mirror of
https://github.com/ysoftdevs/DependencyCheck.git
synced 2026-01-14 07:43:40 +01:00
enhanced filter to support empty tokens
This commit is contained in:
@@ -72,7 +72,9 @@ public final class AlphaNumericFilter extends AbstractTokenizingFilter {
|
||||
skipCounter = 0;
|
||||
while (input.incrementToken()) {
|
||||
final String text = new String(termAtt.buffer(), 0, termAtt.length());
|
||||
|
||||
if (text.isEmpty()) {
|
||||
return true;
|
||||
}
|
||||
parts = text.split("[^a-zA-Z0-9]");
|
||||
if (parts.length == 0) {
|
||||
skipCounter += posIncrAttribute.getPositionIncrement();
|
||||
|
||||
@@ -18,9 +18,11 @@ package org.owasp.dependencycheck.data.lucene;
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import static org.apache.lucene.analysis.BaseTokenStreamTestCase.checkOneTerm;
|
||||
import static org.apache.lucene.analysis.BaseTokenStreamTestCase.checkRandomData;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import static org.apache.lucene.util.LuceneTestCase.RANDOM_MULTIPLIER;
|
||||
import static org.apache.lucene.util.LuceneTestCase.random;
|
||||
import org.junit.Test;
|
||||
@@ -82,4 +84,21 @@ public class AlphaNumericFilterTest extends BaseTokenStreamTestCase {
|
||||
fail("Failed test random strings: " + ex.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* copied from
|
||||
* http://svn.apache.org/repos/asf/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
public void testEmptyTerm() throws IOException {
|
||||
Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer tokenizer = new KeywordTokenizer();
|
||||
return new Analyzer.TokenStreamComponents(tokenizer, new AlphaNumericFilter(tokenizer));
|
||||
}
|
||||
};
|
||||
checkOneTerm(a, "", "");
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user