enhanced filter to support empty tokens

This commit is contained in:
Jeremy Long
2017-12-29 05:58:44 -05:00
parent e8088c2bda
commit 43af96bb0f
2 changed files with 22 additions and 1 deletions

View File

@@ -72,7 +72,9 @@ public final class AlphaNumericFilter extends AbstractTokenizingFilter {
skipCounter = 0;
while (input.incrementToken()) {
final String text = new String(termAtt.buffer(), 0, termAtt.length());
if (text.isEmpty()) {
return true;
}
parts = text.split("[^a-zA-Z0-9]");
if (parts.length == 0) {
skipCounter += posIncrAttribute.getPositionIncrement();

View File

@@ -18,9 +18,11 @@ package org.owasp.dependencycheck.data.lucene;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import static org.apache.lucene.analysis.BaseTokenStreamTestCase.checkOneTerm;
import static org.apache.lucene.analysis.BaseTokenStreamTestCase.checkRandomData;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import static org.apache.lucene.util.LuceneTestCase.RANDOM_MULTIPLIER;
import static org.apache.lucene.util.LuceneTestCase.random;
import org.junit.Test;
@@ -82,4 +84,21 @@ public class AlphaNumericFilterTest extends BaseTokenStreamTestCase {
fail("Failed test random strings: " + ex.getMessage());
}
}
/**
* copied from
* http://svn.apache.org/repos/asf/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java
*
* @throws IOException
*/
public void testEmptyTerm() throws IOException {
Analyzer a = new Analyzer() {
@Override
protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new KeywordTokenizer();
return new Analyzer.TokenStreamComponents(tokenizer, new AlphaNumericFilter(tokenizer));
}
};
checkOneTerm(a, "", "");
}
}