From 43af96bb0f086ede0b8dbc417fd92e52ea1760cc Mon Sep 17 00:00:00 2001 From: Jeremy Long Date: Fri, 29 Dec 2017 05:58:44 -0500 Subject: [PATCH] enhanced filter to support empty tokens --- .../data/lucene/AlphaNumericFilter.java | 4 +++- .../data/lucene/AlphaNumericFilterTest.java | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/dependency-check-core/src/main/java/org/owasp/dependencycheck/data/lucene/AlphaNumericFilter.java b/dependency-check-core/src/main/java/org/owasp/dependencycheck/data/lucene/AlphaNumericFilter.java index 761aaa3de..f23ee8eab 100644 --- a/dependency-check-core/src/main/java/org/owasp/dependencycheck/data/lucene/AlphaNumericFilter.java +++ b/dependency-check-core/src/main/java/org/owasp/dependencycheck/data/lucene/AlphaNumericFilter.java @@ -72,7 +72,9 @@ public final class AlphaNumericFilter extends AbstractTokenizingFilter { skipCounter = 0; while (input.incrementToken()) { final String text = new String(termAtt.buffer(), 0, termAtt.length()); - + if (text.isEmpty()) { + return true; + } parts = text.split("[^a-zA-Z0-9]"); if (parts.length == 0) { skipCounter += posIncrAttribute.getPositionIncrement(); diff --git a/dependency-check-core/src/test/java/org/owasp/dependencycheck/data/lucene/AlphaNumericFilterTest.java b/dependency-check-core/src/test/java/org/owasp/dependencycheck/data/lucene/AlphaNumericFilterTest.java index 3e279805d..32e980a6f 100644 --- a/dependency-check-core/src/test/java/org/owasp/dependencycheck/data/lucene/AlphaNumericFilterTest.java +++ b/dependency-check-core/src/test/java/org/owasp/dependencycheck/data/lucene/AlphaNumericFilterTest.java @@ -18,9 +18,11 @@ package org.owasp.dependencycheck.data.lucene; import java.io.IOException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import static org.apache.lucene.analysis.BaseTokenStreamTestCase.checkOneTerm; import static org.apache.lucene.analysis.BaseTokenStreamTestCase.checkRandomData; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.core.KeywordTokenizer; import static org.apache.lucene.util.LuceneTestCase.RANDOM_MULTIPLIER; import static org.apache.lucene.util.LuceneTestCase.random; import org.junit.Test; @@ -82,4 +84,21 @@ public class AlphaNumericFilterTest extends BaseTokenStreamTestCase { fail("Failed test random strings: " + ex.getMessage()); } } + + /** + * copied from + * http://svn.apache.org/repos/asf/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java + * + * @throws IOException + */ + public void testEmptyTerm() throws IOException { + Analyzer a = new Analyzer() { + @Override + protected Analyzer.TokenStreamComponents createComponents(String fieldName) { + Tokenizer tokenizer = new KeywordTokenizer(); + return new Analyzer.TokenStreamComponents(tokenizer, new AlphaNumericFilter(tokenizer)); + } + }; + checkOneTerm(a, "", ""); + } }