mirror of
https://github.com/ysoftdevs/DependencyCheck.git
synced 2026-03-25 10:32:00 +01:00
enhanced filter to support empty tokens
This commit is contained in:
@@ -72,7 +72,9 @@ public final class AlphaNumericFilter extends AbstractTokenizingFilter {
|
|||||||
skipCounter = 0;
|
skipCounter = 0;
|
||||||
while (input.incrementToken()) {
|
while (input.incrementToken()) {
|
||||||
final String text = new String(termAtt.buffer(), 0, termAtt.length());
|
final String text = new String(termAtt.buffer(), 0, termAtt.length());
|
||||||
|
if (text.isEmpty()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
parts = text.split("[^a-zA-Z0-9]");
|
parts = text.split("[^a-zA-Z0-9]");
|
||||||
if (parts.length == 0) {
|
if (parts.length == 0) {
|
||||||
skipCounter += posIncrAttribute.getPositionIncrement();
|
skipCounter += posIncrAttribute.getPositionIncrement();
|
||||||
|
|||||||
@@ -18,9 +18,11 @@ package org.owasp.dependencycheck.data.lucene;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
|
import static org.apache.lucene.analysis.BaseTokenStreamTestCase.checkOneTerm;
|
||||||
import static org.apache.lucene.analysis.BaseTokenStreamTestCase.checkRandomData;
|
import static org.apache.lucene.analysis.BaseTokenStreamTestCase.checkRandomData;
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
|
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||||
import static org.apache.lucene.util.LuceneTestCase.RANDOM_MULTIPLIER;
|
import static org.apache.lucene.util.LuceneTestCase.RANDOM_MULTIPLIER;
|
||||||
import static org.apache.lucene.util.LuceneTestCase.random;
|
import static org.apache.lucene.util.LuceneTestCase.random;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
@@ -82,4 +84,21 @@ public class AlphaNumericFilterTest extends BaseTokenStreamTestCase {
|
|||||||
fail("Failed test random strings: " + ex.getMessage());
|
fail("Failed test random strings: " + ex.getMessage());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* copied from
|
||||||
|
* http://svn.apache.org/repos/asf/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java
|
||||||
|
*
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public void testEmptyTerm() throws IOException {
|
||||||
|
Analyzer a = new Analyzer() {
|
||||||
|
@Override
|
||||||
|
protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
|
||||||
|
Tokenizer tokenizer = new KeywordTokenizer();
|
||||||
|
return new Analyzer.TokenStreamComponents(tokenizer, new AlphaNumericFilter(tokenizer));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
checkOneTerm(a, "", "");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user