mirror of
https://github.com/ysoftdevs/DependencyCheck.git
synced 2026-03-23 17:41:28 +01:00
upgrade lucene, bug fixes, and general cleanup
This commit is contained in:
@@ -82,9 +82,11 @@ public class CPEAnalyzer extends AbstractAnalyzer {
|
|||||||
private static final String WEIGHTING_BOOST = "^5";
|
private static final String WEIGHTING_BOOST = "^5";
|
||||||
/**
|
/**
|
||||||
* A string representation of a regular expression defining characters
|
* A string representation of a regular expression defining characters
|
||||||
* utilized within the CPE Names.
|
* utilized within the CPE Names. Note, the :/ are included so URLs are
|
||||||
|
* passed into the Lucene query so that the specialized tokenizer can parse
|
||||||
|
* them.
|
||||||
*/
|
*/
|
||||||
private static final String CLEANSE_CHARACTER_RX = "[^A-Za-z0-9 ._-]";
|
private static final String CLEANSE_CHARACTER_RX = "[^A-Za-z0-9 ._:/-]";
|
||||||
/**
|
/**
|
||||||
* A string representation of a regular expression used to remove all but
|
* A string representation of a regular expression used to remove all but
|
||||||
* alpha characters.
|
* alpha characters.
|
||||||
|
|||||||
@@ -40,7 +40,6 @@ import org.apache.lucene.search.IndexSearcher;
|
|||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.TopDocs;
|
import org.apache.lucene.search.TopDocs;
|
||||||
import org.apache.lucene.store.RAMDirectory;
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
import org.owasp.dependencycheck.data.lucene.LuceneUtils;
|
|
||||||
import org.owasp.dependencycheck.data.lucene.SearchFieldAnalyzer;
|
import org.owasp.dependencycheck.data.lucene.SearchFieldAnalyzer;
|
||||||
import org.owasp.dependencycheck.data.nvdcve.CveDB;
|
import org.owasp.dependencycheck.data.nvdcve.CveDB;
|
||||||
import org.owasp.dependencycheck.data.nvdcve.DatabaseException;
|
import org.owasp.dependencycheck.data.nvdcve.DatabaseException;
|
||||||
@@ -130,7 +129,7 @@ public final class CpeMemoryIndex implements AutoCloseable {
|
|||||||
}
|
}
|
||||||
indexSearcher = new IndexSearcher(indexReader);
|
indexSearcher = new IndexSearcher(indexReader);
|
||||||
searchingAnalyzer = createSearchingAnalyzer();
|
searchingAnalyzer = createSearchingAnalyzer();
|
||||||
queryParser = new QueryParser(LuceneUtils.CURRENT_VERSION, Fields.DOCUMENT_KEY, searchingAnalyzer);
|
queryParser = new QueryParser(Fields.DOCUMENT_KEY, searchingAnalyzer);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -151,8 +150,8 @@ public final class CpeMemoryIndex implements AutoCloseable {
|
|||||||
private Analyzer createSearchingAnalyzer() {
|
private Analyzer createSearchingAnalyzer() {
|
||||||
final Map<String, Analyzer> fieldAnalyzers = new HashMap<>();
|
final Map<String, Analyzer> fieldAnalyzers = new HashMap<>();
|
||||||
fieldAnalyzers.put(Fields.DOCUMENT_KEY, new KeywordAnalyzer());
|
fieldAnalyzers.put(Fields.DOCUMENT_KEY, new KeywordAnalyzer());
|
||||||
final SearchFieldAnalyzer productFieldAnalyzer = new SearchFieldAnalyzer(LuceneUtils.CURRENT_VERSION);
|
final SearchFieldAnalyzer productFieldAnalyzer = new SearchFieldAnalyzer();
|
||||||
final SearchFieldAnalyzer vendorFieldAnalyzer = new SearchFieldAnalyzer(LuceneUtils.CURRENT_VERSION);
|
final SearchFieldAnalyzer vendorFieldAnalyzer = new SearchFieldAnalyzer();
|
||||||
fieldAnalyzers.put(Fields.PRODUCT, productFieldAnalyzer);
|
fieldAnalyzers.put(Fields.PRODUCT, productFieldAnalyzer);
|
||||||
fieldAnalyzers.put(Fields.VENDOR, vendorFieldAnalyzer);
|
fieldAnalyzers.put(Fields.VENDOR, vendorFieldAnalyzer);
|
||||||
|
|
||||||
@@ -196,7 +195,7 @@ public final class CpeMemoryIndex implements AutoCloseable {
|
|||||||
*/
|
*/
|
||||||
private void buildIndex(CveDB cve) throws IndexException {
|
private void buildIndex(CveDB cve) throws IndexException {
|
||||||
try (Analyzer analyzer = createSearchingAnalyzer();
|
try (Analyzer analyzer = createSearchingAnalyzer();
|
||||||
IndexWriter indexWriter = new IndexWriter(index, new IndexWriterConfig(LuceneUtils.CURRENT_VERSION, analyzer))) {
|
IndexWriter indexWriter = new IndexWriter(index, new IndexWriterConfig(analyzer))) {
|
||||||
// Tip: reuse the Document and Fields for performance...
|
// Tip: reuse the Document and Fields for performance...
|
||||||
// See "Re-use Document and Field instances" from
|
// See "Re-use Document and Field instances" from
|
||||||
// http://wiki.apache.org/lucene-java/ImproveIndexingSpeed
|
// http://wiki.apache.org/lucene-java/ImproveIndexingSpeed
|
||||||
@@ -215,7 +214,7 @@ public final class CpeMemoryIndex implements AutoCloseable {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
indexWriter.commit();
|
indexWriter.commit();
|
||||||
indexWriter.close(true);
|
indexWriter.close();
|
||||||
} catch (DatabaseException ex) {
|
} catch (DatabaseException ex) {
|
||||||
LOGGER.debug("", ex);
|
LOGGER.debug("", ex);
|
||||||
throw new IndexException("Error reading CPE data", ex);
|
throw new IndexException("Error reading CPE data", ex);
|
||||||
|
|||||||
@@ -17,6 +17,7 @@
|
|||||||
*/
|
*/
|
||||||
package org.owasp.dependencycheck.data.lucene;
|
package org.owasp.dependencycheck.data.lucene;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
import javax.annotation.concurrent.NotThreadSafe;
|
import javax.annotation.concurrent.NotThreadSafe;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
@@ -37,6 +38,11 @@ public abstract class AbstractTokenizingFilter extends TokenFilter {
|
|||||||
*/
|
*/
|
||||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A collection of tokens to add to the stream.
|
||||||
|
*/
|
||||||
|
private final LinkedList<String> tokens;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the CharTermAttribute.
|
* Gets the CharTermAttribute.
|
||||||
*
|
*
|
||||||
@@ -45,10 +51,6 @@ public abstract class AbstractTokenizingFilter extends TokenFilter {
|
|||||||
protected CharTermAttribute getTermAtt() {
|
protected CharTermAttribute getTermAtt() {
|
||||||
return termAtt;
|
return termAtt;
|
||||||
}
|
}
|
||||||
/**
|
|
||||||
* A collection of tokens to add to the stream.
|
|
||||||
*/
|
|
||||||
private final LinkedList<String> tokens;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the list of tokens.
|
* Gets the list of tokens.
|
||||||
@@ -69,6 +71,15 @@ public abstract class AbstractTokenizingFilter extends TokenFilter {
|
|||||||
tokens = new LinkedList<>();
|
tokens = new LinkedList<>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void reset() throws IOException {
|
||||||
|
super.reset();
|
||||||
|
tokens.clear();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds a term, if one exists, from the tokens collection.
|
* Adds a term, if one exists, from the tokens collection.
|
||||||
*
|
*
|
||||||
|
|||||||
@@ -0,0 +1,146 @@
|
|||||||
|
/*
|
||||||
|
* This file is part of dependency-check-core.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*
|
||||||
|
* Copyright (c) 2017 Jeremy Long. All Rights Reserved.
|
||||||
|
*/
|
||||||
|
package org.owasp.dependencycheck.data.lucene;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import org.apache.commons.lang3.builder.EqualsBuilder;
|
||||||
|
import org.apache.commons.lang3.builder.HashCodeBuilder;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A simple alphanumeric filter that removes non-alphanumeric characters from
|
||||||
|
* the terms. If a term contains a non-alphanumeric character it may be split
|
||||||
|
* into multiple terms:
|
||||||
|
*
|
||||||
|
* <table>
|
||||||
|
* <tr><th>term</th><th>results in</th></tr>
|
||||||
|
* <tr><td>bob</td><td>bob</td></tr>
|
||||||
|
* <tr><td>bob-cat</td><td>bob cat</td></tr>
|
||||||
|
* <tr><td>#$%</td><td>[skipped]</td></tr>
|
||||||
|
* </table>
|
||||||
|
*
|
||||||
|
* @author jeremy long
|
||||||
|
*/
|
||||||
|
public final class AlphaNumericFilter extends AbstractTokenizingFilter {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The position increment attribute.
|
||||||
|
*/
|
||||||
|
private final PositionIncrementAttribute posIncrAttribute = addAttribute(PositionIncrementAttribute.class);
|
||||||
|
/**
|
||||||
|
* Used to count the number of terms skipped as they were only made up of
|
||||||
|
* special characters.
|
||||||
|
*/
|
||||||
|
private int skipCounter;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs a new AlphaNumericFilter.
|
||||||
|
*
|
||||||
|
* @param stream the TokenStream that this filter will process
|
||||||
|
*/
|
||||||
|
public AlphaNumericFilter(TokenStream stream) {
|
||||||
|
super(stream);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public boolean incrementToken() throws IOException {
|
||||||
|
final LinkedList<String> tokens = getTokens();
|
||||||
|
final CharTermAttribute termAtt = getTermAtt();
|
||||||
|
if (tokens.isEmpty()) {
|
||||||
|
String[] parts;
|
||||||
|
skipCounter = 0;
|
||||||
|
while (input.incrementToken()) {
|
||||||
|
final String text = new String(termAtt.buffer(), 0, termAtt.length());
|
||||||
|
|
||||||
|
parts = text.split("[^a-zA-Z0-9]");
|
||||||
|
if (parts.length == 0) {
|
||||||
|
skipCounter += posIncrAttribute.getPositionIncrement();
|
||||||
|
} else {
|
||||||
|
if (skipCounter != 0) {
|
||||||
|
posIncrAttribute.setPositionIncrement(posIncrAttribute.getPositionIncrement() + skipCounter);
|
||||||
|
}
|
||||||
|
for (String part : parts) {
|
||||||
|
if (!part.isEmpty()) {
|
||||||
|
tokens.add(part);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return addTerm();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void reset() throws IOException {
|
||||||
|
super.reset();
|
||||||
|
skipCounter = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void end() throws IOException {
|
||||||
|
super.end();
|
||||||
|
posIncrAttribute.setPositionIncrement(posIncrAttribute.getPositionIncrement() + skipCounter);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return new HashCodeBuilder(13, 27)
|
||||||
|
.appendSuper(super.hashCode())
|
||||||
|
.append(posIncrAttribute)
|
||||||
|
.append(skipCounter)
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object obj) {
|
||||||
|
if (obj == null) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (obj == this) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (obj.getClass() != getClass()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
final AlphaNumericFilter rhs = (AlphaNumericFilter) obj;
|
||||||
|
return new EqualsBuilder()
|
||||||
|
.appendSuper(super.equals(obj))
|
||||||
|
.append(skipCounter, rhs.skipCounter)
|
||||||
|
.append(posIncrAttribute, rhs.posIncrAttribute)
|
||||||
|
.isEquals();
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -17,39 +17,37 @@
|
|||||||
*/
|
*/
|
||||||
package org.owasp.dependencycheck.data.lucene;
|
package org.owasp.dependencycheck.data.lucene;
|
||||||
|
|
||||||
import java.io.Reader;
|
|
||||||
import javax.annotation.concurrent.NotThreadSafe;
|
import javax.annotation.concurrent.NotThreadSafe;
|
||||||
import org.apache.lucene.analysis.util.CharTokenizer;
|
import org.apache.lucene.analysis.util.CharTokenizer;
|
||||||
import org.apache.lucene.util.Version;
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tokenizes the input breaking it into tokens when non-alpha/numeric characters
|
* Tokenizes the input breaking it into tokens when non-alpha/numeric characters
|
||||||
* are found.
|
* are found.
|
||||||
*
|
*
|
||||||
|
* @deprecated This class is no longer used after re-factoring the lucene
|
||||||
|
* analysis.
|
||||||
* @author Jeremy Long
|
* @author Jeremy Long
|
||||||
*/
|
*/
|
||||||
@NotThreadSafe
|
@NotThreadSafe
|
||||||
|
@Deprecated
|
||||||
public class AlphaNumericTokenizer extends CharTokenizer {
|
public class AlphaNumericTokenizer extends CharTokenizer {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructs a new AlphaNumericTokenizer.
|
* Constructs a new AlphaNumericTokenizer.
|
||||||
*
|
*
|
||||||
* @param matchVersion the lucene version
|
|
||||||
* @param in the Reader
|
|
||||||
*/
|
*/
|
||||||
public AlphaNumericTokenizer(Version matchVersion, Reader in) {
|
public AlphaNumericTokenizer() {
|
||||||
super(matchVersion, in);
|
super();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructs a new AlphaNumericTokenizer.
|
* Constructs a new AlphaNumericTokenizer.
|
||||||
*
|
*
|
||||||
* @param matchVersion the lucene version
|
|
||||||
* @param factory the AttributeFactory
|
* @param factory the AttributeFactory
|
||||||
* @param in the Reader
|
|
||||||
*/
|
*/
|
||||||
public AlphaNumericTokenizer(Version matchVersion, AttributeFactory factory, Reader in) {
|
public AlphaNumericTokenizer(AttributeFactory factory) {
|
||||||
super(matchVersion, factory, in);
|
super(factory);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@@ -19,7 +19,6 @@ package org.owasp.dependencycheck.data.lucene;
|
|||||||
|
|
||||||
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
|
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
|
||||||
import javax.annotation.concurrent.ThreadSafe;
|
import javax.annotation.concurrent.ThreadSafe;
|
||||||
import org.apache.lucene.util.Version;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>
|
* <p>
|
||||||
@@ -31,12 +30,6 @@ import org.apache.lucene.util.Version;
|
|||||||
@ThreadSafe
|
@ThreadSafe
|
||||||
public final class LuceneUtils {
|
public final class LuceneUtils {
|
||||||
|
|
||||||
/**
|
|
||||||
* The current version of Lucene being used. Declaring this one place so an
|
|
||||||
* upgrade doesn't require hunting through the code base.
|
|
||||||
*/
|
|
||||||
public static final Version CURRENT_VERSION = Version.LUCENE_47;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Private constructor as this is a utility class.
|
* Private constructor as this is a utility class.
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -17,16 +17,15 @@
|
|||||||
*/
|
*/
|
||||||
package org.owasp.dependencycheck.data.lucene;
|
package org.owasp.dependencycheck.data.lucene;
|
||||||
|
|
||||||
import java.io.Reader;
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||||
import org.apache.lucene.analysis.core.StopAnalyzer;
|
import org.apache.lucene.analysis.core.StopAnalyzer;
|
||||||
import org.apache.lucene.analysis.core.StopFilter;
|
import org.apache.lucene.analysis.core.StopFilter;
|
||||||
|
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||||
import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
|
import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
|
||||||
import org.apache.lucene.analysis.util.CharArraySet;
|
import org.apache.lucene.analysis.util.CharArraySet;
|
||||||
import org.apache.lucene.util.Version;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A Lucene field analyzer used to analyzer queries against the CPE data.
|
* A Lucene field analyzer used to analyzer queries against the CPE data.
|
||||||
@@ -35,10 +34,6 @@ import org.apache.lucene.util.Version;
|
|||||||
*/
|
*/
|
||||||
public class SearchFieldAnalyzer extends Analyzer {
|
public class SearchFieldAnalyzer extends Analyzer {
|
||||||
|
|
||||||
/**
|
|
||||||
* The Lucene Version used.
|
|
||||||
*/
|
|
||||||
private final Version version;
|
|
||||||
/**
|
/**
|
||||||
* The list of additional stop words to use.
|
* The list of additional stop words to use.
|
||||||
*/
|
*/
|
||||||
@@ -55,7 +50,7 @@ public class SearchFieldAnalyzer extends Analyzer {
|
|||||||
* @return the set of stop words being used
|
* @return the set of stop words being used
|
||||||
*/
|
*/
|
||||||
public static CharArraySet getStopWords() {
|
public static CharArraySet getStopWords() {
|
||||||
final CharArraySet words = StopFilter.makeStopSet(LuceneUtils.CURRENT_VERSION, ADDITIONAL_STOP_WORDS, true);
|
final CharArraySet words = StopFilter.makeStopSet(ADDITIONAL_STOP_WORDS, true);
|
||||||
words.addAll(StopAnalyzer.ENGLISH_STOP_WORDS_SET);
|
words.addAll(StopAnalyzer.ENGLISH_STOP_WORDS_SET);
|
||||||
return words;
|
return words;
|
||||||
}
|
}
|
||||||
@@ -63,10 +58,8 @@ public class SearchFieldAnalyzer extends Analyzer {
|
|||||||
/**
|
/**
|
||||||
* Constructs a new SearchFieldAnalyzer.
|
* Constructs a new SearchFieldAnalyzer.
|
||||||
*
|
*
|
||||||
* @param version the Lucene version
|
|
||||||
*/
|
*/
|
||||||
public SearchFieldAnalyzer(Version version) {
|
public SearchFieldAnalyzer() {
|
||||||
this.version = version;
|
|
||||||
stopWords = getStopWords();
|
stopWords = getStopWords();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -74,15 +67,16 @@ public class SearchFieldAnalyzer extends Analyzer {
|
|||||||
* Creates a the TokenStreamComponents used to analyze the stream.
|
* Creates a the TokenStreamComponents used to analyze the stream.
|
||||||
*
|
*
|
||||||
* @param fieldName the field that this lucene analyzer will process
|
* @param fieldName the field that this lucene analyzer will process
|
||||||
* @param reader a reader containing the tokens
|
|
||||||
* @return the token stream filter chain
|
* @return the token stream filter chain
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
protected TokenStreamComponents createComponents(String fieldName) {
|
||||||
final Tokenizer source = new AlphaNumericTokenizer(version, reader);
|
//final Tokenizer source = new AlphaNumericTokenizer();
|
||||||
|
final Tokenizer source = new WhitespaceTokenizer();
|
||||||
TokenStream stream = source;
|
TokenStream stream = source;
|
||||||
|
|
||||||
|
stream = new UrlTokenizingFilter(stream);
|
||||||
|
stream = new AlphaNumericFilter(stream);
|
||||||
stream = new WordDelimiterFilter(stream,
|
stream = new WordDelimiterFilter(stream,
|
||||||
WordDelimiterFilter.GENERATE_WORD_PARTS
|
WordDelimiterFilter.GENERATE_WORD_PARTS
|
||||||
| WordDelimiterFilter.GENERATE_NUMBER_PARTS
|
| WordDelimiterFilter.GENERATE_NUMBER_PARTS
|
||||||
@@ -91,9 +85,9 @@ public class SearchFieldAnalyzer extends Analyzer {
|
|||||||
| WordDelimiterFilter.SPLIT_ON_NUMERICS
|
| WordDelimiterFilter.SPLIT_ON_NUMERICS
|
||||||
| WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE, null);
|
| WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE, null);
|
||||||
|
|
||||||
stream = new LowerCaseFilter(version, stream);
|
stream = new LowerCaseFilter(stream);
|
||||||
stream = new UrlTokenizingFilter(stream);
|
|
||||||
stream = new StopFilter(version, stream, stopWords);
|
stream = new StopFilter(stream, stopWords);
|
||||||
stream = new TokenPairConcatenatingFilter(stream);
|
stream = new TokenPairConcatenatingFilter(stream);
|
||||||
|
|
||||||
return new TokenStreamComponents(source, stream);
|
return new TokenStreamComponents(source, stream);
|
||||||
|
|||||||
@@ -18,8 +18,9 @@
|
|||||||
package org.owasp.dependencycheck.data.lucene;
|
package org.owasp.dependencycheck.data.lucene;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.LinkedList;
|
|
||||||
import javax.annotation.concurrent.NotThreadSafe;
|
import javax.annotation.concurrent.NotThreadSafe;
|
||||||
|
import org.apache.commons.lang3.builder.EqualsBuilder;
|
||||||
|
import org.apache.commons.lang3.builder.HashCodeBuilder;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
@@ -46,9 +47,10 @@ public final class TokenPairConcatenatingFilter extends TokenFilter {
|
|||||||
*/
|
*/
|
||||||
private String previousWord;
|
private String previousWord;
|
||||||
/**
|
/**
|
||||||
* A list of words parsed.
|
* Keeps track if we are adding a single term or concatenating with the
|
||||||
|
* previous.
|
||||||
*/
|
*/
|
||||||
private final LinkedList<String> words;
|
private boolean addSingleTerm;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructs a new TokenPairConcatenatingFilter.
|
* Constructs a new TokenPairConcatenatingFilter.
|
||||||
@@ -57,7 +59,8 @@ public final class TokenPairConcatenatingFilter extends TokenFilter {
|
|||||||
*/
|
*/
|
||||||
public TokenPairConcatenatingFilter(TokenStream stream) {
|
public TokenPairConcatenatingFilter(TokenStream stream) {
|
||||||
super(stream);
|
super(stream);
|
||||||
words = new LinkedList<>();
|
addSingleTerm = true;
|
||||||
|
previousWord = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -70,86 +73,83 @@ public final class TokenPairConcatenatingFilter extends TokenFilter {
|
|||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public boolean incrementToken() throws IOException {
|
public boolean incrementToken() throws IOException {
|
||||||
|
if (addSingleTerm && previousWord != null) {
|
||||||
//collect all the terms into the words collection
|
addSingleTerm = false;
|
||||||
while (input.incrementToken()) {
|
|
||||||
final String word = new String(termAtt.buffer(), 0, termAtt.length());
|
|
||||||
words.add(word);
|
|
||||||
}
|
|
||||||
|
|
||||||
//if we have a previousTerm - write it out as its own token concatenated
|
|
||||||
// with the current word (if one is available).
|
|
||||||
if (previousWord != null && !words.isEmpty()) {
|
|
||||||
final String word = words.getFirst();
|
|
||||||
clearAttributes();
|
clearAttributes();
|
||||||
termAtt.append(previousWord).append(word);
|
termAtt.append(previousWord);
|
||||||
previousWord = null;
|
|
||||||
return true;
|
return true;
|
||||||
}
|
|
||||||
//if we have words, write it out as a single token
|
} else if (input.incrementToken()) {
|
||||||
if (!words.isEmpty()) {
|
final String word = new String(termAtt.buffer(), 0, termAtt.length());
|
||||||
final String word = words.removeFirst();
|
if (addSingleTerm) {
|
||||||
clearAttributes();
|
clearAttributes();
|
||||||
termAtt.append(word);
|
termAtt.append(word);
|
||||||
previousWord = word;
|
previousWord = word;
|
||||||
|
addSingleTerm = false;
|
||||||
|
} else {
|
||||||
|
clearAttributes();
|
||||||
|
termAtt.append(previousWord).append(word);
|
||||||
|
previousWord = word;
|
||||||
|
addSingleTerm = true;
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>
|
* {@inheritDoc}
|
||||||
* Resets the Filter and clears any internal state data that may have been
|
|
||||||
* left-over from previous uses of the Filter.</p>
|
|
||||||
* <p>
|
|
||||||
* <b>If this Filter is re-used this method must be called between
|
|
||||||
* uses.</b></p>
|
|
||||||
*
|
|
||||||
* @throws java.io.IOException thrown if there is an error resetting the
|
|
||||||
* filter
|
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void end() throws IOException {
|
public void end() throws IOException {
|
||||||
super.end();
|
super.end();
|
||||||
previousWord = null;
|
previousWord = null;
|
||||||
words.clear();
|
addSingleTerm = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Standard hash code implementation.
|
* {@inheritDoc}
|
||||||
*
|
*/
|
||||||
* @return the hash code
|
@Override
|
||||||
|
public void reset() throws IOException {
|
||||||
|
super.reset();
|
||||||
|
previousWord = null;
|
||||||
|
addSingleTerm = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@inheritDoc}
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
int hash = 3;
|
return new HashCodeBuilder(13, 27)
|
||||||
hash = 31 * hash + (this.termAtt != null ? this.termAtt.hashCode() : 0);
|
.appendSuper(super.hashCode())
|
||||||
hash = 31 * hash + (this.previousWord != null ? this.previousWord.hashCode() : 0);
|
.append(addSingleTerm)
|
||||||
hash = 31 * hash + (this.words != null ? this.words.hashCode() : 0);
|
.append(previousWord)
|
||||||
return hash;
|
.append(termAtt)
|
||||||
|
.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Standard equals implementation.
|
* {@inheritDoc}
|
||||||
*
|
|
||||||
* @param obj the object to compare
|
|
||||||
* @return true if the objects are equal; otherwise false.
|
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public boolean equals(Object obj) {
|
public boolean equals(Object obj) {
|
||||||
if (obj == null) {
|
if (obj == null) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (getClass() != obj.getClass()) {
|
if (obj == this) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (obj.getClass() != getClass()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
final TokenPairConcatenatingFilter other = (TokenPairConcatenatingFilter) obj;
|
final TokenPairConcatenatingFilter rhs = (TokenPairConcatenatingFilter) obj;
|
||||||
if (this.termAtt != other.termAtt && (this.termAtt == null || !this.termAtt.equals(other.termAtt))) {
|
return new EqualsBuilder()
|
||||||
return false;
|
.appendSuper(super.equals(obj))
|
||||||
}
|
.append(addSingleTerm, rhs.addSingleTerm)
|
||||||
if ((this.previousWord == null) ? (other.previousWord != null) : !this.previousWord.equals(other.previousWord)) {
|
.append(previousWord, rhs.previousWord)
|
||||||
return false;
|
.append(termAtt, rhs.termAtt)
|
||||||
}
|
.isEquals();
|
||||||
return !(this.words != other.words && (this.words == null || !this.words.equals(other.words)));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ public final class UrlTokenizingFilter extends AbstractTokenizingFilter {
|
|||||||
private static final Logger LOGGER = LoggerFactory.getLogger(UrlTokenizingFilter.class);
|
private static final Logger LOGGER = LoggerFactory.getLogger(UrlTokenizingFilter.class);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructs a new VersionTokenizingFilter.
|
* Constructs a new UrlTokenizingFilter.
|
||||||
*
|
*
|
||||||
* @param stream the TokenStream that this filter will process
|
* @param stream the TokenStream that this filter will process
|
||||||
*/
|
*/
|
||||||
|
|||||||
@@ -820,6 +820,11 @@ public final class CveDB implements AutoCloseable {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the size of the batch.
|
||||||
|
*
|
||||||
|
* @return the size of the batch
|
||||||
|
*/
|
||||||
private int getBatchSize() {
|
private int getBatchSize() {
|
||||||
int max;
|
int max;
|
||||||
try {
|
try {
|
||||||
@@ -830,6 +835,12 @@ public final class CveDB implements AutoCloseable {
|
|||||||
return max;
|
return max;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determines whether or not batch insert is enabled.
|
||||||
|
*
|
||||||
|
* @return <code>true</code> if batch insert is enabled; otherwise
|
||||||
|
* <code>false</code>
|
||||||
|
*/
|
||||||
private boolean isBatchInsertEnabled() {
|
private boolean isBatchInsertEnabled() {
|
||||||
boolean batch = false;
|
boolean batch = false;
|
||||||
try {
|
try {
|
||||||
@@ -841,25 +852,34 @@ public final class CveDB implements AutoCloseable {
|
|||||||
return batch;
|
return batch;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generates a logging message for batch inserts.
|
||||||
|
*
|
||||||
|
* @param pCountReferences the number of batch statements executed
|
||||||
|
* @param pFormat a Java String.format string
|
||||||
|
* @return the formated string
|
||||||
|
*/
|
||||||
private String getLogForBatchInserts(int pCountReferences, String pFormat) {
|
private String getLogForBatchInserts(int pCountReferences, String pFormat) {
|
||||||
return String.format(pFormat, pCountReferences, new Date());
|
return String.format(pFormat, pCountReferences, new Date());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Executes batch inserts of vulnerabilities when property
|
* Executes batch inserts of vulnerabilities when property
|
||||||
* database.batchinsert.maxsize is reached
|
* database.batchinsert.maxsize is reached.
|
||||||
*
|
*
|
||||||
* @param pVulnerability
|
* @param pVulnerability the vulnerability
|
||||||
* @param pVulnerableSoftware
|
* @param pVulnerableSoftware the vulnerable software
|
||||||
* @param pInsertSoftware
|
* @param pInsertSoftware the prepared statement to batch execute
|
||||||
* @throws SQLException
|
* @throws SQLException thrown when the batch cannot be executed
|
||||||
*/
|
*/
|
||||||
private void executeBatch(Vulnerability pVulnerability, VulnerableSoftware pVulnerableSoftware, PreparedStatement pInsertSoftware) throws SQLException {
|
private void executeBatch(Vulnerability pVulnerability, VulnerableSoftware pVulnerableSoftware, PreparedStatement pInsertSoftware)
|
||||||
|
throws SQLException {
|
||||||
try {
|
try {
|
||||||
pInsertSoftware.executeBatch();
|
pInsertSoftware.executeBatch();
|
||||||
} catch (SQLException ex) {
|
} catch (SQLException ex) {
|
||||||
if (ex.getMessage().contains("Duplicate entry")) {
|
if (ex.getMessage().contains("Duplicate entry")) {
|
||||||
final String msg = String.format("Duplicate software key identified in '%s:%s'", pVulnerability.getName(), pVulnerableSoftware.getName());
|
final String msg = String.format("Duplicate software key identified in '%s:%s'",
|
||||||
|
pVulnerability.getName(), pVulnerableSoftware.getName());
|
||||||
LOGGER.info(msg, ex);
|
LOGGER.info(msg, ex);
|
||||||
} else {
|
} else {
|
||||||
throw ex;
|
throw ex;
|
||||||
|
|||||||
@@ -72,7 +72,8 @@ public class H2DBLock {
|
|||||||
private final String magic;
|
private final String magic;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The shutdown hook used to remove the lock file in case of an unexpected shutdown.
|
* The shutdown hook used to remove the lock file in case of an unexpected
|
||||||
|
* shutdown.
|
||||||
*/
|
*/
|
||||||
private H2DBShutdownHook hook = null;
|
private H2DBShutdownHook hook = null;
|
||||||
|
|
||||||
@@ -161,11 +162,19 @@ public class H2DBLock {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks the state of the custom h2 lock file and under some conditions
|
||||||
|
* will attempt to remove the lock file.
|
||||||
|
*
|
||||||
|
* @throws H2DBLockException thrown if the lock directory does not exist and
|
||||||
|
* cannot be created
|
||||||
|
*/
|
||||||
private void checkState() throws H2DBLockException {
|
private void checkState() throws H2DBLockException {
|
||||||
if (!lockFile.getParentFile().isDirectory() && !lockFile.mkdir()) {
|
if (!lockFile.getParentFile().isDirectory() && !lockFile.mkdir()) {
|
||||||
throw new H2DBLockException("Unable to create path to data directory.");
|
throw new H2DBLockException("Unable to create path to data directory.");
|
||||||
}
|
}
|
||||||
if (lockFile.isFile()) {
|
if (lockFile.isFile()) {
|
||||||
|
//TODO - this 30 minute check needs to be configurable.
|
||||||
if (getFileAge(lockFile) > 30) {
|
if (getFileAge(lockFile) > 30) {
|
||||||
LOGGER.debug("An old db update lock file was found: {}", lockFile.getAbsolutePath());
|
LOGGER.debug("An old db update lock file was found: {}", lockFile.getAbsolutePath());
|
||||||
if (!lockFile.delete()) {
|
if (!lockFile.delete()) {
|
||||||
@@ -232,6 +241,9 @@ public class H2DBLock {
|
|||||||
return time;
|
return time;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds the shutdown hook to the JVM.
|
||||||
|
*/
|
||||||
private void addShutdownHook() {
|
private void addShutdownHook() {
|
||||||
if (hook == null) {
|
if (hook == null) {
|
||||||
hook = H2DBShutdownHookFactory.getHook(settings);
|
hook = H2DBShutdownHookFactory.getHook(settings);
|
||||||
@@ -240,6 +252,9 @@ public class H2DBLock {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Removes the shutdown hook.
|
||||||
|
*/
|
||||||
private void removeShutdownHook() {
|
private void removeShutdownHook() {
|
||||||
if (hook != null) {
|
if (hook != null) {
|
||||||
hook.remove();
|
hook.remove();
|
||||||
|
|||||||
@@ -32,6 +32,13 @@ public final class H2DBShutdownHookFactory {
|
|||||||
*/
|
*/
|
||||||
private static final Logger LOGGER = LoggerFactory.getLogger(H2DBShutdownHookFactory.class);
|
private static final Logger LOGGER = LoggerFactory.getLogger(H2DBShutdownHookFactory.class);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Empty constructor for utility class.
|
||||||
|
*/
|
||||||
|
private H2DBShutdownHookFactory() {
|
||||||
|
//empty
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a new H2DB Shutdown Hook.
|
* Creates a new H2DB Shutdown Hook.
|
||||||
*
|
*
|
||||||
@@ -40,8 +47,8 @@ public final class H2DBShutdownHookFactory {
|
|||||||
*/
|
*/
|
||||||
public static H2DBShutdownHook getHook(Settings settings) {
|
public static H2DBShutdownHook getHook(Settings settings) {
|
||||||
try {
|
try {
|
||||||
String className = settings.getString(Settings.KEYS.H2DB_SHUTDOWN_HOOK, "org.owasp.dependencycheck.utils.H2DBCleanupHook");
|
final String className = settings.getString(Settings.KEYS.H2DB_SHUTDOWN_HOOK, "org.owasp.dependencycheck.utils.H2DBCleanupHook");
|
||||||
Class type = Class.forName(className);
|
final Class type = Class.forName(className);
|
||||||
return (H2DBShutdownHook) type.newInstance();
|
return (H2DBShutdownHook) type.newInstance();
|
||||||
} catch (ClassNotFoundException | InstantiationException | IllegalAccessException ex) {
|
} catch (ClassNotFoundException | InstantiationException | IllegalAccessException ex) {
|
||||||
LOGGER.debug("Failed to instantiate {}, using default shutdown hook instead", ex);
|
LOGGER.debug("Failed to instantiate {}, using default shutdown hook instead", ex);
|
||||||
|
|||||||
@@ -65,6 +65,7 @@
|
|||||||
9. mail_project is ruby library
|
9. mail_project is ruby library
|
||||||
10. ldap_project is part of type3 written in php
|
10. ldap_project is part of type3 written in php
|
||||||
11. user import project is used in drupal (i.e. php)
|
11. user import project is used in drupal (i.e. php)
|
||||||
|
12. root is a c++ project https://github.com/root-project/root/
|
||||||
]]></notes>
|
]]></notes>
|
||||||
<filePath regex="true">.*(\.(dll|jar|ear|war|pom|nupkg|nuspec)|pom\.xml|package.json)$</filePath>
|
<filePath regex="true">.*(\.(dll|jar|ear|war|pom|nupkg|nuspec)|pom\.xml|package.json)$</filePath>
|
||||||
<cpe>cpe:/a:sandbox:sandbox</cpe>
|
<cpe>cpe:/a:sandbox:sandbox</cpe>
|
||||||
@@ -81,6 +82,7 @@
|
|||||||
<cpe>cpe:/a:mail_project:mail</cpe>
|
<cpe>cpe:/a:mail_project:mail</cpe>
|
||||||
<cpe>cpe:/a:ldap_project:ldap</cpe>
|
<cpe>cpe:/a:ldap_project:ldap</cpe>
|
||||||
<cpe>cpe:/a:user_import_project:user_import</cpe>
|
<cpe>cpe:/a:user_import_project:user_import</cpe>
|
||||||
|
<cpe>cpe:/a:root:root</cpe>
|
||||||
</suppress>
|
</suppress>
|
||||||
<suppress base="true">
|
<suppress base="true">
|
||||||
<notes><![CDATA[
|
<notes><![CDATA[
|
||||||
|
|||||||
@@ -0,0 +1,87 @@
|
|||||||
|
/*
|
||||||
|
* Copyright 2017 OWASP.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.owasp.dependencycheck.data.lucene;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
|
import static org.apache.lucene.analysis.BaseTokenStreamTestCase.checkOneTerm;
|
||||||
|
import static org.apache.lucene.analysis.BaseTokenStreamTestCase.checkRandomData;
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
|
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||||
|
import static org.apache.lucene.util.LuceneTestCase.RANDOM_MULTIPLIER;
|
||||||
|
import static org.apache.lucene.util.LuceneTestCase.random;
|
||||||
|
import org.junit.Test;
|
||||||
|
import static org.junit.Assert.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @author jeremy
|
||||||
|
*/
|
||||||
|
public class AlphaNumericFilterTest extends BaseTokenStreamTestCase {
|
||||||
|
|
||||||
|
private final Analyzer analyzer;
|
||||||
|
|
||||||
|
public AlphaNumericFilterTest() {
|
||||||
|
analyzer = new Analyzer() {
|
||||||
|
@Override
|
||||||
|
protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
|
||||||
|
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||||
|
return new Analyzer.TokenStreamComponents(source, new AlphaNumericFilter(source));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of incrementToken method, of class AlphaNumericFilter.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testIncrementToken() throws Exception {
|
||||||
|
String[] expected = new String[6];
|
||||||
|
expected[0] = "http";
|
||||||
|
expected[1] = "www";
|
||||||
|
expected[2] = "domain";
|
||||||
|
expected[3] = "com";
|
||||||
|
expected[4] = "test";
|
||||||
|
expected[5] = "php";
|
||||||
|
assertAnalyzesTo(analyzer, "http://www.domain.com/test.php", expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test of incrementToken method, of class AlphaNumericFilter.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testGarbage() throws Exception {
|
||||||
|
String[] expected = new String[2];
|
||||||
|
expected[0] = "test";
|
||||||
|
expected[1] = "two";
|
||||||
|
assertAnalyzesTo(analyzer, "!@#$% !@#$ &*(@#$ test-two @#$%", expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* copied from
|
||||||
|
* http://svn.apache.org/repos/asf/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java
|
||||||
|
* blast some random strings through the analyzer
|
||||||
|
*/
|
||||||
|
public void testRandomStrings() {
|
||||||
|
try {
|
||||||
|
checkRandomData(random(), analyzer, 1000 * RANDOM_MULTIPLIER);
|
||||||
|
} catch (IOException ex) {
|
||||||
|
fail("Failed test random strings: " + ex.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -52,7 +52,7 @@ public class FieldAnalyzerTest extends BaseTest {
|
|||||||
@Test
|
@Test
|
||||||
public void testAnalyzers() throws Exception {
|
public void testAnalyzers() throws Exception {
|
||||||
|
|
||||||
Analyzer analyzer = new SearchFieldAnalyzer(LuceneUtils.CURRENT_VERSION);
|
Analyzer analyzer = new SearchFieldAnalyzer();
|
||||||
Directory index = new RAMDirectory();
|
Directory index = new RAMDirectory();
|
||||||
|
|
||||||
String field1 = "product";
|
String field1 = "product";
|
||||||
@@ -68,16 +68,16 @@ public class FieldAnalyzerTest extends BaseTest {
|
|||||||
addDoc(w, field1, text1, field2, text2);
|
addDoc(w, field1, text1, field2, text2);
|
||||||
}
|
}
|
||||||
|
|
||||||
//Analyzer searchingAnalyzer = new SearchFieldAnalyzer(LuceneUtils.CURRENT_VERSION);
|
//Analyzer searchingAnalyzer = new SearchFieldAnalyzer();
|
||||||
String querystr = "product:\"(Spring Framework Core)\" vendor:(SpringSource)";
|
String querystr = "product:\"(Spring Framework Core)\" vendor:(SpringSource)";
|
||||||
|
|
||||||
SearchFieldAnalyzer searchAnalyzerProduct = new SearchFieldAnalyzer(LuceneUtils.CURRENT_VERSION);
|
SearchFieldAnalyzer searchAnalyzerProduct = new SearchFieldAnalyzer();
|
||||||
SearchFieldAnalyzer searchAnalyzerVendor = new SearchFieldAnalyzer(LuceneUtils.CURRENT_VERSION);
|
SearchFieldAnalyzer searchAnalyzerVendor = new SearchFieldAnalyzer();
|
||||||
HashMap<String, Analyzer> map = new HashMap<>();
|
HashMap<String, Analyzer> map = new HashMap<>();
|
||||||
map.put(field1, searchAnalyzerProduct);
|
map.put(field1, searchAnalyzerProduct);
|
||||||
map.put(field2, searchAnalyzerVendor);
|
map.put(field2, searchAnalyzerVendor);
|
||||||
PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new StandardAnalyzer(LuceneUtils.CURRENT_VERSION), map);
|
PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new StandardAnalyzer(), map);
|
||||||
QueryParser parser = new QueryParser(LuceneUtils.CURRENT_VERSION, field1, wrapper);
|
QueryParser parser = new QueryParser(field1, wrapper);
|
||||||
|
|
||||||
Query q = parser.parse(querystr);
|
Query q = parser.parse(querystr);
|
||||||
|
|
||||||
@@ -85,7 +85,7 @@ public class FieldAnalyzerTest extends BaseTest {
|
|||||||
|
|
||||||
IndexReader reader = DirectoryReader.open(index);
|
IndexReader reader = DirectoryReader.open(index);
|
||||||
IndexSearcher searcher = new IndexSearcher(reader);
|
IndexSearcher searcher = new IndexSearcher(reader);
|
||||||
TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
|
TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage);
|
||||||
searcher.search(q, collector);
|
searcher.search(q, collector);
|
||||||
ScoreDoc[] hits = collector.topDocs().scoreDocs;
|
ScoreDoc[] hits = collector.topDocs().scoreDocs;
|
||||||
|
|
||||||
@@ -99,7 +99,7 @@ public class FieldAnalyzerTest extends BaseTest {
|
|||||||
|
|
||||||
querystr = "product:( x-stream^5 ) AND vendor:( thoughtworks.xstream )";
|
querystr = "product:( x-stream^5 ) AND vendor:( thoughtworks.xstream )";
|
||||||
Query q3 = parser.parse(querystr);
|
Query q3 = parser.parse(querystr);
|
||||||
collector = TopScoreDocCollector.create(hitsPerPage, true);
|
collector = TopScoreDocCollector.create(hitsPerPage);
|
||||||
searcher.search(q3, collector);
|
searcher.search(q3, collector);
|
||||||
hits = collector.topDocs().scoreDocs;
|
hits = collector.topDocs().scoreDocs;
|
||||||
assertEquals("x-stream", searcher.doc(hits[0].doc).get(field1));
|
assertEquals("x-stream", searcher.doc(hits[0].doc).get(field1));
|
||||||
@@ -107,7 +107,7 @@ public class FieldAnalyzerTest extends BaseTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private IndexWriter createIndex(Analyzer analyzer, Directory index) throws IOException {
|
private IndexWriter createIndex(Analyzer analyzer, Directory index) throws IOException {
|
||||||
IndexWriterConfig config = new IndexWriterConfig(LuceneUtils.CURRENT_VERSION, analyzer);
|
IndexWriterConfig config = new IndexWriterConfig( analyzer);
|
||||||
return new IndexWriter(index, config);
|
return new IndexWriter(index, config);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -18,6 +18,7 @@
|
|||||||
package org.owasp.dependencycheck.data.lucene;
|
package org.owasp.dependencycheck.data.lucene;
|
||||||
|
|
||||||
import static org.junit.Assert.assertEquals;
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.owasp.dependencycheck.BaseTest;
|
import org.owasp.dependencycheck.BaseTest;
|
||||||
|
|||||||
@@ -19,13 +19,24 @@ package org.owasp.dependencycheck.data.lucene;
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
|
import static org.apache.lucene.analysis.BaseTokenStreamTestCase.assertAnalyzesTo;
|
||||||
|
import static org.apache.lucene.analysis.BaseTokenStreamTestCase.checkOneTerm;
|
||||||
|
import static org.apache.lucene.analysis.BaseTokenStreamTestCase.checkRandomData;
|
||||||
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
import org.apache.lucene.analysis.Tokenizer;
|
||||||
|
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||||
|
import org.apache.lucene.util.AttributeFactory;
|
||||||
|
import static org.apache.lucene.util.LuceneTestCase.RANDOM_MULTIPLIER;
|
||||||
|
import static org.apache.lucene.util.LuceneTestCase.random;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
|
import static org.junit.Assert.fail;
|
||||||
|
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
@@ -33,25 +44,60 @@ import org.junit.Before;
|
|||||||
*/
|
*/
|
||||||
public class TokenPairConcatenatingFilterTest extends BaseTokenStreamTestCase {
|
public class TokenPairConcatenatingFilterTest extends BaseTokenStreamTestCase {
|
||||||
|
|
||||||
@Override
|
private final Analyzer analyzer;
|
||||||
@Before
|
|
||||||
public void setUp() throws Exception {
|
|
||||||
super.setUp();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
public TokenPairConcatenatingFilterTest() {
|
||||||
@After
|
analyzer = new Analyzer() {
|
||||||
public void tearDown() throws Exception {
|
@Override
|
||||||
super.tearDown();
|
protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
|
||||||
|
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||||
|
return new Analyzer.TokenStreamComponents(source, new TokenPairConcatenatingFilter(source));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Test of incrementToken method, of class TokenPairConcatenatingFilter.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testIncrementToken() throws Exception {
|
||||||
|
String[] expected = new String[5];
|
||||||
|
expected[0] = "red";
|
||||||
|
expected[1] = "redblue";
|
||||||
|
expected[2] = "blue";
|
||||||
|
expected[3] = "bluegreen";
|
||||||
|
expected[4] = "green";
|
||||||
|
assertAnalyzesTo(analyzer, "red blue green", expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* copied from
|
||||||
|
* http://svn.apache.org/repos/asf/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java
|
||||||
|
* blast some random strings through the analyzer
|
||||||
|
*/
|
||||||
|
public void testRandomStrings() {
|
||||||
|
try {
|
||||||
|
checkRandomData(random(), analyzer, 1000 * RANDOM_MULTIPLIER);
|
||||||
|
} catch (IOException ex) {
|
||||||
|
fail("Failed test random strings: " + ex.getMessage());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* test some examples
|
* copied from
|
||||||
|
* http://svn.apache.org/repos/asf/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java
|
||||||
|
*
|
||||||
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
public void testExamples() throws IOException {
|
public void testEmptyTerm() throws IOException {
|
||||||
Tokenizer wsTokenizer = new WhitespaceTokenizer(LuceneUtils.CURRENT_VERSION, new StringReader("one two three"));
|
Analyzer a = new Analyzer() {
|
||||||
TokenStream filter = new TokenPairConcatenatingFilter(wsTokenizer);
|
@Override
|
||||||
assertTokenStreamContents(filter,
|
protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
|
||||||
new String[]{"one", "onetwo", "two", "twothree", "three"});
|
Tokenizer tokenizer = new KeywordTokenizer();
|
||||||
|
return new Analyzer.TokenStreamComponents(tokenizer, new TokenPairConcatenatingFilter(tokenizer));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
checkOneTerm(a, "", "");
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -18,7 +18,6 @@
|
|||||||
package org.owasp.dependencycheck.data.lucene;
|
package org.owasp.dependencycheck.data.lucene;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||||
import org.apache.lucene.analysis.MockTokenizer;
|
import org.apache.lucene.analysis.MockTokenizer;
|
||||||
@@ -36,9 +35,8 @@ public class UrlTokenizingFilterTest extends BaseTokenStreamTestCase {
|
|||||||
public UrlTokenizingFilterTest() {
|
public UrlTokenizingFilterTest() {
|
||||||
analyzer = new Analyzer() {
|
analyzer = new Analyzer() {
|
||||||
@Override
|
@Override
|
||||||
protected TokenStreamComponents createComponents(String fieldName,
|
protected TokenStreamComponents createComponents(String fieldName) {
|
||||||
Reader reader) {
|
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
|
||||||
Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
|
|
||||||
return new TokenStreamComponents(source, new UrlTokenizingFilter(source));
|
return new TokenStreamComponents(source, new UrlTokenizingFilter(source));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -77,8 +75,8 @@ public class UrlTokenizingFilterTest extends BaseTokenStreamTestCase {
|
|||||||
public void testEmptyTerm() throws IOException {
|
public void testEmptyTerm() throws IOException {
|
||||||
Analyzer a = new Analyzer() {
|
Analyzer a = new Analyzer() {
|
||||||
@Override
|
@Override
|
||||||
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
protected TokenStreamComponents createComponents(String fieldName) {
|
||||||
Tokenizer tokenizer = new KeywordTokenizer(reader);
|
Tokenizer tokenizer = new KeywordTokenizer();
|
||||||
return new TokenStreamComponents(tokenizer, new UrlTokenizingFilter(tokenizer));
|
return new TokenStreamComponents(tokenizer, new UrlTokenizingFilter(tokenizer));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|||||||
2
pom.xml
2
pom.xml
@@ -124,7 +124,7 @@ Copyright (c) 2012 - Jeremy Long
|
|||||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||||
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
|
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
|
||||||
<github.global.server>github</github.global.server>
|
<github.global.server>github</github.global.server>
|
||||||
<apache.lucene.version>4.7.2</apache.lucene.version>
|
<apache.lucene.version>5.5.5</apache.lucene.version>
|
||||||
<apache.ant.version>1.9.9</apache.ant.version>
|
<apache.ant.version>1.9.9</apache.ant.version>
|
||||||
<!--upgrading to the 1.8 requires Java 8 compatability - we are maintaining 7 atm-->
|
<!--upgrading to the 1.8 requires Java 8 compatability - we are maintaining 7 atm-->
|
||||||
<slf4j.version>1.7.25</slf4j.version>
|
<slf4j.version>1.7.25</slf4j.version>
|
||||||
|
|||||||
Reference in New Issue
Block a user