Merge pull request #1048 from jeremylong/luceneUpgrade

Lucene upgrade, bug fixes, and general cleanup
This commit is contained in:
Jeremy Long
2017-12-29 06:26:25 -05:00
committed by GitHub
18 changed files with 575 additions and 149 deletions

View File

@@ -82,9 +82,11 @@ public class CPEAnalyzer extends AbstractAnalyzer {
private static final String WEIGHTING_BOOST = "^5";
/**
* A string representation of a regular expression defining characters
* utilized within the CPE Names.
* utilized within the CPE Names. Note, the :/ are included so URLs are
* passed into the Lucene query so that the specialized tokenizer can parse
* them.
*/
private static final String CLEANSE_CHARACTER_RX = "[^A-Za-z0-9 ._-]";
private static final String CLEANSE_CHARACTER_RX = "[^A-Za-z0-9 ._:/-]";
/**
* A string representation of a regular expression used to remove all but
* alpha characters.

View File

@@ -40,7 +40,6 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.RAMDirectory;
import org.owasp.dependencycheck.data.lucene.LuceneUtils;
import org.owasp.dependencycheck.data.lucene.SearchFieldAnalyzer;
import org.owasp.dependencycheck.data.nvdcve.CveDB;
import org.owasp.dependencycheck.data.nvdcve.DatabaseException;
@@ -130,7 +129,7 @@ public final class CpeMemoryIndex implements AutoCloseable {
}
indexSearcher = new IndexSearcher(indexReader);
searchingAnalyzer = createSearchingAnalyzer();
queryParser = new QueryParser(LuceneUtils.CURRENT_VERSION, Fields.DOCUMENT_KEY, searchingAnalyzer);
queryParser = new QueryParser(Fields.DOCUMENT_KEY, searchingAnalyzer);
}
}
@@ -151,8 +150,8 @@ public final class CpeMemoryIndex implements AutoCloseable {
private Analyzer createSearchingAnalyzer() {
final Map<String, Analyzer> fieldAnalyzers = new HashMap<>();
fieldAnalyzers.put(Fields.DOCUMENT_KEY, new KeywordAnalyzer());
final SearchFieldAnalyzer productFieldAnalyzer = new SearchFieldAnalyzer(LuceneUtils.CURRENT_VERSION);
final SearchFieldAnalyzer vendorFieldAnalyzer = new SearchFieldAnalyzer(LuceneUtils.CURRENT_VERSION);
final SearchFieldAnalyzer productFieldAnalyzer = new SearchFieldAnalyzer();
final SearchFieldAnalyzer vendorFieldAnalyzer = new SearchFieldAnalyzer();
fieldAnalyzers.put(Fields.PRODUCT, productFieldAnalyzer);
fieldAnalyzers.put(Fields.VENDOR, vendorFieldAnalyzer);
@@ -196,7 +195,7 @@ public final class CpeMemoryIndex implements AutoCloseable {
*/
private void buildIndex(CveDB cve) throws IndexException {
try (Analyzer analyzer = createSearchingAnalyzer();
IndexWriter indexWriter = new IndexWriter(index, new IndexWriterConfig(LuceneUtils.CURRENT_VERSION, analyzer))) {
IndexWriter indexWriter = new IndexWriter(index, new IndexWriterConfig(analyzer))) {
// Tip: reuse the Document and Fields for performance...
// See "Re-use Document and Field instances" from
// http://wiki.apache.org/lucene-java/ImproveIndexingSpeed
@@ -215,7 +214,7 @@ public final class CpeMemoryIndex implements AutoCloseable {
}
}
indexWriter.commit();
indexWriter.close(true);
indexWriter.close();
} catch (DatabaseException ex) {
LOGGER.debug("", ex);
throw new IndexException("Error reading CPE data", ex);

View File

@@ -17,6 +17,7 @@
*/
package org.owasp.dependencycheck.data.lucene;
import java.io.IOException;
import java.util.LinkedList;
import javax.annotation.concurrent.NotThreadSafe;
import org.apache.lucene.analysis.TokenFilter;
@@ -37,6 +38,11 @@ public abstract class AbstractTokenizingFilter extends TokenFilter {
*/
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
/**
* A collection of tokens to add to the stream.
*/
private final LinkedList<String> tokens;
/**
* Gets the CharTermAttribute.
*
@@ -45,10 +51,6 @@ public abstract class AbstractTokenizingFilter extends TokenFilter {
protected CharTermAttribute getTermAtt() {
return termAtt;
}
/**
* A collection of tokens to add to the stream.
*/
private final LinkedList<String> tokens;
/**
* Gets the list of tokens.
@@ -69,6 +71,15 @@ public abstract class AbstractTokenizingFilter extends TokenFilter {
tokens = new LinkedList<>();
}
/**
* {@inheritDoc}
*/
@Override
public void reset() throws IOException {
super.reset();
tokens.clear();
}
/**
* Adds a term, if one exists, from the tokens collection.
*

View File

@@ -0,0 +1,148 @@
/*
* This file is part of dependency-check-core.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Copyright (c) 2017 Jeremy Long. All Rights Reserved.
*/
package org.owasp.dependencycheck.data.lucene;
import java.io.IOException;
import java.util.LinkedList;
import org.apache.commons.lang3.builder.EqualsBuilder;
import org.apache.commons.lang3.builder.HashCodeBuilder;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
/**
* A simple alphanumeric filter that removes non-alphanumeric characters from
* the terms. If a term contains a non-alphanumeric character it may be split
* into multiple terms:
*
* <table>
* <tr><th>term</th><th>results in</th></tr>
* <tr><td>bob</td><td>bob</td></tr>
* <tr><td>bob-cat</td><td>bob cat</td></tr>
* <tr><td>#$%</td><td>[skipped]</td></tr>
* </table>
*
* @author jeremy long
*/
public final class AlphaNumericFilter extends AbstractTokenizingFilter {
/**
* The position increment attribute.
*/
private final PositionIncrementAttribute posIncrAttribute = addAttribute(PositionIncrementAttribute.class);
/**
* Used to count the number of terms skipped as they were only made up of
* special characters.
*/
private int skipCounter;
/**
* Constructs a new AlphaNumericFilter.
*
* @param stream the TokenStream that this filter will process
*/
public AlphaNumericFilter(TokenStream stream) {
super(stream);
}
/**
* {@inheritDoc}
*/
@Override
public boolean incrementToken() throws IOException {
final LinkedList<String> tokens = getTokens();
final CharTermAttribute termAtt = getTermAtt();
if (tokens.isEmpty()) {
String[] parts;
skipCounter = 0;
while (input.incrementToken()) {
final String text = new String(termAtt.buffer(), 0, termAtt.length());
if (text.isEmpty()) {
return true;
}
parts = text.split("[^a-zA-Z0-9]");
if (parts.length == 0) {
skipCounter += posIncrAttribute.getPositionIncrement();
} else {
if (skipCounter != 0) {
posIncrAttribute.setPositionIncrement(posIncrAttribute.getPositionIncrement() + skipCounter);
}
for (String part : parts) {
if (!part.isEmpty()) {
tokens.add(part);
}
}
break;
}
}
}
return addTerm();
}
/**
* {@inheritDoc}
*/
@Override
public void reset() throws IOException {
super.reset();
skipCounter = 0;
}
/**
* {@inheritDoc}
*/
@Override
public void end() throws IOException {
super.end();
posIncrAttribute.setPositionIncrement(posIncrAttribute.getPositionIncrement() + skipCounter);
}
/**
* {@inheritDoc}
*/
@Override
public int hashCode() {
return new HashCodeBuilder(13, 27)
.appendSuper(super.hashCode())
.append(posIncrAttribute)
.append(skipCounter)
.build();
}
/**
* {@inheritDoc}
*/
@Override
public boolean equals(Object obj) {
if (obj == null) {
return false;
}
if (obj == this) {
return true;
}
if (obj.getClass() != getClass()) {
return false;
}
final AlphaNumericFilter rhs = (AlphaNumericFilter) obj;
return new EqualsBuilder()
.appendSuper(super.equals(obj))
.append(skipCounter, rhs.skipCounter)
.append(posIncrAttribute, rhs.posIncrAttribute)
.isEquals();
}
}

View File

@@ -17,39 +17,37 @@
*/
package org.owasp.dependencycheck.data.lucene;
import java.io.Reader;
import javax.annotation.concurrent.NotThreadSafe;
import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.util.Version;
import org.apache.lucene.util.AttributeFactory;
/**
* Tokenizes the input breaking it into tokens when non-alpha/numeric characters
* are found.
*
* @deprecated This class is no longer used after re-factoring the lucene
* analysis.
* @author Jeremy Long
*/
@NotThreadSafe
@Deprecated
public class AlphaNumericTokenizer extends CharTokenizer {
/**
* Constructs a new AlphaNumericTokenizer.
*
* @param matchVersion the lucene version
* @param in the Reader
*/
public AlphaNumericTokenizer(Version matchVersion, Reader in) {
super(matchVersion, in);
public AlphaNumericTokenizer() {
super();
}
/**
* Constructs a new AlphaNumericTokenizer.
*
* @param matchVersion the lucene version
* @param factory the AttributeFactory
* @param in the Reader
*/
public AlphaNumericTokenizer(Version matchVersion, AttributeFactory factory, Reader in) {
super(matchVersion, factory, in);
public AlphaNumericTokenizer(AttributeFactory factory) {
super(factory);
}
/**

View File

@@ -19,7 +19,6 @@ package org.owasp.dependencycheck.data.lucene;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import javax.annotation.concurrent.ThreadSafe;
import org.apache.lucene.util.Version;
/**
* <p>
@@ -31,12 +30,6 @@ import org.apache.lucene.util.Version;
@ThreadSafe
public final class LuceneUtils {
/**
* The current version of Lucene being used. Declaring this one place so an
* upgrade doesn't require hunting through the code base.
*/
public static final Version CURRENT_VERSION = Version.LUCENE_47;
/**
* Private constructor as this is a utility class.
*/

View File

@@ -17,16 +17,15 @@
*/
package org.owasp.dependencycheck.data.lucene;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.util.Version;
/**
* A Lucene field analyzer used to analyzer queries against the CPE data.
@@ -35,10 +34,6 @@ import org.apache.lucene.util.Version;
*/
public class SearchFieldAnalyzer extends Analyzer {
/**
* The Lucene Version used.
*/
private final Version version;
/**
* The list of additional stop words to use.
*/
@@ -55,7 +50,7 @@ public class SearchFieldAnalyzer extends Analyzer {
* @return the set of stop words being used
*/
public static CharArraySet getStopWords() {
final CharArraySet words = StopFilter.makeStopSet(LuceneUtils.CURRENT_VERSION, ADDITIONAL_STOP_WORDS, true);
final CharArraySet words = StopFilter.makeStopSet(ADDITIONAL_STOP_WORDS, true);
words.addAll(StopAnalyzer.ENGLISH_STOP_WORDS_SET);
return words;
}
@@ -63,10 +58,8 @@ public class SearchFieldAnalyzer extends Analyzer {
/**
* Constructs a new SearchFieldAnalyzer.
*
* @param version the Lucene version
*/
public SearchFieldAnalyzer(Version version) {
this.version = version;
public SearchFieldAnalyzer() {
stopWords = getStopWords();
}
@@ -74,15 +67,16 @@ public class SearchFieldAnalyzer extends Analyzer {
* Creates a the TokenStreamComponents used to analyze the stream.
*
* @param fieldName the field that this lucene analyzer will process
* @param reader a reader containing the tokens
* @return the token stream filter chain
*/
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
final Tokenizer source = new AlphaNumericTokenizer(version, reader);
protected TokenStreamComponents createComponents(String fieldName) {
//final Tokenizer source = new AlphaNumericTokenizer();
final Tokenizer source = new WhitespaceTokenizer();
TokenStream stream = source;
stream = new UrlTokenizingFilter(stream);
stream = new AlphaNumericFilter(stream);
stream = new WordDelimiterFilter(stream,
WordDelimiterFilter.GENERATE_WORD_PARTS
| WordDelimiterFilter.GENERATE_NUMBER_PARTS
@@ -91,9 +85,9 @@ public class SearchFieldAnalyzer extends Analyzer {
| WordDelimiterFilter.SPLIT_ON_NUMERICS
| WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE, null);
stream = new LowerCaseFilter(version, stream);
stream = new UrlTokenizingFilter(stream);
stream = new StopFilter(version, stream, stopWords);
stream = new LowerCaseFilter(stream);
stream = new StopFilter(stream, stopWords);
stream = new TokenPairConcatenatingFilter(stream);
return new TokenStreamComponents(source, stream);

View File

@@ -18,8 +18,9 @@
package org.owasp.dependencycheck.data.lucene;
import java.io.IOException;
import java.util.LinkedList;
import javax.annotation.concurrent.NotThreadSafe;
import org.apache.commons.lang3.builder.EqualsBuilder;
import org.apache.commons.lang3.builder.HashCodeBuilder;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -46,9 +47,10 @@ public final class TokenPairConcatenatingFilter extends TokenFilter {
*/
private String previousWord;
/**
* A list of words parsed.
* Keeps track if we are adding a single term or concatenating with the
* previous.
*/
private final LinkedList<String> words;
private boolean addSingleTerm;
/**
* Constructs a new TokenPairConcatenatingFilter.
@@ -57,7 +59,8 @@ public final class TokenPairConcatenatingFilter extends TokenFilter {
*/
public TokenPairConcatenatingFilter(TokenStream stream) {
super(stream);
words = new LinkedList<>();
addSingleTerm = true;
previousWord = null;
}
/**
@@ -70,86 +73,83 @@ public final class TokenPairConcatenatingFilter extends TokenFilter {
*/
@Override
public boolean incrementToken() throws IOException {
//collect all the terms into the words collection
while (input.incrementToken()) {
final String word = new String(termAtt.buffer(), 0, termAtt.length());
words.add(word);
}
//if we have a previousTerm - write it out as its own token concatenated
// with the current word (if one is available).
if (previousWord != null && !words.isEmpty()) {
final String word = words.getFirst();
if (addSingleTerm && previousWord != null) {
addSingleTerm = false;
clearAttributes();
termAtt.append(previousWord).append(word);
previousWord = null;
termAtt.append(previousWord);
return true;
}
//if we have words, write it out as a single token
if (!words.isEmpty()) {
final String word = words.removeFirst();
clearAttributes();
termAtt.append(word);
previousWord = word;
} else if (input.incrementToken()) {
final String word = new String(termAtt.buffer(), 0, termAtt.length());
if (addSingleTerm) {
clearAttributes();
termAtt.append(word);
previousWord = word;
addSingleTerm = false;
} else {
clearAttributes();
termAtt.append(previousWord).append(word);
previousWord = word;
addSingleTerm = true;
}
return true;
}
return false;
}
/**
* <p>
* Resets the Filter and clears any internal state data that may have been
* left-over from previous uses of the Filter.</p>
* <p>
* <b>If this Filter is re-used this method must be called between
* uses.</b></p>
*
* @throws java.io.IOException thrown if there is an error resetting the
* filter
* {@inheritDoc}
*/
@Override
public void end() throws IOException {
super.end();
previousWord = null;
words.clear();
addSingleTerm = true;
}
/**
* Standard hash code implementation.
*
* @return the hash code
* {@inheritDoc}
*/
@Override
public void reset() throws IOException {
super.reset();
previousWord = null;
addSingleTerm = true;
}
/**
* {@inheritDoc}
*/
@Override
public int hashCode() {
int hash = 3;
hash = 31 * hash + (this.termAtt != null ? this.termAtt.hashCode() : 0);
hash = 31 * hash + (this.previousWord != null ? this.previousWord.hashCode() : 0);
hash = 31 * hash + (this.words != null ? this.words.hashCode() : 0);
return hash;
return new HashCodeBuilder(13, 27)
.appendSuper(super.hashCode())
.append(addSingleTerm)
.append(previousWord)
.append(termAtt)
.build();
}
/**
* Standard equals implementation.
*
* @param obj the object to compare
* @return true if the objects are equal; otherwise false.
* {@inheritDoc}
*/
@Override
public boolean equals(Object obj) {
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
if (obj == this) {
return true;
}
if (obj.getClass() != getClass()) {
return false;
}
final TokenPairConcatenatingFilter other = (TokenPairConcatenatingFilter) obj;
if (this.termAtt != other.termAtt && (this.termAtt == null || !this.termAtt.equals(other.termAtt))) {
return false;
}
if ((this.previousWord == null) ? (other.previousWord != null) : !this.previousWord.equals(other.previousWord)) {
return false;
}
return !(this.words != other.words && (this.words == null || !this.words.equals(other.words)));
final TokenPairConcatenatingFilter rhs = (TokenPairConcatenatingFilter) obj;
return new EqualsBuilder()
.appendSuper(super.equals(obj))
.append(addSingleTerm, rhs.addSingleTerm)
.append(previousWord, rhs.previousWord)
.append(termAtt, rhs.termAtt)
.isEquals();
}
}

View File

@@ -43,7 +43,7 @@ public final class UrlTokenizingFilter extends AbstractTokenizingFilter {
private static final Logger LOGGER = LoggerFactory.getLogger(UrlTokenizingFilter.class);
/**
* Constructs a new VersionTokenizingFilter.
* Constructs a new UrlTokenizingFilter.
*
* @param stream the TokenStream that this filter will process
*/

View File

@@ -820,6 +820,11 @@ public final class CveDB implements AutoCloseable {
}
}
/**
* Returns the size of the batch.
*
* @return the size of the batch
*/
private int getBatchSize() {
int max;
try {
@@ -830,6 +835,12 @@ public final class CveDB implements AutoCloseable {
return max;
}
/**
* Determines whether or not batch insert is enabled.
*
* @return <code>true</code> if batch insert is enabled; otherwise
* <code>false</code>
*/
private boolean isBatchInsertEnabled() {
boolean batch = false;
try {
@@ -841,25 +852,34 @@ public final class CveDB implements AutoCloseable {
return batch;
}
/**
* Generates a logging message for batch inserts.
*
* @param pCountReferences the number of batch statements executed
* @param pFormat a Java String.format string
* @return the formated string
*/
private String getLogForBatchInserts(int pCountReferences, String pFormat) {
return String.format(pFormat, pCountReferences, new Date());
}
/**
* Executes batch inserts of vulnerabilities when property
* database.batchinsert.maxsize is reached
* database.batchinsert.maxsize is reached.
*
* @param pVulnerability
* @param pVulnerableSoftware
* @param pInsertSoftware
* @throws SQLException
* @param pVulnerability the vulnerability
* @param pVulnerableSoftware the vulnerable software
* @param pInsertSoftware the prepared statement to batch execute
* @throws SQLException thrown when the batch cannot be executed
*/
private void executeBatch(Vulnerability pVulnerability, VulnerableSoftware pVulnerableSoftware, PreparedStatement pInsertSoftware) throws SQLException {
private void executeBatch(Vulnerability pVulnerability, VulnerableSoftware pVulnerableSoftware, PreparedStatement pInsertSoftware)
throws SQLException {
try {
pInsertSoftware.executeBatch();
} catch (SQLException ex) {
if (ex.getMessage().contains("Duplicate entry")) {
final String msg = String.format("Duplicate software key identified in '%s:%s'", pVulnerability.getName(), pVulnerableSoftware.getName());
final String msg = String.format("Duplicate software key identified in '%s:%s'",
pVulnerability.getName(), pVulnerableSoftware.getName());
LOGGER.info(msg, ex);
} else {
throw ex;

View File

@@ -72,7 +72,8 @@ public class H2DBLock {
private final String magic;
/**
* The shutdown hook used to remove the lock file in case of an unexpected shutdown.
* The shutdown hook used to remove the lock file in case of an unexpected
* shutdown.
*/
private H2DBShutdownHook hook = null;
@@ -161,11 +162,19 @@ public class H2DBLock {
}
}
/**
* Checks the state of the custom h2 lock file and under some conditions
* will attempt to remove the lock file.
*
* @throws H2DBLockException thrown if the lock directory does not exist and
* cannot be created
*/
private void checkState() throws H2DBLockException {
if (!lockFile.getParentFile().isDirectory() && !lockFile.mkdir()) {
throw new H2DBLockException("Unable to create path to data directory.");
}
if (lockFile.isFile()) {
//TODO - this 30 minute check needs to be configurable.
if (getFileAge(lockFile) > 30) {
LOGGER.debug("An old db update lock file was found: {}", lockFile.getAbsolutePath());
if (!lockFile.delete()) {
@@ -232,6 +241,9 @@ public class H2DBLock {
return time;
}
/**
* Adds the shutdown hook to the JVM.
*/
private void addShutdownHook() {
if (hook == null) {
hook = H2DBShutdownHookFactory.getHook(settings);
@@ -240,6 +252,9 @@ public class H2DBLock {
}
}
/**
* Removes the shutdown hook.
*/
private void removeShutdownHook() {
if (hook != null) {
hook.remove();

View File

@@ -32,6 +32,13 @@ public final class H2DBShutdownHookFactory {
*/
private static final Logger LOGGER = LoggerFactory.getLogger(H2DBShutdownHookFactory.class);
/**
* Empty constructor for utility class.
*/
private H2DBShutdownHookFactory() {
//empty
}
/**
* Creates a new H2DB Shutdown Hook.
*
@@ -40,8 +47,8 @@ public final class H2DBShutdownHookFactory {
*/
public static H2DBShutdownHook getHook(Settings settings) {
try {
String className = settings.getString(Settings.KEYS.H2DB_SHUTDOWN_HOOK, "org.owasp.dependencycheck.utils.H2DBCleanupHook");
Class type = Class.forName(className);
final String className = settings.getString(Settings.KEYS.H2DB_SHUTDOWN_HOOK, "org.owasp.dependencycheck.utils.H2DBCleanupHook");
final Class type = Class.forName(className);
return (H2DBShutdownHook) type.newInstance();
} catch (ClassNotFoundException | InstantiationException | IllegalAccessException ex) {
LOGGER.debug("Failed to instantiate {}, using default shutdown hook instead", ex);

View File

@@ -65,7 +65,8 @@
9. mail_project is ruby library
10. ldap_project is part of type3 written in php
11. user import project is used in drupal (i.e. php)
12. xml_sec is a C library for XML security
12. root is a c++ project https://github.com/root-project/root/
13. xml_sec is a C library for XML security
]]></notes>
<filePath regex="true">.*(\.(dll|jar|ear|war|pom|nupkg|nuspec)|pom\.xml|package.json)$</filePath>
<cpe>cpe:/a:sandbox:sandbox</cpe>
@@ -82,6 +83,7 @@
<cpe>cpe:/a:mail_project:mail</cpe>
<cpe>cpe:/a:ldap_project:ldap</cpe>
<cpe>cpe:/a:user_import_project:user_import</cpe>
<cpe>cpe:/a:root:root</cpe>
<cpe>cpe:/a:xmlsec_project:xmlsec</cpe>
</suppress>
<suppress base="true">
@@ -972,6 +974,97 @@
<cpe>cpe:/a:git:git</cpe>
<cpe>cpe:/a:git_project:git</cpe>
</suppress>
<suppress base="true">
<notes><![CDATA[
general FP cleanup
]]></notes>
<filePath regex="true">.*winstone-?(\d*\.?){0,3}\.jar</filePath>
<cpe>cpe:/a:jetty:jetty</cpe>
<cpe>cpe:/a:eclipse:jetty</cpe>
</suppress>
<suppress base="true">
<notes><![CDATA[
general FP cleanup
]]></notes>
<gav regex="true">^org\.apache\.maven\.wagon:wagon-webdav-jackrabbit:.*$</gav>
<cpe>cpe:/a:apache:jackrabbit</cpe>
</suppress>
<suppress base="true">
<notes><![CDATA[
general FP cleanup
]]></notes>
<gav regex="true">^org\.apache\.xbean:xbean-reflect:.*$</gav>
<cpe>cpe:/a:apache:geronimo</cpe>
</suppress>
<suppress base="true">
<notes><![CDATA[
general FP cleanup
]]></notes>
<gav regex="true">^org\.eclipse\.jetty\.orbit:javax\.annotation:.*$</gav>
<cpe>cpe:/a:eclipse:jetty</cpe>
<cpe>cpe:/a:jetty:jetty</cpe>
</suppress>
<suppress base="true">
<notes><![CDATA[
general FP cleanup
]]></notes>
<gav regex="true">^org\.eclipse\.jetty\.websocket:websocket-api:.*$</gav>
<cpe>cpe:/a:eclipse:jetty</cpe>
<cpe>cpe:/a:jetty:jetty</cpe>
</suppress>
<suppress base="true">
<notes><![CDATA[
general FP cleanup: com.amazonaws is a drupal project
]]></notes>
<gav regex="true">^com\.amazonaws:jmespath-java:.*$</gav>
<cpe>cpe:/a:amazon_aws_project:amazon_aws</cpe>
</suppress>
<suppress base="true">
<notes><![CDATA[
general FP cleanup: apache_test CPE is referencing Perl code.
]]></notes>
<gav regex="true">^org\.apache\.ant:ant-testutil:.*$</gav>
<cpe>cpe:/a:apache:apache_test</cpe>
</suppress>
<suppress base="true">
<notes><![CDATA[
general FP cleanup: CPE is for git, not the git provider
]]></notes>
<gav regex="true">^org\.apache\.maven\.scm:maven-scm-provider-git-commons:.*$</gav>
<cpe>cpe:/a:git-scm:git</cpe>
</suppress>
<suppress base="true">
<notes><![CDATA[
general FP cleanup
]]></notes>
<gav regex="true">^org\.eclipse\.jetty\.orbit:org\.apache\.taglibs\.standard\.glassfish:.*$</gav>
<cpe>cpe:/a:jetty:jetty</cpe>
<cpe>cpe:/a:eclipse:jetty</cpe>
</suppress>
<suppress base="true">
<notes><![CDATA[
general FP cleanup
]]></notes>
<gav regex="true">^org\.eclipse\.jetty\.orbit:com\.sun\.el:.*$</gav>
<cpe>cpe:/a:jetty:jetty</cpe>
<cpe>cpe:/a:eclipse:jetty</cpe>
</suppress>
<suppress base="true">
<notes><![CDATA[
general FP cleanup: client vs. server mismatch
]]></notes>
<gav regex="true">^org\.samba\.jcifs:jcifs:.*$</gav>
<cpe>cpe:/a:samba:samba</cpe>
</suppress>
<suppress base="true">
<notes><![CDATA[
general FP cleanup
]]></notes>
<gav regex="true">^org\.codehaus\.plexus:plexus-utils:.*$</gav>
<cpe>cpe:/a:spice_project:spice</cpe>
</suppress>
<suppress base="true">
<notes><![CDATA[

View File

@@ -0,0 +1,104 @@
/*
* Copyright 2017 OWASP.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.owasp.dependencycheck.data.lucene;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import static org.apache.lucene.analysis.BaseTokenStreamTestCase.checkOneTerm;
import static org.apache.lucene.analysis.BaseTokenStreamTestCase.checkRandomData;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import static org.apache.lucene.util.LuceneTestCase.RANDOM_MULTIPLIER;
import static org.apache.lucene.util.LuceneTestCase.random;
import org.junit.Test;
import static org.junit.Assert.*;
/**
*
* @author jeremy
*/
public class AlphaNumericFilterTest extends BaseTokenStreamTestCase {
private final Analyzer analyzer;
public AlphaNumericFilterTest() {
analyzer = new Analyzer() {
@Override
protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new Analyzer.TokenStreamComponents(source, new AlphaNumericFilter(source));
}
};
}
/**
* Test of incrementToken method, of class AlphaNumericFilter.
*/
@Test
public void testIncrementToken() throws Exception {
String[] expected = new String[6];
expected[0] = "http";
expected[1] = "www";
expected[2] = "domain";
expected[3] = "com";
expected[4] = "test";
expected[5] = "php";
assertAnalyzesTo(analyzer, "http://www.domain.com/test.php", expected);
}
/**
* Test of incrementToken method, of class AlphaNumericFilter.
*/
@Test
public void testGarbage() throws Exception {
String[] expected = new String[2];
expected[0] = "test";
expected[1] = "two";
assertAnalyzesTo(analyzer, "!@#$% !@#$ &*(@#$ test-two @#$%", expected);
}
/**
* copied from
* http://svn.apache.org/repos/asf/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java
* blast some random strings through the analyzer
*/
public void testRandomStrings() {
try {
checkRandomData(random(), analyzer, 1000 * RANDOM_MULTIPLIER);
} catch (IOException ex) {
fail("Failed test random strings: " + ex.getMessage());
}
}
/**
* copied from
* http://svn.apache.org/repos/asf/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java
*
* @throws IOException
*/
public void testEmptyTerm() throws IOException {
Analyzer a = new Analyzer() {
@Override
protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new KeywordTokenizer();
return new Analyzer.TokenStreamComponents(tokenizer, new AlphaNumericFilter(tokenizer));
}
};
checkOneTerm(a, "", "");
}
}

View File

@@ -52,7 +52,7 @@ public class FieldAnalyzerTest extends BaseTest {
@Test
public void testAnalyzers() throws Exception {
Analyzer analyzer = new SearchFieldAnalyzer(LuceneUtils.CURRENT_VERSION);
Analyzer analyzer = new SearchFieldAnalyzer();
Directory index = new RAMDirectory();
String field1 = "product";
@@ -68,16 +68,16 @@ public class FieldAnalyzerTest extends BaseTest {
addDoc(w, field1, text1, field2, text2);
}
//Analyzer searchingAnalyzer = new SearchFieldAnalyzer(LuceneUtils.CURRENT_VERSION);
//Analyzer searchingAnalyzer = new SearchFieldAnalyzer();
String querystr = "product:\"(Spring Framework Core)\" vendor:(SpringSource)";
SearchFieldAnalyzer searchAnalyzerProduct = new SearchFieldAnalyzer(LuceneUtils.CURRENT_VERSION);
SearchFieldAnalyzer searchAnalyzerVendor = new SearchFieldAnalyzer(LuceneUtils.CURRENT_VERSION);
SearchFieldAnalyzer searchAnalyzerProduct = new SearchFieldAnalyzer();
SearchFieldAnalyzer searchAnalyzerVendor = new SearchFieldAnalyzer();
HashMap<String, Analyzer> map = new HashMap<>();
map.put(field1, searchAnalyzerProduct);
map.put(field2, searchAnalyzerVendor);
PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new StandardAnalyzer(LuceneUtils.CURRENT_VERSION), map);
QueryParser parser = new QueryParser(LuceneUtils.CURRENT_VERSION, field1, wrapper);
PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new StandardAnalyzer(), map);
QueryParser parser = new QueryParser(field1, wrapper);
Query q = parser.parse(querystr);
@@ -85,7 +85,7 @@ public class FieldAnalyzerTest extends BaseTest {
IndexReader reader = DirectoryReader.open(index);
IndexSearcher searcher = new IndexSearcher(reader);
TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage);
searcher.search(q, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
@@ -99,7 +99,7 @@ public class FieldAnalyzerTest extends BaseTest {
querystr = "product:( x-stream^5 ) AND vendor:( thoughtworks.xstream )";
Query q3 = parser.parse(querystr);
collector = TopScoreDocCollector.create(hitsPerPage, true);
collector = TopScoreDocCollector.create(hitsPerPage);
searcher.search(q3, collector);
hits = collector.topDocs().scoreDocs;
assertEquals("x-stream", searcher.doc(hits[0].doc).get(field1));
@@ -107,7 +107,7 @@ public class FieldAnalyzerTest extends BaseTest {
}
private IndexWriter createIndex(Analyzer analyzer, Directory index) throws IOException {
IndexWriterConfig config = new IndexWriterConfig(LuceneUtils.CURRENT_VERSION, analyzer);
IndexWriterConfig config = new IndexWriterConfig( analyzer);
return new IndexWriter(index, config);
}

View File

@@ -18,14 +18,19 @@
package org.owasp.dependencycheck.data.lucene;
import java.io.IOException;
import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
import static org.apache.lucene.analysis.BaseTokenStreamTestCase.assertAnalyzesTo;
import static org.apache.lucene.analysis.BaseTokenStreamTestCase.checkOneTerm;
import static org.apache.lucene.analysis.BaseTokenStreamTestCase.checkRandomData;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.junit.After;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import static org.apache.lucene.util.LuceneTestCase.RANDOM_MULTIPLIER;
import static org.apache.lucene.util.LuceneTestCase.random;
import static org.junit.Assert.fail;
import org.junit.Before;
import org.junit.Test;
/**
*
@@ -33,25 +38,64 @@ import org.junit.Before;
*/
public class TokenPairConcatenatingFilterTest extends BaseTokenStreamTestCase {
@Override
@Before
public void setUp() throws Exception {
super.setUp();
}
private final Analyzer analyzer;
@Override
@After
public void tearDown() throws Exception {
super.tearDown();
public TokenPairConcatenatingFilterTest() {
analyzer = new Analyzer() {
@Override
protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new Analyzer.TokenStreamComponents(source, new TokenPairConcatenatingFilter(source));
}
};
}
/**
* test some examples
* Test of incrementToken method, of class TokenPairConcatenatingFilter.
*/
public void testExamples() throws IOException {
Tokenizer wsTokenizer = new WhitespaceTokenizer(LuceneUtils.CURRENT_VERSION, new StringReader("one two three"));
TokenStream filter = new TokenPairConcatenatingFilter(wsTokenizer);
assertTokenStreamContents(filter,
new String[]{"one", "onetwo", "two", "twothree", "three"});
@Test
public void testIncrementToken() throws Exception {
String[] expected = new String[5];
expected[0] = "red";
expected[1] = "redblue";
expected[2] = "blue";
expected[3] = "bluegreen";
expected[4] = "green";
assertAnalyzesTo(analyzer, "red blue green", expected);
}
/**
* copied from
* http://svn.apache.org/repos/asf/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java
* blast some random strings through the analyzer
*/
public void testRandomStrings() {
try {
checkRandomData(random(), analyzer, 1000 * RANDOM_MULTIPLIER);
} catch (IOException ex) {
fail("Failed test random strings: " + ex.getMessage());
}
}
/**
* copied from
* http://svn.apache.org/repos/asf/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java
*
* @throws IOException
*/
public void testEmptyTerm() {
Analyzer a = new Analyzer() {
@Override
protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new KeywordTokenizer();
return new Analyzer.TokenStreamComponents(tokenizer, new TokenPairConcatenatingFilter(tokenizer));
}
};
try {
checkOneTerm(a, "", "");
} catch (IOException ex) {
fail("Failed test random strings: " + ex.getMessage());
}
}
}

View File

@@ -18,7 +18,6 @@
package org.owasp.dependencycheck.data.lucene;
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer;
@@ -36,9 +35,8 @@ public class UrlTokenizingFilterTest extends BaseTokenStreamTestCase {
public UrlTokenizingFilterTest() {
analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new UrlTokenizingFilter(source));
}
};
@@ -77,8 +75,8 @@ public class UrlTokenizingFilterTest extends BaseTokenStreamTestCase {
public void testEmptyTerm() throws IOException {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new KeywordTokenizer(reader);
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new KeywordTokenizer();
return new TokenStreamComponents(tokenizer, new UrlTokenizingFilter(tokenizer));
}
};

View File

@@ -124,7 +124,7 @@ Copyright (c) 2012 - Jeremy Long
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<github.global.server>github</github.global.server>
<apache.lucene.version>4.7.2</apache.lucene.version>
<apache.lucene.version>5.5.5</apache.lucene.version>
<apache.ant.version>1.9.9</apache.ant.version>
<!--upgrading to the 1.8 requires Java 8 compatability - we are maintaining 7 atm-->
<slf4j.version>1.7.25</slf4j.version>