Merge pull request #1048 from jeremylong/luceneUpgrade

Lucene upgrade, bug fixes, and general cleanup
2026-03-13 13:45:47 +01:00 · 2017-12-29 06:26:25 -05:00
parent b5e7a54d35 43af96bb0f
commit e9e7f095be
18 changed files with 575 additions and 149 deletions
--- a/dependency-check-core/src/main/java/org/owasp/dependencycheck/analyzer/CPEAnalyzer.java
+++ b/dependency-check-core/src/main/java/org/owasp/dependencycheck/analyzer/CPEAnalyzer.java
@@ -82,9 +82,11 @@ public class CPEAnalyzer extends AbstractAnalyzer {
    private static final String WEIGHTING_BOOST = "^5";
    /**
     * A string representation of a regular expression defining characters
-     * utilized within the CPE Names.
+     * utilized within the CPE Names. Note, the :/ are included so URLs are
+     * passed into the Lucene query so that the specialized tokenizer can parse
+     * them.
     */
-    private static final String CLEANSE_CHARACTER_RX = "[^A-Za-z0-9 ._-]";
+    private static final String CLEANSE_CHARACTER_RX = "[^A-Za-z0-9 ._:/-]";
    /**
     * A string representation of a regular expression used to remove all but
     * alpha characters.
--- a/dependency-check-core/src/main/java/org/owasp/dependencycheck/data/cpe/CpeMemoryIndex.java
+++ b/dependency-check-core/src/main/java/org/owasp/dependencycheck/data/cpe/CpeMemoryIndex.java
@@ -40,7 +40,6 @@ import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.store.RAMDirectory;
-import org.owasp.dependencycheck.data.lucene.LuceneUtils;
 import org.owasp.dependencycheck.data.lucene.SearchFieldAnalyzer;
 import org.owasp.dependencycheck.data.nvdcve.CveDB;
 import org.owasp.dependencycheck.data.nvdcve.DatabaseException;
@@ -130,7 +129,7 @@ public final class CpeMemoryIndex implements AutoCloseable {
            }
            indexSearcher = new IndexSearcher(indexReader);
            searchingAnalyzer = createSearchingAnalyzer();
-            queryParser = new QueryParser(LuceneUtils.CURRENT_VERSION, Fields.DOCUMENT_KEY, searchingAnalyzer);
+            queryParser = new QueryParser(Fields.DOCUMENT_KEY, searchingAnalyzer);
        }
    }

@@ -151,8 +150,8 @@ public final class CpeMemoryIndex implements AutoCloseable {
    private Analyzer createSearchingAnalyzer() {
        final Map<String, Analyzer> fieldAnalyzers = new HashMap<>();
        fieldAnalyzers.put(Fields.DOCUMENT_KEY, new KeywordAnalyzer());
-        final SearchFieldAnalyzer productFieldAnalyzer = new SearchFieldAnalyzer(LuceneUtils.CURRENT_VERSION);
-        final SearchFieldAnalyzer vendorFieldAnalyzer = new SearchFieldAnalyzer(LuceneUtils.CURRENT_VERSION);
+        final SearchFieldAnalyzer productFieldAnalyzer = new SearchFieldAnalyzer();
+        final SearchFieldAnalyzer vendorFieldAnalyzer = new SearchFieldAnalyzer();
        fieldAnalyzers.put(Fields.PRODUCT, productFieldAnalyzer);
        fieldAnalyzers.put(Fields.VENDOR, vendorFieldAnalyzer);

@@ -196,7 +195,7 @@ public final class CpeMemoryIndex implements AutoCloseable {
     */
    private void buildIndex(CveDB cve) throws IndexException {
        try (Analyzer analyzer = createSearchingAnalyzer();
-                IndexWriter indexWriter = new IndexWriter(index, new IndexWriterConfig(LuceneUtils.CURRENT_VERSION, analyzer))) {
+                IndexWriter indexWriter = new IndexWriter(index, new IndexWriterConfig(analyzer))) {
            // Tip: reuse the Document and Fields for performance...
            // See "Re-use Document and Field instances" from
            // http://wiki.apache.org/lucene-java/ImproveIndexingSpeed
@@ -215,7 +214,7 @@ public final class CpeMemoryIndex implements AutoCloseable {
                }
            }
            indexWriter.commit();
-            indexWriter.close(true);
+            indexWriter.close();
        } catch (DatabaseException ex) {
            LOGGER.debug("", ex);
            throw new IndexException("Error reading CPE data", ex);
--- a/dependency-check-core/src/main/java/org/owasp/dependencycheck/data/lucene/AbstractTokenizingFilter.java
+++ b/dependency-check-core/src/main/java/org/owasp/dependencycheck/data/lucene/AbstractTokenizingFilter.java
@@ -17,6 +17,7 @@
 */
 package org.owasp.dependencycheck.data.lucene;

+import java.io.IOException;
 import java.util.LinkedList;
 import javax.annotation.concurrent.NotThreadSafe;
 import org.apache.lucene.analysis.TokenFilter;
@@ -37,6 +38,11 @@ public abstract class AbstractTokenizingFilter extends TokenFilter {
     */
    private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);

+    /**
+     * A collection of tokens to add to the stream.
+     */
+    private final LinkedList<String> tokens;
+
    /**
     * Gets the CharTermAttribute.
     *
@@ -45,10 +51,6 @@ public abstract class AbstractTokenizingFilter extends TokenFilter {
    protected CharTermAttribute getTermAtt() {
        return termAtt;
    }
-    /**
-     * A collection of tokens to add to the stream.
-     */
-    private final LinkedList<String> tokens;

    /**
     * Gets the list of tokens.
@@ -69,6 +71,15 @@ public abstract class AbstractTokenizingFilter extends TokenFilter {
        tokens = new LinkedList<>();
    }

+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public void reset() throws IOException {
+        super.reset();
+        tokens.clear();
+    }
+
    /**
     * Adds a term, if one exists, from the tokens collection.
     *
--- a/dependency-check-core/src/main/java/org/owasp/dependencycheck/data/lucene/AlphaNumericFilter.java
+++ b/dependency-check-core/src/main/java/org/owasp/dependencycheck/data/lucene/AlphaNumericFilter.java
@@ -0,0 +1,148 @@
+/*
+ * This file is part of dependency-check-core.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Copyright (c) 2017 Jeremy Long. All Rights Reserved.
+ */
+package org.owasp.dependencycheck.data.lucene;
+
+import java.io.IOException;
+import java.util.LinkedList;
+import org.apache.commons.lang3.builder.EqualsBuilder;
+import org.apache.commons.lang3.builder.HashCodeBuilder;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+
+/**
+ * A simple alphanumeric filter that removes non-alphanumeric characters from
+ * the terms. If a term contains a non-alphanumeric character it may be split
+ * into multiple terms:
+ *
+ * <table>
+ * <tr><th>term</th><th>results in</th></tr>
+ * <tr><td>bob</td><td>bob</td></tr>
+ * <tr><td>bob-cat</td><td>bob cat</td></tr>
+ * <tr><td>#$%</td><td>[skipped]</td></tr>
+ * </table>
+ *
+ * @author jeremy long
+ */
+public final class AlphaNumericFilter extends AbstractTokenizingFilter {
+
+    /**
+     * The position increment attribute.
+     */
+    private final PositionIncrementAttribute posIncrAttribute = addAttribute(PositionIncrementAttribute.class);
+    /**
+     * Used to count the number of terms skipped as they were only made up of
+     * special characters.
+     */
+    private int skipCounter;
+
+    /**
+     * Constructs a new AlphaNumericFilter.
+     *
+     * @param stream the TokenStream that this filter will process
+     */
+    public AlphaNumericFilter(TokenStream stream) {
+        super(stream);
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public boolean incrementToken() throws IOException {
+        final LinkedList<String> tokens = getTokens();
+        final CharTermAttribute termAtt = getTermAtt();
+        if (tokens.isEmpty()) {
+            String[] parts;
+            skipCounter = 0;
+            while (input.incrementToken()) {
+                final String text = new String(termAtt.buffer(), 0, termAtt.length());
+                if (text.isEmpty()) {
+                    return true;
+                }
+                parts = text.split("[^a-zA-Z0-9]");
+                if (parts.length == 0) {
+                    skipCounter += posIncrAttribute.getPositionIncrement();
+                } else {
+                    if (skipCounter != 0) {
+                        posIncrAttribute.setPositionIncrement(posIncrAttribute.getPositionIncrement() + skipCounter);
+                    }
+                    for (String part : parts) {
+                        if (!part.isEmpty()) {
+                            tokens.add(part);
+                        }
+                    }
+                    break;
+                }
+            }
+        }
+        return addTerm();
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public void reset() throws IOException {
+        super.reset();
+        skipCounter = 0;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public void end() throws IOException {
+        super.end();
+        posIncrAttribute.setPositionIncrement(posIncrAttribute.getPositionIncrement() + skipCounter);
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public int hashCode() {
+        return new HashCodeBuilder(13, 27)
+                .appendSuper(super.hashCode())
+                .append(posIncrAttribute)
+                .append(skipCounter)
+                .build();
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public boolean equals(Object obj) {
+        if (obj == null) {
+            return false;
+        }
+        if (obj == this) {
+            return true;
+        }
+        if (obj.getClass() != getClass()) {
+            return false;
+        }
+        final AlphaNumericFilter rhs = (AlphaNumericFilter) obj;
+        return new EqualsBuilder()
+                .appendSuper(super.equals(obj))
+                .append(skipCounter, rhs.skipCounter)
+                .append(posIncrAttribute, rhs.posIncrAttribute)
+                .isEquals();
+    }
+}
--- a/dependency-check-core/src/main/java/org/owasp/dependencycheck/data/lucene/AlphaNumericTokenizer.java
+++ b/dependency-check-core/src/main/java/org/owasp/dependencycheck/data/lucene/AlphaNumericTokenizer.java
@@ -17,39 +17,37 @@
 */
 package org.owasp.dependencycheck.data.lucene;

-import java.io.Reader;
 import javax.annotation.concurrent.NotThreadSafe;
 import org.apache.lucene.analysis.util.CharTokenizer;
-import org.apache.lucene.util.Version;
+import org.apache.lucene.util.AttributeFactory;

 /**
 * Tokenizes the input breaking it into tokens when non-alpha/numeric characters
 * are found.
 *
+ * @deprecated This class is no longer used after re-factoring the lucene
+ * analysis.
 * @author Jeremy Long
 */
@NotThreadSafe
+@Deprecated
 public class AlphaNumericTokenizer extends CharTokenizer {

    /**
     * Constructs a new AlphaNumericTokenizer.
     *
-     * @param matchVersion the lucene version
-     * @param in the Reader
     */
-    public AlphaNumericTokenizer(Version matchVersion, Reader in) {
-        super(matchVersion, in);
+    public AlphaNumericTokenizer() {
+        super();
    }

    /**
     * Constructs a new AlphaNumericTokenizer.
     *
-     * @param matchVersion the lucene version
     * @param factory the AttributeFactory
-     * @param in the Reader
     */
-    public AlphaNumericTokenizer(Version matchVersion, AttributeFactory factory, Reader in) {
-        super(matchVersion, factory, in);
+    public AlphaNumericTokenizer(AttributeFactory factory) {
+        super(factory);
    }

    /**
--- a/dependency-check-core/src/main/java/org/owasp/dependencycheck/data/lucene/LuceneUtils.java
+++ b/dependency-check-core/src/main/java/org/owasp/dependencycheck/data/lucene/LuceneUtils.java
@@ -19,7 +19,6 @@ package org.owasp.dependencycheck.data.lucene;

 import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
 import javax.annotation.concurrent.ThreadSafe;
-import org.apache.lucene.util.Version;

 /**
 * <p>
@@ -31,12 +30,6 @@ import org.apache.lucene.util.Version;
@ThreadSafe
 public final class LuceneUtils {

-    /**
-     * The current version of Lucene being used. Declaring this one place so an
-     * upgrade doesn't require hunting through the code base.
-     */
-    public static final Version CURRENT_VERSION = Version.LUCENE_47;
-
    /**
     * Private constructor as this is a utility class.
     */
--- a/dependency-check-core/src/main/java/org/owasp/dependencycheck/data/lucene/SearchFieldAnalyzer.java
+++ b/dependency-check-core/src/main/java/org/owasp/dependencycheck/data/lucene/SearchFieldAnalyzer.java
@@ -17,16 +17,15 @@
 */
 package org.owasp.dependencycheck.data.lucene;

-import java.io.Reader;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.LowerCaseFilter;
 import org.apache.lucene.analysis.core.StopAnalyzer;
 import org.apache.lucene.analysis.core.StopFilter;
+import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
 import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.util.Version;

 /**
 * A Lucene field analyzer used to analyzer queries against the CPE data.
@@ -35,10 +34,6 @@ import org.apache.lucene.util.Version;
 */
 public class SearchFieldAnalyzer extends Analyzer {

-    /**
-     * The Lucene Version used.
-     */
-    private final Version version;
    /**
     * The list of additional stop words to use.
     */
@@ -55,7 +50,7 @@ public class SearchFieldAnalyzer extends Analyzer {
     * @return the set of stop words being used
     */
    public static CharArraySet getStopWords() {
-        final CharArraySet words = StopFilter.makeStopSet(LuceneUtils.CURRENT_VERSION, ADDITIONAL_STOP_WORDS, true);
+        final CharArraySet words = StopFilter.makeStopSet(ADDITIONAL_STOP_WORDS, true);
        words.addAll(StopAnalyzer.ENGLISH_STOP_WORDS_SET);
        return words;
    }
@@ -63,10 +58,8 @@ public class SearchFieldAnalyzer extends Analyzer {
    /**
     * Constructs a new SearchFieldAnalyzer.
     *
-     * @param version the Lucene version
     */
-    public SearchFieldAnalyzer(Version version) {
-        this.version = version;
+    public SearchFieldAnalyzer() {
        stopWords = getStopWords();
    }

@@ -74,15 +67,16 @@ public class SearchFieldAnalyzer extends Analyzer {
     * Creates a the TokenStreamComponents used to analyze the stream.
     *
     * @param fieldName the field that this lucene analyzer will process
-     * @param reader a reader containing the tokens
     * @return the token stream filter chain
     */
    @Override
-    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
-        final Tokenizer source = new AlphaNumericTokenizer(version, reader);
-
+    protected TokenStreamComponents createComponents(String fieldName) {
+        //final Tokenizer source = new AlphaNumericTokenizer();
+        final Tokenizer source = new WhitespaceTokenizer();
        TokenStream stream = source;

+        stream = new UrlTokenizingFilter(stream);
+        stream = new AlphaNumericFilter(stream);
        stream = new WordDelimiterFilter(stream,
                WordDelimiterFilter.GENERATE_WORD_PARTS
                | WordDelimiterFilter.GENERATE_NUMBER_PARTS
@@ -91,9 +85,9 @@ public class SearchFieldAnalyzer extends Analyzer {
                | WordDelimiterFilter.SPLIT_ON_NUMERICS
                | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE, null);

-        stream = new LowerCaseFilter(version, stream);
-        stream = new UrlTokenizingFilter(stream);
-        stream = new StopFilter(version, stream, stopWords);
+        stream = new LowerCaseFilter(stream);
+
+        stream = new StopFilter(stream, stopWords);
        stream = new TokenPairConcatenatingFilter(stream);

        return new TokenStreamComponents(source, stream);
--- a/dependency-check-core/src/main/java/org/owasp/dependencycheck/data/lucene/TokenPairConcatenatingFilter.java
+++ b/dependency-check-core/src/main/java/org/owasp/dependencycheck/data/lucene/TokenPairConcatenatingFilter.java
@@ -18,8 +18,9 @@
 package org.owasp.dependencycheck.data.lucene;

 import java.io.IOException;
-import java.util.LinkedList;
 import javax.annotation.concurrent.NotThreadSafe;
+import org.apache.commons.lang3.builder.EqualsBuilder;
+import org.apache.commons.lang3.builder.HashCodeBuilder;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -46,9 +47,10 @@ public final class TokenPairConcatenatingFilter extends TokenFilter {
     */
    private String previousWord;
    /**
-     * A list of words parsed.
+     * Keeps track if we are adding a single term or concatenating with the
+     * previous.
     */
-    private final LinkedList<String> words;
+    private boolean addSingleTerm;

    /**
     * Constructs a new TokenPairConcatenatingFilter.
@@ -57,7 +59,8 @@ public final class TokenPairConcatenatingFilter extends TokenFilter {
     */
    public TokenPairConcatenatingFilter(TokenStream stream) {
        super(stream);
-        words = new LinkedList<>();
+        addSingleTerm = true;
+        previousWord = null;
    }

    /**
@@ -70,86 +73,83 @@ public final class TokenPairConcatenatingFilter extends TokenFilter {
     */
    @Override
    public boolean incrementToken() throws IOException {
-
-        //collect all the terms into the words collection
-        while (input.incrementToken()) {
-            final String word = new String(termAtt.buffer(), 0, termAtt.length());
-            words.add(word);
-        }
-
-        //if we have a previousTerm - write it out as its own token concatenated
-        // with the current word (if one is available).
-        if (previousWord != null && !words.isEmpty()) {
-            final String word = words.getFirst();
+        if (addSingleTerm && previousWord != null) {
+            addSingleTerm = false;
            clearAttributes();
-            termAtt.append(previousWord).append(word);
-            previousWord = null;
+            termAtt.append(previousWord);
            return true;
-        }
-        //if we have words, write it out as a single token
-        if (!words.isEmpty()) {
-            final String word = words.removeFirst();
-            clearAttributes();
-            termAtt.append(word);
-            previousWord = word;
+
+        } else if (input.incrementToken()) {
+            final String word = new String(termAtt.buffer(), 0, termAtt.length());
+            if (addSingleTerm) {
+                clearAttributes();
+                termAtt.append(word);
+                previousWord = word;
+                addSingleTerm = false;
+            } else {
+                clearAttributes();
+                termAtt.append(previousWord).append(word);
+                previousWord = word;
+                addSingleTerm = true;
+            }
            return true;
        }
        return false;
    }

    /**
-     * <p>
-     * Resets the Filter and clears any internal state data that may have been
-     * left-over from previous uses of the Filter.</p>
-     * <p>
-     * <b>If this Filter is re-used this method must be called between
-     * uses.</b></p>
-     *
-     * @throws java.io.IOException thrown if there is an error resetting the
-     * filter
+     * {@inheritDoc}
     */
    @Override
    public void end() throws IOException {
        super.end();
        previousWord = null;
-        words.clear();
+        addSingleTerm = true;
    }

    /**
-     * Standard hash code implementation.
-     *
-     * @return the hash code
+     * {@inheritDoc}
+     */
+    @Override
+    public void reset() throws IOException {
+        super.reset();
+        previousWord = null;
+        addSingleTerm = true;
+    }
+
+    /**
+     * {@inheritDoc}
     */
    @Override
    public int hashCode() {
-        int hash = 3;
-        hash = 31 * hash + (this.termAtt != null ? this.termAtt.hashCode() : 0);
-        hash = 31 * hash + (this.previousWord != null ? this.previousWord.hashCode() : 0);
-        hash = 31 * hash + (this.words != null ? this.words.hashCode() : 0);
-        return hash;
+        return new HashCodeBuilder(13, 27)
+                .appendSuper(super.hashCode())
+                .append(addSingleTerm)
+                .append(previousWord)
+                .append(termAtt)
+                .build();
    }

    /**
-     * Standard equals implementation.
-     *
-     * @param obj the object to compare
-     * @return true if the objects are equal; otherwise false.
+     * {@inheritDoc}
     */
    @Override
    public boolean equals(Object obj) {
        if (obj == null) {
            return false;
        }
-        if (getClass() != obj.getClass()) {
+        if (obj == this) {
+            return true;
+        }
+        if (obj.getClass() != getClass()) {
            return false;
        }
-        final TokenPairConcatenatingFilter other = (TokenPairConcatenatingFilter) obj;
-        if (this.termAtt != other.termAtt && (this.termAtt == null || !this.termAtt.equals(other.termAtt))) {
-            return false;
-        }
-        if ((this.previousWord == null) ? (other.previousWord != null) : !this.previousWord.equals(other.previousWord)) {
-            return false;
-        }
-        return !(this.words != other.words && (this.words == null || !this.words.equals(other.words)));
+        final TokenPairConcatenatingFilter rhs = (TokenPairConcatenatingFilter) obj;
+        return new EqualsBuilder()
+                .appendSuper(super.equals(obj))
+                .append(addSingleTerm, rhs.addSingleTerm)
+                .append(previousWord, rhs.previousWord)
+                .append(termAtt, rhs.termAtt)
+                .isEquals();
    }
 }
--- a/dependency-check-core/src/main/java/org/owasp/dependencycheck/data/lucene/UrlTokenizingFilter.java
+++ b/dependency-check-core/src/main/java/org/owasp/dependencycheck/data/lucene/UrlTokenizingFilter.java
@@ -43,7 +43,7 @@ public final class UrlTokenizingFilter extends AbstractTokenizingFilter {
    private static final Logger LOGGER = LoggerFactory.getLogger(UrlTokenizingFilter.class);

    /**
-     * Constructs a new VersionTokenizingFilter.
+     * Constructs a new UrlTokenizingFilter.
     *
     * @param stream the TokenStream that this filter will process
     */
--- a/dependency-check-core/src/main/java/org/owasp/dependencycheck/data/nvdcve/CveDB.java
+++ b/dependency-check-core/src/main/java/org/owasp/dependencycheck/data/nvdcve/CveDB.java
@@ -820,6 +820,11 @@ public final class CveDB implements AutoCloseable {
        }
    }

+    /**
+     * Returns the size of the batch.
+     *
+     * @return the size of the batch
+     */
    private int getBatchSize() {
        int max;
        try {
@@ -830,6 +835,12 @@ public final class CveDB implements AutoCloseable {
        return max;
    }

+    /**
+     * Determines whether or not batch insert is enabled.
+     *
+     * @return <code>true</code> if batch insert is enabled; otherwise
+     * <code>false</code>
+     */
    private boolean isBatchInsertEnabled() {
        boolean batch = false;
        try {
@@ -841,25 +852,34 @@ public final class CveDB implements AutoCloseable {
        return batch;
    }

+    /**
+     * Generates a logging message for batch inserts.
+     *
+     * @param pCountReferences the number of batch statements executed
+     * @param pFormat a Java String.format string
+     * @return the formated string
+     */
    private String getLogForBatchInserts(int pCountReferences, String pFormat) {
        return String.format(pFormat, pCountReferences, new Date());
    }

    /**
     * Executes batch inserts of vulnerabilities when property
-     * database.batchinsert.maxsize is reached
+     * database.batchinsert.maxsize is reached.
     *
-     * @param pVulnerability
-     * @param pVulnerableSoftware
-     * @param pInsertSoftware
-     * @throws SQLException
+     * @param pVulnerability the vulnerability
+     * @param pVulnerableSoftware the vulnerable software
+     * @param pInsertSoftware the prepared statement to batch execute
+     * @throws SQLException thrown when the batch cannot be executed
     */
-    private void executeBatch(Vulnerability pVulnerability, VulnerableSoftware pVulnerableSoftware, PreparedStatement pInsertSoftware) throws SQLException {
+    private void executeBatch(Vulnerability pVulnerability, VulnerableSoftware pVulnerableSoftware, PreparedStatement pInsertSoftware)
+            throws SQLException {
        try {
            pInsertSoftware.executeBatch();
        } catch (SQLException ex) {
            if (ex.getMessage().contains("Duplicate entry")) {
-                final String msg = String.format("Duplicate software key identified in '%s:%s'", pVulnerability.getName(), pVulnerableSoftware.getName());
+                final String msg = String.format("Duplicate software key identified in '%s:%s'",
+                        pVulnerability.getName(), pVulnerableSoftware.getName());
                LOGGER.info(msg, ex);
            } else {
                throw ex;
--- a/dependency-check-core/src/main/java/org/owasp/dependencycheck/utils/H2DBLock.java
+++ b/dependency-check-core/src/main/java/org/owasp/dependencycheck/utils/H2DBLock.java
@@ -72,7 +72,8 @@ public class H2DBLock {
    private final String magic;

    /**
-     * The shutdown hook used to remove the lock file in case of an unexpected shutdown.
+     * The shutdown hook used to remove the lock file in case of an unexpected
+     * shutdown.
     */
    private H2DBShutdownHook hook = null;

@@ -161,11 +162,19 @@ public class H2DBLock {
        }
    }

+    /**
+     * Checks the state of the custom h2 lock file and under some conditions
+     * will attempt to remove the lock file.
+     *
+     * @throws H2DBLockException thrown if the lock directory does not exist and
+     * cannot be created
+     */
    private void checkState() throws H2DBLockException {
        if (!lockFile.getParentFile().isDirectory() && !lockFile.mkdir()) {
            throw new H2DBLockException("Unable to create path to data directory.");
        }
        if (lockFile.isFile()) {
+            //TODO - this 30 minute check needs to be configurable.
            if (getFileAge(lockFile) > 30) {
                LOGGER.debug("An old db update lock file was found: {}", lockFile.getAbsolutePath());
                if (!lockFile.delete()) {
@@ -232,6 +241,9 @@ public class H2DBLock {
        return time;
    }

+    /**
+     * Adds the shutdown hook to the JVM.
+     */
    private void addShutdownHook() {
        if (hook == null) {
            hook = H2DBShutdownHookFactory.getHook(settings);
@@ -240,6 +252,9 @@ public class H2DBLock {
        }
    }

+    /**
+     * Removes the shutdown hook.
+     */
    private void removeShutdownHook() {
        if (hook != null) {
            hook.remove();
--- a/dependency-check-core/src/main/java/org/owasp/dependencycheck/utils/H2DBShutdownHookFactory.java
+++ b/dependency-check-core/src/main/java/org/owasp/dependencycheck/utils/H2DBShutdownHookFactory.java
@@ -32,6 +32,13 @@ public final class H2DBShutdownHookFactory {
     */
    private static final Logger LOGGER = LoggerFactory.getLogger(H2DBShutdownHookFactory.class);

+    /**
+     * Empty constructor for utility class.
+     */
+    private H2DBShutdownHookFactory() {
+        //empty
+    }
+
    /**
     * Creates a new H2DB Shutdown Hook.
     *
@@ -40,8 +47,8 @@ public final class H2DBShutdownHookFactory {
     */
    public static H2DBShutdownHook getHook(Settings settings) {
        try {
-            String className = settings.getString(Settings.KEYS.H2DB_SHUTDOWN_HOOK, "org.owasp.dependencycheck.utils.H2DBCleanupHook");
-            Class type = Class.forName(className);
+            final String className = settings.getString(Settings.KEYS.H2DB_SHUTDOWN_HOOK, "org.owasp.dependencycheck.utils.H2DBCleanupHook");
+            final Class type = Class.forName(className);
            return (H2DBShutdownHook) type.newInstance();
        } catch (ClassNotFoundException | InstantiationException | IllegalAccessException ex) {
            LOGGER.debug("Failed to instantiate {}, using default shutdown hook instead", ex);
--- a/dependency-check-core/src/main/resources/dependencycheck-base-suppression.xml
+++ b/dependency-check-core/src/main/resources/dependencycheck-base-suppression.xml
@@ -65,7 +65,8 @@
        9. mail_project is ruby library
        10. ldap_project is part of type3 written in php
        11. user import project is used in drupal (i.e. php)
-        12. xml_sec is a C library for XML security
+        12. root is a c++ project https://github.com/root-project/root/
+        13. xml_sec is a C library for XML security
        ]]></notes>
        <filePath regex="true">.*(\.(dll|jar|ear|war|pom|nupkg|nuspec)|pom\.xml|package.json)$</filePath>
        <cpe>cpe:/a:sandbox:sandbox</cpe>
@@ -82,6 +83,7 @@
        <cpe>cpe:/a:mail_project:mail</cpe>
        <cpe>cpe:/a:ldap_project:ldap</cpe>
        <cpe>cpe:/a:user_import_project:user_import</cpe>
+        <cpe>cpe:/a:root:root</cpe>
        <cpe>cpe:/a:xmlsec_project:xmlsec</cpe>
    </suppress>
    <suppress base="true">
@@ -972,6 +974,97 @@
        <cpe>cpe:/a:git:git</cpe>
        <cpe>cpe:/a:git_project:git</cpe>
    </suppress>
+    
+    <suppress base="true">
+        <notes><![CDATA[
+        general FP cleanup
+        ]]></notes>
+        <filePath regex="true">.*winstone-?(\d*\.?){0,3}\.jar</filePath>
+        <cpe>cpe:/a:jetty:jetty</cpe>
+        <cpe>cpe:/a:eclipse:jetty</cpe>
+    </suppress>
+    <suppress base="true">
+        <notes><![CDATA[
+        general FP cleanup
+        ]]></notes>
+        <gav regex="true">^org\.apache\.maven\.wagon:wagon-webdav-jackrabbit:.*$</gav>
+        <cpe>cpe:/a:apache:jackrabbit</cpe>
+    </suppress>
+    <suppress base="true">
+        <notes><![CDATA[
+        general FP cleanup
+        ]]></notes>
+        <gav regex="true">^org\.apache\.xbean:xbean-reflect:.*$</gav>
+        <cpe>cpe:/a:apache:geronimo</cpe>
+    </suppress>
+    <suppress base="true">
+        <notes><![CDATA[
+        general FP cleanup
+        ]]></notes>
+        <gav regex="true">^org\.eclipse\.jetty\.orbit:javax\.annotation:.*$</gav>
+        <cpe>cpe:/a:eclipse:jetty</cpe>
+        <cpe>cpe:/a:jetty:jetty</cpe>
+    </suppress>
+    <suppress base="true">
+        <notes><![CDATA[
+        general FP cleanup
+        ]]></notes>
+        <gav regex="true">^org\.eclipse\.jetty\.websocket:websocket-api:.*$</gav>
+        <cpe>cpe:/a:eclipse:jetty</cpe>
+        <cpe>cpe:/a:jetty:jetty</cpe>
+    </suppress>
+    <suppress base="true">
+        <notes><![CDATA[
+        general FP cleanup: com.amazonaws is a drupal project
+        ]]></notes>
+        <gav regex="true">^com\.amazonaws:jmespath-java:.*$</gav>
+        <cpe>cpe:/a:amazon_aws_project:amazon_aws</cpe>
+    </suppress>
+    <suppress base="true">
+        <notes><![CDATA[
+        general FP cleanup: apache_test CPE is referencing Perl code.
+        ]]></notes>
+        <gav regex="true">^org\.apache\.ant:ant-testutil:.*$</gav>
+        <cpe>cpe:/a:apache:apache_test</cpe>
+    </suppress>
+    <suppress base="true">
+        <notes><![CDATA[
+        general FP cleanup: CPE is for git, not the git provider
+        ]]></notes>
+        <gav regex="true">^org\.apache\.maven\.scm:maven-scm-provider-git-commons:.*$</gav>
+        <cpe>cpe:/a:git-scm:git</cpe>
+    </suppress>
+    <suppress base="true">
+        <notes><![CDATA[
+        general FP cleanup
+        ]]></notes>
+        <gav regex="true">^org\.eclipse\.jetty\.orbit:org\.apache\.taglibs\.standard\.glassfish:.*$</gav>
+        <cpe>cpe:/a:jetty:jetty</cpe>
+        <cpe>cpe:/a:eclipse:jetty</cpe>
+    </suppress>
+    <suppress base="true">
+        <notes><![CDATA[
+        general FP cleanup
+        ]]></notes>
+        <gav regex="true">^org\.eclipse\.jetty\.orbit:com\.sun\.el:.*$</gav>
+        <cpe>cpe:/a:jetty:jetty</cpe>
+        <cpe>cpe:/a:eclipse:jetty</cpe>
+    </suppress>
+    <suppress base="true">
+        <notes><![CDATA[
+        general FP cleanup: client vs. server mismatch
+        ]]></notes>
+        <gav regex="true">^org\.samba\.jcifs:jcifs:.*$</gav>
+        <cpe>cpe:/a:samba:samba</cpe>
+    </suppress>
+    <suppress base="true">
+        <notes><![CDATA[
+        general FP cleanup
+        ]]></notes>
+        <gav regex="true">^org\.codehaus\.plexus:plexus-utils:.*$</gav>
+        <cpe>cpe:/a:spice_project:spice</cpe>
+    </suppress>
+

    <suppress base="true">
        <notes><![CDATA[
--- a/dependency-check-core/src/test/java/org/owasp/dependencycheck/data/lucene/AlphaNumericFilterTest.java
+++ b/dependency-check-core/src/test/java/org/owasp/dependencycheck/data/lucene/AlphaNumericFilterTest.java
@@ -0,0 +1,104 @@
+/*
+ * Copyright 2017 OWASP.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.owasp.dependencycheck.data.lucene;
+
+import java.io.IOException;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import static org.apache.lucene.analysis.BaseTokenStreamTestCase.checkOneTerm;
+import static org.apache.lucene.analysis.BaseTokenStreamTestCase.checkRandomData;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
+import static org.apache.lucene.util.LuceneTestCase.RANDOM_MULTIPLIER;
+import static org.apache.lucene.util.LuceneTestCase.random;
+import org.junit.Test;
+import static org.junit.Assert.*;
+
+/**
+ *
+ * @author jeremy
+ */
+public class AlphaNumericFilterTest extends BaseTokenStreamTestCase {
+
+    private final Analyzer analyzer;
+
+    public AlphaNumericFilterTest() {
+        analyzer = new Analyzer() {
+            @Override
+            protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
+                Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
+                return new Analyzer.TokenStreamComponents(source, new AlphaNumericFilter(source));
+            }
+        };
+    }
+
+    /**
+     * Test of incrementToken method, of class AlphaNumericFilter.
+     */
+    @Test
+    public void testIncrementToken() throws Exception {
+        String[] expected = new String[6];
+        expected[0] = "http";
+        expected[1] = "www";
+        expected[2] = "domain";
+        expected[3] = "com";
+        expected[4] = "test";
+        expected[5] = "php";
+        assertAnalyzesTo(analyzer, "http://www.domain.com/test.php", expected);
+    }
+
+    /**
+     * Test of incrementToken method, of class AlphaNumericFilter.
+     */
+    @Test
+    public void testGarbage() throws Exception {
+        String[] expected = new String[2];
+        expected[0] = "test";
+        expected[1] = "two";
+        assertAnalyzesTo(analyzer, "!@#$% !@#$ &*(@#$ test-two @#$%", expected);
+    }
+
+    /**
+     * copied from
+     * http://svn.apache.org/repos/asf/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java
+     * blast some random strings through the analyzer
+     */
+    public void testRandomStrings() {
+        try {
+            checkRandomData(random(), analyzer, 1000 * RANDOM_MULTIPLIER);
+        } catch (IOException ex) {
+            fail("Failed test random strings: " + ex.getMessage());
+        }
+    }
+    
+        /**
+     * copied from
+     * http://svn.apache.org/repos/asf/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java
+     *
+     * @throws IOException
+     */
+    public void testEmptyTerm() throws IOException {
+        Analyzer a = new Analyzer() {
+            @Override
+            protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
+                Tokenizer tokenizer = new KeywordTokenizer();
+                return new Analyzer.TokenStreamComponents(tokenizer, new AlphaNumericFilter(tokenizer));
+            }
+        };
+        checkOneTerm(a, "", "");
+    }
+}
--- a/dependency-check-core/src/test/java/org/owasp/dependencycheck/data/lucene/FieldAnalyzerTest.java
+++ b/dependency-check-core/src/test/java/org/owasp/dependencycheck/data/lucene/FieldAnalyzerTest.java
@@ -52,7 +52,7 @@ public class FieldAnalyzerTest extends BaseTest {
    @Test
    public void testAnalyzers() throws Exception {

-        Analyzer analyzer = new SearchFieldAnalyzer(LuceneUtils.CURRENT_VERSION);
+        Analyzer analyzer = new SearchFieldAnalyzer();
        Directory index = new RAMDirectory();

        String field1 = "product";
@@ -68,16 +68,16 @@ public class FieldAnalyzerTest extends BaseTest {
            addDoc(w, field1, text1, field2, text2);
        }

-        //Analyzer searchingAnalyzer = new SearchFieldAnalyzer(LuceneUtils.CURRENT_VERSION);
+        //Analyzer searchingAnalyzer = new SearchFieldAnalyzer();
        String querystr = "product:\"(Spring Framework Core)\" vendor:(SpringSource)";

-        SearchFieldAnalyzer searchAnalyzerProduct = new SearchFieldAnalyzer(LuceneUtils.CURRENT_VERSION);
-        SearchFieldAnalyzer searchAnalyzerVendor = new SearchFieldAnalyzer(LuceneUtils.CURRENT_VERSION);
+        SearchFieldAnalyzer searchAnalyzerProduct = new SearchFieldAnalyzer();
+        SearchFieldAnalyzer searchAnalyzerVendor = new SearchFieldAnalyzer();
        HashMap<String, Analyzer> map = new HashMap<>();
        map.put(field1, searchAnalyzerProduct);
        map.put(field2, searchAnalyzerVendor);
-        PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new StandardAnalyzer(LuceneUtils.CURRENT_VERSION), map);
-        QueryParser parser = new QueryParser(LuceneUtils.CURRENT_VERSION, field1, wrapper);
+        PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new StandardAnalyzer(), map);
+        QueryParser parser = new QueryParser(field1, wrapper);

        Query q = parser.parse(querystr);

@@ -85,7 +85,7 @@ public class FieldAnalyzerTest extends BaseTest {

        IndexReader reader = DirectoryReader.open(index);
        IndexSearcher searcher = new IndexSearcher(reader);
-        TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true);
+        TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage);
        searcher.search(q, collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;

@@ -99,7 +99,7 @@ public class FieldAnalyzerTest extends BaseTest {

        querystr = "product:(  x-stream^5 )  AND  vendor:(  thoughtworks.xstream )";
        Query q3 = parser.parse(querystr);
-        collector = TopScoreDocCollector.create(hitsPerPage, true);
+        collector = TopScoreDocCollector.create(hitsPerPage);
        searcher.search(q3, collector);
        hits = collector.topDocs().scoreDocs;
        assertEquals("x-stream", searcher.doc(hits[0].doc).get(field1));
@@ -107,7 +107,7 @@ public class FieldAnalyzerTest extends BaseTest {
    }

    private IndexWriter createIndex(Analyzer analyzer, Directory index) throws IOException {
-        IndexWriterConfig config = new IndexWriterConfig(LuceneUtils.CURRENT_VERSION, analyzer);
+        IndexWriterConfig config = new IndexWriterConfig( analyzer);
        return new IndexWriter(index, config);
    }

--- a/dependency-check-core/src/test/java/org/owasp/dependencycheck/data/lucene/TokenPairConcatenatingFilterTest.java
+++ b/dependency-check-core/src/test/java/org/owasp/dependencycheck/data/lucene/TokenPairConcatenatingFilterTest.java
@@ -18,14 +18,19 @@
 package org.owasp.dependencycheck.data.lucene;

 import java.io.IOException;
-import java.io.StringReader;
+import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.TokenStream;
+import static org.apache.lucene.analysis.BaseTokenStreamTestCase.assertAnalyzesTo;
+import static org.apache.lucene.analysis.BaseTokenStreamTestCase.checkOneTerm;
+import static org.apache.lucene.analysis.BaseTokenStreamTestCase.checkRandomData;
+import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.WhitespaceTokenizer;
-import org.junit.After;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
+import static org.apache.lucene.util.LuceneTestCase.RANDOM_MULTIPLIER;
+import static org.apache.lucene.util.LuceneTestCase.random;
+import static org.junit.Assert.fail;

-import org.junit.Before;
+import org.junit.Test;

 /**
 *
@@ -33,25 +38,64 @@ import org.junit.Before;
 */
 public class TokenPairConcatenatingFilterTest extends BaseTokenStreamTestCase {

-    @Override
-    @Before
-    public void setUp() throws Exception {
-        super.setUp();
-    }
+    private final Analyzer analyzer;

-    @Override
-    @After
-    public void tearDown() throws Exception {
-        super.tearDown();
+    public TokenPairConcatenatingFilterTest() {
+        analyzer = new Analyzer() {
+            @Override
+            protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
+                Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
+                return new Analyzer.TokenStreamComponents(source, new TokenPairConcatenatingFilter(source));
+            }
+        };
    }

    /**
-     * test some examples
+     * Test of incrementToken method, of class TokenPairConcatenatingFilter.
     */
-    public void testExamples() throws IOException {
-        Tokenizer wsTokenizer = new WhitespaceTokenizer(LuceneUtils.CURRENT_VERSION, new StringReader("one two three"));
-        TokenStream filter = new TokenPairConcatenatingFilter(wsTokenizer);
-        assertTokenStreamContents(filter,
-                new String[]{"one", "onetwo", "two", "twothree", "three"});
+    @Test
+    public void testIncrementToken() throws Exception {
+        String[] expected = new String[5];
+        expected[0] = "red";
+        expected[1] = "redblue";
+        expected[2] = "blue";
+        expected[3] = "bluegreen";
+        expected[4] = "green";
+        assertAnalyzesTo(analyzer, "red blue green", expected);
    }
+
+    /**
+     * copied from
+     * http://svn.apache.org/repos/asf/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java
+     * blast some random strings through the analyzer
+     */
+    public void testRandomStrings() {
+        try {
+            checkRandomData(random(), analyzer, 1000 * RANDOM_MULTIPLIER);
+        } catch (IOException ex) {
+            fail("Failed test random strings: " + ex.getMessage());
+        }
+    }
+
+    /**
+     * copied from
+     * http://svn.apache.org/repos/asf/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java
+     *
+     * @throws IOException
+     */
+    public void testEmptyTerm() {
+        Analyzer a = new Analyzer() {
+            @Override
+            protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
+                Tokenizer tokenizer = new KeywordTokenizer();
+                return new Analyzer.TokenStreamComponents(tokenizer, new TokenPairConcatenatingFilter(tokenizer));
+            }
+        };
+        try {
+            checkOneTerm(a, "", "");
+        } catch (IOException ex) {
+            fail("Failed test random strings: " + ex.getMessage());
+        }
+    }
+
 }
--- a/dependency-check-core/src/test/java/org/owasp/dependencycheck/data/lucene/UrlTokenizingFilterTest.java
+++ b/dependency-check-core/src/test/java/org/owasp/dependencycheck/data/lucene/UrlTokenizingFilterTest.java
@@ -18,7 +18,6 @@
 package org.owasp.dependencycheck.data.lucene;

 import java.io.IOException;
-import java.io.Reader;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.MockTokenizer;
@@ -36,9 +35,8 @@ public class UrlTokenizingFilterTest extends BaseTokenStreamTestCase {
    public UrlTokenizingFilterTest() {
        analyzer = new Analyzer() {
            @Override
-            protected TokenStreamComponents createComponents(String fieldName,
-                    Reader reader) {
-                Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+            protected TokenStreamComponents createComponents(String fieldName) {
+                Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
                return new TokenStreamComponents(source, new UrlTokenizingFilter(source));
            }
        };
@@ -77,8 +75,8 @@ public class UrlTokenizingFilterTest extends BaseTokenStreamTestCase {
    public void testEmptyTerm() throws IOException {
        Analyzer a = new Analyzer() {
            @Override
-            protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
-                Tokenizer tokenizer = new KeywordTokenizer(reader);
+            protected TokenStreamComponents createComponents(String fieldName) {
+                Tokenizer tokenizer = new KeywordTokenizer();
                return new TokenStreamComponents(tokenizer, new UrlTokenizingFilter(tokenizer));
            }
        };
--- a/pom.xml
+++ b/pom.xml
@@ -124,7 +124,7 @@ Copyright (c) 2012 - Jeremy Long
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
        <github.global.server>github</github.global.server>
-        <apache.lucene.version>4.7.2</apache.lucene.version>
+        <apache.lucene.version>5.5.5</apache.lucene.version>
        <apache.ant.version>1.9.9</apache.ant.version>
        <!--upgrading to the 1.8 requires Java 8 compatability  - we are maintaining 7 atm-->
        <slf4j.version>1.7.25</slf4j.version>