Merge pull request #1048 from jeremylong/luceneUpgrade

Lucene upgrade, bug fixes, and general cleanup
This commit is contained in:
Jeremy Long
2017-12-29 06:26:25 -05:00
committed by GitHub
18 changed files with 575 additions and 149 deletions

View File

@@ -82,9 +82,11 @@ public class CPEAnalyzer extends AbstractAnalyzer {
private static final String WEIGHTING_BOOST = "^5"; private static final String WEIGHTING_BOOST = "^5";
/** /**
* A string representation of a regular expression defining characters * A string representation of a regular expression defining characters
* utilized within the CPE Names. * utilized within the CPE Names. Note, the :/ are included so URLs are
* passed into the Lucene query so that the specialized tokenizer can parse
* them.
*/ */
private static final String CLEANSE_CHARACTER_RX = "[^A-Za-z0-9 ._-]"; private static final String CLEANSE_CHARACTER_RX = "[^A-Za-z0-9 ._:/-]";
/** /**
* A string representation of a regular expression used to remove all but * A string representation of a regular expression used to remove all but
* alpha characters. * alpha characters.

View File

@@ -40,7 +40,6 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.RAMDirectory;
import org.owasp.dependencycheck.data.lucene.LuceneUtils;
import org.owasp.dependencycheck.data.lucene.SearchFieldAnalyzer; import org.owasp.dependencycheck.data.lucene.SearchFieldAnalyzer;
import org.owasp.dependencycheck.data.nvdcve.CveDB; import org.owasp.dependencycheck.data.nvdcve.CveDB;
import org.owasp.dependencycheck.data.nvdcve.DatabaseException; import org.owasp.dependencycheck.data.nvdcve.DatabaseException;
@@ -130,7 +129,7 @@ public final class CpeMemoryIndex implements AutoCloseable {
} }
indexSearcher = new IndexSearcher(indexReader); indexSearcher = new IndexSearcher(indexReader);
searchingAnalyzer = createSearchingAnalyzer(); searchingAnalyzer = createSearchingAnalyzer();
queryParser = new QueryParser(LuceneUtils.CURRENT_VERSION, Fields.DOCUMENT_KEY, searchingAnalyzer); queryParser = new QueryParser(Fields.DOCUMENT_KEY, searchingAnalyzer);
} }
} }
@@ -151,8 +150,8 @@ public final class CpeMemoryIndex implements AutoCloseable {
private Analyzer createSearchingAnalyzer() { private Analyzer createSearchingAnalyzer() {
final Map<String, Analyzer> fieldAnalyzers = new HashMap<>(); final Map<String, Analyzer> fieldAnalyzers = new HashMap<>();
fieldAnalyzers.put(Fields.DOCUMENT_KEY, new KeywordAnalyzer()); fieldAnalyzers.put(Fields.DOCUMENT_KEY, new KeywordAnalyzer());
final SearchFieldAnalyzer productFieldAnalyzer = new SearchFieldAnalyzer(LuceneUtils.CURRENT_VERSION); final SearchFieldAnalyzer productFieldAnalyzer = new SearchFieldAnalyzer();
final SearchFieldAnalyzer vendorFieldAnalyzer = new SearchFieldAnalyzer(LuceneUtils.CURRENT_VERSION); final SearchFieldAnalyzer vendorFieldAnalyzer = new SearchFieldAnalyzer();
fieldAnalyzers.put(Fields.PRODUCT, productFieldAnalyzer); fieldAnalyzers.put(Fields.PRODUCT, productFieldAnalyzer);
fieldAnalyzers.put(Fields.VENDOR, vendorFieldAnalyzer); fieldAnalyzers.put(Fields.VENDOR, vendorFieldAnalyzer);
@@ -196,7 +195,7 @@ public final class CpeMemoryIndex implements AutoCloseable {
*/ */
private void buildIndex(CveDB cve) throws IndexException { private void buildIndex(CveDB cve) throws IndexException {
try (Analyzer analyzer = createSearchingAnalyzer(); try (Analyzer analyzer = createSearchingAnalyzer();
IndexWriter indexWriter = new IndexWriter(index, new IndexWriterConfig(LuceneUtils.CURRENT_VERSION, analyzer))) { IndexWriter indexWriter = new IndexWriter(index, new IndexWriterConfig(analyzer))) {
// Tip: reuse the Document and Fields for performance... // Tip: reuse the Document and Fields for performance...
// See "Re-use Document and Field instances" from // See "Re-use Document and Field instances" from
// http://wiki.apache.org/lucene-java/ImproveIndexingSpeed // http://wiki.apache.org/lucene-java/ImproveIndexingSpeed
@@ -215,7 +214,7 @@ public final class CpeMemoryIndex implements AutoCloseable {
} }
} }
indexWriter.commit(); indexWriter.commit();
indexWriter.close(true); indexWriter.close();
} catch (DatabaseException ex) { } catch (DatabaseException ex) {
LOGGER.debug("", ex); LOGGER.debug("", ex);
throw new IndexException("Error reading CPE data", ex); throw new IndexException("Error reading CPE data", ex);

View File

@@ -17,6 +17,7 @@
*/ */
package org.owasp.dependencycheck.data.lucene; package org.owasp.dependencycheck.data.lucene;
import java.io.IOException;
import java.util.LinkedList; import java.util.LinkedList;
import javax.annotation.concurrent.NotThreadSafe; import javax.annotation.concurrent.NotThreadSafe;
import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenFilter;
@@ -37,6 +38,11 @@ public abstract class AbstractTokenizingFilter extends TokenFilter {
*/ */
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
/**
* A collection of tokens to add to the stream.
*/
private final LinkedList<String> tokens;
/** /**
* Gets the CharTermAttribute. * Gets the CharTermAttribute.
* *
@@ -45,10 +51,6 @@ public abstract class AbstractTokenizingFilter extends TokenFilter {
protected CharTermAttribute getTermAtt() { protected CharTermAttribute getTermAtt() {
return termAtt; return termAtt;
} }
/**
* A collection of tokens to add to the stream.
*/
private final LinkedList<String> tokens;
/** /**
* Gets the list of tokens. * Gets the list of tokens.
@@ -69,6 +71,15 @@ public abstract class AbstractTokenizingFilter extends TokenFilter {
tokens = new LinkedList<>(); tokens = new LinkedList<>();
} }
/**
* {@inheritDoc}
*/
@Override
public void reset() throws IOException {
super.reset();
tokens.clear();
}
/** /**
* Adds a term, if one exists, from the tokens collection. * Adds a term, if one exists, from the tokens collection.
* *

View File

@@ -0,0 +1,148 @@
/*
* This file is part of dependency-check-core.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Copyright (c) 2017 Jeremy Long. All Rights Reserved.
*/
package org.owasp.dependencycheck.data.lucene;
import java.io.IOException;
import java.util.LinkedList;
import org.apache.commons.lang3.builder.EqualsBuilder;
import org.apache.commons.lang3.builder.HashCodeBuilder;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
/**
* A simple alphanumeric filter that removes non-alphanumeric characters from
* the terms. If a term contains a non-alphanumeric character it may be split
* into multiple terms:
*
* <table>
* <tr><th>term</th><th>results in</th></tr>
* <tr><td>bob</td><td>bob</td></tr>
* <tr><td>bob-cat</td><td>bob cat</td></tr>
* <tr><td>#$%</td><td>[skipped]</td></tr>
* </table>
*
* @author jeremy long
*/
public final class AlphaNumericFilter extends AbstractTokenizingFilter {
/**
* The position increment attribute.
*/
private final PositionIncrementAttribute posIncrAttribute = addAttribute(PositionIncrementAttribute.class);
/**
* Used to count the number of terms skipped as they were only made up of
* special characters.
*/
private int skipCounter;
/**
* Constructs a new AlphaNumericFilter.
*
* @param stream the TokenStream that this filter will process
*/
public AlphaNumericFilter(TokenStream stream) {
super(stream);
}
/**
* {@inheritDoc}
*/
@Override
public boolean incrementToken() throws IOException {
final LinkedList<String> tokens = getTokens();
final CharTermAttribute termAtt = getTermAtt();
if (tokens.isEmpty()) {
String[] parts;
skipCounter = 0;
while (input.incrementToken()) {
final String text = new String(termAtt.buffer(), 0, termAtt.length());
if (text.isEmpty()) {
return true;
}
parts = text.split("[^a-zA-Z0-9]");
if (parts.length == 0) {
skipCounter += posIncrAttribute.getPositionIncrement();
} else {
if (skipCounter != 0) {
posIncrAttribute.setPositionIncrement(posIncrAttribute.getPositionIncrement() + skipCounter);
}
for (String part : parts) {
if (!part.isEmpty()) {
tokens.add(part);
}
}
break;
}
}
}
return addTerm();
}
/**
* {@inheritDoc}
*/
@Override
public void reset() throws IOException {
super.reset();
skipCounter = 0;
}
/**
* {@inheritDoc}
*/
@Override
public void end() throws IOException {
super.end();
posIncrAttribute.setPositionIncrement(posIncrAttribute.getPositionIncrement() + skipCounter);
}
/**
* {@inheritDoc}
*/
@Override
public int hashCode() {
return new HashCodeBuilder(13, 27)
.appendSuper(super.hashCode())
.append(posIncrAttribute)
.append(skipCounter)
.build();
}
/**
* {@inheritDoc}
*/
@Override
public boolean equals(Object obj) {
if (obj == null) {
return false;
}
if (obj == this) {
return true;
}
if (obj.getClass() != getClass()) {
return false;
}
final AlphaNumericFilter rhs = (AlphaNumericFilter) obj;
return new EqualsBuilder()
.appendSuper(super.equals(obj))
.append(skipCounter, rhs.skipCounter)
.append(posIncrAttribute, rhs.posIncrAttribute)
.isEquals();
}
}

View File

@@ -17,39 +17,37 @@
*/ */
package org.owasp.dependencycheck.data.lucene; package org.owasp.dependencycheck.data.lucene;
import java.io.Reader;
import javax.annotation.concurrent.NotThreadSafe; import javax.annotation.concurrent.NotThreadSafe;
import org.apache.lucene.analysis.util.CharTokenizer; import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.util.Version; import org.apache.lucene.util.AttributeFactory;
/** /**
* Tokenizes the input breaking it into tokens when non-alpha/numeric characters * Tokenizes the input breaking it into tokens when non-alpha/numeric characters
* are found. * are found.
* *
* @deprecated This class is no longer used after re-factoring the lucene
* analysis.
* @author Jeremy Long * @author Jeremy Long
*/ */
@NotThreadSafe @NotThreadSafe
@Deprecated
public class AlphaNumericTokenizer extends CharTokenizer { public class AlphaNumericTokenizer extends CharTokenizer {
/** /**
* Constructs a new AlphaNumericTokenizer. * Constructs a new AlphaNumericTokenizer.
* *
* @param matchVersion the lucene version
* @param in the Reader
*/ */
public AlphaNumericTokenizer(Version matchVersion, Reader in) { public AlphaNumericTokenizer() {
super(matchVersion, in); super();
} }
/** /**
* Constructs a new AlphaNumericTokenizer. * Constructs a new AlphaNumericTokenizer.
* *
* @param matchVersion the lucene version
* @param factory the AttributeFactory * @param factory the AttributeFactory
* @param in the Reader
*/ */
public AlphaNumericTokenizer(Version matchVersion, AttributeFactory factory, Reader in) { public AlphaNumericTokenizer(AttributeFactory factory) {
super(matchVersion, factory, in); super(factory);
} }
/** /**

View File

@@ -19,7 +19,6 @@ package org.owasp.dependencycheck.data.lucene;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import javax.annotation.concurrent.ThreadSafe; import javax.annotation.concurrent.ThreadSafe;
import org.apache.lucene.util.Version;
/** /**
* <p> * <p>
@@ -31,12 +30,6 @@ import org.apache.lucene.util.Version;
@ThreadSafe @ThreadSafe
public final class LuceneUtils { public final class LuceneUtils {
/**
* The current version of Lucene being used. Declaring this one place so an
* upgrade doesn't require hunting through the code base.
*/
public static final Version CURRENT_VERSION = Version.LUCENE_47;
/** /**
* Private constructor as this is a utility class. * Private constructor as this is a utility class.
*/ */

View File

@@ -17,16 +17,15 @@
*/ */
package org.owasp.dependencycheck.data.lucene; package org.owasp.dependencycheck.data.lucene;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopAnalyzer; import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.core.StopFilter; import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter; import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.util.Version;
/** /**
* A Lucene field analyzer used to analyzer queries against the CPE data. * A Lucene field analyzer used to analyzer queries against the CPE data.
@@ -35,10 +34,6 @@ import org.apache.lucene.util.Version;
*/ */
public class SearchFieldAnalyzer extends Analyzer { public class SearchFieldAnalyzer extends Analyzer {
/**
* The Lucene Version used.
*/
private final Version version;
/** /**
* The list of additional stop words to use. * The list of additional stop words to use.
*/ */
@@ -55,7 +50,7 @@ public class SearchFieldAnalyzer extends Analyzer {
* @return the set of stop words being used * @return the set of stop words being used
*/ */
public static CharArraySet getStopWords() { public static CharArraySet getStopWords() {
final CharArraySet words = StopFilter.makeStopSet(LuceneUtils.CURRENT_VERSION, ADDITIONAL_STOP_WORDS, true); final CharArraySet words = StopFilter.makeStopSet(ADDITIONAL_STOP_WORDS, true);
words.addAll(StopAnalyzer.ENGLISH_STOP_WORDS_SET); words.addAll(StopAnalyzer.ENGLISH_STOP_WORDS_SET);
return words; return words;
} }
@@ -63,10 +58,8 @@ public class SearchFieldAnalyzer extends Analyzer {
/** /**
* Constructs a new SearchFieldAnalyzer. * Constructs a new SearchFieldAnalyzer.
* *
* @param version the Lucene version
*/ */
public SearchFieldAnalyzer(Version version) { public SearchFieldAnalyzer() {
this.version = version;
stopWords = getStopWords(); stopWords = getStopWords();
} }
@@ -74,15 +67,16 @@ public class SearchFieldAnalyzer extends Analyzer {
* Creates a the TokenStreamComponents used to analyze the stream. * Creates a the TokenStreamComponents used to analyze the stream.
* *
* @param fieldName the field that this lucene analyzer will process * @param fieldName the field that this lucene analyzer will process
* @param reader a reader containing the tokens
* @return the token stream filter chain * @return the token stream filter chain
*/ */
@Override @Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) { protected TokenStreamComponents createComponents(String fieldName) {
final Tokenizer source = new AlphaNumericTokenizer(version, reader); //final Tokenizer source = new AlphaNumericTokenizer();
final Tokenizer source = new WhitespaceTokenizer();
TokenStream stream = source; TokenStream stream = source;
stream = new UrlTokenizingFilter(stream);
stream = new AlphaNumericFilter(stream);
stream = new WordDelimiterFilter(stream, stream = new WordDelimiterFilter(stream,
WordDelimiterFilter.GENERATE_WORD_PARTS WordDelimiterFilter.GENERATE_WORD_PARTS
| WordDelimiterFilter.GENERATE_NUMBER_PARTS | WordDelimiterFilter.GENERATE_NUMBER_PARTS
@@ -91,9 +85,9 @@ public class SearchFieldAnalyzer extends Analyzer {
| WordDelimiterFilter.SPLIT_ON_NUMERICS | WordDelimiterFilter.SPLIT_ON_NUMERICS
| WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE, null); | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE, null);
stream = new LowerCaseFilter(version, stream); stream = new LowerCaseFilter(stream);
stream = new UrlTokenizingFilter(stream);
stream = new StopFilter(version, stream, stopWords); stream = new StopFilter(stream, stopWords);
stream = new TokenPairConcatenatingFilter(stream); stream = new TokenPairConcatenatingFilter(stream);
return new TokenStreamComponents(source, stream); return new TokenStreamComponents(source, stream);

View File

@@ -18,8 +18,9 @@
package org.owasp.dependencycheck.data.lucene; package org.owasp.dependencycheck.data.lucene;
import java.io.IOException; import java.io.IOException;
import java.util.LinkedList;
import javax.annotation.concurrent.NotThreadSafe; import javax.annotation.concurrent.NotThreadSafe;
import org.apache.commons.lang3.builder.EqualsBuilder;
import org.apache.commons.lang3.builder.HashCodeBuilder;
import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -46,9 +47,10 @@ public final class TokenPairConcatenatingFilter extends TokenFilter {
*/ */
private String previousWord; private String previousWord;
/** /**
* A list of words parsed. * Keeps track if we are adding a single term or concatenating with the
* previous.
*/ */
private final LinkedList<String> words; private boolean addSingleTerm;
/** /**
* Constructs a new TokenPairConcatenatingFilter. * Constructs a new TokenPairConcatenatingFilter.
@@ -57,7 +59,8 @@ public final class TokenPairConcatenatingFilter extends TokenFilter {
*/ */
public TokenPairConcatenatingFilter(TokenStream stream) { public TokenPairConcatenatingFilter(TokenStream stream) {
super(stream); super(stream);
words = new LinkedList<>(); addSingleTerm = true;
previousWord = null;
} }
/** /**
@@ -70,86 +73,83 @@ public final class TokenPairConcatenatingFilter extends TokenFilter {
*/ */
@Override @Override
public boolean incrementToken() throws IOException { public boolean incrementToken() throws IOException {
if (addSingleTerm && previousWord != null) {
//collect all the terms into the words collection addSingleTerm = false;
while (input.incrementToken()) {
final String word = new String(termAtt.buffer(), 0, termAtt.length());
words.add(word);
}
//if we have a previousTerm - write it out as its own token concatenated
// with the current word (if one is available).
if (previousWord != null && !words.isEmpty()) {
final String word = words.getFirst();
clearAttributes(); clearAttributes();
termAtt.append(previousWord).append(word); termAtt.append(previousWord);
previousWord = null;
return true; return true;
}
//if we have words, write it out as a single token } else if (input.incrementToken()) {
if (!words.isEmpty()) { final String word = new String(termAtt.buffer(), 0, termAtt.length());
final String word = words.removeFirst(); if (addSingleTerm) {
clearAttributes(); clearAttributes();
termAtt.append(word); termAtt.append(word);
previousWord = word; previousWord = word;
addSingleTerm = false;
} else {
clearAttributes();
termAtt.append(previousWord).append(word);
previousWord = word;
addSingleTerm = true;
}
return true; return true;
} }
return false; return false;
} }
/** /**
* <p> * {@inheritDoc}
* Resets the Filter and clears any internal state data that may have been
* left-over from previous uses of the Filter.</p>
* <p>
* <b>If this Filter is re-used this method must be called between
* uses.</b></p>
*
* @throws java.io.IOException thrown if there is an error resetting the
* filter
*/ */
@Override @Override
public void end() throws IOException { public void end() throws IOException {
super.end(); super.end();
previousWord = null; previousWord = null;
words.clear(); addSingleTerm = true;
} }
/** /**
* Standard hash code implementation. * {@inheritDoc}
* */
* @return the hash code @Override
public void reset() throws IOException {
super.reset();
previousWord = null;
addSingleTerm = true;
}
/**
* {@inheritDoc}
*/ */
@Override @Override
public int hashCode() { public int hashCode() {
int hash = 3; return new HashCodeBuilder(13, 27)
hash = 31 * hash + (this.termAtt != null ? this.termAtt.hashCode() : 0); .appendSuper(super.hashCode())
hash = 31 * hash + (this.previousWord != null ? this.previousWord.hashCode() : 0); .append(addSingleTerm)
hash = 31 * hash + (this.words != null ? this.words.hashCode() : 0); .append(previousWord)
return hash; .append(termAtt)
.build();
} }
/** /**
* Standard equals implementation. * {@inheritDoc}
*
* @param obj the object to compare
* @return true if the objects are equal; otherwise false.
*/ */
@Override @Override
public boolean equals(Object obj) { public boolean equals(Object obj) {
if (obj == null) { if (obj == null) {
return false; return false;
} }
if (getClass() != obj.getClass()) { if (obj == this) {
return true;
}
if (obj.getClass() != getClass()) {
return false; return false;
} }
final TokenPairConcatenatingFilter other = (TokenPairConcatenatingFilter) obj; final TokenPairConcatenatingFilter rhs = (TokenPairConcatenatingFilter) obj;
if (this.termAtt != other.termAtt && (this.termAtt == null || !this.termAtt.equals(other.termAtt))) { return new EqualsBuilder()
return false; .appendSuper(super.equals(obj))
} .append(addSingleTerm, rhs.addSingleTerm)
if ((this.previousWord == null) ? (other.previousWord != null) : !this.previousWord.equals(other.previousWord)) { .append(previousWord, rhs.previousWord)
return false; .append(termAtt, rhs.termAtt)
} .isEquals();
return !(this.words != other.words && (this.words == null || !this.words.equals(other.words)));
} }
} }

View File

@@ -43,7 +43,7 @@ public final class UrlTokenizingFilter extends AbstractTokenizingFilter {
private static final Logger LOGGER = LoggerFactory.getLogger(UrlTokenizingFilter.class); private static final Logger LOGGER = LoggerFactory.getLogger(UrlTokenizingFilter.class);
/** /**
* Constructs a new VersionTokenizingFilter. * Constructs a new UrlTokenizingFilter.
* *
* @param stream the TokenStream that this filter will process * @param stream the TokenStream that this filter will process
*/ */

View File

@@ -820,6 +820,11 @@ public final class CveDB implements AutoCloseable {
} }
} }
/**
* Returns the size of the batch.
*
* @return the size of the batch
*/
private int getBatchSize() { private int getBatchSize() {
int max; int max;
try { try {
@@ -830,6 +835,12 @@ public final class CveDB implements AutoCloseable {
return max; return max;
} }
/**
* Determines whether or not batch insert is enabled.
*
* @return <code>true</code> if batch insert is enabled; otherwise
* <code>false</code>
*/
private boolean isBatchInsertEnabled() { private boolean isBatchInsertEnabled() {
boolean batch = false; boolean batch = false;
try { try {
@@ -841,25 +852,34 @@ public final class CveDB implements AutoCloseable {
return batch; return batch;
} }
/**
* Generates a logging message for batch inserts.
*
* @param pCountReferences the number of batch statements executed
* @param pFormat a Java String.format string
* @return the formated string
*/
private String getLogForBatchInserts(int pCountReferences, String pFormat) { private String getLogForBatchInserts(int pCountReferences, String pFormat) {
return String.format(pFormat, pCountReferences, new Date()); return String.format(pFormat, pCountReferences, new Date());
} }
/** /**
* Executes batch inserts of vulnerabilities when property * Executes batch inserts of vulnerabilities when property
* database.batchinsert.maxsize is reached * database.batchinsert.maxsize is reached.
* *
* @param pVulnerability * @param pVulnerability the vulnerability
* @param pVulnerableSoftware * @param pVulnerableSoftware the vulnerable software
* @param pInsertSoftware * @param pInsertSoftware the prepared statement to batch execute
* @throws SQLException * @throws SQLException thrown when the batch cannot be executed
*/ */
private void executeBatch(Vulnerability pVulnerability, VulnerableSoftware pVulnerableSoftware, PreparedStatement pInsertSoftware) throws SQLException { private void executeBatch(Vulnerability pVulnerability, VulnerableSoftware pVulnerableSoftware, PreparedStatement pInsertSoftware)
throws SQLException {
try { try {
pInsertSoftware.executeBatch(); pInsertSoftware.executeBatch();
} catch (SQLException ex) { } catch (SQLException ex) {
if (ex.getMessage().contains("Duplicate entry")) { if (ex.getMessage().contains("Duplicate entry")) {
final String msg = String.format("Duplicate software key identified in '%s:%s'", pVulnerability.getName(), pVulnerableSoftware.getName()); final String msg = String.format("Duplicate software key identified in '%s:%s'",
pVulnerability.getName(), pVulnerableSoftware.getName());
LOGGER.info(msg, ex); LOGGER.info(msg, ex);
} else { } else {
throw ex; throw ex;

View File

@@ -72,7 +72,8 @@ public class H2DBLock {
private final String magic; private final String magic;
/** /**
* The shutdown hook used to remove the lock file in case of an unexpected shutdown. * The shutdown hook used to remove the lock file in case of an unexpected
* shutdown.
*/ */
private H2DBShutdownHook hook = null; private H2DBShutdownHook hook = null;
@@ -161,11 +162,19 @@ public class H2DBLock {
} }
} }
/**
* Checks the state of the custom h2 lock file and under some conditions
* will attempt to remove the lock file.
*
* @throws H2DBLockException thrown if the lock directory does not exist and
* cannot be created
*/
private void checkState() throws H2DBLockException { private void checkState() throws H2DBLockException {
if (!lockFile.getParentFile().isDirectory() && !lockFile.mkdir()) { if (!lockFile.getParentFile().isDirectory() && !lockFile.mkdir()) {
throw new H2DBLockException("Unable to create path to data directory."); throw new H2DBLockException("Unable to create path to data directory.");
} }
if (lockFile.isFile()) { if (lockFile.isFile()) {
//TODO - this 30 minute check needs to be configurable.
if (getFileAge(lockFile) > 30) { if (getFileAge(lockFile) > 30) {
LOGGER.debug("An old db update lock file was found: {}", lockFile.getAbsolutePath()); LOGGER.debug("An old db update lock file was found: {}", lockFile.getAbsolutePath());
if (!lockFile.delete()) { if (!lockFile.delete()) {
@@ -232,6 +241,9 @@ public class H2DBLock {
return time; return time;
} }
/**
* Adds the shutdown hook to the JVM.
*/
private void addShutdownHook() { private void addShutdownHook() {
if (hook == null) { if (hook == null) {
hook = H2DBShutdownHookFactory.getHook(settings); hook = H2DBShutdownHookFactory.getHook(settings);
@@ -240,6 +252,9 @@ public class H2DBLock {
} }
} }
/**
* Removes the shutdown hook.
*/
private void removeShutdownHook() { private void removeShutdownHook() {
if (hook != null) { if (hook != null) {
hook.remove(); hook.remove();

View File

@@ -32,6 +32,13 @@ public final class H2DBShutdownHookFactory {
*/ */
private static final Logger LOGGER = LoggerFactory.getLogger(H2DBShutdownHookFactory.class); private static final Logger LOGGER = LoggerFactory.getLogger(H2DBShutdownHookFactory.class);
/**
* Empty constructor for utility class.
*/
private H2DBShutdownHookFactory() {
//empty
}
/** /**
* Creates a new H2DB Shutdown Hook. * Creates a new H2DB Shutdown Hook.
* *
@@ -40,8 +47,8 @@ public final class H2DBShutdownHookFactory {
*/ */
public static H2DBShutdownHook getHook(Settings settings) { public static H2DBShutdownHook getHook(Settings settings) {
try { try {
String className = settings.getString(Settings.KEYS.H2DB_SHUTDOWN_HOOK, "org.owasp.dependencycheck.utils.H2DBCleanupHook"); final String className = settings.getString(Settings.KEYS.H2DB_SHUTDOWN_HOOK, "org.owasp.dependencycheck.utils.H2DBCleanupHook");
Class type = Class.forName(className); final Class type = Class.forName(className);
return (H2DBShutdownHook) type.newInstance(); return (H2DBShutdownHook) type.newInstance();
} catch (ClassNotFoundException | InstantiationException | IllegalAccessException ex) { } catch (ClassNotFoundException | InstantiationException | IllegalAccessException ex) {
LOGGER.debug("Failed to instantiate {}, using default shutdown hook instead", ex); LOGGER.debug("Failed to instantiate {}, using default shutdown hook instead", ex);

View File

@@ -65,7 +65,8 @@
9. mail_project is ruby library 9. mail_project is ruby library
10. ldap_project is part of type3 written in php 10. ldap_project is part of type3 written in php
11. user import project is used in drupal (i.e. php) 11. user import project is used in drupal (i.e. php)
12. xml_sec is a C library for XML security 12. root is a c++ project https://github.com/root-project/root/
13. xml_sec is a C library for XML security
]]></notes> ]]></notes>
<filePath regex="true">.*(\.(dll|jar|ear|war|pom|nupkg|nuspec)|pom\.xml|package.json)$</filePath> <filePath regex="true">.*(\.(dll|jar|ear|war|pom|nupkg|nuspec)|pom\.xml|package.json)$</filePath>
<cpe>cpe:/a:sandbox:sandbox</cpe> <cpe>cpe:/a:sandbox:sandbox</cpe>
@@ -82,6 +83,7 @@
<cpe>cpe:/a:mail_project:mail</cpe> <cpe>cpe:/a:mail_project:mail</cpe>
<cpe>cpe:/a:ldap_project:ldap</cpe> <cpe>cpe:/a:ldap_project:ldap</cpe>
<cpe>cpe:/a:user_import_project:user_import</cpe> <cpe>cpe:/a:user_import_project:user_import</cpe>
<cpe>cpe:/a:root:root</cpe>
<cpe>cpe:/a:xmlsec_project:xmlsec</cpe> <cpe>cpe:/a:xmlsec_project:xmlsec</cpe>
</suppress> </suppress>
<suppress base="true"> <suppress base="true">
@@ -973,6 +975,97 @@
<cpe>cpe:/a:git_project:git</cpe> <cpe>cpe:/a:git_project:git</cpe>
</suppress> </suppress>
<suppress base="true">
<notes><![CDATA[
general FP cleanup
]]></notes>
<filePath regex="true">.*winstone-?(\d*\.?){0,3}\.jar</filePath>
<cpe>cpe:/a:jetty:jetty</cpe>
<cpe>cpe:/a:eclipse:jetty</cpe>
</suppress>
<suppress base="true">
<notes><![CDATA[
general FP cleanup
]]></notes>
<gav regex="true">^org\.apache\.maven\.wagon:wagon-webdav-jackrabbit:.*$</gav>
<cpe>cpe:/a:apache:jackrabbit</cpe>
</suppress>
<suppress base="true">
<notes><![CDATA[
general FP cleanup
]]></notes>
<gav regex="true">^org\.apache\.xbean:xbean-reflect:.*$</gav>
<cpe>cpe:/a:apache:geronimo</cpe>
</suppress>
<suppress base="true">
<notes><![CDATA[
general FP cleanup
]]></notes>
<gav regex="true">^org\.eclipse\.jetty\.orbit:javax\.annotation:.*$</gav>
<cpe>cpe:/a:eclipse:jetty</cpe>
<cpe>cpe:/a:jetty:jetty</cpe>
</suppress>
<suppress base="true">
<notes><![CDATA[
general FP cleanup
]]></notes>
<gav regex="true">^org\.eclipse\.jetty\.websocket:websocket-api:.*$</gav>
<cpe>cpe:/a:eclipse:jetty</cpe>
<cpe>cpe:/a:jetty:jetty</cpe>
</suppress>
<suppress base="true">
<notes><![CDATA[
general FP cleanup: com.amazonaws is a drupal project
]]></notes>
<gav regex="true">^com\.amazonaws:jmespath-java:.*$</gav>
<cpe>cpe:/a:amazon_aws_project:amazon_aws</cpe>
</suppress>
<suppress base="true">
<notes><![CDATA[
general FP cleanup: apache_test CPE is referencing Perl code.
]]></notes>
<gav regex="true">^org\.apache\.ant:ant-testutil:.*$</gav>
<cpe>cpe:/a:apache:apache_test</cpe>
</suppress>
<suppress base="true">
<notes><![CDATA[
general FP cleanup: CPE is for git, not the git provider
]]></notes>
<gav regex="true">^org\.apache\.maven\.scm:maven-scm-provider-git-commons:.*$</gav>
<cpe>cpe:/a:git-scm:git</cpe>
</suppress>
<suppress base="true">
<notes><![CDATA[
general FP cleanup
]]></notes>
<gav regex="true">^org\.eclipse\.jetty\.orbit:org\.apache\.taglibs\.standard\.glassfish:.*$</gav>
<cpe>cpe:/a:jetty:jetty</cpe>
<cpe>cpe:/a:eclipse:jetty</cpe>
</suppress>
<suppress base="true">
<notes><![CDATA[
general FP cleanup
]]></notes>
<gav regex="true">^org\.eclipse\.jetty\.orbit:com\.sun\.el:.*$</gav>
<cpe>cpe:/a:jetty:jetty</cpe>
<cpe>cpe:/a:eclipse:jetty</cpe>
</suppress>
<suppress base="true">
<notes><![CDATA[
general FP cleanup: client vs. server mismatch
]]></notes>
<gav regex="true">^org\.samba\.jcifs:jcifs:.*$</gav>
<cpe>cpe:/a:samba:samba</cpe>
</suppress>
<suppress base="true">
<notes><![CDATA[
general FP cleanup
]]></notes>
<gav regex="true">^org\.codehaus\.plexus:plexus-utils:.*$</gav>
<cpe>cpe:/a:spice_project:spice</cpe>
</suppress>
<suppress base="true"> <suppress base="true">
<notes><![CDATA[ <notes><![CDATA[
FP per issue #952 - instead of suppressing the whole thing, we will just FP per issue #952 - instead of suppressing the whole thing, we will just

View File

@@ -0,0 +1,104 @@
/*
* Copyright 2017 OWASP.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.owasp.dependencycheck.data.lucene;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import static org.apache.lucene.analysis.BaseTokenStreamTestCase.checkOneTerm;
import static org.apache.lucene.analysis.BaseTokenStreamTestCase.checkRandomData;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import static org.apache.lucene.util.LuceneTestCase.RANDOM_MULTIPLIER;
import static org.apache.lucene.util.LuceneTestCase.random;
import org.junit.Test;
import static org.junit.Assert.*;
/**
*
* @author jeremy
*/
public class AlphaNumericFilterTest extends BaseTokenStreamTestCase {
private final Analyzer analyzer;
public AlphaNumericFilterTest() {
analyzer = new Analyzer() {
@Override
protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
return new Analyzer.TokenStreamComponents(source, new AlphaNumericFilter(source));
}
};
}
/**
* Test of incrementToken method, of class AlphaNumericFilter.
*/
@Test
public void testIncrementToken() throws Exception {
String[] expected = new String[6];
expected[0] = "http";
expected[1] = "www";
expected[2] = "domain";
expected[3] = "com";
expected[4] = "test";
expected[5] = "php";
assertAnalyzesTo(analyzer, "http://www.domain.com/test.php", expected);
}
/**
* Test of incrementToken method, of class AlphaNumericFilter.
*/
@Test
public void testGarbage() throws Exception {
String[] expected = new String[2];
expected[0] = "test";
expected[1] = "two";
assertAnalyzesTo(analyzer, "!@#$% !@#$ &*(@#$ test-two @#$%", expected);
}
/**
* copied from
* http://svn.apache.org/repos/asf/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java
* blast some random strings through the analyzer
*/
public void testRandomStrings() {
try {
checkRandomData(random(), analyzer, 1000 * RANDOM_MULTIPLIER);
} catch (IOException ex) {
fail("Failed test random strings: " + ex.getMessage());
}
}
/**
* copied from
* http://svn.apache.org/repos/asf/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java
*
* @throws IOException
*/
public void testEmptyTerm() throws IOException {
Analyzer a = new Analyzer() {
@Override
protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new KeywordTokenizer();
return new Analyzer.TokenStreamComponents(tokenizer, new AlphaNumericFilter(tokenizer));
}
};
checkOneTerm(a, "", "");
}
}

View File

@@ -52,7 +52,7 @@ public class FieldAnalyzerTest extends BaseTest {
@Test @Test
public void testAnalyzers() throws Exception { public void testAnalyzers() throws Exception {
Analyzer analyzer = new SearchFieldAnalyzer(LuceneUtils.CURRENT_VERSION); Analyzer analyzer = new SearchFieldAnalyzer();
Directory index = new RAMDirectory(); Directory index = new RAMDirectory();
String field1 = "product"; String field1 = "product";
@@ -68,16 +68,16 @@ public class FieldAnalyzerTest extends BaseTest {
addDoc(w, field1, text1, field2, text2); addDoc(w, field1, text1, field2, text2);
} }
//Analyzer searchingAnalyzer = new SearchFieldAnalyzer(LuceneUtils.CURRENT_VERSION); //Analyzer searchingAnalyzer = new SearchFieldAnalyzer();
String querystr = "product:\"(Spring Framework Core)\" vendor:(SpringSource)"; String querystr = "product:\"(Spring Framework Core)\" vendor:(SpringSource)";
SearchFieldAnalyzer searchAnalyzerProduct = new SearchFieldAnalyzer(LuceneUtils.CURRENT_VERSION); SearchFieldAnalyzer searchAnalyzerProduct = new SearchFieldAnalyzer();
SearchFieldAnalyzer searchAnalyzerVendor = new SearchFieldAnalyzer(LuceneUtils.CURRENT_VERSION); SearchFieldAnalyzer searchAnalyzerVendor = new SearchFieldAnalyzer();
HashMap<String, Analyzer> map = new HashMap<>(); HashMap<String, Analyzer> map = new HashMap<>();
map.put(field1, searchAnalyzerProduct); map.put(field1, searchAnalyzerProduct);
map.put(field2, searchAnalyzerVendor); map.put(field2, searchAnalyzerVendor);
PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new StandardAnalyzer(LuceneUtils.CURRENT_VERSION), map); PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new StandardAnalyzer(), map);
QueryParser parser = new QueryParser(LuceneUtils.CURRENT_VERSION, field1, wrapper); QueryParser parser = new QueryParser(field1, wrapper);
Query q = parser.parse(querystr); Query q = parser.parse(querystr);
@@ -85,7 +85,7 @@ public class FieldAnalyzerTest extends BaseTest {
IndexReader reader = DirectoryReader.open(index); IndexReader reader = DirectoryReader.open(index);
IndexSearcher searcher = new IndexSearcher(reader); IndexSearcher searcher = new IndexSearcher(reader);
TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage);
searcher.search(q, collector); searcher.search(q, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs; ScoreDoc[] hits = collector.topDocs().scoreDocs;
@@ -99,7 +99,7 @@ public class FieldAnalyzerTest extends BaseTest {
querystr = "product:( x-stream^5 ) AND vendor:( thoughtworks.xstream )"; querystr = "product:( x-stream^5 ) AND vendor:( thoughtworks.xstream )";
Query q3 = parser.parse(querystr); Query q3 = parser.parse(querystr);
collector = TopScoreDocCollector.create(hitsPerPage, true); collector = TopScoreDocCollector.create(hitsPerPage);
searcher.search(q3, collector); searcher.search(q3, collector);
hits = collector.topDocs().scoreDocs; hits = collector.topDocs().scoreDocs;
assertEquals("x-stream", searcher.doc(hits[0].doc).get(field1)); assertEquals("x-stream", searcher.doc(hits[0].doc).get(field1));
@@ -107,7 +107,7 @@ public class FieldAnalyzerTest extends BaseTest {
} }
private IndexWriter createIndex(Analyzer analyzer, Directory index) throws IOException { private IndexWriter createIndex(Analyzer analyzer, Directory index) throws IOException {
IndexWriterConfig config = new IndexWriterConfig(LuceneUtils.CURRENT_VERSION, analyzer); IndexWriterConfig config = new IndexWriterConfig( analyzer);
return new IndexWriter(index, config); return new IndexWriter(index, config);
} }

View File

@@ -18,14 +18,19 @@
package org.owasp.dependencycheck.data.lucene; package org.owasp.dependencycheck.data.lucene;
import java.io.IOException; import java.io.IOException;
import java.io.StringReader; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream; import static org.apache.lucene.analysis.BaseTokenStreamTestCase.assertAnalyzesTo;
import static org.apache.lucene.analysis.BaseTokenStreamTestCase.checkOneTerm;
import static org.apache.lucene.analysis.BaseTokenStreamTestCase.checkRandomData;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.junit.After; import static org.apache.lucene.util.LuceneTestCase.RANDOM_MULTIPLIER;
import static org.apache.lucene.util.LuceneTestCase.random;
import static org.junit.Assert.fail;
import org.junit.Before; import org.junit.Test;
/** /**
* *
@@ -33,25 +38,64 @@ import org.junit.Before;
*/ */
public class TokenPairConcatenatingFilterTest extends BaseTokenStreamTestCase { public class TokenPairConcatenatingFilterTest extends BaseTokenStreamTestCase {
@Override private final Analyzer analyzer;
@Before
public void setUp() throws Exception {
super.setUp();
}
public TokenPairConcatenatingFilterTest() {
analyzer = new Analyzer() {
@Override @Override
@After protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
public void tearDown() throws Exception { Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
super.tearDown(); return new Analyzer.TokenStreamComponents(source, new TokenPairConcatenatingFilter(source));
}
};
} }
/** /**
* test some examples * Test of incrementToken method, of class TokenPairConcatenatingFilter.
*/ */
public void testExamples() throws IOException { @Test
Tokenizer wsTokenizer = new WhitespaceTokenizer(LuceneUtils.CURRENT_VERSION, new StringReader("one two three")); public void testIncrementToken() throws Exception {
TokenStream filter = new TokenPairConcatenatingFilter(wsTokenizer); String[] expected = new String[5];
assertTokenStreamContents(filter, expected[0] = "red";
new String[]{"one", "onetwo", "two", "twothree", "three"}); expected[1] = "redblue";
expected[2] = "blue";
expected[3] = "bluegreen";
expected[4] = "green";
assertAnalyzesTo(analyzer, "red blue green", expected);
} }
/**
* copied from
* http://svn.apache.org/repos/asf/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java
* blast some random strings through the analyzer
*/
public void testRandomStrings() {
try {
checkRandomData(random(), analyzer, 1000 * RANDOM_MULTIPLIER);
} catch (IOException ex) {
fail("Failed test random strings: " + ex.getMessage());
}
}
/**
* copied from
* http://svn.apache.org/repos/asf/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishMinimalStemFilter.java
*
* @throws IOException
*/
public void testEmptyTerm() {
Analyzer a = new Analyzer() {
@Override
protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new KeywordTokenizer();
return new Analyzer.TokenStreamComponents(tokenizer, new TokenPairConcatenatingFilter(tokenizer));
}
};
try {
checkOneTerm(a, "", "");
} catch (IOException ex) {
fail("Failed test random strings: " + ex.getMessage());
}
}
} }

View File

@@ -18,7 +18,6 @@
package org.owasp.dependencycheck.data.lucene; package org.owasp.dependencycheck.data.lucene;
import java.io.IOException; import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.MockTokenizer;
@@ -36,9 +35,8 @@ public class UrlTokenizingFilterTest extends BaseTokenStreamTestCase {
public UrlTokenizingFilterTest() { public UrlTokenizingFilterTest() {
analyzer = new Analyzer() { analyzer = new Analyzer() {
@Override @Override
protected TokenStreamComponents createComponents(String fieldName, protected TokenStreamComponents createComponents(String fieldName) {
Reader reader) { Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
Tokenizer source = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
return new TokenStreamComponents(source, new UrlTokenizingFilter(source)); return new TokenStreamComponents(source, new UrlTokenizingFilter(source));
} }
}; };
@@ -77,8 +75,8 @@ public class UrlTokenizingFilterTest extends BaseTokenStreamTestCase {
public void testEmptyTerm() throws IOException { public void testEmptyTerm() throws IOException {
Analyzer a = new Analyzer() { Analyzer a = new Analyzer() {
@Override @Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) { protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new KeywordTokenizer(reader); Tokenizer tokenizer = new KeywordTokenizer();
return new TokenStreamComponents(tokenizer, new UrlTokenizingFilter(tokenizer)); return new TokenStreamComponents(tokenizer, new UrlTokenizingFilter(tokenizer));
} }
}; };

View File

@@ -124,7 +124,7 @@ Copyright (c) 2012 - Jeremy Long
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding> <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<github.global.server>github</github.global.server> <github.global.server>github</github.global.server>
<apache.lucene.version>4.7.2</apache.lucene.version> <apache.lucene.version>5.5.5</apache.lucene.version>
<apache.ant.version>1.9.9</apache.ant.version> <apache.ant.version>1.9.9</apache.ant.version>
<!--upgrading to the 1.8 requires Java 8 compatability - we are maintaining 7 atm--> <!--upgrading to the 1.8 requires Java 8 compatability - we are maintaining 7 atm-->
<slf4j.version>1.7.25</slf4j.version> <slf4j.version>1.7.25</slf4j.version>