upgrade to lucene 4.0

Former-commit-id: 97ae6c47d6498fea873202dae257a2dfab0b683f
This commit is contained in:
Jeremy Long
2012-12-16 21:26:30 -05:00
parent 0027e75a45
commit 36ecf7c7fd
12 changed files with 62 additions and 82 deletions

13
pom.xml
View File

@@ -389,7 +389,18 @@ along with DependencyCheck. If not, see <http://www.gnu.org/licenses/>.
<dependency> <dependency>
<groupId>org.apache.lucene</groupId> <groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId> <artifactId>lucene-core</artifactId>
<version>3.5.0</version> <version>4.0.0</version>
<!--<version>3.5.0</version>-->
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>4.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>4.0.0</version>
</dependency> </dependency>
<dependency> <dependency>
<groupId>org.apache.commons</groupId> <groupId>org.apache.commons</groupId>

View File

@@ -27,8 +27,10 @@ import java.util.StringTokenizer;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.queryparser.classic.QueryParser;
//TODO convert to the analyzing query parser
//import org.apache.lucene.queryparser.analyzing.AnalyzingQueryParser;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.ScoreDoc;
@@ -98,8 +100,7 @@ public class CPEAnalyzer implements org.codesecure.dependencycheck.analyzer.Anal
cpe.open(); cpe.open();
indexSearcher = cpe.getIndexSearcher(); indexSearcher = cpe.getIndexSearcher();
Analyzer analyzer = cpe.getAnalyzer(); Analyzer analyzer = cpe.getAnalyzer();
//TITLE is the default field because it contains venddor, product, and version all in one. queryParser = new QueryParser(Version.LUCENE_40, Fields.NAME, analyzer);
queryParser = new QueryParser(Version.LUCENE_35, Fields.TITLE, analyzer);
} }
/** /**

View File

@@ -46,7 +46,6 @@ public class Entry {
Entry entry = new Entry(); Entry entry = new Entry();
try { try {
entry.parseName(doc.get(Fields.NAME)); entry.parseName(doc.get(Fields.NAME));
entry.setTitle(doc.get(Fields.TITLE));
} catch (UnsupportedEncodingException ex) { } catch (UnsupportedEncodingException ex) {
Logger.getLogger(Entry.class.getName()).log(Level.SEVERE, null, ex); Logger.getLogger(Entry.class.getName()).log(Level.SEVERE, null, ex);
entry.name = doc.get(Fields.NAME); entry.name = doc.get(Fields.NAME);

View File

@@ -34,22 +34,13 @@ public abstract class Fields {
* The key for the vendor field. * The key for the vendor field.
*/ */
public static final String VENDOR = "vendor"; public static final String VENDOR = "vendor";
/**
* The key for the version field.
*/
public static final String VERSION = "version";
//public static final String REVISION = "revision";
/** /**
* The key for the product field. * The key for the product field.
*/ */
public static final String PRODUCT = "product"; public static final String PRODUCT = "product";
/** /**
* The key for the title field. This is a field combining vendor, product, * The key for the version field.
* and version.
*/ */
public static final String TITLE = "title"; public static final String VERSION = "version";
/** //public static final String REVISION = "revision";
* The key for the nvdId field.
*/
public static final String NVDID = "nvdid";
} }

View File

@@ -36,8 +36,8 @@ import java.util.logging.Level;
import java.util.logging.Logger; import java.util.logging.Logger;
import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.ParserConfigurationException;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.KeywordAnalyzer; import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.PerFieldAnalyzerWrapper; import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.FSDirectory;
@@ -121,7 +121,7 @@ public class Index extends AbstractIndex implements CachedWebDataSource {
fieldAnalyzers.put(Fields.NAME, new KeywordAnalyzer()); fieldAnalyzers.put(Fields.NAME, new KeywordAnalyzer());
PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper( PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(
new StandardAnalyzer(Version.LUCENE_35), fieldAnalyzers); new StandardAnalyzer(Version.LUCENE_40), fieldAnalyzers);
return wrapper; return wrapper;
} }

View File

@@ -21,6 +21,8 @@ package org.codesecure.dependencycheck.data.cpe.xml;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
@@ -59,44 +61,30 @@ public class Indexer extends Index implements EntrySaveDelegate {
protected Document convertEntryToDoc(Entry entry) { protected Document convertEntryToDoc(Entry entry) {
Document doc = new Document(); Document doc = new Document();
Field name = new Field(Fields.NAME, entry.getName(), Field.Store.YES, Field.Index.ANALYZED); Field name = new StoredField(Fields.NAME, entry.getName());
name.setIndexOptions(IndexOptions.DOCS_ONLY);
doc.add(name); doc.add(name);
Field nvdId = new Field(Fields.NVDID, entry.getNvdId(), Field.Store.NO, Field.Index.ANALYZED); Field vendor = new TextField(Fields.VENDOR, entry.getVendor(), Field.Store.NO);
nvdId.setIndexOptions(IndexOptions.DOCS_ONLY);
doc.add(nvdId);
Field vendor = new Field(Fields.VENDOR, entry.getVendor(), Field.Store.NO, Field.Index.ANALYZED);
vendor.setIndexOptions(IndexOptions.DOCS_ONLY);
vendor.setBoost(5.0F); vendor.setBoost(5.0F);
doc.add(vendor); doc.add(vendor);
Field product = new Field(Fields.PRODUCT, entry.getProduct(), Field.Store.NO, Field.Index.ANALYZED); Field product = new TextField(Fields.PRODUCT, entry.getProduct(), Field.Store.NO);
product.setIndexOptions(IndexOptions.DOCS_ONLY);
product.setBoost(5.0F); product.setBoost(5.0F);
doc.add(product); doc.add(product);
Field title = new Field(Fields.TITLE, entry.getTitle(), Field.Store.YES, Field.Index.ANALYZED);
title.setIndexOptions(IndexOptions.DOCS_ONLY);
//title.setBoost(1.0F);
doc.add(title);
//TODO revision should likely be its own field //TODO revision should likely be its own field
if (entry.getVersion() != null) { if (entry.getVersion() != null) {
Field version = null; Field version = null;
if (entry.getRevision() != null) { if (entry.getRevision() != null) {
version = new Field(Fields.VERSION, entry.getVersion() + " " version = new TextField(Fields.VERSION, entry.getVersion() + " "
+ entry.getRevision(), Field.Store.NO, Field.Index.ANALYZED); + entry.getRevision(), Field.Store.NO);
} else { } else {
version = new Field(Fields.VERSION, entry.getVersion(), version = new TextField(Fields.VERSION, entry.getVersion(),
Field.Store.NO, Field.Index.ANALYZED); Field.Store.NO);
} }
version.setIndexOptions(IndexOptions.DOCS_ONLY);
version.setBoost(0.8F); version.setBoost(0.8F);
doc.add(version); doc.add(version);
} }
return doc; return doc;
} }
} }

View File

@@ -23,6 +23,7 @@ import java.util.logging.Level;
import java.util.logging.Logger; import java.util.logging.Logger;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig;
@@ -98,13 +99,7 @@ public abstract class AbstractIndex {
} }
} }
if (indexSearcher != null) { if (indexSearcher != null) {
try { indexSearcher = null;
indexSearcher.close();
} catch (IOException ex) {
Logger.getLogger(AbstractIndex.class.getName()).log(Level.SEVERE, null, ex);
} finally {
indexSearcher = null;
}
} }
if (analyzer != null) { if (analyzer != null) {
@@ -140,7 +135,7 @@ public abstract class AbstractIndex {
if (!isOpen()) { if (!isOpen()) {
open(); open();
} }
IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_35, analyzer); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_40, analyzer);
indexWriter = new IndexWriter(directory, conf); indexWriter = new IndexWriter(directory, conf);
} }
@@ -170,7 +165,8 @@ public abstract class AbstractIndex {
if (!isOpen()) { if (!isOpen()) {
open(); open();
} }
indexReader = IndexReader.open(directory, true); //indexReader = IndexReader.open(directory, true);
indexReader = DirectoryReader.open(directory);
} }
/** /**

View File

@@ -18,7 +18,7 @@ package org.codesecure.dependencycheck.data.lucene;
* Copyright (c) 2012 Jeremy Long. All Rights Reserved. * Copyright (c) 2012 Jeremy Long. All Rights Reserved.
*/ */
import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.similarities.DefaultSimilarity;
/** /**
* *
@@ -41,7 +41,7 @@ public class DependencySimilarity extends DefaultSimilarity {
* @return 1 * @return 1
*/ */
@Override @Override
public float idf(int docFreq, int numDocs) { public float idf(long docFreq, long numDocs) {
return 1; return 1;
} }
} }

View File

@@ -28,8 +28,8 @@ import java.util.logging.Logger;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.KeywordAnalyzer; import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.PerFieldAnalyzerWrapper; import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.FSDirectory;
@@ -119,7 +119,7 @@ public class Index extends AbstractIndex implements CachedWebDataSource {
fieldAnalyzers.put(Fields.VULNERABLE_CPE, new KeywordAnalyzer()); fieldAnalyzers.put(Fields.VULNERABLE_CPE, new KeywordAnalyzer());
PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper( PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(
new StandardAnalyzer(Version.LUCENE_35), fieldAnalyzers); new StandardAnalyzer(Version.LUCENE_40), fieldAnalyzers);
return wrapper; return wrapper;
} }

View File

@@ -27,8 +27,9 @@ import javax.xml.bind.JAXBException;
import javax.xml.bind.Marshaller; import javax.xml.bind.Marshaller;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.codesecure.dependencycheck.data.lucene.LuceneUtils; import org.codesecure.dependencycheck.data.lucene.LuceneUtils;
import org.codesecure.dependencycheck.data.nvdcve.generated.VulnerabilityType; import org.codesecure.dependencycheck.data.nvdcve.generated.VulnerabilityType;
@@ -101,14 +102,11 @@ public class Indexer extends Index implements EntrySaveDelegate {
return null; return null;
} }
Field name = new Field(Fields.CVE_ID, vulnerability.getId(), Field.Store.NO, Field.Index.ANALYZED); Field name = new StringField(Fields.CVE_ID, vulnerability.getId(), Field.Store.NO);
name.setIndexOptions(IndexOptions.DOCS_ONLY);
doc.add(name); doc.add(name);
Field description = new Field(Fields.DESCRIPTION, vulnerability.getSummary(), Field.Store.NO, Field.Index.ANALYZED); // Field description = new Field(Fields.DESCRIPTION, vulnerability.getSummary(), Field.Store.NO, Field.Index.ANALYZED);
description.setIndexOptions(IndexOptions.DOCS_ONLY); // doc.add(description);
doc.add(description);
JAXBContext context = JAXBContext.newInstance("org.codesecure.dependencycheck.data.nvdcve.generated"); JAXBContext context = JAXBContext.newInstance("org.codesecure.dependencycheck.data.nvdcve.generated");
@@ -119,7 +117,7 @@ public class Indexer extends Index implements EntrySaveDelegate {
m.marshal(vulnerability, out); m.marshal(vulnerability, out);
Field xml = new Field(Fields.XML, out.toString(), Field.Store.YES, Field.Index.NO); Field xml = new StoredField(Fields.XML, out.toString());
doc.add(xml); doc.add(xml);
return doc; return doc;
@@ -141,8 +139,7 @@ public class Indexer extends Index implements EntrySaveDelegate {
} }
private void addVulnerableCpe(String cpe, Document doc) { private void addVulnerableCpe(String cpe, Document doc) {
Field vulnerable = new Field(Fields.VULNERABLE_CPE, cpe, Field.Store.NO, Field.Index.ANALYZED); Field vulnerable = new StringField(Fields.VULNERABLE_CPE, cpe, Field.Store.NO);
vulnerable.setIndexOptions(IndexOptions.DOCS_ONLY);
doc.add(vulnerable); doc.add(vulnerable);
} }
} }

View File

@@ -30,6 +30,8 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
@@ -121,23 +123,21 @@ public class NvdCveParser extends Index {
sb.append("id=\"").append(id).append("\">"); sb.append("id=\"").append(id).append("\">");
//sb.append(str); //need to do the above to get the correct schema generated from files. //sb.append(str); //need to do the above to get the correct schema generated from files.
Field name = new Field(Fields.CVE_ID, id, Field.Store.NO, Field.Index.ANALYZED); Field name = new StringField(Fields.CVE_ID, id, Field.Store.NO);
name.setIndexOptions(IndexOptions.DOCS_ONLY);
doc.add(name); doc.add(name);
continue; continue;
} }
Matcher matcherSummary = rxSummary.matcher(str); // Matcher matcherSummary = rxSummary.matcher(str);
if (matcherSummary.matches()) { // if (matcherSummary.matches()) {
String summary = matcherSummary.group(1); // String summary = matcherSummary.group(1);
Field description = new Field(Fields.DESCRIPTION, summary, Field.Store.NO, Field.Index.ANALYZED); // Field description = new Field(Fields.DESCRIPTION, summary, Field.Store.NO);
description.setIndexOptions(IndexOptions.DOCS_ONLY); // doc.add(description);
doc.add(description); // continue;
continue; // }
}
if (matcherEntryEnd.matches()) { if (matcherEntryEnd.matches()) {
sb.append("</vulnerabilityType>"); sb.append("</vulnerabilityType>");
Field xml = new Field(Fields.XML, sb.toString(), Field.Store.YES, Field.Index.NO); Field xml = new StoredField(Fields.XML, sb.toString());
doc.add(xml); doc.add(xml);
if (!skipEntry) { if (!skipEntry) {
@@ -184,8 +184,7 @@ public class NvdCveParser extends Index {
* @throws IOException is thrown if there is an IO Exception while writting to the CPE Index * @throws IOException is thrown if there is an IO Exception while writting to the CPE Index
*/ */
private void addVulnerableCpe(String cpe, Document doc) throws CorruptIndexException, IOException { private void addVulnerableCpe(String cpe, Document doc) throws CorruptIndexException, IOException {
Field vulnerable = new Field(Fields.VULNERABLE_CPE, cpe, Field.Store.NO, Field.Index.ANALYZED); Field vulnerable = new StringField(Fields.VULNERABLE_CPE, cpe, Field.Store.NO);
vulnerable.setIndexOptions(IndexOptions.DOCS_ONLY);
doc.add(vulnerable); doc.add(vulnerable);
//HACK - this has initially been placed here as a hack because not all //HACK - this has initially been placed here as a hack because not all
@@ -194,8 +193,6 @@ public class NvdCveParser extends Index {
Entry cpeEntry = new Entry(); Entry cpeEntry = new Entry();
try { try {
cpeEntry.parseName(cpe); cpeEntry.parseName(cpe);
cpeEntry.setNvdId("0");
cpeEntry.setTitle(cpe);
} catch (UnsupportedEncodingException ex) { } catch (UnsupportedEncodingException ex) {
Logger.getLogger(NvdCveParser.class.getName()).log(Level.SEVERE, null, ex); Logger.getLogger(NvdCveParser.class.getName()).log(Level.SEVERE, null, ex);
} }

View File

@@ -10,7 +10,7 @@ import java.util.HashSet;
import java.util.List; import java.util.List;
import java.util.Set; import java.util.Set;
import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryparser.classic.ParseException;
import org.codesecure.dependencycheck.dependency.Dependency; import org.codesecure.dependencycheck.dependency.Dependency;
import org.codesecure.dependencycheck.analyzer.JarAnalyzer; import org.codesecure.dependencycheck.analyzer.JarAnalyzer;
import org.junit.Test; import org.junit.Test;