upgrade to lucene 4.0

Former-commit-id: 0822d5816b603d8017b2fe8aa2a592aa3263c51c
This commit is contained in:
Jeremy Long
2012-12-16 21:26:30 -05:00
parent 2fcc325af7
commit a16bcfbc10
12 changed files with 62 additions and 82 deletions

13
pom.xml
View File

@@ -389,7 +389,18 @@ along with DependencyCheck. If not, see <http://www.gnu.org/licenses/>.
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>3.5.0</version>
<version>4.0.0</version>
<!--<version>3.5.0</version>-->
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>4.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>4.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>

View File

@@ -27,8 +27,10 @@ import java.util.StringTokenizer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
//TODO convert to the analyzing query parser
//import org.apache.lucene.queryparser.analyzing.AnalyzingQueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
@@ -98,8 +100,7 @@ public class CPEAnalyzer implements org.codesecure.dependencycheck.analyzer.Anal
cpe.open();
indexSearcher = cpe.getIndexSearcher();
Analyzer analyzer = cpe.getAnalyzer();
//TITLE is the default field because it contains venddor, product, and version all in one.
queryParser = new QueryParser(Version.LUCENE_35, Fields.TITLE, analyzer);
queryParser = new QueryParser(Version.LUCENE_40, Fields.NAME, analyzer);
}
/**

View File

@@ -46,7 +46,6 @@ public class Entry {
Entry entry = new Entry();
try {
entry.parseName(doc.get(Fields.NAME));
entry.setTitle(doc.get(Fields.TITLE));
} catch (UnsupportedEncodingException ex) {
Logger.getLogger(Entry.class.getName()).log(Level.SEVERE, null, ex);
entry.name = doc.get(Fields.NAME);

View File

@@ -34,22 +34,13 @@ public abstract class Fields {
* The key for the vendor field.
*/
public static final String VENDOR = "vendor";
/**
* The key for the version field.
*/
public static final String VERSION = "version";
//public static final String REVISION = "revision";
/**
* The key for the product field.
*/
public static final String PRODUCT = "product";
/**
* The key for the title field. This is a field combining vendor, product,
* and version.
* The key for the version field.
*/
public static final String TITLE = "title";
/**
* The key for the nvdId field.
*/
public static final String NVDID = "nvdid";
public static final String VERSION = "version";
//public static final String REVISION = "revision";
}

View File

@@ -36,8 +36,8 @@ import java.util.logging.Level;
import java.util.logging.Logger;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
@@ -121,7 +121,7 @@ public class Index extends AbstractIndex implements CachedWebDataSource {
fieldAnalyzers.put(Fields.NAME, new KeywordAnalyzer());
PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(
new StandardAnalyzer(Version.LUCENE_35), fieldAnalyzers);
new StandardAnalyzer(Version.LUCENE_40), fieldAnalyzers);
return wrapper;
}

View File

@@ -21,6 +21,8 @@ package org.codesecure.dependencycheck.data.cpe.xml;
import java.io.IOException;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.Term;
@@ -59,44 +61,30 @@ public class Indexer extends Index implements EntrySaveDelegate {
protected Document convertEntryToDoc(Entry entry) {
Document doc = new Document();
Field name = new Field(Fields.NAME, entry.getName(), Field.Store.YES, Field.Index.ANALYZED);
name.setIndexOptions(IndexOptions.DOCS_ONLY);
Field name = new StoredField(Fields.NAME, entry.getName());
doc.add(name);
Field nvdId = new Field(Fields.NVDID, entry.getNvdId(), Field.Store.NO, Field.Index.ANALYZED);
nvdId.setIndexOptions(IndexOptions.DOCS_ONLY);
doc.add(nvdId);
Field vendor = new Field(Fields.VENDOR, entry.getVendor(), Field.Store.NO, Field.Index.ANALYZED);
vendor.setIndexOptions(IndexOptions.DOCS_ONLY);
Field vendor = new TextField(Fields.VENDOR, entry.getVendor(), Field.Store.NO);
vendor.setBoost(5.0F);
doc.add(vendor);
Field product = new Field(Fields.PRODUCT, entry.getProduct(), Field.Store.NO, Field.Index.ANALYZED);
product.setIndexOptions(IndexOptions.DOCS_ONLY);
Field product = new TextField(Fields.PRODUCT, entry.getProduct(), Field.Store.NO);
product.setBoost(5.0F);
doc.add(product);
Field title = new Field(Fields.TITLE, entry.getTitle(), Field.Store.YES, Field.Index.ANALYZED);
title.setIndexOptions(IndexOptions.DOCS_ONLY);
//title.setBoost(1.0F);
doc.add(title);
//TODO revision should likely be its own field
if (entry.getVersion() != null) {
Field version = null;
if (entry.getRevision() != null) {
version = new Field(Fields.VERSION, entry.getVersion() + " "
+ entry.getRevision(), Field.Store.NO, Field.Index.ANALYZED);
version = new TextField(Fields.VERSION, entry.getVersion() + " "
+ entry.getRevision(), Field.Store.NO);
} else {
version = new Field(Fields.VERSION, entry.getVersion(),
Field.Store.NO, Field.Index.ANALYZED);
version = new TextField(Fields.VERSION, entry.getVersion(),
Field.Store.NO);
}
version.setIndexOptions(IndexOptions.DOCS_ONLY);
version.setBoost(0.8F);
doc.add(version);
}
return doc;
}
}

View File

@@ -23,6 +23,7 @@ import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
@@ -98,13 +99,7 @@ public abstract class AbstractIndex {
}
}
if (indexSearcher != null) {
try {
indexSearcher.close();
} catch (IOException ex) {
Logger.getLogger(AbstractIndex.class.getName()).log(Level.SEVERE, null, ex);
} finally {
indexSearcher = null;
}
indexSearcher = null;
}
if (analyzer != null) {
@@ -140,7 +135,7 @@ public abstract class AbstractIndex {
if (!isOpen()) {
open();
}
IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_35, analyzer);
IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_40, analyzer);
indexWriter = new IndexWriter(directory, conf);
}
@@ -170,7 +165,8 @@ public abstract class AbstractIndex {
if (!isOpen()) {
open();
}
indexReader = IndexReader.open(directory, true);
//indexReader = IndexReader.open(directory, true);
indexReader = DirectoryReader.open(directory);
}
/**

View File

@@ -18,7 +18,7 @@ package org.codesecure.dependencycheck.data.lucene;
* Copyright (c) 2012 Jeremy Long. All Rights Reserved.
*/
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.similarities.DefaultSimilarity;
/**
*
@@ -41,7 +41,7 @@ public class DependencySimilarity extends DefaultSimilarity {
* @return 1
*/
@Override
public float idf(int docFreq, int numDocs) {
public float idf(long docFreq, long numDocs) {
return 1;
}
}

View File

@@ -28,8 +28,8 @@ import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
@@ -119,7 +119,7 @@ public class Index extends AbstractIndex implements CachedWebDataSource {
fieldAnalyzers.put(Fields.VULNERABLE_CPE, new KeywordAnalyzer());
PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(
new StandardAnalyzer(Version.LUCENE_35), fieldAnalyzers);
new StandardAnalyzer(Version.LUCENE_40), fieldAnalyzers);
return wrapper;
}

View File

@@ -27,8 +27,9 @@ import javax.xml.bind.JAXBException;
import javax.xml.bind.Marshaller;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.Term;
import org.codesecure.dependencycheck.data.lucene.LuceneUtils;
import org.codesecure.dependencycheck.data.nvdcve.generated.VulnerabilityType;
@@ -101,14 +102,11 @@ public class Indexer extends Index implements EntrySaveDelegate {
return null;
}
Field name = new Field(Fields.CVE_ID, vulnerability.getId(), Field.Store.NO, Field.Index.ANALYZED);
name.setIndexOptions(IndexOptions.DOCS_ONLY);
Field name = new StringField(Fields.CVE_ID, vulnerability.getId(), Field.Store.NO);
doc.add(name);
Field description = new Field(Fields.DESCRIPTION, vulnerability.getSummary(), Field.Store.NO, Field.Index.ANALYZED);
description.setIndexOptions(IndexOptions.DOCS_ONLY);
doc.add(description);
// Field description = new Field(Fields.DESCRIPTION, vulnerability.getSummary(), Field.Store.NO, Field.Index.ANALYZED);
// doc.add(description);
JAXBContext context = JAXBContext.newInstance("org.codesecure.dependencycheck.data.nvdcve.generated");
@@ -119,7 +117,7 @@ public class Indexer extends Index implements EntrySaveDelegate {
m.marshal(vulnerability, out);
Field xml = new Field(Fields.XML, out.toString(), Field.Store.YES, Field.Index.NO);
Field xml = new StoredField(Fields.XML, out.toString());
doc.add(xml);
return doc;
@@ -141,8 +139,7 @@ public class Indexer extends Index implements EntrySaveDelegate {
}
private void addVulnerableCpe(String cpe, Document doc) {
Field vulnerable = new Field(Fields.VULNERABLE_CPE, cpe, Field.Store.NO, Field.Index.ANALYZED);
vulnerable.setIndexOptions(IndexOptions.DOCS_ONLY);
Field vulnerable = new StringField(Fields.VULNERABLE_CPE, cpe, Field.Store.NO);
doc.add(vulnerable);
}
}

View File

@@ -30,6 +30,8 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.Term;
@@ -121,23 +123,21 @@ public class NvdCveParser extends Index {
sb.append("id=\"").append(id).append("\">");
//sb.append(str); //need to do the above to get the correct schema generated from files.
Field name = new Field(Fields.CVE_ID, id, Field.Store.NO, Field.Index.ANALYZED);
name.setIndexOptions(IndexOptions.DOCS_ONLY);
Field name = new StringField(Fields.CVE_ID, id, Field.Store.NO);
doc.add(name);
continue;
}
Matcher matcherSummary = rxSummary.matcher(str);
if (matcherSummary.matches()) {
String summary = matcherSummary.group(1);
Field description = new Field(Fields.DESCRIPTION, summary, Field.Store.NO, Field.Index.ANALYZED);
description.setIndexOptions(IndexOptions.DOCS_ONLY);
doc.add(description);
continue;
}
// Matcher matcherSummary = rxSummary.matcher(str);
// if (matcherSummary.matches()) {
// String summary = matcherSummary.group(1);
// Field description = new Field(Fields.DESCRIPTION, summary, Field.Store.NO);
// doc.add(description);
// continue;
// }
if (matcherEntryEnd.matches()) {
sb.append("</vulnerabilityType>");
Field xml = new Field(Fields.XML, sb.toString(), Field.Store.YES, Field.Index.NO);
Field xml = new StoredField(Fields.XML, sb.toString());
doc.add(xml);
if (!skipEntry) {
@@ -184,8 +184,7 @@ public class NvdCveParser extends Index {
* @throws IOException is thrown if there is an IO Exception while writting to the CPE Index
*/
private void addVulnerableCpe(String cpe, Document doc) throws CorruptIndexException, IOException {
Field vulnerable = new Field(Fields.VULNERABLE_CPE, cpe, Field.Store.NO, Field.Index.ANALYZED);
vulnerable.setIndexOptions(IndexOptions.DOCS_ONLY);
Field vulnerable = new StringField(Fields.VULNERABLE_CPE, cpe, Field.Store.NO);
doc.add(vulnerable);
//HACK - this has initially been placed here as a hack because not all
@@ -194,8 +193,6 @@ public class NvdCveParser extends Index {
Entry cpeEntry = new Entry();
try {
cpeEntry.parseName(cpe);
cpeEntry.setNvdId("0");
cpeEntry.setTitle(cpe);
} catch (UnsupportedEncodingException ex) {
Logger.getLogger(NvdCveParser.class.getName()).log(Level.SEVERE, null, ex);
}

View File

@@ -10,7 +10,7 @@ import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryparser.classic.ParseException;
import org.codesecure.dependencycheck.dependency.Dependency;
import org.codesecure.dependencycheck.analyzer.JarAnalyzer;
import org.junit.Test;