upgrade to lucene 4.0

Former-commit-id: 97ae6c47d6498fea873202dae257a2dfab0b683f
2026-04-30 20:24:32 +02:00 · 2012-12-16 21:26:30 -05:00
parent 0027e75a45
commit 36ecf7c7fd
12 changed files with 62 additions and 82 deletions
--- a/pom.xml
+++ b/pom.xml
@@ -389,7 +389,18 @@ along with DependencyCheck.  If not, see <http://www.gnu.org/licenses/>.
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
-            <version>3.5.0</version>
+            <version>4.0.0</version>
+            <!--<version>3.5.0</version>-->
+        </dependency>
+        <dependency>
+            <groupId>org.apache.lucene</groupId>
+            <artifactId>lucene-analyzers-common</artifactId>
+            <version>4.0.0</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.lucene</groupId>
+            <artifactId>lucene-queryparser</artifactId>
+            <version>4.0.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.commons</groupId>
--- a/src/main/java/org/codesecure/dependencycheck/data/cpe/CPEAnalyzer.java
+++ b/src/main/java/org/codesecure/dependencycheck/data/cpe/CPEAnalyzer.java
@@ -27,8 +27,10 @@ import java.util.StringTokenizer;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.CorruptIndexException;
-import org.apache.lucene.queryParser.ParseException;
-import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.queryparser.classic.QueryParser;
+//TODO convert to the analyzing query parser
+//import org.apache.lucene.queryparser.analyzing.AnalyzingQueryParser;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
@@ -98,8 +100,7 @@ public class CPEAnalyzer implements org.codesecure.dependencycheck.analyzer.Anal
        cpe.open();
        indexSearcher = cpe.getIndexSearcher();
        Analyzer analyzer = cpe.getAnalyzer();
-        //TITLE is the default field because it contains venddor, product, and version all in one.
-        queryParser = new QueryParser(Version.LUCENE_35, Fields.TITLE, analyzer);
+        queryParser = new QueryParser(Version.LUCENE_40, Fields.NAME, analyzer);
    }

    /**
--- a/src/main/java/org/codesecure/dependencycheck/data/cpe/Entry.java
+++ b/src/main/java/org/codesecure/dependencycheck/data/cpe/Entry.java
@@ -46,7 +46,6 @@ public class Entry {
        Entry entry = new Entry();
        try {
            entry.parseName(doc.get(Fields.NAME));
-            entry.setTitle(doc.get(Fields.TITLE));
        } catch (UnsupportedEncodingException ex) {
            Logger.getLogger(Entry.class.getName()).log(Level.SEVERE, null, ex);
            entry.name = doc.get(Fields.NAME);
--- a/src/main/java/org/codesecure/dependencycheck/data/cpe/Fields.java
+++ b/src/main/java/org/codesecure/dependencycheck/data/cpe/Fields.java
@@ -34,22 +34,13 @@ public abstract class Fields {
     * The key for the vendor field.
     */
    public static final String VENDOR = "vendor";
-    /**
-     * The key for the version field.
-     */
-    public static final String VERSION = "version";
-    //public static final String REVISION = "revision";
    /**
     * The key for the product field.
     */
    public static final String PRODUCT = "product";
    /**
-     * The key for the title field. This is a field combining vendor, product,
-     * and version.
+     * The key for the version field.
     */
-    public static final String TITLE = "title";
-    /**
-     * The key for the nvdId field.
-     */
-    public static final String NVDID = "nvdid";
+    public static final String VERSION = "version";
+    //public static final String REVISION = "revision";
 }
--- a/src/main/java/org/codesecure/dependencycheck/data/cpe/Index.java
+++ b/src/main/java/org/codesecure/dependencycheck/data/cpe/Index.java
@@ -36,8 +36,8 @@ import java.util.logging.Level;
 import java.util.logging.Logger;
 import javax.xml.parsers.ParserConfigurationException;
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.KeywordAnalyzer;
-import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
+import org.apache.lucene.analysis.core.KeywordAnalyzer;
+import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
@@ -121,7 +121,7 @@ public class Index extends AbstractIndex implements CachedWebDataSource {
        fieldAnalyzers.put(Fields.NAME, new KeywordAnalyzer());

        PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(
-                new StandardAnalyzer(Version.LUCENE_35), fieldAnalyzers);
+                new StandardAnalyzer(Version.LUCENE_40), fieldAnalyzers);

        return wrapper;
    }
--- a/src/main/java/org/codesecure/dependencycheck/data/cpe/xml/Indexer.java
+++ b/src/main/java/org/codesecure/dependencycheck/data/cpe/xml/Indexer.java
@@ -21,6 +21,8 @@ package org.codesecure.dependencycheck.data.cpe.xml;
 import java.io.IOException;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.index.Term;
@@ -59,44 +61,30 @@ public class Indexer extends Index implements EntrySaveDelegate {
    protected Document convertEntryToDoc(Entry entry) {
        Document doc = new Document();

-        Field name = new Field(Fields.NAME, entry.getName(), Field.Store.YES, Field.Index.ANALYZED);
-        name.setIndexOptions(IndexOptions.DOCS_ONLY);
+        Field name = new StoredField(Fields.NAME, entry.getName());
        doc.add(name);

-        Field nvdId = new Field(Fields.NVDID, entry.getNvdId(), Field.Store.NO, Field.Index.ANALYZED);
-        nvdId.setIndexOptions(IndexOptions.DOCS_ONLY);
-        doc.add(nvdId);
-
-        Field vendor = new Field(Fields.VENDOR, entry.getVendor(), Field.Store.NO, Field.Index.ANALYZED);
-        vendor.setIndexOptions(IndexOptions.DOCS_ONLY);
+        Field vendor = new TextField(Fields.VENDOR, entry.getVendor(), Field.Store.NO);
        vendor.setBoost(5.0F);
        doc.add(vendor);

-        Field product = new Field(Fields.PRODUCT, entry.getProduct(), Field.Store.NO, Field.Index.ANALYZED);
-        product.setIndexOptions(IndexOptions.DOCS_ONLY);
+        Field product = new TextField(Fields.PRODUCT, entry.getProduct(), Field.Store.NO);
        product.setBoost(5.0F);
        doc.add(product);

-        Field title = new Field(Fields.TITLE, entry.getTitle(), Field.Store.YES, Field.Index.ANALYZED);
-        title.setIndexOptions(IndexOptions.DOCS_ONLY);
-        //title.setBoost(1.0F);
-        doc.add(title);
-
        //TODO revision should likely be its own field
        if (entry.getVersion() != null) {
            Field version = null;
            if (entry.getRevision() != null) {
-                version = new Field(Fields.VERSION, entry.getVersion() + " "
-                        + entry.getRevision(), Field.Store.NO, Field.Index.ANALYZED);
+                version = new TextField(Fields.VERSION, entry.getVersion() + " "
+                        + entry.getRevision(), Field.Store.NO);
            } else {
-                version = new Field(Fields.VERSION, entry.getVersion(),
-                        Field.Store.NO, Field.Index.ANALYZED);
+                version = new TextField(Fields.VERSION, entry.getVersion(),
+                        Field.Store.NO);
            }
-            version.setIndexOptions(IndexOptions.DOCS_ONLY);
            version.setBoost(0.8F);
            doc.add(version);
        }
-
        return doc;
    }
 }
--- a/src/main/java/org/codesecure/dependencycheck/data/lucene/AbstractIndex.java
+++ b/src/main/java/org/codesecure/dependencycheck/data/lucene/AbstractIndex.java
@@ -23,6 +23,7 @@ import java.util.logging.Level;
 import java.util.logging.Logger;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
@@ -98,13 +99,7 @@ public abstract class AbstractIndex {
            }
        }
        if (indexSearcher != null) {
-            try {
-                indexSearcher.close();
-            } catch (IOException ex) {
-                Logger.getLogger(AbstractIndex.class.getName()).log(Level.SEVERE, null, ex);
-            } finally {
-                indexSearcher = null;
-            }
+            indexSearcher = null;
        }

        if (analyzer != null) {
@@ -140,7 +135,7 @@ public abstract class AbstractIndex {
        if (!isOpen()) {
            open();
        }
-        IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_35, analyzer);
+        IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_40, analyzer);
        indexWriter = new IndexWriter(directory, conf);
    }

@@ -170,7 +165,8 @@ public abstract class AbstractIndex {
        if (!isOpen()) {
            open();
        }
-        indexReader = IndexReader.open(directory, true);
+        //indexReader = IndexReader.open(directory, true);
+        indexReader = DirectoryReader.open(directory);
    }

    /**
--- a/src/main/java/org/codesecure/dependencycheck/data/lucene/DependencySimilarity.java
+++ b/src/main/java/org/codesecure/dependencycheck/data/lucene/DependencySimilarity.java
@@ -18,7 +18,7 @@ package org.codesecure.dependencycheck.data.lucene;
 * Copyright (c) 2012 Jeremy Long. All Rights Reserved.
 */

-import org.apache.lucene.search.DefaultSimilarity;
+import org.apache.lucene.search.similarities.DefaultSimilarity;

 /**
 *
@@ -41,7 +41,7 @@ public class DependencySimilarity extends DefaultSimilarity {
     * @return 1
     */
    @Override
-    public float idf(int docFreq, int numDocs) {
+    public float idf(long docFreq, long numDocs) {
        return 1;
    }
 }
--- a/src/main/java/org/codesecure/dependencycheck/data/nvdcve/Index.java
+++ b/src/main/java/org/codesecure/dependencycheck/data/nvdcve/Index.java
@@ -28,8 +28,8 @@ import java.util.logging.Logger;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.KeywordAnalyzer;
-import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
+import org.apache.lucene.analysis.core.KeywordAnalyzer;
+import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
@@ -119,7 +119,7 @@ public class Index extends AbstractIndex implements CachedWebDataSource {
        fieldAnalyzers.put(Fields.VULNERABLE_CPE, new KeywordAnalyzer());

        PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(
-                new StandardAnalyzer(Version.LUCENE_35), fieldAnalyzers);
+                new StandardAnalyzer(Version.LUCENE_40), fieldAnalyzers);

        return wrapper;
    }
--- a/src/main/java/org/codesecure/dependencycheck/data/nvdcve/xml/Indexer.java
+++ b/src/main/java/org/codesecure/dependencycheck/data/nvdcve/xml/Indexer.java
@@ -27,8 +27,9 @@ import javax.xml.bind.JAXBException;
 import javax.xml.bind.Marshaller;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.CorruptIndexException;
-import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.index.Term;
 import org.codesecure.dependencycheck.data.lucene.LuceneUtils;
 import org.codesecure.dependencycheck.data.nvdcve.generated.VulnerabilityType;
@@ -101,14 +102,11 @@ public class Indexer extends Index implements EntrySaveDelegate {
            return null;
        }

-        Field name = new Field(Fields.CVE_ID, vulnerability.getId(), Field.Store.NO, Field.Index.ANALYZED);
-        name.setIndexOptions(IndexOptions.DOCS_ONLY);
+        Field name = new StringField(Fields.CVE_ID, vulnerability.getId(), Field.Store.NO);
        doc.add(name);

-        Field description = new Field(Fields.DESCRIPTION, vulnerability.getSummary(), Field.Store.NO, Field.Index.ANALYZED);
-        description.setIndexOptions(IndexOptions.DOCS_ONLY);
-        doc.add(description);
-
+//        Field description = new Field(Fields.DESCRIPTION, vulnerability.getSummary(), Field.Store.NO, Field.Index.ANALYZED);
+//        doc.add(description);

        JAXBContext context = JAXBContext.newInstance("org.codesecure.dependencycheck.data.nvdcve.generated");

@@ -119,7 +117,7 @@ public class Indexer extends Index implements EntrySaveDelegate {

        m.marshal(vulnerability, out);

-        Field xml = new Field(Fields.XML, out.toString(), Field.Store.YES, Field.Index.NO);
+        Field xml = new StoredField(Fields.XML, out.toString());
        doc.add(xml);

        return doc;
@@ -141,8 +139,7 @@ public class Indexer extends Index implements EntrySaveDelegate {
    }

    private void addVulnerableCpe(String cpe, Document doc) {
-        Field vulnerable = new Field(Fields.VULNERABLE_CPE, cpe, Field.Store.NO, Field.Index.ANALYZED);
-        vulnerable.setIndexOptions(IndexOptions.DOCS_ONLY);
+        Field vulnerable = new StringField(Fields.VULNERABLE_CPE, cpe, Field.Store.NO);
        doc.add(vulnerable);
    }
 }
--- a/src/main/java/org/codesecure/dependencycheck/data/nvdcve/xml/NvdCveParser.java
+++ b/src/main/java/org/codesecure/dependencycheck/data/nvdcve/xml/NvdCveParser.java
@@ -30,6 +30,8 @@ import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.index.Term;
@@ -121,23 +123,21 @@ public class NvdCveParser extends Index {
                    sb.append("id=\"").append(id).append("\">");
                    //sb.append(str); //need to do the above to get the correct schema generated from files.

-                    Field name = new Field(Fields.CVE_ID, id, Field.Store.NO, Field.Index.ANALYZED);
-                    name.setIndexOptions(IndexOptions.DOCS_ONLY);
+                    Field name = new StringField(Fields.CVE_ID, id, Field.Store.NO);
                    doc.add(name);
                    continue;
                }
-                Matcher matcherSummary = rxSummary.matcher(str);
-                if (matcherSummary.matches()) {
-                    String summary = matcherSummary.group(1);
-                    Field description = new Field(Fields.DESCRIPTION, summary, Field.Store.NO, Field.Index.ANALYZED);
-                    description.setIndexOptions(IndexOptions.DOCS_ONLY);
-                    doc.add(description);
-                    continue;
-                }
+//                Matcher matcherSummary = rxSummary.matcher(str);
+//                if (matcherSummary.matches()) {
+//                    String summary = matcherSummary.group(1);
+//                    Field description = new Field(Fields.DESCRIPTION, summary, Field.Store.NO);
+//                    doc.add(description);
+//                    continue;
+//                }

                if (matcherEntryEnd.matches()) {
                    sb.append("</vulnerabilityType>");
-                    Field xml = new Field(Fields.XML, sb.toString(), Field.Store.YES, Field.Index.NO);
+                    Field xml = new StoredField(Fields.XML, sb.toString());
                    doc.add(xml);

                    if (!skipEntry) {
@@ -184,8 +184,7 @@ public class NvdCveParser extends Index {
     * @throws IOException is thrown if there is an IO Exception while writting to the CPE Index
     */
    private void addVulnerableCpe(String cpe, Document doc) throws CorruptIndexException, IOException {
-        Field vulnerable = new Field(Fields.VULNERABLE_CPE, cpe, Field.Store.NO, Field.Index.ANALYZED);
-        vulnerable.setIndexOptions(IndexOptions.DOCS_ONLY);
+        Field vulnerable = new StringField(Fields.VULNERABLE_CPE, cpe, Field.Store.NO);
        doc.add(vulnerable);

        //HACK - this has initially been placed here as a hack because not all
@@ -194,8 +193,6 @@ public class NvdCveParser extends Index {
        Entry cpeEntry = new Entry();
        try {
            cpeEntry.parseName(cpe);
-            cpeEntry.setNvdId("0");
-            cpeEntry.setTitle(cpe);
        } catch (UnsupportedEncodingException ex) {
            Logger.getLogger(NvdCveParser.class.getName()).log(Level.SEVERE, null, ex);
        }
--- a/src/test/java/org/codesecure/dependencycheck/data/cpe/CPEAnalyzerTest.java
+++ b/src/test/java/org/codesecure/dependencycheck/data/cpe/CPEAnalyzerTest.java
@@ -10,7 +10,7 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 import org.apache.lucene.index.CorruptIndexException;
-import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.queryparser.classic.ParseException;
 import org.codesecure.dependencycheck.dependency.Dependency;
 import org.codesecure.dependencycheck.analyzer.JarAnalyzer;
 import org.junit.Test;