From 33b8da888b7b7d07a3c3ccd180c1060e71313d4a Mon Sep 17 00:00:00 2001 From: Jeremy Long Date: Tue, 30 Oct 2012 00:35:13 -0400 Subject: [PATCH] Major improvements in NVD CVE Import speed Former-commit-id: 918f7e6a6d0336b7620962bc909bef204653346d --- .../dependencycheck/data/nvdcve/Index.java | 2 +- .../data/nvdcve/InvalidDataException.java | 2 +- .../data/nvdcve/xml/Importer.java | 66 ++++++++++++------ .../data/nvdcve/xml/NvdCveParser.java | 53 +++++++++++---- .../data/nvdcve/xml/NvdCveXmlFilter.java | 4 +- .../data/nvdcve/xml/NvdCveParserTest.java | 67 +++++++++++++++++++ .../data/nvdcve/xml/NvdCveXmlFilterTest.java | 54 +++++++++++---- 7 files changed, 194 insertions(+), 54 deletions(-) create mode 100644 src/test/java/org/codesecure/dependencycheck/data/nvdcve/xml/NvdCveParserTest.java diff --git a/src/main/java/org/codesecure/dependencycheck/data/nvdcve/Index.java b/src/main/java/org/codesecure/dependencycheck/data/nvdcve/Index.java index f3e9d7856..50508a04c 100644 --- a/src/main/java/org/codesecure/dependencycheck/data/nvdcve/Index.java +++ b/src/main/java/org/codesecure/dependencycheck/data/nvdcve/Index.java @@ -115,7 +115,7 @@ public class Index extends AbstractIndex implements CachedWebDataSource { } } if (maxUpdates > 3) { - Logger.getLogger(Index.class.getName()).log(Level.WARNING, "NVD CVE requires several updates. This could take a couple of hours. To avoid this in the future, ensure that an update is run at least every seven days."); + Logger.getLogger(Index.class.getName()).log(Level.WARNING, "NVD CVE requires several updates; this could take a couple of minutes."); } int count = 0; for (NvdCveUrl cve : update.values()) { diff --git a/src/main/java/org/codesecure/dependencycheck/data/nvdcve/InvalidDataException.java b/src/main/java/org/codesecure/dependencycheck/data/nvdcve/InvalidDataException.java index 1dd3442ed..e9bdcbcfe 100644 --- a/src/main/java/org/codesecure/dependencycheck/data/nvdcve/InvalidDataException.java +++ b/src/main/java/org/codesecure/dependencycheck/data/nvdcve/InvalidDataException.java @@ -24,7 +24,7 @@ package org.codesecure.dependencycheck.data.nvdcve; * * @author Jeremy */ -class InvalidDataException extends Exception { +public class InvalidDataException extends Exception { /** * Creates an InvalidDataException diff --git a/src/main/java/org/codesecure/dependencycheck/data/nvdcve/xml/Importer.java b/src/main/java/org/codesecure/dependencycheck/data/nvdcve/xml/Importer.java index f555e08bd..f9d8b7731 100644 --- a/src/main/java/org/codesecure/dependencycheck/data/nvdcve/xml/Importer.java +++ b/src/main/java/org/codesecure/dependencycheck/data/nvdcve/xml/Importer.java @@ -25,6 +25,7 @@ import javax.xml.bind.JAXBContext; import javax.xml.bind.JAXBException; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParserFactory; +import org.apache.lucene.index.CorruptIndexException; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.XMLReader; @@ -54,34 +55,55 @@ public class Importer { * @throws JAXBException is thrown when there is a JAXBException. * @throws SAXException is thrown when there is a SAXException. */ - public static void importXML(File file) throws FileNotFoundException, IOException, JAXBException, - ParserConfigurationException, SAXException { - - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - XMLReader reader = factory.newSAXParser().getXMLReader(); - - JAXBContext context = JAXBContext.newInstance("org.codesecure.dependencycheck.data.nvdcve.generated"); - NvdCveXmlFilter filter = new NvdCveXmlFilter(context); - - Indexer indexer = new Indexer(); - indexer.openIndexWriter(); - - filter.registerSaveDelegate(indexer); - - reader.setContentHandler(filter); - Reader fileReader = new FileReader(file); - InputSource is = new InputSource(fileReader); + public static void importXML(File file) { + NvdCveParser indexer = null; try { - reader.parse(is); + + indexer = new NvdCveParser(); + + indexer.openIndexWriter(); + + + indexer.parse(file); + + } catch (CorruptIndexException ex) { + Logger.getLogger(Importer.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(Importer.class.getName()).log(Level.SEVERE, null, ex); - } catch (SAXException ex) { - Logger.getLogger(Importer.class.getName()).log(Level.SEVERE, null, ex); } finally { - indexer.close(); + if (indexer != null) { + indexer.close(); + } } } +// public static void importXML(File file) throws FileNotFoundException, IOException, JAXBException, +// ParserConfigurationException, SAXException { +// +// SAXParserFactory factory = SAXParserFactory.newInstance(); +// factory.setNamespaceAware(true); +// XMLReader reader = factory.newSAXParser().getXMLReader(); +// +// JAXBContext context = JAXBContext.newInstance("org.codesecure.dependencycheck.data.nvdcve.generated"); +// NvdCveXmlFilter filter = new NvdCveXmlFilter(context); +// +// Indexer indexer = new Indexer(); +// indexer.openIndexWriter(); +// +// filter.registerSaveDelegate(indexer); +// +// reader.setContentHandler(filter); +// Reader fileReader = new FileReader(file); +// InputSource is = new InputSource(fileReader); +// try { +// reader.parse(is); +// } catch (IOException ex) { +// Logger.getLogger(Importer.class.getName()).log(Level.SEVERE, null, ex); +// } catch (SAXException ex) { +// Logger.getLogger(Importer.class.getName()).log(Level.SEVERE, null, ex); +// } finally { +// indexer.close(); +// } +// } /** * Imports the CPE XML File into the Lucene Index. diff --git a/src/main/java/org/codesecure/dependencycheck/data/nvdcve/xml/NvdCveParser.java b/src/main/java/org/codesecure/dependencycheck/data/nvdcve/xml/NvdCveParser.java index 5648978f8..b33640136 100644 --- a/src/main/java/org/codesecure/dependencycheck/data/nvdcve/xml/NvdCveParser.java +++ b/src/main/java/org/codesecure/dependencycheck/data/nvdcve/xml/NvdCveParser.java @@ -30,10 +30,10 @@ public class NvdCveParser extends Index { public void parse(File file) { FileReader fr = null; BufferedReader br = null; - Pattern rxEntry = Pattern.compile("^\\s*\\([^\\<]+"); + Pattern rxEntry = Pattern.compile("^\\s*.*$"); + Pattern rxFact = Pattern.compile("^\\s*([^\\<]+).*$"); try { fr = new FileReader(file); br = new BufferedReader(fr); @@ -41,17 +41,35 @@ public class NvdCveParser extends Index { String str = null; String id = null; Document doc = new Document(); + boolean skipEntry = true; + boolean started = false; + while ((str = br.readLine()) != null) { - sb.append(str); + Matcher matcherEntryEnd = rxEntryEnd.matcher(str); + + if (started && !matcherEntryEnd.matches()) { + sb.append(str); + } //facts occur more often, do them first. Matcher matcherFact = rxFact.matcher(str); if (matcherFact.matches()) { - addVulnerableCpe(matcherFact.group(0), doc); + String cpe = matcherFact.group(1); + if (cpe != null && cpe.startsWith("cpe:/a:")) { + skipEntry = false; + addVulnerableCpe(cpe, doc); + } continue; } Matcher matcherEntry = rxEntry.matcher(str); if (matcherEntry.matches()) { - id = matcherEntry.group(0); + started = true; + id = matcherEntry.group(1); + + sb.append(""); + sb.append(""); + //sb.append(str); //need to do the above to get the correct schema generated from files. + Field name = new Field(Fields.CVE_ID, id, Field.Store.NO, Field.Index.ANALYZED); name.setIndexOptions(IndexOptions.DOCS_ONLY); doc.add(name); @@ -59,23 +77,30 @@ public class NvdCveParser extends Index { } Matcher matcherSummary = rxSummary.matcher(str); if (matcherSummary.matches()) { - String summary = matcherSummary.group(0); + String summary = matcherSummary.group(1); Field description = new Field(Fields.DESCRIPTION, summary, Field.Store.NO, Field.Index.ANALYZED); description.setIndexOptions(IndexOptions.DOCS_ONLY); doc.add(description); continue; } - Matcher matcherEntryEnd = rxEntryEnd.matcher(str); + if (matcherEntryEnd.matches()) { - + sb.append(""); Field xml = new Field(Fields.XML, sb.toString(), Field.Store.YES, Field.Index.NO); doc.add(xml); - Term name = new Term(Fields.CVE_ID, LuceneUtils.escapeLuceneQuery(id)); - indexWriter.updateDocument(name, doc); - + if (!skipEntry) { + Term name = new Term(Fields.CVE_ID, id); + indexWriter.deleteDocuments(name); + indexWriter.addDocument(doc); + //indexWriter.updateDocument(name, doc); + } + //reset the document doc = new Document(); - + sb = new StringBuilder(7000); + id = null; + skipEntry = true; + started = false; } } diff --git a/src/main/java/org/codesecure/dependencycheck/data/nvdcve/xml/NvdCveXmlFilter.java b/src/main/java/org/codesecure/dependencycheck/data/nvdcve/xml/NvdCveXmlFilter.java index 662536387..abbb1e8dc 100644 --- a/src/main/java/org/codesecure/dependencycheck/data/nvdcve/xml/NvdCveXmlFilter.java +++ b/src/main/java/org/codesecure/dependencycheck/data/nvdcve/xml/NvdCveXmlFilter.java @@ -222,9 +222,9 @@ public class NvdCveXmlFilter extends XMLFilterImpl { // then retrieve the fully unmarshalled object try { - JAXBElement result = (JAXBElement) unmarshallerHandler.getResult(); - VulnerabilityType entry = result.getValue(); if (saveDelegate != null) { + JAXBElement result = (JAXBElement) unmarshallerHandler.getResult(); + VulnerabilityType entry = result.getValue(); saveDelegate.saveEntry(entry); } } catch (JAXBException je) { //we can continue with this exception. diff --git a/src/test/java/org/codesecure/dependencycheck/data/nvdcve/xml/NvdCveParserTest.java b/src/test/java/org/codesecure/dependencycheck/data/nvdcve/xml/NvdCveParserTest.java new file mode 100644 index 000000000..81a18403a --- /dev/null +++ b/src/test/java/org/codesecure/dependencycheck/data/nvdcve/xml/NvdCveParserTest.java @@ -0,0 +1,67 @@ +/* + * To change this template, choose Tools | Templates + * and open the template in the editor. + */ +package org.codesecure.dependencycheck.data.nvdcve.xml; + +import java.io.File; +import java.io.IOException; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.apache.lucene.index.CorruptIndexException; +import org.codesecure.dependencycheck.data.nvdcve.InvalidDataException; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import static org.junit.Assert.*; + +/** + * + * @author Jeremy Long (jeremy.long@gmail.com) + */ +public class NvdCveParserTest { + + public NvdCveParserTest() { + } + + @BeforeClass + public static void setUpClass() throws Exception { + } + + @AfterClass + public static void tearDownClass() throws Exception { + } + + @Before + public void setUp() { + } + + @After + public void tearDown() { + } + + /** + * Test of parse method, of class NvdCveParser. + */ + @Test + public void testParse() throws InvalidDataException { + NvdCveParser instance = null; + try { + System.out.println("parse"); + File file = new File(this.getClass().getClassLoader().getResource("nvdcve-2.0-2012.xml").getPath()); + instance = new NvdCveParser(); + instance.openIndexWriter(); + instance.parse(file); + } catch (CorruptIndexException ex) { + throw new InvalidDataException("corrupt index", ex); + } catch (IOException ex) { + throw new InvalidDataException("IO Exception", ex); + } finally { + if (instance != null) { + instance.close(); + } + } + } +} diff --git a/src/test/java/org/codesecure/dependencycheck/data/nvdcve/xml/NvdCveXmlFilterTest.java b/src/test/java/org/codesecure/dependencycheck/data/nvdcve/xml/NvdCveXmlFilterTest.java index de95cbb11..c93c265ce 100644 --- a/src/test/java/org/codesecure/dependencycheck/data/nvdcve/xml/NvdCveXmlFilterTest.java +++ b/src/test/java/org/codesecure/dependencycheck/data/nvdcve/xml/NvdCveXmlFilterTest.java @@ -12,10 +12,14 @@ import java.io.IOException; import java.io.InputStream; import java.io.Reader; import java.net.MalformedURLException; +import java.util.logging.Level; +import java.util.logging.Logger; import javax.xml.bind.JAXBContext; import javax.xml.bind.JAXBException; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParserFactory; +import org.apache.lucene.index.CorruptIndexException; +import org.codesecure.dependencycheck.data.nvdcve.InvalidDataException; import org.codesecure.dependencycheck.data.nvdcve.generated.VulnerabilityType; import org.junit.After; import org.junit.AfterClass; @@ -58,20 +62,42 @@ public class NvdCveXmlFilterTest { * Test of process method, of class NvdCveXmlFilter. */ @Test - public void testFilter() throws JAXBException, SAXException, ParserConfigurationException, MalformedURLException, IOException { - System.out.println("filter"); - - SAXParserFactory factory = SAXParserFactory.newInstance(); - factory.setNamespaceAware(true); - XMLReader reader = factory.newSAXParser().getXMLReader(); - - JAXBContext context = JAXBContext.newInstance("org.codesecure.dependencycheck.data.nvdcve.generated"); - NvdCveXmlFilter filter = new NvdCveXmlFilter(context); + public void testFilter() throws InvalidDataException { + Indexer indexer = null; + try { + System.out.println("filter"); + + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + XMLReader reader = factory.newSAXParser().getXMLReader(); + + JAXBContext context = JAXBContext.newInstance("org.codesecure.dependencycheck.data.nvdcve.generated"); + NvdCveXmlFilter filter = new NvdCveXmlFilter(context); + + indexer = new Indexer(); + indexer.openIndexWriter(); - reader.setContentHandler(filter); - File file = new File(this.getClass().getClassLoader().getResource("nvdcve-2.0-2012.xml").getPath()); - Reader fileReader = new FileReader(file); - InputSource is = new InputSource(fileReader); - reader.parse(is); + filter.registerSaveDelegate(indexer); + + reader.setContentHandler(filter); + File file = new File(this.getClass().getClassLoader().getResource("nvdcve-2.0-2012.xml").getPath()); + Reader fileReader = new FileReader(file); + InputSource is = new InputSource(fileReader); + reader.parse(is); + } catch (JAXBException ex) { + throw new InvalidDataException("JAXBException", ex); + } catch (SAXException ex) { + throw new InvalidDataException("SAXException", ex); + } catch (ParserConfigurationException ex) { + throw new InvalidDataException("ParserConfigurationException", ex); + } catch (CorruptIndexException ex) { + throw new InvalidDataException("CorruptIndexException", ex); + } catch (IOException ex) { + throw new InvalidDataException("IOException", ex); + } finally { + if (indexer != null) { + indexer.close(); + } + } } }