Major improvements in NVD CVE Import speed

Former-commit-id: 918f7e6a6d0336b7620962bc909bef204653346d
This commit is contained in:
Jeremy Long
2012-10-30 00:35:13 -04:00
parent 7ba6a731ff
commit 33b8da888b
7 changed files with 194 additions and 54 deletions

View File

@@ -115,7 +115,7 @@ public class Index extends AbstractIndex implements CachedWebDataSource {
}
}
if (maxUpdates > 3) {
Logger.getLogger(Index.class.getName()).log(Level.WARNING, "NVD CVE requires several updates. This could take a couple of hours. To avoid this in the future, ensure that an update is run at least every seven days.");
Logger.getLogger(Index.class.getName()).log(Level.WARNING, "NVD CVE requires several updates; this could take a couple of minutes.");
}
int count = 0;
for (NvdCveUrl cve : update.values()) {

View File

@@ -24,7 +24,7 @@ package org.codesecure.dependencycheck.data.nvdcve;
*
* @author Jeremy
*/
class InvalidDataException extends Exception {
public class InvalidDataException extends Exception {
/**
* Creates an InvalidDataException

View File

@@ -25,6 +25,7 @@ import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBException;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;
import org.apache.lucene.index.CorruptIndexException;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
@@ -54,34 +55,55 @@ public class Importer {
* @throws JAXBException is thrown when there is a JAXBException.
* @throws SAXException is thrown when there is a SAXException.
*/
public static void importXML(File file) throws FileNotFoundException, IOException, JAXBException,
ParserConfigurationException, SAXException {
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setNamespaceAware(true);
XMLReader reader = factory.newSAXParser().getXMLReader();
JAXBContext context = JAXBContext.newInstance("org.codesecure.dependencycheck.data.nvdcve.generated");
NvdCveXmlFilter filter = new NvdCveXmlFilter(context);
Indexer indexer = new Indexer();
indexer.openIndexWriter();
filter.registerSaveDelegate(indexer);
reader.setContentHandler(filter);
Reader fileReader = new FileReader(file);
InputSource is = new InputSource(fileReader);
public static void importXML(File file) {
NvdCveParser indexer = null;
try {
reader.parse(is);
indexer = new NvdCveParser();
indexer.openIndexWriter();
indexer.parse(file);
} catch (CorruptIndexException ex) {
Logger.getLogger(Importer.class.getName()).log(Level.SEVERE, null, ex);
} catch (IOException ex) {
Logger.getLogger(Importer.class.getName()).log(Level.SEVERE, null, ex);
} catch (SAXException ex) {
Logger.getLogger(Importer.class.getName()).log(Level.SEVERE, null, ex);
} finally {
indexer.close();
if (indexer != null) {
indexer.close();
}
}
}
// public static void importXML(File file) throws FileNotFoundException, IOException, JAXBException,
// ParserConfigurationException, SAXException {
//
// SAXParserFactory factory = SAXParserFactory.newInstance();
// factory.setNamespaceAware(true);
// XMLReader reader = factory.newSAXParser().getXMLReader();
//
// JAXBContext context = JAXBContext.newInstance("org.codesecure.dependencycheck.data.nvdcve.generated");
// NvdCveXmlFilter filter = new NvdCveXmlFilter(context);
//
// Indexer indexer = new Indexer();
// indexer.openIndexWriter();
//
// filter.registerSaveDelegate(indexer);
//
// reader.setContentHandler(filter);
// Reader fileReader = new FileReader(file);
// InputSource is = new InputSource(fileReader);
// try {
// reader.parse(is);
// } catch (IOException ex) {
// Logger.getLogger(Importer.class.getName()).log(Level.SEVERE, null, ex);
// } catch (SAXException ex) {
// Logger.getLogger(Importer.class.getName()).log(Level.SEVERE, null, ex);
// } finally {
// indexer.close();
// }
// }
/**
* Imports the CPE XML File into the Lucene Index.

View File

@@ -30,10 +30,10 @@ public class NvdCveParser extends Index {
public void parse(File file) {
FileReader fr = null;
BufferedReader br = null;
Pattern rxEntry = Pattern.compile("^\\s*\\<entry\\s*id\\=\\\"([^\\\"]+)\\\"");
Pattern rxEntryEnd = Pattern.compile("^\\s*\\</entry");
Pattern rxFact = Pattern.compile("^\\s*\\<cpe\\-lang\\:fact\\-ref name=\\\"([^\\\"]+)");
Pattern rxSummary = Pattern.compile("^\\s*\\<vuln:summary>([^\\<]+");
Pattern rxEntry = Pattern.compile("^\\s*<entry\\s*id\\=\\\"([^\\\"]+)\\\".*$");
Pattern rxEntryEnd = Pattern.compile("^\\s*</entry>.*$");
Pattern rxFact = Pattern.compile("^\\s*<cpe\\-lang\\:fact\\-ref name=\\\"([^\\\"]+).*$");
Pattern rxSummary = Pattern.compile("^\\s*<vuln:summary>([^\\<]+).*$");
try {
fr = new FileReader(file);
br = new BufferedReader(fr);
@@ -41,17 +41,35 @@ public class NvdCveParser extends Index {
String str = null;
String id = null;
Document doc = new Document();
boolean skipEntry = true;
boolean started = false;
while ((str = br.readLine()) != null) {
sb.append(str);
Matcher matcherEntryEnd = rxEntryEnd.matcher(str);
if (started && !matcherEntryEnd.matches()) {
sb.append(str);
}
//facts occur more often, do them first.
Matcher matcherFact = rxFact.matcher(str);
if (matcherFact.matches()) {
addVulnerableCpe(matcherFact.group(0), doc);
String cpe = matcherFact.group(1);
if (cpe != null && cpe.startsWith("cpe:/a:")) {
skipEntry = false;
addVulnerableCpe(cpe, doc);
}
continue;
}
Matcher matcherEntry = rxEntry.matcher(str);
if (matcherEntry.matches()) {
id = matcherEntry.group(0);
started = true;
id = matcherEntry.group(1);
sb.append("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>");
sb.append("<vulnerabilityType xmlns=\"http://scap.nist.gov/schema/vulnerability/0.4\" xmlns:vuln=\"http://scap.nist.gov/schema/vulnerability/0.4\" xmlns:cpe-lang=\"http://cpe.mitre.org/language/2.0\" xmlns:cvss=\"http://scap.nist.gov/schema/cvss-v2/0.2\" xmlns:scap-core=\"http://scap.nist.gov/schema/scap-core/0.1\" xmlns:patch=\"http://scap.nist.gov/schema/patch/0.1\" xmlns:ns6=\"http://scap.nist.gov/schema/cve/0.1\" xmlns:cce=\"http://scap.nist.gov/schema/cce/0.1\" xmlns:vulnerability=\"http://scap.nist.gov/schema/feed/vulnerability/2.0\"");
sb.append(" id=\"").append(id).append("\">");
//sb.append(str); //need to do the above to get the correct schema generated from files.
Field name = new Field(Fields.CVE_ID, id, Field.Store.NO, Field.Index.ANALYZED);
name.setIndexOptions(IndexOptions.DOCS_ONLY);
doc.add(name);
@@ -59,23 +77,30 @@ public class NvdCveParser extends Index {
}
Matcher matcherSummary = rxSummary.matcher(str);
if (matcherSummary.matches()) {
String summary = matcherSummary.group(0);
String summary = matcherSummary.group(1);
Field description = new Field(Fields.DESCRIPTION, summary, Field.Store.NO, Field.Index.ANALYZED);
description.setIndexOptions(IndexOptions.DOCS_ONLY);
doc.add(description);
continue;
}
Matcher matcherEntryEnd = rxEntryEnd.matcher(str);
if (matcherEntryEnd.matches()) {
sb.append("</vulnerabilityType>");
Field xml = new Field(Fields.XML, sb.toString(), Field.Store.YES, Field.Index.NO);
doc.add(xml);
Term name = new Term(Fields.CVE_ID, LuceneUtils.escapeLuceneQuery(id));
indexWriter.updateDocument(name, doc);
if (!skipEntry) {
Term name = new Term(Fields.CVE_ID, id);
indexWriter.deleteDocuments(name);
indexWriter.addDocument(doc);
//indexWriter.updateDocument(name, doc);
}
//reset the document
doc = new Document();
sb = new StringBuilder(7000);
id = null;
skipEntry = true;
started = false;
}
}

View File

@@ -222,9 +222,9 @@ public class NvdCveXmlFilter extends XMLFilterImpl {
// then retrieve the fully unmarshalled object
try {
JAXBElement<VulnerabilityType> result = (JAXBElement<VulnerabilityType>) unmarshallerHandler.getResult();
VulnerabilityType entry = result.getValue();
if (saveDelegate != null) {
JAXBElement<VulnerabilityType> result = (JAXBElement<VulnerabilityType>) unmarshallerHandler.getResult();
VulnerabilityType entry = result.getValue();
saveDelegate.saveEntry(entry);
}
} catch (JAXBException je) { //we can continue with this exception.

View File

@@ -0,0 +1,67 @@
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package org.codesecure.dependencycheck.data.nvdcve.xml;
import java.io.File;
import java.io.IOException;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.lucene.index.CorruptIndexException;
import org.codesecure.dependencycheck.data.nvdcve.InvalidDataException;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import static org.junit.Assert.*;
/**
*
* @author Jeremy Long (jeremy.long@gmail.com)
*/
public class NvdCveParserTest {
public NvdCveParserTest() {
}
@BeforeClass
public static void setUpClass() throws Exception {
}
@AfterClass
public static void tearDownClass() throws Exception {
}
@Before
public void setUp() {
}
@After
public void tearDown() {
}
/**
* Test of parse method, of class NvdCveParser.
*/
@Test
public void testParse() throws InvalidDataException {
NvdCveParser instance = null;
try {
System.out.println("parse");
File file = new File(this.getClass().getClassLoader().getResource("nvdcve-2.0-2012.xml").getPath());
instance = new NvdCveParser();
instance.openIndexWriter();
instance.parse(file);
} catch (CorruptIndexException ex) {
throw new InvalidDataException("corrupt index", ex);
} catch (IOException ex) {
throw new InvalidDataException("IO Exception", ex);
} finally {
if (instance != null) {
instance.close();
}
}
}
}

View File

@@ -12,10 +12,14 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.net.MalformedURLException;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBException;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;
import org.apache.lucene.index.CorruptIndexException;
import org.codesecure.dependencycheck.data.nvdcve.InvalidDataException;
import org.codesecure.dependencycheck.data.nvdcve.generated.VulnerabilityType;
import org.junit.After;
import org.junit.AfterClass;
@@ -58,20 +62,42 @@ public class NvdCveXmlFilterTest {
* Test of process method, of class NvdCveXmlFilter.
*/
@Test
public void testFilter() throws JAXBException, SAXException, ParserConfigurationException, MalformedURLException, IOException {
System.out.println("filter");
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setNamespaceAware(true);
XMLReader reader = factory.newSAXParser().getXMLReader();
JAXBContext context = JAXBContext.newInstance("org.codesecure.dependencycheck.data.nvdcve.generated");
NvdCveXmlFilter filter = new NvdCveXmlFilter(context);
public void testFilter() throws InvalidDataException {
Indexer indexer = null;
try {
System.out.println("filter");
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setNamespaceAware(true);
XMLReader reader = factory.newSAXParser().getXMLReader();
JAXBContext context = JAXBContext.newInstance("org.codesecure.dependencycheck.data.nvdcve.generated");
NvdCveXmlFilter filter = new NvdCveXmlFilter(context);
indexer = new Indexer();
indexer.openIndexWriter();
reader.setContentHandler(filter);
File file = new File(this.getClass().getClassLoader().getResource("nvdcve-2.0-2012.xml").getPath());
Reader fileReader = new FileReader(file);
InputSource is = new InputSource(fileReader);
reader.parse(is);
filter.registerSaveDelegate(indexer);
reader.setContentHandler(filter);
File file = new File(this.getClass().getClassLoader().getResource("nvdcve-2.0-2012.xml").getPath());
Reader fileReader = new FileReader(file);
InputSource is = new InputSource(fileReader);
reader.parse(is);
} catch (JAXBException ex) {
throw new InvalidDataException("JAXBException", ex);
} catch (SAXException ex) {
throw new InvalidDataException("SAXException", ex);
} catch (ParserConfigurationException ex) {
throw new InvalidDataException("ParserConfigurationException", ex);
} catch (CorruptIndexException ex) {
throw new InvalidDataException("CorruptIndexException", ex);
} catch (IOException ex) {
throw new InvalidDataException("IOException", ex);
} finally {
if (indexer != null) {
indexer.close();
}
}
}
}