performance enhancement for nvd cve import.

Former-commit-id: 5a40d5798c804c4632e6ed2dd04d6d7c9e0f51b8
This commit is contained in:
Jeremy Long
2012-10-30 21:13:47 -04:00
parent c694461abc
commit 7971c42814
10 changed files with 174 additions and 173 deletions

View File

@@ -26,8 +26,6 @@ import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.bind.JAXBException;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
@@ -42,7 +40,6 @@ import org.codesecure.dependencycheck.data.nvdcve.xml.Importer;
import org.codesecure.dependencycheck.utils.DownloadFailedException;
import org.codesecure.dependencycheck.utils.Downloader;
import org.codesecure.dependencycheck.utils.Settings;
import org.xml.sax.SAXException;
/**
* The Index class is used to utilize and maintain the NVD CVE Index.
@@ -134,15 +131,6 @@ public class Index extends AbstractIndex implements CachedWebDataSource {
} catch (FileNotFoundException ex) {
//Logger.getLogger(Index.class.getName()).log(Level.SEVERE, null, ex);
throw new UpdateException(ex);
} catch (JAXBException ex) {
//Logger.getLogger(Index.class.getName()).log(Level.SEVERE, null, ex);
throw new UpdateException(ex);
} catch (ParserConfigurationException ex) {
//Logger.getLogger(Index.class.getName()).log(Level.SEVERE, null, ex);
throw new UpdateException(ex);
} catch (SAXException ex) {
//Logger.getLogger(Index.class.getName()).log(Level.SEVERE, null, ex);
throw new UpdateException(ex);
} catch (IOException ex) {
//Logger.getLogger(Index.class.getName()).log(Level.SEVERE, null, ex);
throw new UpdateException(ex);

View File

@@ -21,14 +21,7 @@ package org.codesecure.dependencycheck.data.nvdcve.xml;
import java.io.*;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBException;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;
import org.apache.lucene.index.CorruptIndexException;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
/**
* Imports a NVD CVE XML file into the Lucene NVD CVE Index.
@@ -47,13 +40,6 @@ public class Importer {
* Imports the NVD CVE XML File into the Lucene Index.
*
* @param file containing the path to the NVD CVE XML file.
* @throws ParserConfigurationException is thrown if the parser is
* misconfigured.
* @throws FileNotFoundException is thrown when there is a
* FileNotFoundException.
* @throws IOException is thrown when there is an IOException.
* @throws JAXBException is thrown when there is a JAXBException.
* @throws SAXException is thrown when there is a SAXException.
*/
public static void importXML(File file) {
NvdCveParser indexer = null;
@@ -109,16 +95,8 @@ public class Importer {
* Imports the CPE XML File into the Lucene Index.
*
* @param path the path to the CPE XML file.
* @throws ParserConfigurationException is thrown if the parser is
* misconfigured.
* @throws FileNotFoundException is thrown when there is a
* FileNotFoundException.
* @throws IOException is thrown when there is an IOException.
* @throws JAXBException is thrown when there is a JAXBException.
* @throws SAXException is thrown when there is a SAXException.
*/
public static void importXML(String path) throws FileNotFoundException, IOException, JAXBException,
ParserConfigurationException, SAXException {
public static void importXML(String path) {
File f = new File(path);
if (!f.exists()) {
f.mkdirs();

View File

@@ -1,8 +1,22 @@
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package org.codesecure.dependencycheck.data.nvdcve.xml;
/*
* This file is part of DependencyCheck.
*
* DependencyCheck is free software: you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation, either version 3 of the License, or (at your option) any
* later version.
*
* DependencyCheck is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* DependencyCheck. If not, see http://www.gnu.org/licenses/.
*
* Copyright (c) 2012 Jeremy Long. All Rights Reserved.
*/
import java.io.BufferedReader;
import java.io.File;
@@ -17,7 +31,6 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.Term;
import org.codesecure.dependencycheck.data.lucene.LuceneUtils;
import org.codesecure.dependencycheck.data.nvdcve.Fields;
import org.codesecure.dependencycheck.data.nvdcve.Index;
@@ -26,7 +39,15 @@ import org.codesecure.dependencycheck.data.nvdcve.Index;
* @author Jeremy Long (jeremy.long@gmail.com)
*/
public class NvdCveParser extends Index {
/**
* Parses an NVD CVE xml file using a buffered readerd. This
* method maybe more fragile then using a partial-unmarshalling SAX
* Parser (aka the deprecated NvdCveXmlFilter) - but this method is
* orders of magnitude faster.
*
* @param file the reference to the NVD CVE file
*/
public void parse(File file) {
FileReader fr = null;
BufferedReader br = null;
@@ -43,10 +64,10 @@ public class NvdCveParser extends Index {
Document doc = new Document();
boolean skipEntry = true;
boolean started = false;
while ((str = br.readLine()) != null) {
Matcher matcherEntryEnd = rxEntryEnd.matcher(str);
if (started && !matcherEntryEnd.matches()) {
sb.append(str);
}
@@ -58,18 +79,32 @@ public class NvdCveParser extends Index {
skipEntry = false;
addVulnerableCpe(cpe, doc);
}
continue;
continue;
}
Matcher matcherEntry = rxEntry.matcher(str);
if (matcherEntry.matches()) {
started = true;
id = matcherEntry.group(1);
sb.append("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>");
sb.append("<vulnerabilityType xmlns=\"http://scap.nist.gov/schema/vulnerability/0.4\" xmlns:vuln=\"http://scap.nist.gov/schema/vulnerability/0.4\" xmlns:cpe-lang=\"http://cpe.mitre.org/language/2.0\" xmlns:cvss=\"http://scap.nist.gov/schema/cvss-v2/0.2\" xmlns:scap-core=\"http://scap.nist.gov/schema/scap-core/0.1\" xmlns:patch=\"http://scap.nist.gov/schema/patch/0.1\" xmlns:ns6=\"http://scap.nist.gov/schema/cve/0.1\" xmlns:cce=\"http://scap.nist.gov/schema/cce/0.1\" xmlns:vulnerability=\"http://scap.nist.gov/schema/feed/vulnerability/2.0\"");
sb.append(" id=\"").append(id).append("\">");
sb.append("<vulnerabilityType ");
//sb.append("xmlns=\"http://scap.nist.gov/schema/feed/vulnerability/2.0\" ");
//sb.append("xmlns:vuln=\"http://scap.nist.gov/schema/vulnerability/0.4\" ");
sb.append("xmlns=\"http://scap.nist.gov/schema/vulnerability/0.4\" ");
sb.append("xmlns:vuln=\"http://scap.nist.gov/schema/vulnerability/0.4\" ");
//sb.append("xmlns:vulnerability=\"http://scap.nist.gov/schema/feed/vulnerability/2.0\" ");
sb.append("xmlns:cpe-lang=\"http://cpe.mitre.org/language/2.0\" ");
sb.append("xmlns:cvss2=\"http://scap.nist.gov/schema/cvss-v2/0.2\" ");
sb.append("xmlns:cvss=\"http://scap.nist.gov/schema/cvss-v2/0.2\" ");
sb.append("xmlns:scap-core=\"http://scap.nist.gov/schema/scap-core/0.1\" ");
sb.append("xmlns:scap_core=\"http://scap.nist.gov/schema/scap-core/0.1\" ");
sb.append("xmlns:patch=\"http://scap.nist.gov/schema/patch/0.1\" ");
sb.append("xmlns:cve=\"http://scap.nist.gov/schema/cve/0.1\" ");
sb.append("xmlns:cce=\"http://scap.nist.gov/schema/cce/0.1\" ");
sb.append("id=\"").append(id).append("\">");
//sb.append(str); //need to do the above to get the correct schema generated from files.
Field name = new Field(Fields.CVE_ID, id, Field.Store.NO, Field.Index.ANALYZED);
name.setIndexOptions(IndexOptions.DOCS_ONLY);
doc.add(name);
@@ -77,18 +112,18 @@ public class NvdCveParser extends Index {
}
Matcher matcherSummary = rxSummary.matcher(str);
if (matcherSummary.matches()) {
String summary = matcherSummary.group(1);
Field description = new Field(Fields.DESCRIPTION, summary, Field.Store.NO, Field.Index.ANALYZED);
description.setIndexOptions(IndexOptions.DOCS_ONLY);
doc.add(description);
continue;
String summary = matcherSummary.group(1);
Field description = new Field(Fields.DESCRIPTION, summary, Field.Store.NO, Field.Index.ANALYZED);
description.setIndexOptions(IndexOptions.DOCS_ONLY);
doc.add(description);
continue;
}
if (matcherEntryEnd.matches()) {
sb.append("</vulnerabilityType>");
Field xml = new Field(Fields.XML, sb.toString(), Field.Store.YES, Field.Index.NO);
doc.add(xml);
if (!skipEntry) {
Term name = new Term(Fields.CVE_ID, id);
indexWriter.deleteDocuments(name);
@@ -103,8 +138,8 @@ public class NvdCveParser extends Index {
started = false;
}
}
} catch (FileNotFoundException ex) {
Logger.getLogger(NvdCveParser.class.getName()).log(Level.SEVERE, null, ex);
} catch (IOException ex) {
@@ -124,8 +159,12 @@ public class NvdCveParser extends Index {
}
}
}
/**
* Adds a CPE to the Lucene Document
* @param cpe a string representing a CPE
* @param doc a lucene document
*/
private void addVulnerableCpe(String cpe, Document doc) {
Field vulnerable = new Field(Fields.VULNERABLE_CPE, cpe, Field.Store.NO, Field.Index.ANALYZED);
vulnerable.setIndexOptions(IndexOptions.DOCS_ONLY);

View File

@@ -46,6 +46,7 @@ import org.xml.sax.helpers.XMLFilterImpl;
*
* @author Jeremy
*/
@Deprecated
public class NvdCveXmlFilter extends XMLFilterImpl {
EntrySaveDelegate saveDelegate = null;

View File

@@ -126,7 +126,7 @@ public class Downloader {
throw new DownloadFailedException("Error downloading file.", ex);
}
String encoding = conn.getContentEncoding();
BufferedOutputStream writer = null;
try {
InputStream reader;

View File

@@ -353,7 +353,7 @@ Copyright (c) 2012 Jeremy Long. All Rights Reserved.
#end
#end
<h4 id="header$cnt" class="subsectionheader white">Identifiers</h4>
##:&nbsp;<a href="http://web.nvd.nist.gov/view/vuln/search-results?cpe=$esc.url($cpevalue)" target="blank">$esc.html($cpevalue)</a></h4>
##:&nbsp;<a href="http://web.nvd.nist.gov/view/vuln/search-results?cpe=$esc.url($cpevalue)" target="_blank">$esc.html($cpevalue)</a></h4>
<div id="content$cnt" class="subsectioncontent standardsubsection">
#if($cpeCount>1)
Several possible CPEs where identified. If one of the following are correct please update the configuration
@@ -366,7 +366,7 @@ Copyright (c) 2012 Jeremy Long. All Rights Reserved.
<ul>
#foreach($id in $dependency.getIdentifiers())
##yes, we are HTML Encoding the href. this is okay. We can't URL encode as we have to trust the analyzer here...
<li><b>$esc.html($id.type):</b>&nbsp;$esc.html($id.title)&nbsp;:&nbsp;<a href="$esc.html($id.url)" target="blank">$esc.html($id.value)</a>
<li><b>$esc.html($id.type):</b>&nbsp;$esc.html($id.title)&nbsp;:&nbsp;<a href="$esc.html($id.url)" target="_blank">$esc.html($id.value)</a>
#if( $id.descrription )
<br/>$esc.html($id.description)
#end
@@ -380,12 +380,12 @@ Copyright (c) 2012 Jeremy Long. All Rights Reserved.
<h4 id="header$cnt" class="subsectionheader white">Published Vulnerabilities</h4>
<div id="content$cnt" class="subsectioncontent standardsubsection">
#foreach($vuln in $dependency.getVulnerabilities())
<p><b><a target="blank" href="http://web.nvd.nist.gov/view/vuln/detail?vulnId=$esc.url($vuln.name)">$esc.html($vuln.name)</a></b></p>
<p><b><a target="_blank" href="http://web.nvd.nist.gov/view/vuln/detail?vulnId=$esc.url($vuln.name)">$esc.html($vuln.name)</a></b></p>
<p>$esc.html($vuln.description)
#if ($vuln.getReferences().size()>0)
<ul>
#foreach($ref in $vuln.getReferences())
<li>$esc.html($ref.source) - <a target="blank" href="$esc.html($ref.url)">$ref.name</a></li>
<li>$esc.html($ref.source) - <a target="_blank" href="$esc.html($ref.url)">$ref.name</a></li>
#end
</ul>
#end

View File

@@ -22,7 +22,7 @@ import static org.junit.Assert.*;
* @author Jeremy Long (jeremy.long@gmail.com)
*/
public class NvdCveParserTest {
public NvdCveParserTest() {
}
@@ -33,11 +33,11 @@ public class NvdCveParserTest {
@AfterClass
public static void tearDownClass() throws Exception {
}
@Before
public void setUp() {
}
@After
public void tearDown() {
}

View File

@@ -3,101 +3,101 @@
* and open the template in the editor.
*/
package org.codesecure.dependencycheck.data.nvdcve.xml;
import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.net.MalformedURLException;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBException;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;
import org.apache.lucene.index.CorruptIndexException;
import org.codesecure.dependencycheck.data.nvdcve.InvalidDataException;
import org.codesecure.dependencycheck.data.nvdcve.generated.VulnerabilityType;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import static org.junit.Assert.*;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
/**
*
* @author Jeremy
*/
public class NvdCveXmlFilterTest {
public NvdCveXmlFilterTest() {
}
@BeforeClass
public static void setUpClass() {
}
@AfterClass
public static void tearDownClass() {
}
@Before
public void setUp() {
}
@After
public void tearDown() {
}
/**
* Test of process method, of class NvdCveXmlFilter.
*/
@Test
public void testFilter() throws InvalidDataException {
Indexer indexer = null;
try {
System.out.println("filter");
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setNamespaceAware(true);
XMLReader reader = factory.newSAXParser().getXMLReader();
JAXBContext context = JAXBContext.newInstance("org.codesecure.dependencycheck.data.nvdcve.generated");
NvdCveXmlFilter filter = new NvdCveXmlFilter(context);
indexer = new Indexer();
indexer.openIndexWriter();
filter.registerSaveDelegate(indexer);
reader.setContentHandler(filter);
File file = new File(this.getClass().getClassLoader().getResource("nvdcve-2.0-2012.xml").getPath());
Reader fileReader = new FileReader(file);
InputSource is = new InputSource(fileReader);
reader.parse(is);
} catch (JAXBException ex) {
throw new InvalidDataException("JAXBException", ex);
} catch (SAXException ex) {
throw new InvalidDataException("SAXException", ex);
} catch (ParserConfigurationException ex) {
throw new InvalidDataException("ParserConfigurationException", ex);
} catch (CorruptIndexException ex) {
throw new InvalidDataException("CorruptIndexException", ex);
} catch (IOException ex) {
throw new InvalidDataException("IOException", ex);
} finally {
if (indexer != null) {
indexer.close();
}
}
}
}
//
//import java.io.BufferedInputStream;
//import java.io.DataInputStream;
//import java.io.File;
//import java.io.FileReader;
//import java.io.IOException;
//import java.io.InputStream;
//import java.io.Reader;
//import java.net.MalformedURLException;
//import java.util.logging.Level;
//import java.util.logging.Logger;
//import javax.xml.bind.JAXBContext;
//import javax.xml.bind.JAXBException;
//import javax.xml.parsers.ParserConfigurationException;
//import javax.xml.parsers.SAXParserFactory;
//import org.apache.lucene.index.CorruptIndexException;
//import org.codesecure.dependencycheck.data.nvdcve.InvalidDataException;
//import org.codesecure.dependencycheck.data.nvdcve.generated.VulnerabilityType;
//import org.junit.After;
//import org.junit.AfterClass;
//import org.junit.Before;
//import org.junit.BeforeClass;
//import org.junit.Test;
//import static org.junit.Assert.*;
//import org.xml.sax.Attributes;
//import org.xml.sax.InputSource;
//import org.xml.sax.Locator;
//import org.xml.sax.SAXException;
//import org.xml.sax.XMLReader;
//
///**
// *
// * @author Jeremy
// */
//public class NvdCveXmlFilterTest {
//
// public NvdCveXmlFilterTest() {
// }
//
// @BeforeClass
// public static void setUpClass() {
// }
//
// @AfterClass
// public static void tearDownClass() {
// }
//
// @Before
// public void setUp() {
// }
//
// @After
// public void tearDown() {
// }
//
// /**
// * Test of process method, of class NvdCveXmlFilter.
// */
// @Test
// public void testFilter() throws InvalidDataException {
// Indexer indexer = null;
// try {
// System.out.println("filter");
//
// SAXParserFactory factory = SAXParserFactory.newInstance();
// factory.setNamespaceAware(true);
// XMLReader reader = factory.newSAXParser().getXMLReader();
//
// JAXBContext context = JAXBContext.newInstance("org.codesecure.dependencycheck.data.nvdcve.generated");
// NvdCveXmlFilter filter = new NvdCveXmlFilter(context);
//
// indexer = new Indexer();
// indexer.openIndexWriter();
//
// filter.registerSaveDelegate(indexer);
//
// reader.setContentHandler(filter);
// File file = new File(this.getClass().getClassLoader().getResource("nvdcve-2.0-2012.xml").getPath());
// Reader fileReader = new FileReader(file);
// InputSource is = new InputSource(fileReader);
// reader.parse(is);
// } catch (JAXBException ex) {
// throw new InvalidDataException("JAXBException", ex);
// } catch (SAXException ex) {
// throw new InvalidDataException("SAXException", ex);
// } catch (ParserConfigurationException ex) {
// throw new InvalidDataException("ParserConfigurationException", ex);
// } catch (CorruptIndexException ex) {
// throw new InvalidDataException("CorruptIndexException", ex);
// } catch (IOException ex) {
// throw new InvalidDataException("IOException", ex);
// } finally {
// if (indexer != null) {
// indexer.close();
// }
// }
// }
//}