add jsoup to help convert the HTML, specified within nodes in the POM, into text.

Former-commit-id: d32fbfe3e1d9a26f053773d7b9566acead1acc1c
This commit is contained in:
Jeremy Long
2013-06-02 21:45:49 -04:00
parent c8e6e8eb32
commit 0ea29b3d7c
4 changed files with 222 additions and 131 deletions

View File

@@ -3,11 +3,11 @@ Copyright (c) 2012-2013 Jeremy Long. All Rights Reserved.
The licenses for the software listed below can be found in the META-INF/licenses/[dependency name]. The licenses for the software listed below can be found in the META-INF/licenses/[dependency name].
This product includes software developed by This product includes software developed by The Apache Software Foundation (http://www.apache.org/).
The Apache Software Foundation (http://www.apache.org/).
This product includes software developed by This product includes software developed by Jquery.com (http://jquery.com/).
Jquery.com (http://jquery.com/).
This product includs software developed by Jonathan Hedley (jsoup.org)
This software contains unmodified binary redistributions for H2 database engine (http://www.h2database.com/), which is dual licensed and available under a modified version of the MPL 1.1 (Mozilla Public License) or under the (unmodified) EPL 1.0 (Eclipse Public License). This software contains unmodified binary redistributions for H2 database engine (http://www.h2database.com/), which is dual licensed and available under a modified version of the MPL 1.1 (Mozilla Public License) or under the (unmodified) EPL 1.0 (Eclipse Public License).
An original copy of the license agreement can be found at: http://www.h2database.com/html/license.html An original copy of the license agreement can be found at: http://www.h2database.com/html/license.html

View File

@@ -503,6 +503,12 @@ along with DependencyCheck. If not, see <http://www.gnu.org/licenses />.
<artifactId>h2</artifactId> <artifactId>h2</artifactId>
<version>1.3.172</version> <version>1.3.172</version>
</dependency> </dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.7.2</version>
<type>jar</type>
</dependency>
<!-- The following dependencies are only scanned during integration testing --> <!-- The following dependencies are only scanned during integration testing -->
<!--<dependency> <!--<dependency>

View File

@@ -39,6 +39,7 @@ import java.util.Properties;
import java.util.Set; import java.util.Set;
import java.util.StringTokenizer; import java.util.StringTokenizer;
import java.util.jar.Attributes; import java.util.jar.Attributes;
import java.util.jar.JarEntry;
import java.util.jar.JarFile; import java.util.jar.JarFile;
import java.util.jar.Manifest; import java.util.jar.Manifest;
import java.util.regex.Pattern; import java.util.regex.Pattern;
@@ -47,6 +48,7 @@ import java.util.zip.ZipInputStream;
import javax.xml.bind.JAXBContext; import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBElement; import javax.xml.bind.JAXBElement;
import javax.xml.bind.Unmarshaller; import javax.xml.bind.Unmarshaller;
import org.jsoup.Jsoup;
import org.owasp.dependencycheck.analyzer.pom.generated.License; import org.owasp.dependencycheck.analyzer.pom.generated.License;
import org.owasp.dependencycheck.analyzer.pom.generated.Model; import org.owasp.dependencycheck.analyzer.pom.generated.Model;
import org.owasp.dependencycheck.analyzer.pom.generated.Organization; import org.owasp.dependencycheck.analyzer.pom.generated.Organization;
@@ -208,6 +210,10 @@ public class JarAnalyzer extends AbstractAnalyzer implements Analyzer {
throw new AnalysisException("Exception occurred reading the JAR file.", ex); throw new AnalysisException("Exception occurred reading the JAR file.", ex);
} }
} }
/**
* A pattern to detect HTML within text.
*/
final Pattern htmlDetection = Pattern.compile("\\<[a-z]+.*/?\\>", Pattern.CASE_INSENSITIVE);
/** /**
* Attempts to find a pom.xml within the JAR file. If found it extracts * Attempts to find a pom.xml within the JAR file. If found it extracts
@@ -215,77 +221,125 @@ public class JarAnalyzer extends AbstractAnalyzer implements Analyzer {
* the strings contained within the pom.properties if one exists. * the strings contained within the pom.properties if one exists.
* *
* @param dependency the dependency being analyzed. * @param dependency the dependency being analyzed.
* @throws IOException is thrown if there is an error reading the zip file. * @throws AnalysisException is thrown if there is an exception parsing the pom.
* @throws AnalysisException is thrown if there is an exception parsing the
* pom.
* @return whether or not evidence was added to the dependency * @return whether or not evidence was added to the dependency
*/ */
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "OS_OPEN_STREAM", //@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "OS_OPEN_STREAM",
justification = "The reader on line 259 is closed by closing the zipEntry") //justification = "The reader on line 259 is closed by closing the zipEntry")
protected boolean analyzePOM(Dependency dependency) throws IOException, AnalysisException { protected boolean analyzePOM(Dependency dependency) throws AnalysisException {
boolean foundSomething = false; boolean foundSomething = false;
Properties pomProperties = null; final JarFile jar;
final List<Model> poms = new ArrayList<Model>();
FileInputStream fs = null;
try { try {
fs = new FileInputStream(dependency.getActualFilePath()); jar = new JarFile(dependency.getActualFilePath());
final ZipInputStream zin = new ZipInputStream(fs); } catch (IOException ex) {
ZipEntry entry = zin.getNextEntry(); final String msg = String.format("Unable to read JarFile '%s'.", dependency.getActualFilePath());
while (entry != null) {
final String entryName = (new File(entry.getName())).getName().toLowerCase();
if (!entry.isDirectory() && "pom.xml".equals(entryName)) {
final NonClosingStream stream = new NonClosingStream(zin);
Model p = null;
try {
final JAXBElement obj = (JAXBElement) pomUnmarshaller.unmarshal(stream);
p = (Model) obj.getValue();
} catch (JAXBException ex) {
final String msg = String.format("Unable to parse POM '%s' in '%s'",
entry.getName(), dependency.getFilePath());
final AnalysisException ax = new AnalysisException(msg, ex); final AnalysisException ax = new AnalysisException(msg, ex);
dependency.getAnalysisExceptions().add(ax); dependency.getAnalysisExceptions().add(ax);
Logger.getLogger(JarAnalyzer.class.getName()).log(Level.INFO, msg); Logger.getLogger(JarAnalyzer.class.getName()).log(Level.WARNING, msg, ex);
return foundSomething;
} }
if (p != null) { final List<Model> poms = new ArrayList<Model>();
poms.add(p); List<String> pomEntries;
}
zin.closeEntry();
} else if (!entry.isDirectory() && "pom.properties".equals(entryName)) {
//TODO what if there is more then one pom.properties?
// need to find the POM, then look to see if there is a sibling
// pom.properties and use those together.
if (pomProperties == null) {
Reader reader;
try { try {
reader = new InputStreamReader(zin, "UTF-8"); pomEntries = retrievePomListing(jar);
} catch (IOException ex) {
final String msg = String.format("Unable to read JarEntries in '%s'.", dependency.getActualFilePath());
final AnalysisException ax = new AnalysisException(msg, ex);
dependency.getAnalysisExceptions().add(ax);
Logger.getLogger(JarAnalyzer.class.getName()).log(Level.WARNING, msg, ex);
return foundSomething;
}
for (String path : pomEntries) {
Properties pomProperties = null;
try {
pomProperties = retrievePomProperties(path, jar);
} catch (IOException ex) {
Logger.getLogger(JarAnalyzer.class.getName()).log(Level.FINEST, "ignore this, failed reading a non-existent pom.properties", ex);
}
Model pom = null;
try {
pom = retrievePom(path, jar);
} catch (JAXBException ex) {
final String msg = String.format("Unable to parse POM '%s' in '%s'",
path, dependency.getFilePath());
final AnalysisException ax = new AnalysisException(msg, ex);
dependency.getAnalysisExceptions().add(ax);
Logger.getLogger(JarAnalyzer.class.getName()).log(Level.WARNING, msg);
Logger.getLogger(JarAnalyzer.class.getName()).log(Level.SEVERE, msg, ax);
} catch (IOException ex) {
Logger.getLogger(JarAnalyzer.class.getName()).log(Level.SEVERE, null, ex);
}
foundSomething |= setPomEvidence(dependency, pom, pomProperties);
}
return foundSomething;
}
/**
* Given a path to a pom.xml within a JarFile, this method attempts to load
* a sibling pom.properties if one exists.
* @param path the path to the pom.xml within the JarFile
* @param jar the JarFile to load the pom.properties from
* @return a Properties object or null if no pom.properties was found
* @throws IOException thrown if there is an exception reading the pom.properties
*/
private Properties retrievePomProperties(String path, final JarFile jar) throws IOException {
Properties pomProperties = null;
String propPath = path.substring(0, path.length() - 7) + "pom.properies";
ZipEntry propEntry = jar.getEntry(propPath);
if (propEntry != null) {
Reader reader = new InputStreamReader(jar.getInputStream(propEntry), "UTF-8");
pomProperties = new Properties(); pomProperties = new Properties();
pomProperties.load(reader); pomProperties.load(reader);
} finally {
//zin.closeEntry closes the reader
//reader.close();
zin.closeEntry();
} }
} else { return pomProperties;
final String msg = "JAR file contains multiple pom.properties files - unable to process POM";
final AnalysisException ax = new AnalysisException(msg);
dependency.getAnalysisExceptions().add(ax);
Logger.getLogger(JarAnalyzer.class.getName()).log(Level.INFO, msg);
} }
/**
* Searches a JarFile for pom.xml entries and returns a listing of these entries.
* @param jar the JarFile to search
* @return a list of pom.xml entries
* @throws IOException thrown if there is an exception reading a JarEntry
*/
private List<String> retrievePomListing(final JarFile jar) throws IOException {
List<String> pomEntries = new ArrayList<String>();
JarEntry entry = jar.entries().nextElement();
while (entry != null) {
final String entryName = (new File(entry.getName())).getName().toLowerCase();
if (!entry.isDirectory() && "pom.xml".equals(entryName)) {
pomEntries.add(entry.getName());
}
entry = jar.entries().nextElement();
}
return pomEntries;
}
/**
* Retrieves the specified POM from a jar file and converts it to a Model.
* @param path the path to the pom.xml file within the jar file
* @param jar the jar file to extract the pom from
* @return returns a {@link org.owasp.dependencycheck.analyzer.pom.generated.Model} object
* @throws JAXBException is thrown if there is an exception parsing the pom
* @throws IOException is thrown if there is an exception reading the jar
*/
private Model retrievePom(String path, JarFile jar) throws JAXBException, IOException {
ZipEntry entry = jar.getEntry(path);
if (entry != null) { //should never be null
NonClosingStream stream = new NonClosingStream(jar.getInputStream(entry));
Model p = null;
final JAXBElement obj = (JAXBElement) pomUnmarshaller.unmarshal(stream);
return (Model) obj.getValue();
}
return null;
} }
entry = zin.getNextEntry(); /**
} * Sets evidence from the pom on the supplied dependency.
} catch (IOException ex) { * @param dependency the dependency to set data on
throw new AnalysisException("Error reading JAR file as zip.", ex); * @param pom the information from the pom
} finally { * @param pomProperties the pom properties file (null if none exists)
if (fs != null) { * @return true if there was evidence within the pom that we could use; otherwise false
fs.close(); */
} private boolean setPomEvidence(Dependency dependency, Model pom, Properties pomProperties) {
} boolean foundSomething = false;
for (Model pom : poms) {
//group id //group id
final String groupid = interpolateString(pom.getGroupId(), pomProperties); final String groupid = interpolateString(pom.getGroupId(), pomProperties);
if (groupid != null) { if (groupid != null) {
@@ -324,7 +378,12 @@ public class JarAnalyzer extends AbstractAnalyzer implements Analyzer {
//Description //Description
if (pom.getDescription() != null) { if (pom.getDescription() != null) {
foundSomething = true; foundSomething = true;
final String description = interpolateString(pom.getDescription(), pomProperties); String description = interpolateString(pom.getDescription(), pomProperties);
if (htmlDetection.matcher(description).find()) {
description = Jsoup.parse(description).text();
}
dependency.setDescription(description); dependency.setDescription(description);
dependency.getProductEvidence().addEvidence("pom", "description", description, Evidence.Confidence.MEDIUM); dependency.getProductEvidence().addEvidence("pom", "description", description, Evidence.Confidence.MEDIUM);
dependency.getVendorEvidence().addEvidence("pom", "description", description, Evidence.Confidence.MEDIUM); dependency.getVendorEvidence().addEvidence("pom", "description", description, Evidence.Confidence.MEDIUM);
@@ -348,6 +407,9 @@ public class JarAnalyzer extends AbstractAnalyzer implements Analyzer {
if (tmp == null) { if (tmp == null) {
continue; continue;
} }
if (htmlDetection.matcher(tmp).find()) {
tmp = Jsoup.parse(tmp).text();
}
if (license == null) { if (license == null) {
license = tmp; license = tmp;
} else { } else {
@@ -358,7 +420,6 @@ public class JarAnalyzer extends AbstractAnalyzer implements Analyzer {
dependency.setLicense(license); dependency.setLicense(license);
} }
} }
}
return foundSomething; return foundSomething;
} }
@@ -530,7 +591,10 @@ public class JarAnalyzer extends AbstractAnalyzer implements Analyzer {
for (Entry<Object, Object> entry : atts.entrySet()) { for (Entry<Object, Object> entry : atts.entrySet()) {
String key = entry.getKey().toString(); String key = entry.getKey().toString();
final String value = atts.getValue(key); String value = atts.getValue(key);
if (htmlDetection.matcher(value).find()) {
value = Jsoup.parse(value).text();
}
if (key.equals(Attributes.Name.IMPLEMENTATION_TITLE.toString())) { if (key.equals(Attributes.Name.IMPLEMENTATION_TITLE.toString())) {
foundSomething = true; foundSomething = true;
productEvidence.addEvidence(source, key, value, Evidence.Confidence.HIGH); productEvidence.addEvidence(source, key, value, Evidence.Confidence.HIGH);
@@ -662,7 +726,7 @@ public class JarAnalyzer extends AbstractAnalyzer implements Analyzer {
* within the text. * within the text.
* @return the interpolated text. * @return the interpolated text.
*/ */
protected String interpolateString(String text, Properties properties) { private String interpolateString(String text, Properties properties) {
//${project.build.directory} //${project.build.directory}
if (properties == null || text == null) { if (properties == null || text == null) {
return text; return text;

View File

@@ -0,0 +1,21 @@
The MIT License
Copyright (c) 2009, 2010, 2011, 2012, 2013 Jonathan Hedley <jonathan@hedley.net>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.