add jsoup to help convert the HTML, specified within nodes in the POM, into text.

Former-commit-id: d32fbfe3e1d9a26f053773d7b9566acead1acc1c
This commit is contained in:
Jeremy Long
2013-06-02 21:45:49 -04:00
parent c8e6e8eb32
commit 0ea29b3d7c
4 changed files with 222 additions and 131 deletions

View File

@@ -3,11 +3,11 @@ Copyright (c) 2012-2013 Jeremy Long. All Rights Reserved.
The licenses for the software listed below can be found in the META-INF/licenses/[dependency name].
This product includes software developed by
The Apache Software Foundation (http://www.apache.org/).
This product includes software developed by The Apache Software Foundation (http://www.apache.org/).
This product includes software developed by
Jquery.com (http://jquery.com/).
This product includes software developed by Jquery.com (http://jquery.com/).
This product includs software developed by Jonathan Hedley (jsoup.org)
This software contains unmodified binary redistributions for H2 database engine (http://www.h2database.com/), which is dual licensed and available under a modified version of the MPL 1.1 (Mozilla Public License) or under the (unmodified) EPL 1.0 (Eclipse Public License).
An original copy of the license agreement can be found at: http://www.h2database.com/html/license.html

View File

@@ -503,6 +503,12 @@ along with DependencyCheck. If not, see <http://www.gnu.org/licenses />.
<artifactId>h2</artifactId>
<version>1.3.172</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.7.2</version>
<type>jar</type>
</dependency>
<!-- The following dependencies are only scanned during integration testing -->
<!--<dependency>

View File

@@ -39,6 +39,7 @@ import java.util.Properties;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.jar.Attributes;
import java.util.jar.JarEntry;
import java.util.jar.JarFile;
import java.util.jar.Manifest;
import java.util.regex.Pattern;
@@ -47,6 +48,7 @@ import java.util.zip.ZipInputStream;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBElement;
import javax.xml.bind.Unmarshaller;
import org.jsoup.Jsoup;
import org.owasp.dependencycheck.analyzer.pom.generated.License;
import org.owasp.dependencycheck.analyzer.pom.generated.Model;
import org.owasp.dependencycheck.analyzer.pom.generated.Organization;
@@ -208,6 +210,10 @@ public class JarAnalyzer extends AbstractAnalyzer implements Analyzer {
throw new AnalysisException("Exception occurred reading the JAR file.", ex);
}
}
/**
* A pattern to detect HTML within text.
*/
final Pattern htmlDetection = Pattern.compile("\\<[a-z]+.*/?\\>", Pattern.CASE_INSENSITIVE);
/**
* Attempts to find a pom.xml within the JAR file. If found it extracts
@@ -215,148 +221,203 @@ public class JarAnalyzer extends AbstractAnalyzer implements Analyzer {
* the strings contained within the pom.properties if one exists.
*
* @param dependency the dependency being analyzed.
* @throws IOException is thrown if there is an error reading the zip file.
* @throws AnalysisException is thrown if there is an exception parsing the
* pom.
* @throws AnalysisException is thrown if there is an exception parsing the pom.
* @return whether or not evidence was added to the dependency
*/
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "OS_OPEN_STREAM",
justification = "The reader on line 259 is closed by closing the zipEntry")
protected boolean analyzePOM(Dependency dependency) throws IOException, AnalysisException {
//@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "OS_OPEN_STREAM",
//justification = "The reader on line 259 is closed by closing the zipEntry")
protected boolean analyzePOM(Dependency dependency) throws AnalysisException {
boolean foundSomething = false;
Properties pomProperties = null;
final List<Model> poms = new ArrayList<Model>();
FileInputStream fs = null;
final JarFile jar;
try {
fs = new FileInputStream(dependency.getActualFilePath());
final ZipInputStream zin = new ZipInputStream(fs);
ZipEntry entry = zin.getNextEntry();
while (entry != null) {
final String entryName = (new File(entry.getName())).getName().toLowerCase();
if (!entry.isDirectory() && "pom.xml".equals(entryName)) {
final NonClosingStream stream = new NonClosingStream(zin);
Model p = null;
try {
final JAXBElement obj = (JAXBElement) pomUnmarshaller.unmarshal(stream);
p = (Model) obj.getValue();
} catch (JAXBException ex) {
final String msg = String.format("Unable to parse POM '%s' in '%s'",
entry.getName(), dependency.getFilePath());
final AnalysisException ax = new AnalysisException(msg, ex);
dependency.getAnalysisExceptions().add(ax);
Logger.getLogger(JarAnalyzer.class.getName()).log(Level.INFO, msg);
}
if (p != null) {
poms.add(p);
}
zin.closeEntry();
} else if (!entry.isDirectory() && "pom.properties".equals(entryName)) {
//TODO what if there is more then one pom.properties?
// need to find the POM, then look to see if there is a sibling
// pom.properties and use those together.
if (pomProperties == null) {
Reader reader;
try {
reader = new InputStreamReader(zin, "UTF-8");
pomProperties = new Properties();
pomProperties.load(reader);
} finally {
//zin.closeEntry closes the reader
//reader.close();
zin.closeEntry();
}
} else {
final String msg = "JAR file contains multiple pom.properties files - unable to process POM";
final AnalysisException ax = new AnalysisException(msg);
dependency.getAnalysisExceptions().add(ax);
Logger.getLogger(JarAnalyzer.class.getName()).log(Level.INFO, msg);
}
}
entry = zin.getNextEntry();
}
jar = new JarFile(dependency.getActualFilePath());
} catch (IOException ex) {
throw new AnalysisException("Error reading JAR file as zip.", ex);
} finally {
if (fs != null) {
fs.close();
}
final String msg = String.format("Unable to read JarFile '%s'.", dependency.getActualFilePath());
final AnalysisException ax = new AnalysisException(msg, ex);
dependency.getAnalysisExceptions().add(ax);
Logger.getLogger(JarAnalyzer.class.getName()).log(Level.WARNING, msg, ex);
return foundSomething;
}
final List<Model> poms = new ArrayList<Model>();
List<String> pomEntries;
try {
pomEntries = retrievePomListing(jar);
} catch (IOException ex) {
final String msg = String.format("Unable to read JarEntries in '%s'.", dependency.getActualFilePath());
final AnalysisException ax = new AnalysisException(msg, ex);
dependency.getAnalysisExceptions().add(ax);
Logger.getLogger(JarAnalyzer.class.getName()).log(Level.WARNING, msg, ex);
return foundSomething;
}
for (Model pom : poms) {
//group id
final String groupid = interpolateString(pom.getGroupId(), pomProperties);
if (groupid != null) {
foundSomething = true;
dependency.getVendorEvidence().addEvidence("pom", "groupid", groupid, Evidence.Confidence.HIGH);
dependency.getProductEvidence().addEvidence("pom", "groupid", groupid, Evidence.Confidence.LOW);
for (String path : pomEntries) {
Properties pomProperties = null;
try {
pomProperties = retrievePomProperties(path, jar);
} catch (IOException ex) {
Logger.getLogger(JarAnalyzer.class.getName()).log(Level.FINEST, "ignore this, failed reading a non-existent pom.properties", ex);
}
//artifact id
final String artifactid = interpolateString(pom.getArtifactId(), pomProperties);
if (artifactid != null) {
foundSomething = true;
dependency.getProductEvidence().addEvidence("pom", "artifactid", artifactid, Evidence.Confidence.HIGH);
dependency.getVendorEvidence().addEvidence("pom", "artifactid", artifactid, Evidence.Confidence.LOW);
Model pom = null;
try {
pom = retrievePom(path, jar);
} catch (JAXBException ex) {
final String msg = String.format("Unable to parse POM '%s' in '%s'",
path, dependency.getFilePath());
final AnalysisException ax = new AnalysisException(msg, ex);
dependency.getAnalysisExceptions().add(ax);
Logger.getLogger(JarAnalyzer.class.getName()).log(Level.WARNING, msg);
Logger.getLogger(JarAnalyzer.class.getName()).log(Level.SEVERE, msg, ax);
} catch (IOException ex) {
Logger.getLogger(JarAnalyzer.class.getName()).log(Level.SEVERE, null, ex);
}
//version
final String version = interpolateString(pom.getVersion(), pomProperties);
if (version != null) {
foundSomething = true;
dependency.getVersionEvidence().addEvidence("pom", "version", version, Evidence.Confidence.HIGH);
}
// org name
final Organization org = pom.getOrganization();
if (org != null && org.getName() != null) {
foundSomething = true;
final String orgName = interpolateString(org.getName(), pomProperties);
dependency.getVendorEvidence().addEvidence("pom", "organization name", orgName, Evidence.Confidence.HIGH);
}
//pom name
final String pomName = interpolateString(pom.getName(), pomProperties);
if (pomName != null) {
foundSomething = true;
dependency.getProductEvidence().addEvidence("pom", "name", pomName, Evidence.Confidence.HIGH);
dependency.getVendorEvidence().addEvidence("pom", "name", pomName, Evidence.Confidence.HIGH);
foundSomething |= setPomEvidence(dependency, pom, pomProperties);
}
return foundSomething;
}
/**
* Given a path to a pom.xml within a JarFile, this method attempts to load
* a sibling pom.properties if one exists.
* @param path the path to the pom.xml within the JarFile
* @param jar the JarFile to load the pom.properties from
* @return a Properties object or null if no pom.properties was found
* @throws IOException thrown if there is an exception reading the pom.properties
*/
private Properties retrievePomProperties(String path, final JarFile jar) throws IOException {
Properties pomProperties = null;
String propPath = path.substring(0, path.length() - 7) + "pom.properies";
ZipEntry propEntry = jar.getEntry(propPath);
if (propEntry != null) {
Reader reader = new InputStreamReader(jar.getInputStream(propEntry), "UTF-8");
pomProperties = new Properties();
pomProperties.load(reader);
}
return pomProperties;
}
/**
* Searches a JarFile for pom.xml entries and returns a listing of these entries.
* @param jar the JarFile to search
* @return a list of pom.xml entries
* @throws IOException thrown if there is an exception reading a JarEntry
*/
private List<String> retrievePomListing(final JarFile jar) throws IOException {
List<String> pomEntries = new ArrayList<String>();
JarEntry entry = jar.entries().nextElement();
while (entry != null) {
final String entryName = (new File(entry.getName())).getName().toLowerCase();
if (!entry.isDirectory() && "pom.xml".equals(entryName)) {
pomEntries.add(entry.getName());
}
entry = jar.entries().nextElement();
}
return pomEntries;
}
/**
* Retrieves the specified POM from a jar file and converts it to a Model.
* @param path the path to the pom.xml file within the jar file
* @param jar the jar file to extract the pom from
* @return returns a {@link org.owasp.dependencycheck.analyzer.pom.generated.Model} object
* @throws JAXBException is thrown if there is an exception parsing the pom
* @throws IOException is thrown if there is an exception reading the jar
*/
private Model retrievePom(String path, JarFile jar) throws JAXBException, IOException {
ZipEntry entry = jar.getEntry(path);
if (entry != null) { //should never be null
NonClosingStream stream = new NonClosingStream(jar.getInputStream(entry));
Model p = null;
final JAXBElement obj = (JAXBElement) pomUnmarshaller.unmarshal(stream);
return (Model) obj.getValue();
}
return null;
}
/**
* Sets evidence from the pom on the supplied dependency.
* @param dependency the dependency to set data on
* @param pom the information from the pom
* @param pomProperties the pom properties file (null if none exists)
* @return true if there was evidence within the pom that we could use; otherwise false
*/
private boolean setPomEvidence(Dependency dependency, Model pom, Properties pomProperties) {
boolean foundSomething = false;
//group id
final String groupid = interpolateString(pom.getGroupId(), pomProperties);
if (groupid != null) {
foundSomething = true;
dependency.getVendorEvidence().addEvidence("pom", "groupid", groupid, Evidence.Confidence.HIGH);
dependency.getProductEvidence().addEvidence("pom", "groupid", groupid, Evidence.Confidence.LOW);
}
//artifact id
final String artifactid = interpolateString(pom.getArtifactId(), pomProperties);
if (artifactid != null) {
foundSomething = true;
dependency.getProductEvidence().addEvidence("pom", "artifactid", artifactid, Evidence.Confidence.HIGH);
dependency.getVendorEvidence().addEvidence("pom", "artifactid", artifactid, Evidence.Confidence.LOW);
}
//version
final String version = interpolateString(pom.getVersion(), pomProperties);
if (version != null) {
foundSomething = true;
dependency.getVersionEvidence().addEvidence("pom", "version", version, Evidence.Confidence.HIGH);
}
// org name
final Organization org = pom.getOrganization();
if (org != null && org.getName() != null) {
foundSomething = true;
final String orgName = interpolateString(org.getName(), pomProperties);
dependency.getVendorEvidence().addEvidence("pom", "organization name", orgName, Evidence.Confidence.HIGH);
}
//pom name
final String pomName = interpolateString(pom.getName(), pomProperties);
if (pomName != null) {
foundSomething = true;
dependency.getProductEvidence().addEvidence("pom", "name", pomName, Evidence.Confidence.HIGH);
dependency.getVendorEvidence().addEvidence("pom", "name", pomName, Evidence.Confidence.HIGH);
}
//Description
if (pom.getDescription() != null) {
foundSomething = true;
String description = interpolateString(pom.getDescription(), pomProperties);
if (htmlDetection.matcher(description).find()) {
description = Jsoup.parse(description).text();
}
//Description
if (pom.getDescription() != null) {
foundSomething = true;
final String description = interpolateString(pom.getDescription(), pomProperties);
dependency.setDescription(description);
dependency.getProductEvidence().addEvidence("pom", "description", description, Evidence.Confidence.MEDIUM);
dependency.getVendorEvidence().addEvidence("pom", "description", description, Evidence.Confidence.MEDIUM);
}
dependency.setDescription(description);
dependency.getProductEvidence().addEvidence("pom", "description", description, Evidence.Confidence.MEDIUM);
dependency.getVendorEvidence().addEvidence("pom", "description", description, Evidence.Confidence.MEDIUM);
}
//license
if (pom.getLicenses() != null) {
String license = null;
for (License lic : pom.getLicenses().getLicense()) {
String tmp = null;
if (lic.getName() != null) {
tmp = interpolateString(lic.getName(), pomProperties);
}
if (lic.getUrl() != null) {
if (tmp == null) {
tmp = interpolateString(lic.getUrl(), pomProperties);
} else {
tmp += ": " + interpolateString(lic.getUrl(), pomProperties);
}
}
//license
if (pom.getLicenses() != null) {
String license = null;
for (License lic : pom.getLicenses().getLicense()) {
String tmp = null;
if (lic.getName() != null) {
tmp = interpolateString(lic.getName(), pomProperties);
}
if (lic.getUrl() != null) {
if (tmp == null) {
continue;
}
if (license == null) {
license = tmp;
tmp = interpolateString(lic.getUrl(), pomProperties);
} else {
license += "\n" + tmp;
tmp += ": " + interpolateString(lic.getUrl(), pomProperties);
}
}
if (license != null) {
dependency.setLicense(license);
if (tmp == null) {
continue;
}
if (htmlDetection.matcher(tmp).find()) {
tmp = Jsoup.parse(tmp).text();
}
if (license == null) {
license = tmp;
} else {
license += "\n" + tmp;
}
}
if (license != null) {
dependency.setLicense(license);
}
}
return foundSomething;
@@ -530,7 +591,10 @@ public class JarAnalyzer extends AbstractAnalyzer implements Analyzer {
for (Entry<Object, Object> entry : atts.entrySet()) {
String key = entry.getKey().toString();
final String value = atts.getValue(key);
String value = atts.getValue(key);
if (htmlDetection.matcher(value).find()) {
value = Jsoup.parse(value).text();
}
if (key.equals(Attributes.Name.IMPLEMENTATION_TITLE.toString())) {
foundSomething = true;
productEvidence.addEvidence(source, key, value, Evidence.Confidence.HIGH);
@@ -662,7 +726,7 @@ public class JarAnalyzer extends AbstractAnalyzer implements Analyzer {
* within the text.
* @return the interpolated text.
*/
protected String interpolateString(String text, Properties properties) {
private String interpolateString(String text, Properties properties) {
//${project.build.directory}
if (properties == null || text == null) {
return text;

View File

@@ -0,0 +1,21 @@
The MIT License
Copyright (c) 2009, 2010, 2011, 2012, 2013 Jonathan Hedley <jonathan@hedley.net>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.