Added PythonPackageAnalyzer, for directly analyzing Python library, a.k.a.,

package, source code.


Former-commit-id: 3154ea4ecddd794cb3e7f3686972fd7a6cc2177c
This commit is contained in:
Dale Visser
2015-04-02 19:31:34 -04:00
parent bf96c24ec3
commit 511d2b9457
13 changed files with 439 additions and 43 deletions

3
.gitignore vendored
View File

@@ -10,6 +10,7 @@
.settings
maven-eclipse.xml
.externalToolBuilders
.pmd
# Netbeans configuration
nb-configuration.xml
/target/
@@ -22,4 +23,4 @@ _site/**
#unknown as to why these are showing up... but need to be ignored.
.LCKpom.xml~
#coverity
/cov-int/
/cov-int/

View File

@@ -22,15 +22,18 @@ import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FilenameFilter;
import java.net.MalformedURLException;
import java.util.Arrays;
import java.util.List;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.mail.MessagingException;
import javax.mail.internet.InternetHeaders;
import org.apache.commons.collections.iterators.ReverseListIterator;
import org.apache.commons.io.filefilter.NameFileFilter;
import org.apache.commons.io.filefilter.SuffixFileFilter;
import org.apache.commons.io.input.AutoCloseInputStream;
@@ -44,10 +47,12 @@ import org.owasp.dependencycheck.utils.ExtractionException;
import org.owasp.dependencycheck.utils.ExtractionUtil;
import org.owasp.dependencycheck.utils.FileUtils;
import org.owasp.dependencycheck.utils.Settings;
import org.owasp.dependencycheck.utils.UrlStringUtils;
/**
* Used to analyze a Wheel distriution file or *.dist-info folder, and collect
* information that can be used to determine the associated CPE.
* Used to analyze a Wheel or egg distriution files, or their contents in
* unzipped form, and collect information that can be used to determine the
* associated CPE.
*
* @author Dale Visser <dvisser@ida.org>
*/
@@ -90,17 +95,6 @@ public class PythonDistributionAnalyzer extends AbstractFileTypeAnalyzer {
private static final Set<String> EXTENSIONS = newHashSet("whl", "egg",
"zip", METADATA, PKG_INFO);
/**
* Pattern that captures the vendor from a home page URL.
*/
private static final Pattern HOMEPAGE_VENDOR = Pattern
.compile("^[a-zA-Z]+://(.*)$");
/**
* Used to split the subdomains of a host name.
*/
private static final Pattern DOT = Pattern.compile("\\.");
/**
* Used to match on egg archive candidate extenssions.
*/
@@ -265,8 +259,10 @@ public class PythonDistributionAnalyzer extends AbstractFileTypeAnalyzer {
*
* @param dependency
* the dependency being analyzed
* @throws MalformedURLException
*/
private static void collectWheelMetadata(Dependency dependency, File file) {
private static void collectWheelMetadata(Dependency dependency, File file)
throws AnalysisException {
final InternetHeaders headers = getManifestProperties(file);
addPropertyToEvidence(headers, dependency.getVersionEvidence(),
"Version", Confidence.HIGHEST);
@@ -276,12 +272,17 @@ public class PythonDistributionAnalyzer extends AbstractFileTypeAnalyzer {
final EvidenceCollection vendorEvidence = dependency
.getVendorEvidence();
if (StringUtils.isNotBlank(url)) {
final Matcher matcher = HOMEPAGE_VENDOR.matcher(url);
if (matcher.matches()) {
final String[] subdomains = DOT.split(matcher.group(1));
vendorEvidence.addEvidence(METADATA, "vendor",
subdomains[Math.max(0, subdomains.length - 2)],
Confidence.MEDIUM);
if (UrlStringUtils.isUrl(url)) {
try {
vendorEvidence.addEvidence(METADATA, "vendor",
(String) (new ReverseListIterator(
Arrays.asList(UrlStringUtils
.extractImportantUrlData(url).get(0)
.split(Pattern.quote("."))))).next(),
Confidence.MEDIUM);
} catch (MalformedURLException mue) {
LOGGER.fine("URL didn't parse: " + mue.getMessage());
}
}
}
addPropertyToEvidence(headers, vendorEvidence, "Author", Confidence.LOW);

View File

@@ -0,0 +1,288 @@
/*
* This file is part of dependency-check-core.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Copyright (c) 2015 Institute for Defense Analyses. All Rights Reserved.
*/
package org.owasp.dependencycheck.analyzer;
import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.logging.Logger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.filefilter.NameFileFilter;
import org.apache.commons.io.filefilter.SuffixFileFilter;
import org.owasp.dependencycheck.Engine;
import org.owasp.dependencycheck.analyzer.exception.AnalysisException;
import org.owasp.dependencycheck.dependency.Confidence;
import org.owasp.dependencycheck.dependency.Dependency;
import org.owasp.dependencycheck.dependency.EvidenceCollection;
import org.owasp.dependencycheck.utils.Settings;
import org.owasp.dependencycheck.utils.UrlStringUtils;
/**
* Used to analyze a Python package, and collect information that can be used to
* determine the associated CPE.
*
* @author Dale Visser <dvisser@ida.org>
*/
public class PythonPackageAnalyzer extends AbstractFileTypeAnalyzer {
/**
* Used when compiling file scanning regex patterns.
*/
private static final int REGEX_OPTIONS = Pattern.DOTALL
| Pattern.CASE_INSENSITIVE;
/**
* The logger.
*/
private static final Logger LOGGER = Logger
.getLogger(PythonDistributionAnalyzer.class.getName());
/**
* Filename extensions for files to be analyzed.
*/
private static final Set<String> EXTENSIONS = Collections
.unmodifiableSet(Collections.singleton("py"));
/**
* Pattern for matching the module docstring in a source file.
*/
private static final Pattern MODULE_DOCSTRING = Pattern.compile(
"^(['\\\"]{3})(.*?)\\1", REGEX_OPTIONS);
/**
* Matches assignments to version variables in Python source code.
*/
private static final Pattern VERSION_PATTERN = Pattern.compile(
"\\b(__)?version(__)? *= *(['\"]+)(\\d+\\.\\d+.*?)\\3",
REGEX_OPTIONS);
/**
* Matches assignments to title variables in Python source code.
*/
private static final Pattern TITLE_PATTERN = compileAssignPattern("title");
/**
* Matches assignments to summary variables in Python source code.
*/
private static final Pattern SUMMARY_PATTERN = compileAssignPattern("summary");
/**
* Matches assignments to URL/URL variables in Python source code.
*/
private static final Pattern URI_PATTERN = compileAssignPattern("ur[il]");
/**
* Matches assignments to home page variables in Python source code.
*/
private static final Pattern HOMEPAGE_PATTERN = compileAssignPattern("home_?page");
/**
* Matches assignments to author variables in Python source code.
*/
private static final Pattern AUTHOR_PATTERN = compileAssignPattern("author");
/**
* Filter that detects files named "__init__.py".
*/
private static final FileFilter INIT_PY_FILTER = new NameFileFilter(
"__init__.py");
private static final FileFilter PY_FILTER = new SuffixFileFilter(".py");
/**
* Returns the name of the Python Package Analyzer.
*/
@Override
public String getName() {
return "Python Package Analyzer";
}
/**
* Tell that we are used for information collection.
*/
@Override
public AnalysisPhase getAnalysisPhase() {
return AnalysisPhase.INFORMATION_COLLECTION;
}
/**
* Return the set of supported file extensions.
*/
@Override
protected Set<String> getSupportedExtensions() {
return EXTENSIONS;
}
/**
* No-op initializer implementation.
*/
@Override
protected void initializeFileTypeAnalyzer() throws Exception {
// Nothing to do here.
}
private static Pattern compileAssignPattern(String name) {
return Pattern.compile(
String.format("\\b(__)?%s(__)?\\b *= *(['\"]+)(.*?)\\3", name),
REGEX_OPTIONS);
}
@Override
protected void analyzeFileType(Dependency dependency, Engine engine)
throws AnalysisException {
final File file = dependency.getActualFile();
final File parent = file.getParentFile();
final String parentName = parent.getName();
boolean found = false;
if (INIT_PY_FILTER.accept(file)) {
for (final File sourcefile : parent.listFiles(PY_FILTER)) {
found |= analyzeFileContents(dependency, sourcefile);
}
}
if (found) {
dependency.setDisplayFileName(parentName + "/__init__.py");
dependency.getProductEvidence().addEvidence(file.getName(),
"PackageName", parentName, Confidence.MEDIUM);
} else {
// copy, alter and set in case some other thread is iterating over
final List<Dependency> deps = new ArrayList<Dependency>(
engine.getDependencies());
deps.remove(dependency);
engine.setDependencies(deps);
}
}
/**
* This should gather information from leading docstrings, file comments,
* and assignments to __version__, __title__, __summary__, __uri__, __url__,
* __home*page__, __author__, and their all caps equivalents.
*
* @return whether evidence was found
*/
private boolean analyzeFileContents(Dependency dependency, File file)
throws AnalysisException {
String contents = "";
try {
contents = FileUtils.readFileToString(file).trim();
} catch (IOException e) {
throw new AnalysisException(
"Problem occured while reading dependency file.", e);
}
boolean found = false;
if (!contents.isEmpty()) {
final String source = file.getName();
found = gatherEvidence(VERSION_PATTERN, contents, source,
dependency.getVersionEvidence(), "SourceVersion",
Confidence.MEDIUM);
found |= addSummaryInfo(dependency, SUMMARY_PATTERN, 4, contents,
source, "summary");
if (INIT_PY_FILTER.accept(file)) {
found |= addSummaryInfo(dependency, MODULE_DOCSTRING, 2,
contents, source, "docstring");
}
found |= gatherEvidence(TITLE_PATTERN, contents, source,
dependency.getProductEvidence(), "SourceTitle",
Confidence.LOW);
final EvidenceCollection vendorEvidence = dependency
.getVendorEvidence();
found |= gatherEvidence(AUTHOR_PATTERN, contents, source,
vendorEvidence, "SourceAuthor", Confidence.MEDIUM);
try {
found |= gatherHomePageEvidence(URI_PATTERN, vendorEvidence,
source, "URL", contents);
found |= gatherHomePageEvidence(HOMEPAGE_PATTERN,
vendorEvidence, source, "HomePage", contents);
} catch (MalformedURLException e) {
LOGGER.warning(e.getMessage());
}
}
return found;
}
private boolean addSummaryInfo(Dependency dependency, Pattern pattern,
int group, String contents, String source, String key) {
final Matcher matcher = pattern.matcher(contents);
final boolean found = matcher.find();
if (found) {
JarAnalyzer.addDescription(dependency, matcher.group(group),
source, key);
}
return found;
}
private boolean gatherHomePageEvidence(Pattern pattern,
EvidenceCollection evidence, String source, String name,
String contents) throws MalformedURLException {
final Matcher matcher = pattern.matcher(contents);
boolean found = false;
if (matcher.find()) {
final String value = matcher.group(4);
if (UrlStringUtils.isUrl(value)) {
found = true;
final List<String> urlData = UrlStringUtils
.extractImportantUrlData(value);
for (final String part : urlData) {
evidence.addEvidence(source, name, part, Confidence.MEDIUM);
}
}
}
return found;
}
/**
* Gather evidence from a Python source file usin the given string
* assignment regex pattern.
*
* @param pattern
* to scan contents with
* @param contents
* of Python source file
* @param source
* for storing evidence
* @param evidence
* to store evidence in
* @param name
* of evidence
* @param confidence
* in evidence
* @return whether evidence was found
*/
private boolean gatherEvidence(Pattern pattern, String contents,
String source, EvidenceCollection evidence, String name,
Confidence confidence) {
final Matcher matcher = pattern.matcher(contents);
final boolean found = matcher.find();
if (found) {
evidence.addEvidence(source, name, matcher.group(4), confidence);
}
return found;
}
@Override
protected String getAnalyzerEnabledSettingKey() {
return Settings.KEYS.ANALYZER_PYTHON_PACKAGE_ENABLED;
}
}

View File

@@ -12,4 +12,5 @@ org.owasp.dependencycheck.analyzer.CentralAnalyzer
org.owasp.dependencycheck.analyzer.NexusAnalyzer
org.owasp.dependencycheck.analyzer.NuspecAnalyzer
org.owasp.dependencycheck.analyzer.AssemblyAnalyzer
org.owasp.dependencycheck.analyzer.PythonDistributionAnalyzer
org.owasp.dependencycheck.analyzer.PythonDistributionAnalyzer
org.owasp.dependencycheck.analyzer.PythonPackageAnalyzer

View File

@@ -47,7 +47,8 @@ public class PythonDistributionAnalyzerTest extends BaseTest {
}
/**
* Test of getSupportedExtensions method, of class JarAnalyzer.
* Test of getSupportedExtensions method, of class
* PythonDistributionAnalyzer.
*/
@Test
public void testGetSupportedExtensions() {
@@ -57,7 +58,7 @@ public class PythonDistributionAnalyzerTest extends BaseTest {
new HashSet<String>(Arrays.asList(expected)),
new PythonDistributionAnalyzer().getSupportedExtensions());
}
/**
* Test of supportsExtension method, of class PythonDistributionAnalyzer.
*/
@@ -76,9 +77,8 @@ public class PythonDistributionAnalyzerTest extends BaseTest {
analyzer.supportsExtension("PKG-INFO"));
}
/**
* Test of inspect method, of class JarAnalyzer.
* Test of inspect method, of class PythonDistributionAnalyzer.
*
* @throws Exception
* is thrown when an exception occurs.
@@ -90,7 +90,7 @@ public class PythonDistributionAnalyzerTest extends BaseTest {
}
/**
* Test of inspect method, of class JarAnalyzer.
* Test of inspect method, of class PythonDistributionAnalyzer.
*
* @throws Exception
* is thrown when an exception occurs.
@@ -121,34 +121,41 @@ public class PythonDistributionAnalyzerTest extends BaseTest {
@Test
public void testAnalyzeEggInfoFolder() throws AnalysisException {
eggtestAssertions("python/site-packages/EggTest.egg-info/PKG-INFO");
eggtestAssertions(this,
"python/site-packages/EggTest.egg-info/PKG-INFO",
new PythonDistributionAnalyzer());
}
@Test
public void testAnalyzeEggArchive() throws AnalysisException {
eggtestAssertions("python/dist/EggTest-0.0.1-py2.7.egg");
eggtestAssertions(this, "python/dist/EggTest-0.0.1-py2.7.egg",
new PythonDistributionAnalyzer());
}
@Test
public void testAnalyzeEggArchiveNamedZip() throws AnalysisException {
eggtestAssertions("python/dist/EggTest-0.0.1-py2.7.zip");
eggtestAssertions(this, "python/dist/EggTest-0.0.1-py2.7.zip",
new PythonDistributionAnalyzer());
}
@Test
public void testAnalyzeEggFolder() throws AnalysisException {
eggtestAssertions("python/site-packages/EggTest-0.0.1-py2.7.egg/EGG-INFO/PKG-INFO");
eggtestAssertions(
this,
"python/site-packages/EggTest-0.0.1-py2.7.egg/EGG-INFO/PKG-INFO",
new PythonDistributionAnalyzer());
}
private void eggtestAssertions(final String resource)
throws AnalysisException {
public static void eggtestAssertions(Object context, final String resource,
Analyzer analyzer) throws AnalysisException {
final Dependency result = new Dependency(BaseTest.getResourceAsFile(
this, resource));
new PythonDistributionAnalyzer().analyze(result, null);
context, resource));
analyzer.analyze(result, null);
assertTrue("Expected vendor evidence to contain \"example\".", result
.getVendorEvidence().toString().contains("example"));
boolean found = false;
for (final Evidence e : result.getVersionEvidence()) {
if ("Version".equals(e.getName()) && "0.0.1".equals(e.getValue())) {
if ("0.0.1".equals(e.getValue())) {
found = true;
break;
}

View File

@@ -0,0 +1,73 @@
/*
* This file is part of dependency-check-core.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Copyright (c) 2015 Institute for Defense Analyses. All Rights Reserved.
*/
package org.owasp.dependencycheck.analyzer;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.util.Arrays;
import java.util.HashSet;
import org.apache.commons.lang.StringUtils;
import org.junit.Test;
import org.owasp.dependencycheck.BaseTest;
import org.owasp.dependencycheck.analyzer.exception.AnalysisException;
/**
* Unit tests for PythonPackageAnalyzer.
*
* @author Dale Visser <dvisser@ida.org>
*/
public class PythonPackageAnalyzerTest extends BaseTest {
/**
* Test of getName method, of class PythonPackageAnalyzer.
*/
@Test
public void testGetName() {
assertEquals("Analyzer name wrong.", "Python Distribution Analyzer",
new PythonDistributionAnalyzer().getName());
}
/**
* Test of getSupportedExtensions method, of class PythonPackageAnalyzer.
*/
@Test
public void testGetSupportedExtensions() {
final String[] expected = { "py" };
assertEquals("Supported extensions should just have the following: "
+ StringUtils.join(expected, ", "),
new HashSet<String>(Arrays.asList(expected)),
new PythonPackageAnalyzer().getSupportedExtensions());
}
/**
* Test of supportsExtension method, of class PythonPackageAnalyzer.
*/
@Test
public void testSupportsExtension() {
assertTrue("Should support \"py\" extension.",
new PythonPackageAnalyzer().supportsExtension("py"));
}
@Test
public void testAnalyzeSourceMetadata() throws AnalysisException {
PythonDistributionAnalyzerTest.eggtestAssertions(this,
"python/eggtest/__init__.py", new PythonPackageAnalyzer());
}
}

View File

@@ -0,0 +1,9 @@
__all__ = ["__title__", "__summary__", "__uri__", "__version__", "__author__",
"__email__" ]
__title__ = "EggTest"
__summary__ = "Simple project for producing an .egg."
__uri__ = "http://example.org/eggtest"
__version__ = "0.0.1"
__author__ = "Dale Visser"
__email__ = "dvisser@ida.org"

View File

@@ -1,9 +1,11 @@
from setuptools import setup
setup(name = 'EggTest',
about = {}
execfile('eggtest/__about__.py', about)
setup(name = about['__title__'],
packages = ['eggtest'],
version = '0.0.1',
description = 'Simple project for producing an .egg.',
url = 'http://example.org/eggtest',
author = 'Dale Visser',
author_email = 'dvisser@ida.org')
version = about['__version__'],
description = about['__summary__'],
url = about['__uri__'],
author = about['__author__'],
author_email = about['__email__'])

View File

@@ -3,5 +3,6 @@ EggTest.egg-info/PKG-INFO
EggTest.egg-info/SOURCES.txt
EggTest.egg-info/dependency_links.txt
EggTest.egg-info/top_level.txt
eggtest/__about__.py
eggtest/__init__.py
eggtest/main.py

View File

@@ -0,0 +1,9 @@
__all__ = ["__title__", "__summary__", "__uri__", "__version__", "__author__",
"__email__" ]
__title__ = "EggTest"
__summary__ = "Simple project for producing an .egg."
__uri__ = "http://example.org/eggtest"
__version__ = "0.0.1"
__author__ = "Dale Visser"
__email__ = "dvisser@ida.org"

View File

@@ -180,6 +180,10 @@ public final class Settings {
* The properties key for whether the Python Distribution analyzer is enabled.
*/
public static final String ANALYZER_PYTHON_DISTRIBUTION_ENABLED = "analyzer.python.distribution.enabled";
/**
* The properties key for whether the Python Package analyzer is enabled.
*/
public static final String ANALYZER_PYTHON_PACKAGE_ENABLED = "analyzer.python.package.enabled";
/**
* The properties key for whether the .NET Assembly analyzer is enabled.
*/