Coverage Report - org.owasp.dependencycheck.analyzer.PythonPackageAnalyzer
 
Classes in this File Line Coverage Branch Coverage Complexity
PythonPackageAnalyzer
90%
63/70
77%
14/18
2.091
 
 1  
 /*
 2  
  * This file is part of dependency-check-core.
 3  
  *
 4  
  * Licensed under the Apache License, Version 2.0 (the "License");
 5  
  * you may not use this file except in compliance with the License.
 6  
  * You may obtain a copy of the License at
 7  
  *
 8  
  *     http://www.apache.org/licenses/LICENSE-2.0
 9  
  *
 10  
  * Unless required by applicable law or agreed to in writing, software
 11  
  * distributed under the License is distributed on an "AS IS" BASIS,
 12  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13  
  * See the License for the specific language governing permissions and
 14  
  * limitations under the License.
 15  
  *
 16  
  * Copyright (c) 2015 Institute for Defense Analyses. All Rights Reserved.
 17  
  */
 18  
 package org.owasp.dependencycheck.analyzer;
 19  
 
 20  
 import org.apache.commons.io.FileUtils;
 21  
 import org.apache.commons.io.filefilter.NameFileFilter;
 22  
 import org.apache.commons.io.filefilter.SuffixFileFilter;
 23  
 import org.owasp.dependencycheck.Engine;
 24  
 import org.owasp.dependencycheck.analyzer.exception.AnalysisException;
 25  
 import org.owasp.dependencycheck.dependency.Confidence;
 26  
 import org.owasp.dependencycheck.dependency.Dependency;
 27  
 import org.owasp.dependencycheck.dependency.EvidenceCollection;
 28  
 import org.owasp.dependencycheck.utils.FileFilterBuilder;
 29  
 import org.owasp.dependencycheck.utils.Settings;
 30  
 import org.owasp.dependencycheck.utils.UrlStringUtils;
 31  
 import org.slf4j.Logger;
 32  
 import org.slf4j.LoggerFactory;
 33  
 
 34  
 import java.io.File;
 35  
 import java.io.FileFilter;
 36  
 import java.io.IOException;
 37  
 import java.net.MalformedURLException;
 38  
 import java.util.ArrayList;
 39  
 import java.util.List;
 40  
 import java.util.regex.Matcher;
 41  
 import java.util.regex.Pattern;
 42  
 
 43  
 /**
 44  
  * Used to analyze a Python package, and collect information that can be used to determine the associated CPE.
 45  
  *
 46  
  * @author Dale Visser <dvisser@ida.org>
 47  
  */
 48  48
 public class PythonPackageAnalyzer extends AbstractFileTypeAnalyzer {
 49  
 
 50  
     /**
 51  
      * Used when compiling file scanning regex patterns.
 52  
      */
 53  
     private static final int REGEX_OPTIONS = Pattern.DOTALL
 54  
             | Pattern.CASE_INSENSITIVE;
 55  
 
 56  
     /**
 57  
      * The logger.
 58  
      */
 59  8
     private static final Logger LOGGER = LoggerFactory
 60  
             .getLogger(PythonPackageAnalyzer.class);
 61  
 
 62  
     /**
 63  
      * Filename extensions for files to be analyzed.
 64  
      */
 65  
     private static final String EXTENSIONS = "py";
 66  
 
 67  
     /**
 68  
      * Pattern for matching the module docstring in a source file.
 69  
      */
 70  8
     private static final Pattern MODULE_DOCSTRING = Pattern.compile(
 71  
             "^(['\\\"]{3})(.*?)\\1", REGEX_OPTIONS);
 72  
 
 73  
     /**
 74  
      * Matches assignments to version variables in Python source code.
 75  
      */
 76  8
     private static final Pattern VERSION_PATTERN = Pattern.compile(
 77  
             "\\b(__)?version(__)? *= *(['\"]+)(\\d+\\.\\d+.*?)\\3",
 78  
             REGEX_OPTIONS);
 79  
 
 80  
     /**
 81  
      * Matches assignments to title variables in Python source code.
 82  
      */
 83  8
     private static final Pattern TITLE_PATTERN = compileAssignPattern("title");
 84  
 
 85  
     /**
 86  
      * Matches assignments to summary variables in Python source code.
 87  
      */
 88  8
     private static final Pattern SUMMARY_PATTERN = compileAssignPattern("summary");
 89  
 
 90  
     /**
 91  
      * Matches assignments to URL/URL variables in Python source code.
 92  
      */
 93  8
     private static final Pattern URI_PATTERN = compileAssignPattern("ur[il]");
 94  
 
 95  
     /**
 96  
      * Matches assignments to home page variables in Python source code.
 97  
      */
 98  8
     private static final Pattern HOMEPAGE_PATTERN = compileAssignPattern("home_?page");
 99  
 
 100  
     /**
 101  
      * Matches assignments to author variables in Python source code.
 102  
      */
 103  8
     private static final Pattern AUTHOR_PATTERN = compileAssignPattern("author");
 104  
 
 105  
     /**
 106  
      * Filter that detects files named "__init__.py".
 107  
      */
 108  8
     private static final FileFilter INIT_PY_FILTER = new NameFileFilter("__init__.py");
 109  
 
 110  
     /**
 111  
      * The file filter for python files.
 112  
      */
 113  8
     private static final FileFilter PY_FILTER = new SuffixFileFilter(".py");
 114  
 
 115  
     /**
 116  
      * Returns the name of the Python Package Analyzer.
 117  
      *
 118  
      * @return the name of the analyzer
 119  
      */
 120  
     @Override
 121  
     public String getName() {
 122  40
         return "Python Package Analyzer";
 123  
     }
 124  
 
 125  
     /**
 126  
      * Tell that we are used for information collection.
 127  
      *
 128  
      * @return INFORMATION_COLLECTION
 129  
      */
 130  
     @Override
 131  
     public AnalysisPhase getAnalysisPhase() {
 132  16
         return AnalysisPhase.INFORMATION_COLLECTION;
 133  
     }
 134  
 
 135  
     /**
 136  
      * The file filter used to determine which files this analyzer supports.
 137  
      */
 138  8
     private static final FileFilter FILTER = FileFilterBuilder.newInstance().addExtensions(EXTENSIONS).build();
 139  
 
 140  
     /**
 141  
      * Returns the FileFilter
 142  
      *
 143  
      * @return the FileFilter
 144  
      */
 145  
     @Override
 146  
     protected FileFilter getFileFilter() {
 147  6832
         return FILTER;
 148  
     }
 149  
 
 150  
     /**
 151  
      * No-op initializer implementation.
 152  
      *
 153  
      * @throws Exception never thrown
 154  
      */
 155  
     @Override
 156  
     protected void initializeFileTypeAnalyzer() throws Exception {
 157  
         // Nothing to do here.
 158  24
     }
 159  
 
 160  
     /**
 161  
      * Utility function to create a regex pattern matcher.
 162  
      *
 163  
      * @param name the value to use when constructing the assignment pattern
 164  
      * @return the compiled Pattern
 165  
      */
 166  
     private static Pattern compileAssignPattern(String name) {
 167  40
         return Pattern.compile(
 168  
                 String.format("\\b(__)?%s(__)?\\b *= *(['\"]+)(.*?)\\3", name),
 169  
                 REGEX_OPTIONS);
 170  
     }
 171  
 
 172  
     /**
 173  
      * Analyzes python packages and adds evidence to the dependency.
 174  
      *
 175  
      * @param dependency the dependency being analyzed
 176  
      * @param engine the engine being used to perform the scan
 177  
      * @throws AnalysisException thrown if there is an unrecoverable error analyzing the dependency
 178  
      */
 179  
     @Override
 180  
     protected void analyzeFileType(Dependency dependency, Engine engine)
 181  
             throws AnalysisException {
 182  8
         final File file = dependency.getActualFile();
 183  8
         final File parent = file.getParentFile();
 184  8
         final String parentName = parent.getName();
 185  8
         boolean found = false;
 186  8
         if (INIT_PY_FILTER.accept(file)) {
 187  32
             for (final File sourcefile : parent.listFiles(PY_FILTER)) {
 188  24
                 found |= analyzeFileContents(dependency, sourcefile);
 189  
             }
 190  
         }
 191  8
         if (found) {
 192  8
             dependency.setDisplayFileName(parentName + "/__init__.py");
 193  8
             dependency.getProductEvidence().addEvidence(file.getName(),
 194  
                     "PackageName", parentName, Confidence.MEDIUM);
 195  
         } else {
 196  
             // copy, alter and set in case some other thread is iterating over
 197  0
             final List<Dependency> deps = new ArrayList<Dependency>(
 198  
                     engine.getDependencies());
 199  0
             deps.remove(dependency);
 200  0
             engine.setDependencies(deps);
 201  
         }
 202  8
     }
 203  
 
 204  
     /**
 205  
      * This should gather information from leading docstrings, file comments, and assignments to __version__, __title__,
 206  
      * __summary__, __uri__, __url__, __home*page__, __author__, and their all caps equivalents.
 207  
      *
 208  
      * @param dependency the dependency being analyzed
 209  
      * @param file the file name to analyze
 210  
      * @return whether evidence was found
 211  
      * @throws AnalysisException thrown if there is an unrecoverable error
 212  
      */
 213  
     private boolean analyzeFileContents(Dependency dependency, File file)
 214  
             throws AnalysisException {
 215  
         String contents;
 216  
         try {
 217  24
             contents = FileUtils.readFileToString(file).trim();
 218  0
         } catch (IOException e) {
 219  0
             throw new AnalysisException(
 220  
                     "Problem occurred while reading dependency file.", e);
 221  24
         }
 222  24
         boolean found = false;
 223  24
         if (!contents.isEmpty()) {
 224  24
             final String source = file.getName();
 225  24
             found = gatherEvidence(VERSION_PATTERN, contents, source,
 226  
                     dependency.getVersionEvidence(), "SourceVersion",
 227  
                     Confidence.MEDIUM);
 228  24
             found |= addSummaryInfo(dependency, SUMMARY_PATTERN, 4, contents,
 229  
                     source, "summary");
 230  24
             if (INIT_PY_FILTER.accept(file)) {
 231  8
                 found |= addSummaryInfo(dependency, MODULE_DOCSTRING, 2,
 232  
                         contents, source, "docstring");
 233  
             }
 234  24
             found |= gatherEvidence(TITLE_PATTERN, contents, source,
 235  
                     dependency.getProductEvidence(), "SourceTitle",
 236  
                     Confidence.LOW);
 237  24
             final EvidenceCollection vendorEvidence = dependency
 238  
                     .getVendorEvidence();
 239  24
             found |= gatherEvidence(AUTHOR_PATTERN, contents, source,
 240  
                     vendorEvidence, "SourceAuthor", Confidence.MEDIUM);
 241  
             try {
 242  24
                 found |= gatherHomePageEvidence(URI_PATTERN, vendorEvidence,
 243  
                         source, "URL", contents);
 244  24
                 found |= gatherHomePageEvidence(HOMEPAGE_PATTERN,
 245  
                         vendorEvidence, source, "HomePage", contents);
 246  0
             } catch (MalformedURLException e) {
 247  0
                 LOGGER.warn(e.getMessage());
 248  24
             }
 249  
         }
 250  24
         return found;
 251  
     }
 252  
 
 253  
     /**
 254  
      * Adds summary information to the dependency
 255  
      *
 256  
      * @param dependency the dependency being analyzed
 257  
      * @param pattern the pattern used to perform analysis
 258  
      * @param group the group from the pattern that indicates the data to use
 259  
      * @param contents the data being analyzed
 260  
      * @param source the source name to use when recording the evidence
 261  
      * @param key the key name to use when recording the evidence
 262  
      * @return true if evidence was collected; otherwise false
 263  
      */
 264  
     private boolean addSummaryInfo(Dependency dependency, Pattern pattern,
 265  
             int group, String contents, String source, String key) {
 266  32
         final Matcher matcher = pattern.matcher(contents);
 267  32
         final boolean found = matcher.find();
 268  32
         if (found) {
 269  8
             JarAnalyzer.addDescription(dependency, matcher.group(group),
 270  
                     source, key);
 271  
         }
 272  32
         return found;
 273  
     }
 274  
 
 275  
     /**
 276  
      * Collects evidence from the home page URL.
 277  
      *
 278  
      * @param pattern the pattern to match
 279  
      * @param evidence the evidence collection to add the evidence to
 280  
      * @param source the source of the evidence
 281  
      * @param name the name of the evidence
 282  
      * @param contents the home page URL
 283  
      * @return true if evidence was collected; otherwise false
 284  
      * @throws MalformedURLException thrown if the URL is malformed
 285  
      */
 286  
     private boolean gatherHomePageEvidence(Pattern pattern,
 287  
             EvidenceCollection evidence, String source, String name,
 288  
             String contents) throws MalformedURLException {
 289  48
         final Matcher matcher = pattern.matcher(contents);
 290  48
         boolean found = false;
 291  48
         if (matcher.find()) {
 292  8
             final String url = matcher.group(4);
 293  8
             if (UrlStringUtils.isUrl(url)) {
 294  8
                 found = true;
 295  8
                 evidence.addEvidence(source, name, url, Confidence.MEDIUM);
 296  
             }
 297  
         }
 298  48
         return found;
 299  
     }
 300  
 
 301  
     /**
 302  
      * Gather evidence from a Python source file usin the given string assignment regex pattern.
 303  
      *
 304  
      * @param pattern to scan contents with
 305  
      * @param contents of Python source file
 306  
      * @param source for storing evidence
 307  
      * @param evidence to store evidence in
 308  
      * @param name of evidence
 309  
      * @param confidence in evidence
 310  
      * @return whether evidence was found
 311  
      */
 312  
     private boolean gatherEvidence(Pattern pattern, String contents,
 313  
             String source, EvidenceCollection evidence, String name,
 314  
             Confidence confidence) {
 315  72
         final Matcher matcher = pattern.matcher(contents);
 316  72
         final boolean found = matcher.find();
 317  72
         if (found) {
 318  24
             evidence.addEvidence(source, name, matcher.group(4), confidence);
 319  
         }
 320  72
         return found;
 321  
     }
 322  
 
 323  
     @Override
 324  
     protected String getAnalyzerEnabledSettingKey() {
 325  48
         return Settings.KEYS.ANALYZER_PYTHON_PACKAGE_ENABLED;
 326  
     }
 327  
 }