Coverage Report - org.owasp.dependencycheck.analyzer.PythonPackageAnalyzer
 
Classes in this File Line Coverage Branch Coverage Complexity
PythonPackageAnalyzer
92%
61/66
77%
14/18
2
 
 1  
 /*
 2  
  * This file is part of dependency-check-core.
 3  
  *
 4  
  * Licensed under the Apache License, Version 2.0 (the "License");
 5  
  * you may not use this file except in compliance with the License.
 6  
  * You may obtain a copy of the License at
 7  
  *
 8  
  *     http://www.apache.org/licenses/LICENSE-2.0
 9  
  *
 10  
  * Unless required by applicable law or agreed to in writing, software
 11  
  * distributed under the License is distributed on an "AS IS" BASIS,
 12  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13  
  * See the License for the specific language governing permissions and
 14  
  * limitations under the License.
 15  
  *
 16  
  * Copyright (c) 2015 Institute for Defense Analyses. All Rights Reserved.
 17  
  */
 18  
 package org.owasp.dependencycheck.analyzer;
 19  
 
 20  
 import org.apache.commons.io.FileUtils;
 21  
 import org.apache.commons.io.filefilter.NameFileFilter;
 22  
 import org.apache.commons.io.filefilter.SuffixFileFilter;
 23  
 import org.owasp.dependencycheck.Engine;
 24  
 import org.owasp.dependencycheck.analyzer.exception.AnalysisException;
 25  
 import org.owasp.dependencycheck.dependency.Confidence;
 26  
 import org.owasp.dependencycheck.dependency.Dependency;
 27  
 import org.owasp.dependencycheck.dependency.EvidenceCollection;
 28  
 import org.owasp.dependencycheck.utils.FileFilterBuilder;
 29  
 import org.owasp.dependencycheck.utils.Settings;
 30  
 import org.owasp.dependencycheck.utils.UrlStringUtils;
 31  
 
 32  
 import java.io.File;
 33  
 import java.io.FileFilter;
 34  
 import java.io.IOException;
 35  
 import java.util.ArrayList;
 36  
 import java.util.List;
 37  
 import java.util.regex.Matcher;
 38  
 import java.util.regex.Pattern;
 39  
 
 40  
 /**
 41  
  * Used to analyze a Python package, and collect information that can be used to determine the associated CPE.
 42  
  *
 43  
  * @author Dale Visser <dvisser@ida.org>
 44  
  */
 45  7
 public class PythonPackageAnalyzer extends AbstractFileTypeAnalyzer {
 46  
 
 47  
     /**
 48  
      * Used when compiling file scanning regex patterns.
 49  
      */
 50  
     private static final int REGEX_OPTIONS = Pattern.DOTALL
 51  
             | Pattern.CASE_INSENSITIVE;
 52  
 
 53  
     /**
 54  
      * Filename extensions for files to be analyzed.
 55  
      */
 56  
     private static final String EXTENSIONS = "py";
 57  
 
 58  
     /**
 59  
      * Pattern for matching the module docstring in a source file.
 60  
      */
 61  1
     private static final Pattern MODULE_DOCSTRING = Pattern.compile(
 62  
             "^(['\\\"]{3})(.*?)\\1", REGEX_OPTIONS);
 63  
 
 64  
     /**
 65  
      * Matches assignments to version variables in Python source code.
 66  
      */
 67  1
     private static final Pattern VERSION_PATTERN = Pattern.compile(
 68  
             "\\b(__)?version(__)? *= *(['\"]+)(\\d+\\.\\d+.*?)\\3",
 69  
             REGEX_OPTIONS);
 70  
 
 71  
     /**
 72  
      * Matches assignments to title variables in Python source code.
 73  
      */
 74  1
     private static final Pattern TITLE_PATTERN = compileAssignPattern("title");
 75  
 
 76  
     /**
 77  
      * Matches assignments to summary variables in Python source code.
 78  
      */
 79  1
     private static final Pattern SUMMARY_PATTERN = compileAssignPattern("summary");
 80  
 
 81  
     /**
 82  
      * Matches assignments to URL/URL variables in Python source code.
 83  
      */
 84  1
     private static final Pattern URI_PATTERN = compileAssignPattern("ur[il]");
 85  
 
 86  
     /**
 87  
      * Matches assignments to home page variables in Python source code.
 88  
      */
 89  1
     private static final Pattern HOMEPAGE_PATTERN = compileAssignPattern("home_?page");
 90  
 
 91  
     /**
 92  
      * Matches assignments to author variables in Python source code.
 93  
      */
 94  1
     private static final Pattern AUTHOR_PATTERN = compileAssignPattern("author");
 95  
 
 96  
     /**
 97  
      * Filter that detects files named "__init__.py".
 98  
      */
 99  1
     private static final FileFilter INIT_PY_FILTER = new NameFileFilter("__init__.py");
 100  
 
 101  
     /**
 102  
      * The file filter for python files.
 103  
      */
 104  1
     private static final FileFilter PY_FILTER = new SuffixFileFilter(".py");
 105  
 
 106  
     /**
 107  
      * Returns the name of the Python Package Analyzer.
 108  
      *
 109  
      * @return the name of the analyzer
 110  
      */
 111  
     @Override
 112  
     public String getName() {
 113  5
         return "Python Package Analyzer";
 114  
     }
 115  
 
 116  
     /**
 117  
      * Tell that we are used for information collection.
 118  
      *
 119  
      * @return INFORMATION_COLLECTION
 120  
      */
 121  
     @Override
 122  
     public AnalysisPhase getAnalysisPhase() {
 123  3
         return AnalysisPhase.INFORMATION_COLLECTION;
 124  
     }
 125  
 
 126  
     /**
 127  
      * The file filter used to determine which files this analyzer supports.
 128  
      */
 129  1
     private static final FileFilter FILTER = FileFilterBuilder.newInstance().addExtensions(EXTENSIONS).build();
 130  
 
 131  
     /**
 132  
      * Returns the FileFilter
 133  
      *
 134  
      * @return the FileFilter
 135  
      */
 136  
     @Override
 137  
     protected FileFilter getFileFilter() {
 138  854
         return FILTER;
 139  
     }
 140  
 
 141  
     /**
 142  
      * No-op initializer implementation.
 143  
      *
 144  
      * @throws Exception never thrown
 145  
      */
 146  
     @Override
 147  
     protected void initializeFileTypeAnalyzer() throws Exception {
 148  
         // Nothing to do here.
 149  3
     }
 150  
 
 151  
     /**
 152  
      * Utility function to create a regex pattern matcher.
 153  
      *
 154  
      * @param name the value to use when constructing the assignment pattern
 155  
      * @return the compiled Pattern
 156  
      */
 157  
     private static Pattern compileAssignPattern(String name) {
 158  5
         return Pattern.compile(
 159  
                 String.format("\\b(__)?%s(__)?\\b *= *(['\"]+)(.*?)\\3", name),
 160  
                 REGEX_OPTIONS);
 161  
     }
 162  
 
 163  
     /**
 164  
      * Analyzes python packages and adds evidence to the dependency.
 165  
      *
 166  
      * @param dependency the dependency being analyzed
 167  
      * @param engine     the engine being used to perform the scan
 168  
      * @throws AnalysisException thrown if there is an unrecoverable error analyzing the dependency
 169  
      */
 170  
     @Override
 171  
     protected void analyzeFileType(Dependency dependency, Engine engine)
 172  
             throws AnalysisException {
 173  1
         final File file = dependency.getActualFile();
 174  1
         final File parent = file.getParentFile();
 175  1
         final String parentName = parent.getName();
 176  1
         boolean found = false;
 177  1
         if (INIT_PY_FILTER.accept(file)) {
 178  4
             for (final File sourceFile : parent.listFiles(PY_FILTER)) {
 179  3
                 found |= analyzeFileContents(dependency, sourceFile);
 180  
             }
 181  
         }
 182  1
         if (found) {
 183  1
             dependency.setDisplayFileName(parentName + "/__init__.py");
 184  1
             dependency.getProductEvidence().addEvidence(file.getName(),
 185  
                     "PackageName", parentName, Confidence.MEDIUM);
 186  
         } else {
 187  
             // copy, alter and set in case some other thread is iterating over
 188  0
             final List<Dependency> dependencies = new ArrayList<Dependency>(
 189  
                     engine.getDependencies());
 190  0
             dependencies.remove(dependency);
 191  0
             engine.setDependencies(dependencies);
 192  
         }
 193  1
     }
 194  
 
 195  
     /**
 196  
      * This should gather information from leading docstrings, file comments, and assignments to __version__, __title__,
 197  
      * __summary__, __uri__, __url__, __home*page__, __author__, and their all caps equivalents.
 198  
      *
 199  
      * @param dependency the dependency being analyzed
 200  
      * @param file       the file name to analyze
 201  
      * @return whether evidence was found
 202  
      * @throws AnalysisException thrown if there is an unrecoverable error
 203  
      */
 204  
     private boolean analyzeFileContents(Dependency dependency, File file)
 205  
             throws AnalysisException {
 206  
         String contents;
 207  
         try {
 208  3
             contents = FileUtils.readFileToString(file).trim();
 209  0
         } catch (IOException e) {
 210  0
             throw new AnalysisException(
 211  
                     "Problem occurred while reading dependency file.", e);
 212  3
         }
 213  3
         boolean found = false;
 214  3
         if (!contents.isEmpty()) {
 215  3
             final String source = file.getName();
 216  3
             found = gatherEvidence(VERSION_PATTERN, contents, source,
 217  
                     dependency.getVersionEvidence(), "SourceVersion",
 218  
                     Confidence.MEDIUM);
 219  3
             found |= addSummaryInfo(dependency, SUMMARY_PATTERN, 4, contents,
 220  
                     source, "summary");
 221  3
             if (INIT_PY_FILTER.accept(file)) {
 222  1
                 found |= addSummaryInfo(dependency, MODULE_DOCSTRING, 2,
 223  
                         contents, source, "docstring");
 224  
             }
 225  3
             found |= gatherEvidence(TITLE_PATTERN, contents, source,
 226  
                     dependency.getProductEvidence(), "SourceTitle",
 227  
                     Confidence.LOW);
 228  3
             final EvidenceCollection vendorEvidence = dependency
 229  
                     .getVendorEvidence();
 230  3
             found |= gatherEvidence(AUTHOR_PATTERN, contents, source,
 231  
                     vendorEvidence, "SourceAuthor", Confidence.MEDIUM);
 232  3
             found |= gatherHomePageEvidence(URI_PATTERN, vendorEvidence,
 233  
                     source, "URL", contents);
 234  3
             found |= gatherHomePageEvidence(HOMEPAGE_PATTERN,
 235  
                     vendorEvidence, source, "HomePage", contents);
 236  
         }
 237  3
         return found;
 238  
     }
 239  
 
 240  
     /**
 241  
      * Adds summary information to the dependency
 242  
      *
 243  
      * @param dependency the dependency being analyzed
 244  
      * @param pattern    the pattern used to perform analysis
 245  
      * @param group      the group from the pattern that indicates the data to use
 246  
      * @param contents   the data being analyzed
 247  
      * @param source     the source name to use when recording the evidence
 248  
      * @param key        the key name to use when recording the evidence
 249  
      * @return true if evidence was collected; otherwise false
 250  
      */
 251  
     private boolean addSummaryInfo(Dependency dependency, Pattern pattern,
 252  
                                    int group, String contents, String source, String key) {
 253  4
         final Matcher matcher = pattern.matcher(contents);
 254  4
         final boolean found = matcher.find();
 255  4
         if (found) {
 256  1
             JarAnalyzer.addDescription(dependency, matcher.group(group),
 257  
                     source, key);
 258  
         }
 259  4
         return found;
 260  
     }
 261  
 
 262  
     /**
 263  
      * Collects evidence from the home page URL.
 264  
      *
 265  
      * @param pattern  the pattern to match
 266  
      * @param evidence the evidence collection to add the evidence to
 267  
      * @param source   the source of the evidence
 268  
      * @param name     the name of the evidence
 269  
      * @param contents the home page URL
 270  
      * @return true if evidence was collected; otherwise false
 271  
      */
 272  
     private boolean gatherHomePageEvidence(Pattern pattern,
 273  
                                            EvidenceCollection evidence, String source, String name,
 274  
                                            String contents) {
 275  6
         final Matcher matcher = pattern.matcher(contents);
 276  6
         boolean found = false;
 277  6
         if (matcher.find()) {
 278  1
             final String url = matcher.group(4);
 279  1
             if (UrlStringUtils.isUrl(url)) {
 280  1
                 found = true;
 281  1
                 evidence.addEvidence(source, name, url, Confidence.MEDIUM);
 282  
             }
 283  
         }
 284  6
         return found;
 285  
     }
 286  
 
 287  
     /**
 288  
      * Gather evidence from a Python source file using the given string assignment regex pattern.
 289  
      *
 290  
      * @param pattern    to scan contents with
 291  
      * @param contents   of Python source file
 292  
      * @param source     for storing evidence
 293  
      * @param evidence   to store evidence in
 294  
      * @param name       of evidence
 295  
      * @param confidence in evidence
 296  
      * @return whether evidence was found
 297  
      */
 298  
     private boolean gatherEvidence(Pattern pattern, String contents,
 299  
                                    String source, EvidenceCollection evidence, String name,
 300  
                                    Confidence confidence) {
 301  9
         final Matcher matcher = pattern.matcher(contents);
 302  9
         final boolean found = matcher.find();
 303  9
         if (found) {
 304  3
             evidence.addEvidence(source, name, matcher.group(4), confidence);
 305  
         }
 306  9
         return found;
 307  
     }
 308  
 
 309  
     @Override
 310  
     protected String getAnalyzerEnabledSettingKey() {
 311  7
         return Settings.KEYS.ANALYZER_PYTHON_PACKAGE_ENABLED;
 312  
     }
 313  
 }