View Javadoc
1   /*
2    * This file is part of dependency-check-core.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   *
16   * Copyright (c) 2015 Institute for Defense Analyses. All Rights Reserved.
17   */
18  package org.owasp.dependencycheck.analyzer;
19  
20  import org.apache.commons.io.FileUtils;
21  import org.apache.commons.io.filefilter.NameFileFilter;
22  import org.apache.commons.io.filefilter.SuffixFileFilter;
23  import org.owasp.dependencycheck.Engine;
24  import org.owasp.dependencycheck.analyzer.exception.AnalysisException;
25  import org.owasp.dependencycheck.dependency.Confidence;
26  import org.owasp.dependencycheck.dependency.Dependency;
27  import org.owasp.dependencycheck.dependency.EvidenceCollection;
28  import org.owasp.dependencycheck.utils.FileFilterBuilder;
29  import org.owasp.dependencycheck.utils.Settings;
30  import org.owasp.dependencycheck.utils.UrlStringUtils;
31  
32  import java.io.File;
33  import java.io.FileFilter;
34  import java.io.IOException;
35  import java.nio.charset.Charset;
36  import java.util.ArrayList;
37  import java.util.List;
38  import java.util.regex.Matcher;
39  import java.util.regex.Pattern;
40  
41  /**
42   * Used to analyze a Python package, and collect information that can be used to
43   * determine the associated CPE.
44   *
45   * @author Dale Visser
46   */
47  @Experimental
48  public class PythonPackageAnalyzer extends AbstractFileTypeAnalyzer {
49  
50      /**
51       * Used when compiling file scanning regex patterns.
52       */
53      private static final int REGEX_OPTIONS = Pattern.DOTALL
54              | Pattern.CASE_INSENSITIVE;
55  
56      /**
57       * Filename extensions for files to be analyzed.
58       */
59      private static final String EXTENSIONS = "py";
60  
61      /**
62       * Pattern for matching the module docstring in a source file.
63       */
64      private static final Pattern MODULE_DOCSTRING = Pattern.compile(
65              "^(['\\\"]{3})(.*?)\\1", REGEX_OPTIONS);
66  
67      /**
68       * Matches assignments to version variables in Python source code.
69       */
70      private static final Pattern VERSION_PATTERN = Pattern.compile(
71              "\\b(__)?version(__)? *= *(['\"]+)(\\d+\\.\\d+.*?)\\3",
72              REGEX_OPTIONS);
73  
74      /**
75       * Matches assignments to title variables in Python source code.
76       */
77      private static final Pattern TITLE_PATTERN = compileAssignPattern("title");
78  
79      /**
80       * Matches assignments to summary variables in Python source code.
81       */
82      private static final Pattern SUMMARY_PATTERN = compileAssignPattern("summary");
83  
84      /**
85       * Matches assignments to URL/URL variables in Python source code.
86       */
87      private static final Pattern URI_PATTERN = compileAssignPattern("ur[il]");
88  
89      /**
90       * Matches assignments to home page variables in Python source code.
91       */
92      private static final Pattern HOMEPAGE_PATTERN = compileAssignPattern("home_?page");
93  
94      /**
95       * Matches assignments to author variables in Python source code.
96       */
97      private static final Pattern AUTHOR_PATTERN = compileAssignPattern("author");
98  
99      /**
100      * Filter that detects files named "__init__.py".
101      */
102     private static final FileFilter INIT_PY_FILTER = new NameFileFilter("__init__.py");
103 
104     /**
105      * The file filter for python files.
106      */
107     private static final FileFilter PY_FILTER = new SuffixFileFilter(".py");
108 
109     /**
110      * Returns the name of the Python Package Analyzer.
111      *
112      * @return the name of the analyzer
113      */
114     @Override
115     public String getName() {
116         return "Python Package Analyzer";
117     }
118 
119     /**
120      * Tell that we are used for information collection.
121      *
122      * @return INFORMATION_COLLECTION
123      */
124     @Override
125     public AnalysisPhase getAnalysisPhase() {
126         return AnalysisPhase.INFORMATION_COLLECTION;
127     }
128 
129     /**
130      * The file filter used to determine which files this analyzer supports.
131      */
132     private static final FileFilter FILTER = FileFilterBuilder.newInstance().addExtensions(EXTENSIONS).build();
133 
134     /**
135      * Returns the FileFilter
136      *
137      * @return the FileFilter
138      */
139     @Override
140     protected FileFilter getFileFilter() {
141         return FILTER;
142     }
143 
144     /**
145      * No-op initializer implementation.
146      *
147      * @throws Exception never thrown
148      */
149     @Override
150     protected void initializeFileTypeAnalyzer() throws Exception {
151         // Nothing to do here.
152     }
153 
154     /**
155      * Utility function to create a regex pattern matcher.
156      *
157      * @param name the value to use when constructing the assignment pattern
158      * @return the compiled Pattern
159      */
160     private static Pattern compileAssignPattern(String name) {
161         return Pattern.compile(
162                 String.format("\\b(__)?%s(__)?\\b *= *(['\"]+)(.*?)\\3", name),
163                 REGEX_OPTIONS);
164     }
165 
166     /**
167      * Analyzes python packages and adds evidence to the dependency.
168      *
169      * @param dependency the dependency being analyzed
170      * @param engine the engine being used to perform the scan
171      * @throws AnalysisException thrown if there is an unrecoverable error
172      * analyzing the dependency
173      */
174     @Override
175     protected void analyzeFileType(Dependency dependency, Engine engine)
176             throws AnalysisException {
177         final File file = dependency.getActualFile();
178         final File parent = file.getParentFile();
179         final String parentName = parent.getName();
180         if (INIT_PY_FILTER.accept(file)) {
181             //by definition, the containing folder of __init__.py is considered the package, even the file is empty:
182             //"The __init__.py files are required to make Python treat the directories as containing packages"
183             //see section "6.4 Packages" from https://docs.python.org/2/tutorial/modules.html;
184             dependency.setDisplayFileName(parentName + "/__init__.py");
185             dependency.getProductEvidence().addEvidence(file.getName(),
186                     "PackageName", parentName, Confidence.HIGHEST);
187 
188             final File[] fileList = parent.listFiles(PY_FILTER);
189             if (fileList != null) {
190                 for (final File sourceFile : fileList) {
191                     analyzeFileContents(dependency, sourceFile);
192                 }
193             }
194         } else {
195             // copy, alter and set in case some other thread is iterating over
196             final List<Dependency> dependencies = new ArrayList<Dependency>(
197                     engine.getDependencies());
198             dependencies.remove(dependency);
199             engine.setDependencies(dependencies);
200         }
201     }
202 
203     /**
204      * This should gather information from leading docstrings, file comments,
205      * and assignments to __version__, __title__, __summary__, __uri__, __url__,
206      * __home*page__, __author__, and their all caps equivalents.
207      *
208      * @param dependency the dependency being analyzed
209      * @param file the file name to analyze
210      * @return whether evidence was found
211      * @throws AnalysisException thrown if there is an unrecoverable error
212      */
213     private boolean analyzeFileContents(Dependency dependency, File file)
214             throws AnalysisException {
215         String contents;
216         try {
217             contents = FileUtils.readFileToString(file, Charset.defaultCharset()).trim();
218         } catch (IOException e) {
219             throw new AnalysisException(
220                     "Problem occurred while reading dependency file.", e);
221         }
222         boolean found = false;
223         if (!contents.isEmpty()) {
224             final String source = file.getName();
225             found = gatherEvidence(VERSION_PATTERN, contents, source,
226                     dependency.getVersionEvidence(), "SourceVersion",
227                     Confidence.MEDIUM);
228             found |= addSummaryInfo(dependency, SUMMARY_PATTERN, 4, contents,
229                     source, "summary");
230             if (INIT_PY_FILTER.accept(file)) {
231                 found |= addSummaryInfo(dependency, MODULE_DOCSTRING, 2,
232                         contents, source, "docstring");
233             }
234             found |= gatherEvidence(TITLE_PATTERN, contents, source,
235                     dependency.getProductEvidence(), "SourceTitle",
236                     Confidence.LOW);
237             final EvidenceCollection vendorEvidence = dependency
238                     .getVendorEvidence();
239             found |= gatherEvidence(AUTHOR_PATTERN, contents, source,
240                     vendorEvidence, "SourceAuthor", Confidence.MEDIUM);
241             found |= gatherHomePageEvidence(URI_PATTERN, vendorEvidence,
242                     source, "URL", contents);
243             found |= gatherHomePageEvidence(HOMEPAGE_PATTERN,
244                     vendorEvidence, source, "HomePage", contents);
245         }
246         return found;
247     }
248 
249     /**
250      * Adds summary information to the dependency
251      *
252      * @param dependency the dependency being analyzed
253      * @param pattern the pattern used to perform analysis
254      * @param group the group from the pattern that indicates the data to use
255      * @param contents the data being analyzed
256      * @param source the source name to use when recording the evidence
257      * @param key the key name to use when recording the evidence
258      * @return true if evidence was collected; otherwise false
259      */
260     private boolean addSummaryInfo(Dependency dependency, Pattern pattern,
261             int group, String contents, String source, String key) {
262         final Matcher matcher = pattern.matcher(contents);
263         final boolean found = matcher.find();
264         if (found) {
265             JarAnalyzer.addDescription(dependency, matcher.group(group),
266                     source, key);
267         }
268         return found;
269     }
270 
271     /**
272      * Collects evidence from the home page URL.
273      *
274      * @param pattern the pattern to match
275      * @param evidence the evidence collection to add the evidence to
276      * @param source the source of the evidence
277      * @param name the name of the evidence
278      * @param contents the home page URL
279      * @return true if evidence was collected; otherwise false
280      */
281     private boolean gatherHomePageEvidence(Pattern pattern,
282             EvidenceCollection evidence, String source, String name,
283             String contents) {
284         final Matcher matcher = pattern.matcher(contents);
285         boolean found = false;
286         if (matcher.find()) {
287             final String url = matcher.group(4);
288             if (UrlStringUtils.isUrl(url)) {
289                 found = true;
290                 evidence.addEvidence(source, name, url, Confidence.MEDIUM);
291             }
292         }
293         return found;
294     }
295 
296     /**
297      * Gather evidence from a Python source file using the given string
298      * assignment regex pattern.
299      *
300      * @param pattern to scan contents with
301      * @param contents of Python source file
302      * @param source for storing evidence
303      * @param evidence to store evidence in
304      * @param name of evidence
305      * @param confidence in evidence
306      * @return whether evidence was found
307      */
308     private boolean gatherEvidence(Pattern pattern, String contents,
309             String source, EvidenceCollection evidence, String name,
310             Confidence confidence) {
311         final Matcher matcher = pattern.matcher(contents);
312         final boolean found = matcher.find();
313         if (found) {
314             evidence.addEvidence(source, name, matcher.group(4), confidence);
315         }
316         return found;
317     }
318 
319     @Override
320     protected String getAnalyzerEnabledSettingKey() {
321         return Settings.KEYS.ANALYZER_PYTHON_PACKAGE_ENABLED;
322     }
323 }