View Javadoc
1   /*
2    * This file is part of dependency-check-core.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   *
16   * Copyright (c) 2015 Institute for Defense Analyses. All Rights Reserved.
17   */
18  package org.owasp.dependencycheck.analyzer;
19  
20  import org.apache.commons.io.FileUtils;
21  import org.apache.commons.io.filefilter.NameFileFilter;
22  import org.apache.commons.io.filefilter.SuffixFileFilter;
23  import org.owasp.dependencycheck.Engine;
24  import org.owasp.dependencycheck.analyzer.exception.AnalysisException;
25  import org.owasp.dependencycheck.dependency.Confidence;
26  import org.owasp.dependencycheck.dependency.Dependency;
27  import org.owasp.dependencycheck.dependency.EvidenceCollection;
28  import org.owasp.dependencycheck.utils.FileFilterBuilder;
29  import org.owasp.dependencycheck.utils.Settings;
30  import org.owasp.dependencycheck.utils.UrlStringUtils;
31  
32  import java.io.File;
33  import java.io.FileFilter;
34  import java.io.IOException;
35  import java.nio.charset.Charset;
36  import java.util.ArrayList;
37  import java.util.List;
38  import java.util.regex.Matcher;
39  import java.util.regex.Pattern;
40  import org.owasp.dependencycheck.exception.InitializationException;
41  
42  /**
43   * Used to analyze a Python package, and collect information that can be used to
44   * determine the associated CPE.
45   *
46   * @author Dale Visser
47   */
48  @Experimental
49  public class PythonPackageAnalyzer extends AbstractFileTypeAnalyzer {
50  
51      /**
52       * Used when compiling file scanning regex patterns.
53       */
54      private static final int REGEX_OPTIONS = Pattern.DOTALL
55              | Pattern.CASE_INSENSITIVE;
56  
57      /**
58       * Filename extensions for files to be analyzed.
59       */
60      private static final String EXTENSIONS = "py";
61  
62      /**
63       * Pattern for matching the module docstring in a source file.
64       */
65      private static final Pattern MODULE_DOCSTRING = Pattern.compile(
66              "^(['\\\"]{3})(.*?)\\1", REGEX_OPTIONS);
67  
68      /**
69       * Matches assignments to version variables in Python source code.
70       */
71      private static final Pattern VERSION_PATTERN = Pattern.compile(
72              "\\b(__)?version(__)? *= *(['\"]+)(\\d+\\.\\d+.*?)\\3",
73              REGEX_OPTIONS);
74  
75      /**
76       * Matches assignments to title variables in Python source code.
77       */
78      private static final Pattern TITLE_PATTERN = compileAssignPattern("title");
79  
80      /**
81       * Matches assignments to summary variables in Python source code.
82       */
83      private static final Pattern SUMMARY_PATTERN = compileAssignPattern("summary");
84  
85      /**
86       * Matches assignments to URL/URL variables in Python source code.
87       */
88      private static final Pattern URI_PATTERN = compileAssignPattern("ur[il]");
89  
90      /**
91       * Matches assignments to home page variables in Python source code.
92       */
93      private static final Pattern HOMEPAGE_PATTERN = compileAssignPattern("home_?page");
94  
95      /**
96       * Matches assignments to author variables in Python source code.
97       */
98      private static final Pattern AUTHOR_PATTERN = compileAssignPattern("author");
99  
100     /**
101      * Filter that detects files named "__init__.py".
102      */
103     private static final FileFilter INIT_PY_FILTER = new NameFileFilter("__init__.py");
104 
105     /**
106      * The file filter for python files.
107      */
108     private static final FileFilter PY_FILTER = new SuffixFileFilter(".py");
109 
110     /**
111      * Returns the name of the Python Package Analyzer.
112      *
113      * @return the name of the analyzer
114      */
115     @Override
116     public String getName() {
117         return "Python Package Analyzer";
118     }
119 
120     /**
121      * Tell that we are used for information collection.
122      *
123      * @return INFORMATION_COLLECTION
124      */
125     @Override
126     public AnalysisPhase getAnalysisPhase() {
127         return AnalysisPhase.INFORMATION_COLLECTION;
128     }
129 
130     /**
131      * The file filter used to determine which files this analyzer supports.
132      */
133     private static final FileFilter FILTER = FileFilterBuilder.newInstance().addExtensions(EXTENSIONS).build();
134 
135     /**
136      * Returns the FileFilter
137      *
138      * @return the FileFilter
139      */
140     @Override
141     protected FileFilter getFileFilter() {
142         return FILTER;
143     }
144 
145     /**
146      * No-op initializer implementation.
147      *
148      * @throws InitializationException never thrown
149      */
150     @Override
151     protected void initializeFileTypeAnalyzer() throws InitializationException {
152         // Nothing to do here.
153     }
154 
155     /**
156      * Utility function to create a regex pattern matcher.
157      *
158      * @param name the value to use when constructing the assignment pattern
159      * @return the compiled Pattern
160      */
161     private static Pattern compileAssignPattern(String name) {
162         return Pattern.compile(
163                 String.format("\\b(__)?%s(__)?\\b *= *(['\"]+)(.*?)\\3", name),
164                 REGEX_OPTIONS);
165     }
166 
167     /**
168      * Analyzes python packages and adds evidence to the dependency.
169      *
170      * @param dependency the dependency being analyzed
171      * @param engine the engine being used to perform the scan
172      * @throws AnalysisException thrown if there is an unrecoverable error
173      * analyzing the dependency
174      */
175     @Override
176     protected void analyzeFileType(Dependency dependency, Engine engine)
177             throws AnalysisException {
178         final File file = dependency.getActualFile();
179         final File parent = file.getParentFile();
180         final String parentName = parent.getName();
181         if (INIT_PY_FILTER.accept(file)) {
182             //by definition, the containing folder of __init__.py is considered the package, even the file is empty:
183             //"The __init__.py files are required to make Python treat the directories as containing packages"
184             //see section "6.4 Packages" from https://docs.python.org/2/tutorial/modules.html;
185             dependency.setDisplayFileName(parentName + "/__init__.py");
186             dependency.getProductEvidence().addEvidence(file.getName(),
187                     "PackageName", parentName, Confidence.HIGHEST);
188 
189             final File[] fileList = parent.listFiles(PY_FILTER);
190             if (fileList != null) {
191                 for (final File sourceFile : fileList) {
192                     analyzeFileContents(dependency, sourceFile);
193                 }
194             }
195         } else {
196             // copy, alter and set in case some other thread is iterating over
197             final List<Dependency> dependencies = new ArrayList<Dependency>(
198                     engine.getDependencies());
199             dependencies.remove(dependency);
200             engine.setDependencies(dependencies);
201         }
202     }
203 
204     /**
205      * This should gather information from leading docstrings, file comments,
206      * and assignments to __version__, __title__, __summary__, __uri__, __url__,
207      * __home*page__, __author__, and their all caps equivalents.
208      *
209      * @param dependency the dependency being analyzed
210      * @param file the file name to analyze
211      * @return whether evidence was found
212      * @throws AnalysisException thrown if there is an unrecoverable error
213      */
214     private boolean analyzeFileContents(Dependency dependency, File file)
215             throws AnalysisException {
216         String contents;
217         try {
218             contents = FileUtils.readFileToString(file, Charset.defaultCharset()).trim();
219         } catch (IOException e) {
220             throw new AnalysisException(
221                     "Problem occurred while reading dependency file.", e);
222         }
223         boolean found = false;
224         if (!contents.isEmpty()) {
225             final String source = file.getName();
226             found = gatherEvidence(VERSION_PATTERN, contents, source,
227                     dependency.getVersionEvidence(), "SourceVersion",
228                     Confidence.MEDIUM);
229             found |= addSummaryInfo(dependency, SUMMARY_PATTERN, 4, contents,
230                     source, "summary");
231             if (INIT_PY_FILTER.accept(file)) {
232                 found |= addSummaryInfo(dependency, MODULE_DOCSTRING, 2,
233                         contents, source, "docstring");
234             }
235             found |= gatherEvidence(TITLE_PATTERN, contents, source,
236                     dependency.getProductEvidence(), "SourceTitle",
237                     Confidence.LOW);
238             final EvidenceCollection vendorEvidence = dependency
239                     .getVendorEvidence();
240             found |= gatherEvidence(AUTHOR_PATTERN, contents, source,
241                     vendorEvidence, "SourceAuthor", Confidence.MEDIUM);
242             found |= gatherHomePageEvidence(URI_PATTERN, vendorEvidence,
243                     source, "URL", contents);
244             found |= gatherHomePageEvidence(HOMEPAGE_PATTERN,
245                     vendorEvidence, source, "HomePage", contents);
246         }
247         return found;
248     }
249 
250     /**
251      * Adds summary information to the dependency
252      *
253      * @param dependency the dependency being analyzed
254      * @param pattern the pattern used to perform analysis
255      * @param group the group from the pattern that indicates the data to use
256      * @param contents the data being analyzed
257      * @param source the source name to use when recording the evidence
258      * @param key the key name to use when recording the evidence
259      * @return true if evidence was collected; otherwise false
260      */
261     private boolean addSummaryInfo(Dependency dependency, Pattern pattern,
262             int group, String contents, String source, String key) {
263         final Matcher matcher = pattern.matcher(contents);
264         final boolean found = matcher.find();
265         if (found) {
266             JarAnalyzer.addDescription(dependency, matcher.group(group),
267                     source, key);
268         }
269         return found;
270     }
271 
272     /**
273      * Collects evidence from the home page URL.
274      *
275      * @param pattern the pattern to match
276      * @param evidence the evidence collection to add the evidence to
277      * @param source the source of the evidence
278      * @param name the name of the evidence
279      * @param contents the home page URL
280      * @return true if evidence was collected; otherwise false
281      */
282     private boolean gatherHomePageEvidence(Pattern pattern,
283             EvidenceCollection evidence, String source, String name,
284             String contents) {
285         final Matcher matcher = pattern.matcher(contents);
286         boolean found = false;
287         if (matcher.find()) {
288             final String url = matcher.group(4);
289             if (UrlStringUtils.isUrl(url)) {
290                 found = true;
291                 evidence.addEvidence(source, name, url, Confidence.MEDIUM);
292             }
293         }
294         return found;
295     }
296 
297     /**
298      * Gather evidence from a Python source file using the given string
299      * assignment regex pattern.
300      *
301      * @param pattern to scan contents with
302      * @param contents of Python source file
303      * @param source for storing evidence
304      * @param evidence to store evidence in
305      * @param name of evidence
306      * @param confidence in evidence
307      * @return whether evidence was found
308      */
309     private boolean gatherEvidence(Pattern pattern, String contents,
310             String source, EvidenceCollection evidence, String name,
311             Confidence confidence) {
312         final Matcher matcher = pattern.matcher(contents);
313         final boolean found = matcher.find();
314         if (found) {
315             evidence.addEvidence(source, name, matcher.group(4), confidence);
316         }
317         return found;
318     }
319 
320     @Override
321     protected String getAnalyzerEnabledSettingKey() {
322         return Settings.KEYS.ANALYZER_PYTHON_PACKAGE_ENABLED;
323     }
324 }