View Javadoc
1   /*
2    * This file is part of dependency-check-core.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   *
16   * Copyright (c) 2015 Institute for Defense Analyses. All Rights Reserved.
17   */
18  package org.owasp.dependencycheck.analyzer;
19  
20  import org.apache.commons.io.FileUtils;
21  import org.apache.commons.io.filefilter.NameFileFilter;
22  import org.apache.commons.io.filefilter.SuffixFileFilter;
23  import org.owasp.dependencycheck.Engine;
24  import org.owasp.dependencycheck.analyzer.exception.AnalysisException;
25  import org.owasp.dependencycheck.dependency.Confidence;
26  import org.owasp.dependencycheck.dependency.Dependency;
27  import org.owasp.dependencycheck.dependency.EvidenceCollection;
28  import org.owasp.dependencycheck.utils.FileFilterBuilder;
29  import org.owasp.dependencycheck.utils.Settings;
30  import org.owasp.dependencycheck.utils.UrlStringUtils;
31  
32  import java.io.File;
33  import java.io.FileFilter;
34  import java.io.IOException;
35  import java.nio.charset.Charset;
36  import java.util.regex.Matcher;
37  import java.util.regex.Pattern;
38  import org.owasp.dependencycheck.exception.InitializationException;
39  
40  /**
41   * Used to analyze a Python package, and collect information that can be used to
42   * determine the associated CPE.
43   *
44   * @author Dale Visser
45   */
46  @Experimental
47  public class PythonPackageAnalyzer extends AbstractFileTypeAnalyzer {
48  
49      /**
50       * Used when compiling file scanning regex patterns.
51       */
52      private static final int REGEX_OPTIONS = Pattern.DOTALL
53              | Pattern.CASE_INSENSITIVE;
54  
55      /**
56       * Filename extensions for files to be analyzed.
57       */
58      private static final String EXTENSIONS = "py";
59  
60      /**
61       * Pattern for matching the module docstring in a source file.
62       */
63      private static final Pattern MODULE_DOCSTRING = Pattern.compile(
64              "^(['\\\"]{3})(.*?)\\1", REGEX_OPTIONS);
65  
66      /**
67       * Matches assignments to version variables in Python source code.
68       */
69      private static final Pattern VERSION_PATTERN = Pattern.compile(
70              "\\b(__)?version(__)? *= *(['\"]+)(\\d+\\.\\d+.*?)\\3",
71              REGEX_OPTIONS);
72  
73      /**
74       * Matches assignments to title variables in Python source code.
75       */
76      private static final Pattern TITLE_PATTERN = compileAssignPattern("title");
77  
78      /**
79       * Matches assignments to summary variables in Python source code.
80       */
81      private static final Pattern SUMMARY_PATTERN = compileAssignPattern("summary");
82  
83      /**
84       * Matches assignments to URL/URL variables in Python source code.
85       */
86      private static final Pattern URI_PATTERN = compileAssignPattern("ur[il]");
87  
88      /**
89       * Matches assignments to home page variables in Python source code.
90       */
91      private static final Pattern HOMEPAGE_PATTERN = compileAssignPattern("home_?page");
92  
93      /**
94       * Matches assignments to author variables in Python source code.
95       */
96      private static final Pattern AUTHOR_PATTERN = compileAssignPattern("author");
97  
98      /**
99       * Filter that detects files named "__init__.py".
100      */
101     private static final FileFilter INIT_PY_FILTER = new NameFileFilter("__init__.py");
102 
103     /**
104      * The file filter for python files.
105      */
106     private static final FileFilter PY_FILTER = new SuffixFileFilter(".py");
107 
108     /**
109      * Returns the name of the Python Package Analyzer.
110      *
111      * @return the name of the analyzer
112      */
113     @Override
114     public String getName() {
115         return "Python Package Analyzer";
116     }
117 
118     /**
119      * Tell that we are used for information collection.
120      *
121      * @return INFORMATION_COLLECTION
122      */
123     @Override
124     public AnalysisPhase getAnalysisPhase() {
125         return AnalysisPhase.INFORMATION_COLLECTION;
126     }
127 
128     /**
129      * The file filter used to determine which files this analyzer supports.
130      */
131     private static final FileFilter FILTER = FileFilterBuilder.newInstance().addExtensions(EXTENSIONS).build();
132 
133     /**
134      * Returns the FileFilter
135      *
136      * @return the FileFilter
137      */
138     @Override
139     protected FileFilter getFileFilter() {
140         return FILTER;
141     }
142 
143     /**
144      * No-op initializer implementation.
145      *
146      * @throws InitializationException never thrown
147      */
148     @Override
149     protected void initializeFileTypeAnalyzer() throws InitializationException {
150         // Nothing to do here.
151     }
152 
153     /**
154      * Utility function to create a regex pattern matcher.
155      *
156      * @param name the value to use when constructing the assignment pattern
157      * @return the compiled Pattern
158      */
159     private static Pattern compileAssignPattern(String name) {
160         return Pattern.compile(
161                 String.format("\\b(__)?%s(__)?\\b *= *(['\"]+)(.*?)\\3", name),
162                 REGEX_OPTIONS);
163     }
164 
165     /**
166      * Analyzes python packages and adds evidence to the dependency.
167      *
168      * @param dependency the dependency being analyzed
169      * @param engine the engine being used to perform the scan
170      * @throws AnalysisException thrown if there is an unrecoverable error
171      * analyzing the dependency
172      */
173     @Override
174     protected void analyzeDependency(Dependency dependency, Engine engine)
175             throws AnalysisException {
176         final File file = dependency.getActualFile();
177         final File parent = file.getParentFile();
178         final String parentName = parent.getName();
179         if (INIT_PY_FILTER.accept(file)) {
180             //by definition, the containing folder of __init__.py is considered the package, even the file is empty:
181             //"The __init__.py files are required to make Python treat the directories as containing packages"
182             //see section "6.4 Packages" from https://docs.python.org/2/tutorial/modules.html;
183             dependency.setDisplayFileName(parentName + "/__init__.py");
184             dependency.getProductEvidence().addEvidence(file.getName(),
185                     "PackageName", parentName, Confidence.HIGHEST);
186 
187             final File[] fileList = parent.listFiles(PY_FILTER);
188             if (fileList != null) {
189                 for (final File sourceFile : fileList) {
190                     analyzeFileContents(dependency, sourceFile);
191                 }
192             }
193         } else {
194             engine.getDependencies().remove(dependency);
195         }
196     }
197 
198     /**
199      * This should gather information from leading docstrings, file comments,
200      * and assignments to __version__, __title__, __summary__, __uri__, __url__,
201      * __home*page__, __author__, and their all caps equivalents.
202      *
203      * @param dependency the dependency being analyzed
204      * @param file the file name to analyze
205      * @return whether evidence was found
206      * @throws AnalysisException thrown if there is an unrecoverable error
207      */
208     private boolean analyzeFileContents(Dependency dependency, File file)
209             throws AnalysisException {
210         String contents;
211         try {
212             contents = FileUtils.readFileToString(file, Charset.defaultCharset()).trim();
213         } catch (IOException e) {
214             throw new AnalysisException(
215                     "Problem occurred while reading dependency file.", e);
216         }
217         boolean found = false;
218         if (!contents.isEmpty()) {
219             final String source = file.getName();
220             found = gatherEvidence(VERSION_PATTERN, contents, source,
221                     dependency.getVersionEvidence(), "SourceVersion",
222                     Confidence.MEDIUM);
223             found |= addSummaryInfo(dependency, SUMMARY_PATTERN, 4, contents,
224                     source, "summary");
225             if (INIT_PY_FILTER.accept(file)) {
226                 found |= addSummaryInfo(dependency, MODULE_DOCSTRING, 2,
227                         contents, source, "docstring");
228             }
229             found |= gatherEvidence(TITLE_PATTERN, contents, source,
230                     dependency.getProductEvidence(), "SourceTitle",
231                     Confidence.LOW);
232             final EvidenceCollection vendorEvidence = dependency
233                     .getVendorEvidence();
234             found |= gatherEvidence(AUTHOR_PATTERN, contents, source,
235                     vendorEvidence, "SourceAuthor", Confidence.MEDIUM);
236             found |= gatherHomePageEvidence(URI_PATTERN, vendorEvidence,
237                     source, "URL", contents);
238             found |= gatherHomePageEvidence(HOMEPAGE_PATTERN,
239                     vendorEvidence, source, "HomePage", contents);
240         }
241         return found;
242     }
243 
244     /**
245      * Adds summary information to the dependency
246      *
247      * @param dependency the dependency being analyzed
248      * @param pattern the pattern used to perform analysis
249      * @param group the group from the pattern that indicates the data to use
250      * @param contents the data being analyzed
251      * @param source the source name to use when recording the evidence
252      * @param key the key name to use when recording the evidence
253      * @return true if evidence was collected; otherwise false
254      */
255     private boolean addSummaryInfo(Dependency dependency, Pattern pattern,
256             int group, String contents, String source, String key) {
257         final Matcher matcher = pattern.matcher(contents);
258         final boolean found = matcher.find();
259         if (found) {
260             JarAnalyzer.addDescription(dependency, matcher.group(group),
261                     source, key);
262         }
263         return found;
264     }
265 
266     /**
267      * Collects evidence from the home page URL.
268      *
269      * @param pattern the pattern to match
270      * @param evidence the evidence collection to add the evidence to
271      * @param source the source of the evidence
272      * @param name the name of the evidence
273      * @param contents the home page URL
274      * @return true if evidence was collected; otherwise false
275      */
276     private boolean gatherHomePageEvidence(Pattern pattern,
277             EvidenceCollection evidence, String source, String name,
278             String contents) {
279         final Matcher matcher = pattern.matcher(contents);
280         boolean found = false;
281         if (matcher.find()) {
282             final String url = matcher.group(4);
283             if (UrlStringUtils.isUrl(url)) {
284                 found = true;
285                 evidence.addEvidence(source, name, url, Confidence.MEDIUM);
286             }
287         }
288         return found;
289     }
290 
291     /**
292      * Gather evidence from a Python source file using the given string
293      * assignment regex pattern.
294      *
295      * @param pattern to scan contents with
296      * @param contents of Python source file
297      * @param source for storing evidence
298      * @param evidence to store evidence in
299      * @param name of evidence
300      * @param confidence in evidence
301      * @return whether evidence was found
302      */
303     private boolean gatherEvidence(Pattern pattern, String contents,
304             String source, EvidenceCollection evidence, String name,
305             Confidence confidence) {
306         final Matcher matcher = pattern.matcher(contents);
307         final boolean found = matcher.find();
308         if (found) {
309             evidence.addEvidence(source, name, matcher.group(4), confidence);
310         }
311         return found;
312     }
313 
314     @Override
315     protected String getAnalyzerEnabledSettingKey() {
316         return Settings.KEYS.ANALYZER_PYTHON_PACKAGE_ENABLED;
317     }
318 }