View Javadoc
1   /*
2    * This file is part of dependency-check-core.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   *
16   * Copyright (c) 2015 Institute for Defense Analyses. All Rights Reserved.
17   */
18  package org.owasp.dependencycheck.analyzer;
19  
20  import org.apache.commons.io.FileUtils;
21  import org.apache.commons.io.filefilter.NameFileFilter;
22  import org.apache.commons.io.filefilter.SuffixFileFilter;
23  import org.owasp.dependencycheck.Engine;
24  import org.owasp.dependencycheck.analyzer.exception.AnalysisException;
25  import org.owasp.dependencycheck.dependency.Confidence;
26  import org.owasp.dependencycheck.dependency.Dependency;
27  import org.owasp.dependencycheck.dependency.EvidenceCollection;
28  import org.owasp.dependencycheck.utils.FileFilterBuilder;
29  import org.owasp.dependencycheck.utils.Settings;
30  import org.owasp.dependencycheck.utils.UrlStringUtils;
31  
32  import java.io.File;
33  import java.io.FileFilter;
34  import java.io.IOException;
35  import java.util.ArrayList;
36  import java.util.List;
37  import java.util.regex.Matcher;
38  import java.util.regex.Pattern;
39  
40  /**
41   * Used to analyze a Python package, and collect information that can be used to determine the associated CPE.
42   *
43   * @author Dale Visser
44   */
45  public class PythonPackageAnalyzer extends AbstractFileTypeAnalyzer {
46  
47      /**
48       * Used when compiling file scanning regex patterns.
49       */
50      private static final int REGEX_OPTIONS = Pattern.DOTALL
51              | Pattern.CASE_INSENSITIVE;
52  
53      /**
54       * Filename extensions for files to be analyzed.
55       */
56      private static final String EXTENSIONS = "py";
57  
58      /**
59       * Pattern for matching the module docstring in a source file.
60       */
61      private static final Pattern MODULE_DOCSTRING = Pattern.compile(
62              "^(['\\\"]{3})(.*?)\\1", REGEX_OPTIONS);
63  
64      /**
65       * Matches assignments to version variables in Python source code.
66       */
67      private static final Pattern VERSION_PATTERN = Pattern.compile(
68              "\\b(__)?version(__)? *= *(['\"]+)(\\d+\\.\\d+.*?)\\3",
69              REGEX_OPTIONS);
70  
71      /**
72       * Matches assignments to title variables in Python source code.
73       */
74      private static final Pattern TITLE_PATTERN = compileAssignPattern("title");
75  
76      /**
77       * Matches assignments to summary variables in Python source code.
78       */
79      private static final Pattern SUMMARY_PATTERN = compileAssignPattern("summary");
80  
81      /**
82       * Matches assignments to URL/URL variables in Python source code.
83       */
84      private static final Pattern URI_PATTERN = compileAssignPattern("ur[il]");
85  
86      /**
87       * Matches assignments to home page variables in Python source code.
88       */
89      private static final Pattern HOMEPAGE_PATTERN = compileAssignPattern("home_?page");
90  
91      /**
92       * Matches assignments to author variables in Python source code.
93       */
94      private static final Pattern AUTHOR_PATTERN = compileAssignPattern("author");
95  
96      /**
97       * Filter that detects files named "__init__.py".
98       */
99      private static final FileFilter INIT_PY_FILTER = new NameFileFilter("__init__.py");
100 
101     /**
102      * The file filter for python files.
103      */
104     private static final FileFilter PY_FILTER = new SuffixFileFilter(".py");
105 
106     /**
107      * Returns the name of the Python Package Analyzer.
108      *
109      * @return the name of the analyzer
110      */
111     @Override
112     public String getName() {
113         return "Python Package Analyzer";
114     }
115 
116     /**
117      * Tell that we are used for information collection.
118      *
119      * @return INFORMATION_COLLECTION
120      */
121     @Override
122     public AnalysisPhase getAnalysisPhase() {
123         return AnalysisPhase.INFORMATION_COLLECTION;
124     }
125 
126     /**
127      * The file filter used to determine which files this analyzer supports.
128      */
129     private static final FileFilter FILTER = FileFilterBuilder.newInstance().addExtensions(EXTENSIONS).build();
130 
131     /**
132      * Returns the FileFilter
133      *
134      * @return the FileFilter
135      */
136     @Override
137     protected FileFilter getFileFilter() {
138         return FILTER;
139     }
140 
141     /**
142      * No-op initializer implementation.
143      *
144      * @throws Exception never thrown
145      */
146     @Override
147     protected void initializeFileTypeAnalyzer() throws Exception {
148         // Nothing to do here.
149     }
150 
151     /**
152      * Utility function to create a regex pattern matcher.
153      *
154      * @param name the value to use when constructing the assignment pattern
155      * @return the compiled Pattern
156      */
157     private static Pattern compileAssignPattern(String name) {
158         return Pattern.compile(
159                 String.format("\\b(__)?%s(__)?\\b *= *(['\"]+)(.*?)\\3", name),
160                 REGEX_OPTIONS);
161     }
162 
163     /**
164      * Analyzes python packages and adds evidence to the dependency.
165      *
166      * @param dependency the dependency being analyzed
167      * @param engine the engine being used to perform the scan
168      * @throws AnalysisException thrown if there is an unrecoverable error analyzing the dependency
169      */
170     @Override
171     protected void analyzeFileType(Dependency dependency, Engine engine)
172             throws AnalysisException {
173         final File file = dependency.getActualFile();
174         final File parent = file.getParentFile();
175         final String parentName = parent.getName();
176         boolean found = false;
177         if (INIT_PY_FILTER.accept(file)) {
178             final File[] fileList = parent.listFiles(PY_FILTER);
179             if (fileList != null) {
180                 for (final File sourceFile : fileList) {
181                     found |= analyzeFileContents(dependency, sourceFile);
182                 }
183             }
184         }
185         if (found) {
186             dependency.setDisplayFileName(parentName + "/__init__.py");
187             dependency.getProductEvidence().addEvidence(file.getName(),
188                     "PackageName", parentName, Confidence.HIGH);
189         } else {
190             // copy, alter and set in case some other thread is iterating over
191             final List<Dependency> dependencies = new ArrayList<Dependency>(
192                     engine.getDependencies());
193             dependencies.remove(dependency);
194             engine.setDependencies(dependencies);
195         }
196     }
197 
198     /**
199      * This should gather information from leading docstrings, file comments, and assignments to __version__, __title__,
200      * __summary__, __uri__, __url__, __home*page__, __author__, and their all caps equivalents.
201      *
202      * @param dependency the dependency being analyzed
203      * @param file the file name to analyze
204      * @return whether evidence was found
205      * @throws AnalysisException thrown if there is an unrecoverable error
206      */
207     private boolean analyzeFileContents(Dependency dependency, File file)
208             throws AnalysisException {
209         String contents;
210         try {
211             contents = FileUtils.readFileToString(file).trim();
212         } catch (IOException e) {
213             throw new AnalysisException(
214                     "Problem occurred while reading dependency file.", e);
215         }
216         boolean found = false;
217         if (!contents.isEmpty()) {
218             final String source = file.getName();
219             found = gatherEvidence(VERSION_PATTERN, contents, source,
220                     dependency.getVersionEvidence(), "SourceVersion",
221                     Confidence.MEDIUM);
222             found |= addSummaryInfo(dependency, SUMMARY_PATTERN, 4, contents,
223                     source, "summary");
224             if (INIT_PY_FILTER.accept(file)) {
225                 found |= addSummaryInfo(dependency, MODULE_DOCSTRING, 2,
226                         contents, source, "docstring");
227             }
228             found |= gatherEvidence(TITLE_PATTERN, contents, source,
229                     dependency.getProductEvidence(), "SourceTitle",
230                     Confidence.LOW);
231             final EvidenceCollection vendorEvidence = dependency
232                     .getVendorEvidence();
233             found |= gatherEvidence(AUTHOR_PATTERN, contents, source,
234                     vendorEvidence, "SourceAuthor", Confidence.MEDIUM);
235             found |= gatherHomePageEvidence(URI_PATTERN, vendorEvidence,
236                     source, "URL", contents);
237             found |= gatherHomePageEvidence(HOMEPAGE_PATTERN,
238                     vendorEvidence, source, "HomePage", contents);
239         }
240         return found;
241     }
242 
243     /**
244      * Adds summary information to the dependency
245      *
246      * @param dependency the dependency being analyzed
247      * @param pattern the pattern used to perform analysis
248      * @param group the group from the pattern that indicates the data to use
249      * @param contents the data being analyzed
250      * @param source the source name to use when recording the evidence
251      * @param key the key name to use when recording the evidence
252      * @return true if evidence was collected; otherwise false
253      */
254     private boolean addSummaryInfo(Dependency dependency, Pattern pattern,
255             int group, String contents, String source, String key) {
256         final Matcher matcher = pattern.matcher(contents);
257         final boolean found = matcher.find();
258         if (found) {
259             JarAnalyzer.addDescription(dependency, matcher.group(group),
260                     source, key);
261         }
262         return found;
263     }
264 
265     /**
266      * Collects evidence from the home page URL.
267      *
268      * @param pattern the pattern to match
269      * @param evidence the evidence collection to add the evidence to
270      * @param source the source of the evidence
271      * @param name the name of the evidence
272      * @param contents the home page URL
273      * @return true if evidence was collected; otherwise false
274      */
275     private boolean gatherHomePageEvidence(Pattern pattern,
276             EvidenceCollection evidence, String source, String name,
277             String contents) {
278         final Matcher matcher = pattern.matcher(contents);
279         boolean found = false;
280         if (matcher.find()) {
281             final String url = matcher.group(4);
282             if (UrlStringUtils.isUrl(url)) {
283                 found = true;
284                 evidence.addEvidence(source, name, url, Confidence.MEDIUM);
285             }
286         }
287         return found;
288     }
289 
290     /**
291      * Gather evidence from a Python source file using the given string assignment regex pattern.
292      *
293      * @param pattern to scan contents with
294      * @param contents of Python source file
295      * @param source for storing evidence
296      * @param evidence to store evidence in
297      * @param name of evidence
298      * @param confidence in evidence
299      * @return whether evidence was found
300      */
301     private boolean gatherEvidence(Pattern pattern, String contents,
302             String source, EvidenceCollection evidence, String name,
303             Confidence confidence) {
304         final Matcher matcher = pattern.matcher(contents);
305         final boolean found = matcher.find();
306         if (found) {
307             evidence.addEvidence(source, name, matcher.group(4), confidence);
308         }
309         return found;
310     }
311 
312     @Override
313     protected String getAnalyzerEnabledSettingKey() {
314         return Settings.KEYS.ANALYZER_PYTHON_PACKAGE_ENABLED;
315     }
316 }