View Javadoc
1   /*
2    * This file is part of dependency-check-core.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   *
16   * Copyright (c) 2015 Institute for Defense Analyses. All Rights Reserved.
17   */
18  package org.owasp.dependencycheck.analyzer;
19  
20  import org.apache.commons.io.FileUtils;
21  import org.apache.commons.io.filefilter.NameFileFilter;
22  import org.apache.commons.io.filefilter.SuffixFileFilter;
23  import org.owasp.dependencycheck.Engine;
24  import org.owasp.dependencycheck.analyzer.exception.AnalysisException;
25  import org.owasp.dependencycheck.dependency.Confidence;
26  import org.owasp.dependencycheck.dependency.Dependency;
27  import org.owasp.dependencycheck.dependency.EvidenceCollection;
28  import org.owasp.dependencycheck.utils.FileFilterBuilder;
29  import org.owasp.dependencycheck.utils.Settings;
30  import org.owasp.dependencycheck.utils.UrlStringUtils;
31  
32  import java.io.File;
33  import java.io.FileFilter;
34  import java.io.IOException;
35  import java.util.ArrayList;
36  import java.util.List;
37  import java.util.regex.Matcher;
38  import java.util.regex.Pattern;
39  
40  /**
41   * Used to analyze a Python package, and collect information that can be used to determine the associated CPE.
42   *
43   * @author Dale Visser <dvisser@ida.org>
44   */
45  public class PythonPackageAnalyzer extends AbstractFileTypeAnalyzer {
46  
47      /**
48       * Used when compiling file scanning regex patterns.
49       */
50      private static final int REGEX_OPTIONS = Pattern.DOTALL
51              | Pattern.CASE_INSENSITIVE;
52  
53      /**
54       * Filename extensions for files to be analyzed.
55       */
56      private static final String EXTENSIONS = "py";
57  
58      /**
59       * Pattern for matching the module docstring in a source file.
60       */
61      private static final Pattern MODULE_DOCSTRING = Pattern.compile(
62              "^(['\\\"]{3})(.*?)\\1", REGEX_OPTIONS);
63  
64      /**
65       * Matches assignments to version variables in Python source code.
66       */
67      private static final Pattern VERSION_PATTERN = Pattern.compile(
68              "\\b(__)?version(__)? *= *(['\"]+)(\\d+\\.\\d+.*?)\\3",
69              REGEX_OPTIONS);
70  
71      /**
72       * Matches assignments to title variables in Python source code.
73       */
74      private static final Pattern TITLE_PATTERN = compileAssignPattern("title");
75  
76      /**
77       * Matches assignments to summary variables in Python source code.
78       */
79      private static final Pattern SUMMARY_PATTERN = compileAssignPattern("summary");
80  
81      /**
82       * Matches assignments to URL/URL variables in Python source code.
83       */
84      private static final Pattern URI_PATTERN = compileAssignPattern("ur[il]");
85  
86      /**
87       * Matches assignments to home page variables in Python source code.
88       */
89      private static final Pattern HOMEPAGE_PATTERN = compileAssignPattern("home_?page");
90  
91      /**
92       * Matches assignments to author variables in Python source code.
93       */
94      private static final Pattern AUTHOR_PATTERN = compileAssignPattern("author");
95  
96      /**
97       * Filter that detects files named "__init__.py".
98       */
99      private static final FileFilter INIT_PY_FILTER = new NameFileFilter("__init__.py");
100 
101     /**
102      * The file filter for python files.
103      */
104     private static final FileFilter PY_FILTER = new SuffixFileFilter(".py");
105 
106     /**
107      * Returns the name of the Python Package Analyzer.
108      *
109      * @return the name of the analyzer
110      */
111     @Override
112     public String getName() {
113         return "Python Package Analyzer";
114     }
115 
116     /**
117      * Tell that we are used for information collection.
118      *
119      * @return INFORMATION_COLLECTION
120      */
121     @Override
122     public AnalysisPhase getAnalysisPhase() {
123         return AnalysisPhase.INFORMATION_COLLECTION;
124     }
125 
126     /**
127      * The file filter used to determine which files this analyzer supports.
128      */
129     private static final FileFilter FILTER = FileFilterBuilder.newInstance().addExtensions(EXTENSIONS).build();
130 
131     /**
132      * Returns the FileFilter
133      *
134      * @return the FileFilter
135      */
136     @Override
137     protected FileFilter getFileFilter() {
138         return FILTER;
139     }
140 
141     /**
142      * No-op initializer implementation.
143      *
144      * @throws Exception never thrown
145      */
146     @Override
147     protected void initializeFileTypeAnalyzer() throws Exception {
148         // Nothing to do here.
149     }
150 
151     /**
152      * Utility function to create a regex pattern matcher.
153      *
154      * @param name the value to use when constructing the assignment pattern
155      * @return the compiled Pattern
156      */
157     private static Pattern compileAssignPattern(String name) {
158         return Pattern.compile(
159                 String.format("\\b(__)?%s(__)?\\b *= *(['\"]+)(.*?)\\3", name),
160                 REGEX_OPTIONS);
161     }
162 
163     /**
164      * Analyzes python packages and adds evidence to the dependency.
165      *
166      * @param dependency the dependency being analyzed
167      * @param engine     the engine being used to perform the scan
168      * @throws AnalysisException thrown if there is an unrecoverable error analyzing the dependency
169      */
170     @Override
171     protected void analyzeFileType(Dependency dependency, Engine engine)
172             throws AnalysisException {
173         final File file = dependency.getActualFile();
174         final File parent = file.getParentFile();
175         final String parentName = parent.getName();
176         boolean found = false;
177         if (INIT_PY_FILTER.accept(file)) {
178             for (final File sourceFile : parent.listFiles(PY_FILTER)) {
179                 found |= analyzeFileContents(dependency, sourceFile);
180             }
181         }
182         if (found) {
183             dependency.setDisplayFileName(parentName + "/__init__.py");
184             dependency.getProductEvidence().addEvidence(file.getName(),
185                     "PackageName", parentName, Confidence.MEDIUM);
186         } else {
187             // copy, alter and set in case some other thread is iterating over
188             final List<Dependency> dependencies = new ArrayList<Dependency>(
189                     engine.getDependencies());
190             dependencies.remove(dependency);
191             engine.setDependencies(dependencies);
192         }
193     }
194 
195     /**
196      * This should gather information from leading docstrings, file comments, and assignments to __version__, __title__,
197      * __summary__, __uri__, __url__, __home*page__, __author__, and their all caps equivalents.
198      *
199      * @param dependency the dependency being analyzed
200      * @param file       the file name to analyze
201      * @return whether evidence was found
202      * @throws AnalysisException thrown if there is an unrecoverable error
203      */
204     private boolean analyzeFileContents(Dependency dependency, File file)
205             throws AnalysisException {
206         String contents;
207         try {
208             contents = FileUtils.readFileToString(file).trim();
209         } catch (IOException e) {
210             throw new AnalysisException(
211                     "Problem occurred while reading dependency file.", e);
212         }
213         boolean found = false;
214         if (!contents.isEmpty()) {
215             final String source = file.getName();
216             found = gatherEvidence(VERSION_PATTERN, contents, source,
217                     dependency.getVersionEvidence(), "SourceVersion",
218                     Confidence.MEDIUM);
219             found |= addSummaryInfo(dependency, SUMMARY_PATTERN, 4, contents,
220                     source, "summary");
221             if (INIT_PY_FILTER.accept(file)) {
222                 found |= addSummaryInfo(dependency, MODULE_DOCSTRING, 2,
223                         contents, source, "docstring");
224             }
225             found |= gatherEvidence(TITLE_PATTERN, contents, source,
226                     dependency.getProductEvidence(), "SourceTitle",
227                     Confidence.LOW);
228             final EvidenceCollection vendorEvidence = dependency
229                     .getVendorEvidence();
230             found |= gatherEvidence(AUTHOR_PATTERN, contents, source,
231                     vendorEvidence, "SourceAuthor", Confidence.MEDIUM);
232             found |= gatherHomePageEvidence(URI_PATTERN, vendorEvidence,
233                     source, "URL", contents);
234             found |= gatherHomePageEvidence(HOMEPAGE_PATTERN,
235                     vendorEvidence, source, "HomePage", contents);
236         }
237         return found;
238     }
239 
240     /**
241      * Adds summary information to the dependency
242      *
243      * @param dependency the dependency being analyzed
244      * @param pattern    the pattern used to perform analysis
245      * @param group      the group from the pattern that indicates the data to use
246      * @param contents   the data being analyzed
247      * @param source     the source name to use when recording the evidence
248      * @param key        the key name to use when recording the evidence
249      * @return true if evidence was collected; otherwise false
250      */
251     private boolean addSummaryInfo(Dependency dependency, Pattern pattern,
252                                    int group, String contents, String source, String key) {
253         final Matcher matcher = pattern.matcher(contents);
254         final boolean found = matcher.find();
255         if (found) {
256             JarAnalyzer.addDescription(dependency, matcher.group(group),
257                     source, key);
258         }
259         return found;
260     }
261 
262     /**
263      * Collects evidence from the home page URL.
264      *
265      * @param pattern  the pattern to match
266      * @param evidence the evidence collection to add the evidence to
267      * @param source   the source of the evidence
268      * @param name     the name of the evidence
269      * @param contents the home page URL
270      * @return true if evidence was collected; otherwise false
271      */
272     private boolean gatherHomePageEvidence(Pattern pattern,
273                                            EvidenceCollection evidence, String source, String name,
274                                            String contents) {
275         final Matcher matcher = pattern.matcher(contents);
276         boolean found = false;
277         if (matcher.find()) {
278             final String url = matcher.group(4);
279             if (UrlStringUtils.isUrl(url)) {
280                 found = true;
281                 evidence.addEvidence(source, name, url, Confidence.MEDIUM);
282             }
283         }
284         return found;
285     }
286 
287     /**
288      * Gather evidence from a Python source file using the given string assignment regex pattern.
289      *
290      * @param pattern    to scan contents with
291      * @param contents   of Python source file
292      * @param source     for storing evidence
293      * @param evidence   to store evidence in
294      * @param name       of evidence
295      * @param confidence in evidence
296      * @return whether evidence was found
297      */
298     private boolean gatherEvidence(Pattern pattern, String contents,
299                                    String source, EvidenceCollection evidence, String name,
300                                    Confidence confidence) {
301         final Matcher matcher = pattern.matcher(contents);
302         final boolean found = matcher.find();
303         if (found) {
304             evidence.addEvidence(source, name, matcher.group(4), confidence);
305         }
306         return found;
307     }
308 
309     @Override
310     protected String getAnalyzerEnabledSettingKey() {
311         return Settings.KEYS.ANALYZER_PYTHON_PACKAGE_ENABLED;
312     }
313 }