View Javadoc
1   /*
2    * This file is part of dependency-check-core.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   *
16   * Copyright (c) 2012 Jeremy Long. All Rights Reserved.
17   */
18  package org.owasp.dependencycheck.dependency;
19  
20  import java.io.Serializable;
21  import java.net.MalformedURLException;
22  import java.util.HashSet;
23  import java.util.Iterator;
24  import java.util.List;
25  import java.util.Set;
26  import java.util.TreeSet;
27  import org.apache.commons.lang3.StringUtils;
28  import org.owasp.dependencycheck.utils.DependencyVersion;
29  import org.owasp.dependencycheck.utils.DependencyVersionUtil;
30  import org.owasp.dependencycheck.utils.Filter;
31  import org.owasp.dependencycheck.utils.UrlStringUtils;
32  import org.slf4j.Logger;
33  import org.slf4j.LoggerFactory;
34  
35  /**
36   * Used to maintain a collection of Evidence.
37   *
38   * @author Jeremy Long
39   */
40  public class EvidenceCollection implements Serializable, Iterable<Evidence> {
41  
42      /**
43       * The serial version UID for serialization.
44       */
45      private static final long serialVersionUID = 1L;
46      /**
47       * The logger.
48       */
49      private static final Logger LOGGER = LoggerFactory.getLogger(EvidenceCollection.class);
50      /**
51       * Used to iterate over highest confidence evidence contained in the collection.
52       */
53      private static final Filter<Evidence> HIGHEST_CONFIDENCE = new Filter<Evidence>() {
54          @Override
55          public boolean passes(Evidence evidence) {
56              return evidence.getConfidence() == Confidence.HIGHEST;
57          }
58      };
59      /**
60       * Used to iterate over high confidence evidence contained in the collection.
61       */
62      private static final Filter<Evidence> HIGH_CONFIDENCE = new Filter<Evidence>() {
63          @Override
64          public boolean passes(Evidence evidence) {
65              return evidence.getConfidence() == Confidence.HIGH;
66          }
67      };
68      /**
69       * Used to iterate over medium confidence evidence contained in the collection.
70       */
71      private static final Filter<Evidence> MEDIUM_CONFIDENCE = new Filter<Evidence>() {
72          @Override
73          public boolean passes(Evidence evidence) {
74              return evidence.getConfidence() == Confidence.MEDIUM;
75          }
76      };
77      /**
78       * Used to iterate over low confidence evidence contained in the collection.
79       */
80      private static final Filter<Evidence> LOW_CONFIDENCE = new Filter<Evidence>() {
81          @Override
82          public boolean passes(Evidence evidence) {
83              return evidence.getConfidence() == Confidence.LOW;
84          }
85      };
86      /**
87       * Used to iterate over evidence that has was used (aka read) from the collection.
88       */
89      private static final Filter<Evidence> EVIDENCE_USED = new Filter<Evidence>() {
90          @Override
91          public boolean passes(Evidence evidence) {
92              return evidence.isUsed();
93          }
94      };
95  
96      /**
97       * Used to iterate over evidence of the specified confidence.
98       *
99       * @param confidence the confidence level for the evidence to be iterated over.
100      * @return Iterable&lt;Evidence&gt; an iterable collection of evidence
101      */
102     public final Iterable<Evidence> iterator(Confidence confidence) {
103         if (confidence == Confidence.HIGHEST) {
104             return EvidenceCollection.HIGHEST_CONFIDENCE.filter(this.list);
105         } else if (confidence == Confidence.HIGH) {
106             return EvidenceCollection.HIGH_CONFIDENCE.filter(this.list);
107         } else if (confidence == Confidence.MEDIUM) {
108             return EvidenceCollection.MEDIUM_CONFIDENCE.filter(this.list);
109         } else {
110             return EvidenceCollection.LOW_CONFIDENCE.filter(this.list);
111         }
112     }
113     /**
114      * A collection of evidence.
115      */
116     private final Set<Evidence> list;
117     /**
118      * A collection of strings used to adjust Lucene's term weighting.
119      */
120     private final Set<String> weightedStrings;
121 
122     /**
123      * Creates a new EvidenceCollection.
124      */
125     public EvidenceCollection() {
126         list = new TreeSet<Evidence>();
127         weightedStrings = new HashSet<String>();
128     }
129 
130     /**
131      * Adds evidence to the collection.
132      *
133      * @param e Evidence.
134      */
135     public void addEvidence(Evidence e) {
136         list.add(e);
137     }
138 
139     /**
140      * Creates an Evidence object from the parameters and adds the resulting object to the collection.
141      *
142      * @param source the source of the Evidence.
143      * @param name the name of the Evidence.
144      * @param value the value of the Evidence.
145      * @param confidence the confidence of the Evidence.
146      */
147     public void addEvidence(String source, String name, String value, Confidence confidence) {
148         final Evidence e = new Evidence(source, name, value, confidence);
149         addEvidence(e);
150     }
151 
152     /**
153      * Adds term to the weighting collection. The terms added here are used later to boost the score of other terms. This is a way
154      * of combining evidence from multiple sources to boost the confidence of the given evidence.
155      *
156      * Example: The term 'Apache' is found in the manifest of a JAR and is added to the Collection. When we parse the package
157      * names within the JAR file we may add these package names to the "weighted" strings collection to boost the score in the
158      * Lucene query. That way when we construct the Lucene query we find the term Apache in the collection AND in the weighted
159      * strings; as such, we will boost the confidence of the term Apache.
160      *
161      * @param str to add to the weighting collection.
162      */
163     public void addWeighting(String str) {
164         weightedStrings.add(str);
165     }
166 
167     /**
168      * Returns a set of Weightings - a list of terms that are believed to be of higher confidence when also found in another
169      * location.
170      *
171      * @return Set&lt;String&gt;
172      */
173     public Set<String> getWeighting() {
174         return weightedStrings;
175     }
176 
177     /**
178      * Returns the set of evidence.
179      *
180      * @return the set of evidence.
181      */
182     public Set<Evidence> getEvidence() {
183         return list;
184     }
185 
186     /**
187      * Returns the set of evidence from a given source.
188      *
189      * @param source the source of the evidence
190      * @return the set of evidence.
191      */
192     public Set<Evidence> getEvidence(String source) {
193         if (source == null) {
194             return null;
195         }
196         final Set<Evidence> ret = new HashSet<Evidence>();
197         for (Evidence e : list) {
198             if (source.equals(e.getSource())) {
199                 ret.add(e);
200             }
201         }
202         return ret;
203     }
204 
205     /**
206      * Returns the set of evidence from a given source and name.
207      *
208      * @param source the source of the evidence
209      * @param name the name of the evidence to return
210      * @return the set of evidence.
211      */
212     public Set<Evidence> getEvidence(String source, String name) {
213         if (source == null || name == null) {
214             return null;
215         }
216         final Set<Evidence> ret = new HashSet<Evidence>();
217         for (Evidence e : list) {
218             if (source.equals(e.getSource()) && name.equals(e.getName())) {
219                 ret.add(e);
220             }
221         }
222         return ret;
223     }
224 
225     /**
226      * Implements the iterator interface for the Evidence Collection.
227      *
228      * @return an Iterator&lt;Evidence&gt;
229      */
230     @Override
231     public Iterator<Evidence> iterator() {
232         return list.iterator();
233     }
234 
235     /**
236      * Used to determine if a given string was used (aka read).
237      *
238      * @param text the string to search for.
239      * @return whether or not the string was used.
240      */
241     public boolean containsUsedString(String text) {
242         if (text == null) {
243             return false;
244         }
245         final String textToTest = text.toLowerCase();
246 
247         for (Evidence e : EvidenceCollection.EVIDENCE_USED.filter(this)) {
248             //TODO consider changing the regex to only compare alpha-numeric (i.e. strip everything else)
249             final String value = urlCorrection(e.getValue().toLowerCase()).replaceAll("[\\s_-]", "");
250             if (value.contains(textToTest)) {
251                 return true;
252             }
253         }
254         return false;
255     }
256 
257     /**
258      * Used to determine if a given version was used (aka read) from the EvidenceCollection.
259      *
260      * @param version the version to search for within the collected evidence.
261      * @return whether or not the string was used.
262      */
263     public boolean containsUsedVersion(DependencyVersion version) {
264         if (version == null) {
265             return false;
266         }
267 
268         for (Evidence e : EvidenceCollection.EVIDENCE_USED.filter(this)) {
269             final DependencyVersion value = DependencyVersionUtil.parseVersion(e.getValue());
270             if (value != null && value.matchesAtLeastThreeLevels(version)) {
271                 return true;
272             }
273         }
274         return false;
275     }
276 
277     /**
278      * Returns whether or not the collection contains evidence of a specified Confidence.
279      *
280      * @param confidence A Confidence value.
281      * @return boolean.
282      */
283     public boolean contains(Confidence confidence) {
284         for (Evidence e : list) {
285             if (e.getConfidence().equals(confidence)) {
286                 return true;
287             }
288         }
289         return false;
290     }
291 
292     /**
293      * Merges multiple EvidenceCollections together, only merging evidence that was used, into a new EvidenceCollection.
294      *
295      * @param ec One or more EvidenceCollections.
296      * @return a new EvidenceCollection containing the used evidence.
297      */
298     public static EvidenceCollection mergeUsed(EvidenceCollection... ec) {
299         final EvidenceCollection ret = new EvidenceCollection();
300         for (EvidenceCollection col : ec) {
301             for (Evidence e : col.list) {
302                 if (e.isUsed()) {
303                     ret.addEvidence(e);
304                 }
305             }
306         }
307         return ret;
308     }
309 
310     /**
311      * Merges multiple EvidenceCollections together.
312      *
313      * @param ec One or more EvidenceCollections.
314      * @return a new EvidenceCollection.
315      */
316     public static EvidenceCollection merge(EvidenceCollection... ec) {
317         final EvidenceCollection ret = new EvidenceCollection();
318         for (EvidenceCollection col : ec) {
319             ret.list.addAll(col.list);
320             ret.weightedStrings.addAll(col.weightedStrings);
321         }
322         return ret;
323     }
324 
325     /**
326      * Merges multiple EvidenceCollections together; flattening all of the evidence items by removing the confidence.
327      *
328      * @param ec One or more EvidenceCollections
329      * @return new set of evidence resulting from merging the evidence in the collections
330      */
331     public static Set<Evidence> mergeForDisplay(EvidenceCollection... ec) {
332         final Set<Evidence> ret = new TreeSet<Evidence>();
333         for (EvidenceCollection col : ec) {
334             for (Evidence e : col) {
335                 //if (e.isUsed()) {
336                 final Evidence newEvidence = new Evidence(e.getSource(), e.getName(), e.getValue(), null);
337                 newEvidence.setUsed(true);
338                 ret.add(newEvidence);
339                 //}
340             }
341         }
342         return ret;
343     }
344 
345     /**
346      * Returns a string of evidence 'values'.
347      *
348      * @return a string containing the evidence.
349      */
350     @Override
351     public String toString() {
352         final StringBuilder sb = new StringBuilder();
353         for (Evidence e : this.list) {
354             sb.append(e.getValue()).append(' ');
355         }
356         return sb.toString();
357     }
358 
359     /**
360      * Returns the number of elements in the EvidenceCollection.
361      *
362      * @return the number of elements in the collection.
363      */
364     public int size() {
365         return list.size();
366     }
367 
368     /**
369      * <p>
370      * Takes a string that may contain a fully qualified domain and it will return the string having removed the query string, the
371      * protocol, the sub-domain of 'www', and the file extension of the path.</p>
372      * <p>
373      * This is useful for checking if the evidence contains a specific string. The presence of the protocol, file extension, etc.
374      * may produce false positives.
375      *
376      * <p>
377      * Example, given the following input:</p>
378      * <code>'Please visit https://www.somedomain.com/path1/path2/file.php?id=439'</code>
379      * <p>
380      * The function would return:</p>
381      * <code>'Please visit somedomain path1 path2 file'</code>
382      *
383      * @param value the value that may contain a url
384      * @return the modified string
385      */
386     private String urlCorrection(String value) {
387         if (value == null || !UrlStringUtils.containsUrl(value)) {
388             return value;
389         }
390         final StringBuilder sb = new StringBuilder(value.length());
391         final String[] parts = value.split("\\s");
392         for (String part : parts) {
393             if (UrlStringUtils.isUrl(part)) {
394                 try {
395                     final List<String> data = UrlStringUtils.extractImportantUrlData(part);
396                     sb.append(' ').append(StringUtils.join(data, ' '));
397                 } catch (MalformedURLException ex) {
398                     LOGGER.debug("error parsing {}", part, ex);
399                     sb.append(' ').append(part);
400                 }
401             } else {
402                 sb.append(' ').append(part);
403             }
404         }
405         return sb.toString().trim();
406     }
407 }