1 /*
2 * This file is part of dependency-check-core.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 * Copyright (c) 2012 Jeremy Long. All Rights Reserved.
17 */
18 package org.owasp.dependencycheck.dependency;
19
20 import java.io.Serializable;
21 import java.net.MalformedURLException;
22 import java.util.HashSet;
23 import java.util.Iterator;
24 import java.util.List;
25 import java.util.Set;
26 import java.util.TreeSet;
27 import org.apache.commons.lang3.StringUtils;
28 import org.owasp.dependencycheck.utils.DependencyVersion;
29 import org.owasp.dependencycheck.utils.DependencyVersionUtil;
30 import org.owasp.dependencycheck.utils.Filter;
31 import org.owasp.dependencycheck.utils.UrlStringUtils;
32 import org.slf4j.Logger;
33 import org.slf4j.LoggerFactory;
34
35 /**
36 * Used to maintain a collection of Evidence.
37 *
38 * @author Jeremy Long
39 */
40 public class EvidenceCollection implements Serializable, Iterable<Evidence> {
41
42 /**
43 * The serial version UID for serialization.
44 */
45 private static final long serialVersionUID = 1L;
46 /**
47 * The logger.
48 */
49 private static final Logger LOGGER = LoggerFactory.getLogger(EvidenceCollection.class);
50 /**
51 * Used to iterate over highest confidence evidence contained in the collection.
52 */
53 private static final Filter<Evidence> HIGHEST_CONFIDENCE = new Filter<Evidence>() {
54 @Override
55 public boolean passes(Evidence evidence) {
56 return evidence.getConfidence() == Confidence.HIGHEST;
57 }
58 };
59 /**
60 * Used to iterate over high confidence evidence contained in the collection.
61 */
62 private static final Filter<Evidence> HIGH_CONFIDENCE = new Filter<Evidence>() {
63 @Override
64 public boolean passes(Evidence evidence) {
65 return evidence.getConfidence() == Confidence.HIGH;
66 }
67 };
68 /**
69 * Used to iterate over medium confidence evidence contained in the collection.
70 */
71 private static final Filter<Evidence> MEDIUM_CONFIDENCE = new Filter<Evidence>() {
72 @Override
73 public boolean passes(Evidence evidence) {
74 return evidence.getConfidence() == Confidence.MEDIUM;
75 }
76 };
77 /**
78 * Used to iterate over low confidence evidence contained in the collection.
79 */
80 private static final Filter<Evidence> LOW_CONFIDENCE = new Filter<Evidence>() {
81 @Override
82 public boolean passes(Evidence evidence) {
83 return evidence.getConfidence() == Confidence.LOW;
84 }
85 };
86 /**
87 * Used to iterate over evidence that has was used (aka read) from the collection.
88 */
89 private static final Filter<Evidence> EVIDENCE_USED = new Filter<Evidence>() {
90 @Override
91 public boolean passes(Evidence evidence) {
92 return evidence.isUsed();
93 }
94 };
95
96 /**
97 * Used to iterate over evidence of the specified confidence.
98 *
99 * @param confidence the confidence level for the evidence to be iterated over.
100 * @return Iterable<Evidence> an iterable collection of evidence
101 */
102 public final Iterable<Evidence> iterator(Confidence confidence) {
103 if (confidence == Confidence.HIGHEST) {
104 return EvidenceCollection.HIGHEST_CONFIDENCE.filter(this.list);
105 } else if (confidence == Confidence.HIGH) {
106 return EvidenceCollection.HIGH_CONFIDENCE.filter(this.list);
107 } else if (confidence == Confidence.MEDIUM) {
108 return EvidenceCollection.MEDIUM_CONFIDENCE.filter(this.list);
109 } else {
110 return EvidenceCollection.LOW_CONFIDENCE.filter(this.list);
111 }
112 }
113 /**
114 * A collection of evidence.
115 */
116 private final Set<Evidence> list;
117 /**
118 * A collection of strings used to adjust Lucene's term weighting.
119 */
120 private final Set<String> weightedStrings;
121
122 /**
123 * Creates a new EvidenceCollection.
124 */
125 public EvidenceCollection() {
126 list = new TreeSet<Evidence>();
127 weightedStrings = new HashSet<String>();
128 }
129
130 /**
131 * Adds evidence to the collection.
132 *
133 * @param e Evidence.
134 */
135 public void addEvidence(Evidence e) {
136 list.add(e);
137 }
138
139 /**
140 * Creates an Evidence object from the parameters and adds the resulting object to the collection.
141 *
142 * @param source the source of the Evidence.
143 * @param name the name of the Evidence.
144 * @param value the value of the Evidence.
145 * @param confidence the confidence of the Evidence.
146 */
147 public void addEvidence(String source, String name, String value, Confidence confidence) {
148 final Evidence e = new Evidence(source, name, value, confidence);
149 addEvidence(e);
150 }
151
152 /**
153 * Adds term to the weighting collection. The terms added here are used later to boost the score of other terms. This is a way
154 * of combining evidence from multiple sources to boost the confidence of the given evidence.
155 *
156 * Example: The term 'Apache' is found in the manifest of a JAR and is added to the Collection. When we parse the package
157 * names within the JAR file we may add these package names to the "weighted" strings collection to boost the score in the
158 * Lucene query. That way when we construct the Lucene query we find the term Apache in the collection AND in the weighted
159 * strings; as such, we will boost the confidence of the term Apache.
160 *
161 * @param str to add to the weighting collection.
162 */
163 public void addWeighting(String str) {
164 weightedStrings.add(str);
165 }
166
167 /**
168 * Returns a set of Weightings - a list of terms that are believed to be of higher confidence when also found in another
169 * location.
170 *
171 * @return Set<String>
172 */
173 public Set<String> getWeighting() {
174 return weightedStrings;
175 }
176
177 /**
178 * Returns the set of evidence.
179 *
180 * @return the set of evidence.
181 */
182 public Set<Evidence> getEvidence() {
183 return list;
184 }
185
186 /**
187 * Returns the set of evidence from a given source.
188 *
189 * @param source the source of the evidence
190 * @return the set of evidence.
191 */
192 public Set<Evidence> getEvidence(String source) {
193 if (source == null) {
194 return null;
195 }
196 final Set<Evidence> ret = new HashSet<Evidence>();
197 for (Evidence e : list) {
198 if (source.equals(e.getSource())) {
199 ret.add(e);
200 }
201 }
202 return ret;
203 }
204
205 /**
206 * Returns the set of evidence from a given source and name.
207 *
208 * @param source the source of the evidence
209 * @param name the name of the evidence to return
210 * @return the set of evidence.
211 */
212 public Set<Evidence> getEvidence(String source, String name) {
213 if (source == null || name == null) {
214 return null;
215 }
216 final Set<Evidence> ret = new HashSet<Evidence>();
217 for (Evidence e : list) {
218 if (source.equals(e.getSource()) && name.equals(e.getName())) {
219 ret.add(e);
220 }
221 }
222 return ret;
223 }
224
225 /**
226 * Implements the iterator interface for the Evidence Collection.
227 *
228 * @return an Iterator<Evidence>
229 */
230 @Override
231 public Iterator<Evidence> iterator() {
232 return list.iterator();
233 }
234
235 /**
236 * Used to determine if a given string was used (aka read).
237 *
238 * @param text the string to search for.
239 * @return whether or not the string was used.
240 */
241 public boolean containsUsedString(String text) {
242 if (text == null) {
243 return false;
244 }
245 final String textToTest = text.toLowerCase();
246
247 for (Evidence e : EvidenceCollection.EVIDENCE_USED.filter(this)) {
248 //TODO consider changing the regex to only compare alpha-numeric (i.e. strip everything else)
249 final String value = urlCorrection(e.getValue().toLowerCase()).replaceAll("[\\s_-]", "");
250 if (value.contains(textToTest)) {
251 return true;
252 }
253 }
254 return false;
255 }
256
257 /**
258 * Used to determine if a given version was used (aka read) from the EvidenceCollection.
259 *
260 * @param version the version to search for within the collected evidence.
261 * @return whether or not the string was used.
262 */
263 public boolean containsUsedVersion(DependencyVersion version) {
264 if (version == null) {
265 return false;
266 }
267
268 for (Evidence e : EvidenceCollection.EVIDENCE_USED.filter(this)) {
269 final DependencyVersion value = DependencyVersionUtil.parseVersion(e.getValue());
270 if (value != null && value.matchesAtLeastThreeLevels(version)) {
271 return true;
272 }
273 }
274 return false;
275 }
276
277 /**
278 * Returns whether or not the collection contains evidence of a specified Confidence.
279 *
280 * @param confidence A Confidence value.
281 * @return boolean.
282 */
283 public boolean contains(Confidence confidence) {
284 for (Evidence e : list) {
285 if (e.getConfidence().equals(confidence)) {
286 return true;
287 }
288 }
289 return false;
290 }
291
292 /**
293 * Merges multiple EvidenceCollections together, only merging evidence that was used, into a new EvidenceCollection.
294 *
295 * @param ec One or more EvidenceCollections.
296 * @return a new EvidenceCollection containing the used evidence.
297 */
298 public static EvidenceCollection mergeUsed(EvidenceCollection... ec) {
299 final EvidenceCollection ret = new EvidenceCollection();
300 for (EvidenceCollection col : ec) {
301 for (Evidence e : col.list) {
302 if (e.isUsed()) {
303 ret.addEvidence(e);
304 }
305 }
306 }
307 return ret;
308 }
309
310 /**
311 * Merges multiple EvidenceCollections together.
312 *
313 * @param ec One or more EvidenceCollections.
314 * @return a new EvidenceCollection.
315 */
316 public static EvidenceCollection merge(EvidenceCollection... ec) {
317 final EvidenceCollection ret = new EvidenceCollection();
318 for (EvidenceCollection col : ec) {
319 ret.list.addAll(col.list);
320 ret.weightedStrings.addAll(col.weightedStrings);
321 }
322 return ret;
323 }
324
325 /**
326 * Merges multiple EvidenceCollections together; flattening all of the evidence items by removing the confidence.
327 *
328 * @param ec One or more EvidenceCollections
329 * @return new set of evidence resulting from merging the evidence in the collections
330 */
331 public static Set<Evidence> mergeForDisplay(EvidenceCollection... ec) {
332 final Set<Evidence> ret = new TreeSet<Evidence>();
333 for (EvidenceCollection col : ec) {
334 for (Evidence e : col) {
335 //if (e.isUsed()) {
336 final Evidence newEvidence = new Evidence(e.getSource(), e.getName(), e.getValue(), null);
337 newEvidence.setUsed(true);
338 ret.add(newEvidence);
339 //}
340 }
341 }
342 return ret;
343 }
344
345 /**
346 * Returns a string of evidence 'values'.
347 *
348 * @return a string containing the evidence.
349 */
350 @Override
351 public String toString() {
352 final StringBuilder sb = new StringBuilder();
353 for (Evidence e : this.list) {
354 sb.append(e.getValue()).append(' ');
355 }
356 return sb.toString();
357 }
358
359 /**
360 * Returns the number of elements in the EvidenceCollection.
361 *
362 * @return the number of elements in the collection.
363 */
364 public int size() {
365 return list.size();
366 }
367
368 /**
369 * <p>
370 * Takes a string that may contain a fully qualified domain and it will return the string having removed the query string, the
371 * protocol, the sub-domain of 'www', and the file extension of the path.</p>
372 * <p>
373 * This is useful for checking if the evidence contains a specific string. The presence of the protocol, file extension, etc.
374 * may produce false positives.
375 *
376 * <p>
377 * Example, given the following input:</p>
378 * <code>'Please visit https://www.somedomain.com/path1/path2/file.php?id=439'</code>
379 * <p>
380 * The function would return:</p>
381 * <code>'Please visit somedomain path1 path2 file'</code>
382 *
383 * @param value the value that may contain a url
384 * @return the modified string
385 */
386 private String urlCorrection(String value) {
387 if (value == null || !UrlStringUtils.containsUrl(value)) {
388 return value;
389 }
390 final StringBuilder sb = new StringBuilder(value.length());
391 final String[] parts = value.split("\\s");
392 for (String part : parts) {
393 if (UrlStringUtils.isUrl(part)) {
394 try {
395 final List<String> data = UrlStringUtils.extractImportantUrlData(part);
396 sb.append(' ').append(StringUtils.join(data, ' '));
397 } catch (MalformedURLException ex) {
398 LOGGER.debug("error parsing {}", part, ex);
399 sb.append(' ').append(part);
400 }
401 } else {
402 sb.append(' ').append(part);
403 }
404 }
405 return sb.toString().trim();
406 }
407 }