Coverage Report

Coverage Report - org.owasp.dependencycheck.dependency.EvidenceCollection

Classes in this File

Line Coverage

Branch Coverage

Complexity

EvidenceCollection

69%

67/96

53%

31/58

2.783

EvidenceCollection$1

100%

2/2

100%

2/2

2.783

EvidenceCollection$2

100%

2/2

100%

2/2

2.783

EvidenceCollection$3

100%

2/2

100%

2/2

2.783

EvidenceCollection$4

100%

2/2

100%

2/2

2.783

EvidenceCollection$5

100%

2/2

N/A

2.783

 /*
  * This file is part of dependency-check-core.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  *
  * Copyright (c) 2012 Jeremy Long. All Rights Reserved.
  */
 package org.owasp.dependencycheck.dependency;
 
 import java.net.MalformedURLException;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Set;
 import java.util.TreeSet;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 import org.apache.commons.lang.StringUtils;
 import org.owasp.dependencycheck.utils.DependencyVersion;
 import org.owasp.dependencycheck.utils.DependencyVersionUtil;
 import org.owasp.dependencycheck.utils.Filter;
 import org.owasp.dependencycheck.utils.UrlStringUtils;
 
 /**
  * Used to maintain a collection of Evidence.
  *
  * @author Jeremy Long <jeremy.long@owasp.org>
  */
 public class EvidenceCollection implements Iterable<Evidence> {
 
     /**
      * Used to iterate over highest confidence evidence contained in the collection.
      */
     private static final Filter<Evidence> HIGHEST_CONFIDENCE = new Filter<Evidence>() {
         public boolean passes(Evidence evidence) {
             return evidence.getConfidence() == Confidence.HIGHEST;
         }
     };
     /**
      * Used to iterate over high confidence evidence contained in the collection.
      */
     private static final Filter<Evidence> HIGH_CONFIDENCE = new Filter<Evidence>() {
         public boolean passes(Evidence evidence) {
             return evidence.getConfidence() == Confidence.HIGH;
         }
     };
     /**
      * Used to iterate over medium confidence evidence contained in the collection.
      */
     private static final Filter<Evidence> MEDIUM_CONFIDENCE = new Filter<Evidence>() {
         public boolean passes(Evidence evidence) {
             return evidence.getConfidence() == Confidence.MEDIUM;
         }
     };
     /**
      * Used to iterate over low confidence evidence contained in the collection.
      */
     private static final Filter<Evidence> LOW_CONFIDENCE = new Filter<Evidence>() {
         public boolean passes(Evidence evidence) {
             return evidence.getConfidence() == Confidence.LOW;
         }
     };
     /**
      * Used to iterate over evidence that has was used (aka read) from the collection.
      */
     private static final Filter<Evidence> EVIDENCE_USED = new Filter<Evidence>() {
         public boolean passes(Evidence evidence) {
             return evidence.isUsed();
         }
     };
 
     /**
      * Used to iterate over evidence of the specified confidence.
      *
      * @param confidence the confidence level for the evidence to be iterated over.
      * @return Iterable<Evidence> an iterable collection of evidence
      */
     public final Iterable<Evidence> iterator(Confidence confidence) {
         if (confidence == Confidence.HIGHEST) {
             return EvidenceCollection.HIGHEST_CONFIDENCE.filter(this.list);
         } else if (confidence == Confidence.HIGH) {
             return EvidenceCollection.HIGH_CONFIDENCE.filter(this.list);
         } else if (confidence == Confidence.MEDIUM) {
             return EvidenceCollection.MEDIUM_CONFIDENCE.filter(this.list);
         } else {
             return EvidenceCollection.LOW_CONFIDENCE.filter(this.list);
         }
     }
     /**
      * A collection of evidence.
      */
     private final Set<Evidence> list;
     /**
      * A collection of strings used to adjust Lucene's term weighting.
      */
     private final Set<String> weightedStrings;
 
     /**
      * Creates a new EvidenceCollection.
      */
     public EvidenceCollection() {
         list = new TreeSet<Evidence>();
         weightedStrings = new HashSet<String>();
     }
 
     /**
      * Adds evidence to the collection.
      *
      * @param e Evidence.
      */
     public void addEvidence(Evidence e) {
         list.add(e);
     }
 
     /**
      * Creates an Evidence object from the parameters and adds the resulting object to the collection.
      *
      * @param source the source of the Evidence.
      * @param name the name of the Evidence.
      * @param value the value of the Evidence.
      * @param confidence the confidence of the Evidence.
      */
     public void addEvidence(String source, String name, String value, Confidence confidence) {
         final Evidence e = new Evidence(source, name, value, confidence);
         addEvidence(e);
     }
 
     /**
      * Adds term to the weighting collection. The terms added here are used later to boost the score of other terms.
      * This is a way of combining evidence from multiple sources to boost the confidence of the given evidence.
      *
      * Example: The term 'Apache' is found in the manifest of a JAR and is added to the Collection. When we parse the
      * package names within the JAR file we may add these package names to the "weighted" strings collection to boost
      * the score in the Lucene query. That way when we construct the Lucene query we find the term Apache in the
      * collection AND in the weighted strings; as such, we will boost the confidence of the term Apache.
      *
      * @param str to add to the weighting collection.
      */
     public void addWeighting(String str) {
         weightedStrings.add(str);
     }
 
     /**
      * Returns a set of Weightings - a list of terms that are believed to be of higher confidence when also found in
      * another location.
      *
      * @return Set<String>
      */
     public Set<String> getWeighting() {
         return weightedStrings;
     }
 
     /**
      * Returns the set of evidence.
      *
      * @return the set of evidence.
      */
     public Set<Evidence> getEvidence() {
         return list;
     }
 
     /**
      * Returns the set of evidence from a given source.
      *
      * @param source the source of the evidence
      * @return the set of evidence.
      */
     public Set<Evidence> getEvidence(String source) {
         if (source == null) {
             return null;
         }
         final Set<Evidence> ret = new HashSet<Evidence>();
         for (Evidence e : list) {
             if (source.equals(e.getSource())) {
                 ret.add(e);
             }
         }
         return ret;
     }
 
     /**
      * Returns the set of evidence from a given source and name.
      *
      * @param source the source of the evidence
      * @param name the name of the evidence to return
      * @return the set of evidence.
      */
     public Set<Evidence> getEvidence(String source, String name) {
         if (source == null || name == null) {
             return null;
         }
         final Set<Evidence> ret = new HashSet<Evidence>();
         for (Evidence e : list) {
             if (source.equals(e.getSource()) && name.equals(e.getName())) {
                 ret.add(e);
             }
         }
         return ret;
     }
 
     /**
      * Implements the iterator interface for the Evidence Collection.
      *
      * @return an Iterator<Evidence>.
      */
     public Iterator<Evidence> iterator() {
         return list.iterator();
     }
 
     /**
      * Used to determine if a given string was used (aka read).
      *
      * @param text the string to search for.
      * @return whether or not the string was used.
      */
     public boolean containsUsedString(String text) {
         if (text == null) {
             return false;
         }
         final String textToTest = text.toLowerCase();
 
         for (Evidence e : EvidenceCollection.EVIDENCE_USED.filter(this)) {
             //TODO consider changing the regex to only compare alpha-numeric (i.e. strip everything else)
             final String value = urlCorrection(e.getValue().toLowerCase()).replaceAll("[\\s_-]", "");
             if (value.contains(textToTest)) {
                 return true;
             }
         }
         return false;
     }
 
     /**
      * Used to determine if a given version was used (aka read) from the EvidenceCollection.
      *
      * @param version the version to search for within the collected evidence.
      * @return whether or not the string was used.
      */
     public boolean containsUsedVersion(DependencyVersion version) {
         if (version == null) {
             return false;
         }
 
         for (Evidence e : EvidenceCollection.EVIDENCE_USED.filter(this)) {
             final DependencyVersion value = DependencyVersionUtil.parseVersion(e.getValue());
             if (value != null && value.matchesAtLeastThreeLevels(version)) {
                 return true;
             }
         }
         return false;
     }
 
     /**
      * Returns whether or not the collection contains evidence of a specified Confidence.
      *
      * @param confidence A Confidence value.
      * @return boolean.
      */
     public boolean contains(Confidence confidence) {
         for (Evidence e : list) {
             if (e.getConfidence().equals(confidence)) {
                 return true;
             }
         }
         return false;
     }
 
     /**
      * Merges multiple EvidenceCollections together, only merging evidence that was used, into a new EvidenceCollection.
      *
      * @param ec One or more EvidenceCollections.
      * @return a new EvidenceCollection containing the used evidence.
      */
     public static EvidenceCollection mergeUsed(EvidenceCollection... ec) {
         final EvidenceCollection ret = new EvidenceCollection();
         for (EvidenceCollection col : ec) {
             for (Evidence e : col.list) {
                 if (e.isUsed()) {
                     ret.addEvidence(e);
                 }
             }
         }
         return ret;
     }
 
     /**
      * Merges multiple EvidenceCollections together.
      *
      * @param ec One or more EvidenceCollections.
      * @return a new EvidenceCollection.
      */
     public static EvidenceCollection merge(EvidenceCollection... ec) {
         final EvidenceCollection ret = new EvidenceCollection();
         for (EvidenceCollection col : ec) {
             ret.list.addAll(col.list);
             ret.weightedStrings.addAll(col.weightedStrings);
         }
         return ret;
     }
 
     /**
      * Returns a string of evidence 'values'.
      *
      * @return a string containing the evidence.
      */
     @Override
     public String toString() {
         final StringBuilder sb = new StringBuilder();
         for (Evidence e : this.list) {
             sb.append(e.getValue()).append(' ');
         }
         return sb.toString();
     }
 
     /**
      * Returns the number of elements in the EvidenceCollection.
      *
      * @return the number of elements in the collection.
      */
     public int size() {
         return list.size();
     }
 
     /**
      * <p>
      * Takes a string that may contain a fully qualified domain and it will return the string having removed the query
      * string, the protocol, the sub-domain of 'www', and the file extension of the path.</p>
      * <p>
      * This is useful for checking if the evidence contains a specific string. The presence of the protocol, file
      * extension, etc. may produce false positives.
      *
      * <p>
      * Example, given the following input:</p>
      * <code>'Please visit https://www.somedomain.com/path1/path2/file.php?id=439'</code>
      * <p>
      * The function would return:</p>
      * <code>'Please visit somedomain path1 path2 file'</code>
      *
      * @param value the value that may contain a url
      * @return the modified string
      */
     private String urlCorrection(String value) {
         if (value == null || !UrlStringUtils.containsUrl(value)) {
             return value;
         }
         final StringBuilder sb = new StringBuilder(value.length());
         final String[] parts = value.split("\\s");
         for (String part : parts) {
             if (UrlStringUtils.isUrl(part)) {
                 try {
                     final List<String> data = UrlStringUtils.extractImportantUrlData(part);
                     sb.append(' ').append(StringUtils.join(data, ' '));
                 } catch (MalformedURLException ex) {
                     Logger.getLogger(EvidenceCollection.class.getName()).log(Level.INFO, "error parsing " + part, ex);
                     sb.append(' ').append(part);
                 }
             } else {
                 sb.append(' ').append(part);
             }
         }
         return sb.toString().trim();
     }
 }

1		/*
2		* This file is part of dependency-check-core.
3		*
4		* Licensed under the Apache License, Version 2.0 (the "License");
5		* you may not use this file except in compliance with the License.
6		* You may obtain a copy of the License at
7		*
8		* http://www.apache.org/licenses/LICENSE-2.0
9		*
10		* Unless required by applicable law or agreed to in writing, software
11		* distributed under the License is distributed on an "AS IS" BASIS,
12		* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13		* See the License for the specific language governing permissions and
14		* limitations under the License.
15		*
16		* Copyright (c) 2012 Jeremy Long. All Rights Reserved.
17		*/
18		package org.owasp.dependencycheck.dependency;
19
20		import java.net.MalformedURLException;
21		import java.util.HashSet;
22		import java.util.Iterator;
23		import java.util.List;
24		import java.util.Set;
25		import java.util.TreeSet;
26		import java.util.logging.Level;
27		import java.util.logging.Logger;
28		import org.apache.commons.lang.StringUtils;
29		import org.owasp.dependencycheck.utils.DependencyVersion;
30		import org.owasp.dependencycheck.utils.DependencyVersionUtil;
31		import org.owasp.dependencycheck.utils.Filter;
32		import org.owasp.dependencycheck.utils.UrlStringUtils;
33
34		/**
35		* Used to maintain a collection of Evidence.
36		*
37		* @author Jeremy Long <jeremy.long@owasp.org>
38		*/
39		public class EvidenceCollection implements Iterable<Evidence> {
40
41		/**
42		* Used to iterate over highest confidence evidence contained in the collection.
43		*/
44	489	private static final Filter<Evidence> HIGHEST_CONFIDENCE = new Filter<Evidence>() {
45		public boolean passes(Evidence evidence) {
46	488	return evidence.getConfidence() == Confidence.HIGHEST;
47		}
48		};
49		/**
50		* Used to iterate over high confidence evidence contained in the collection.
51		*/
52	483	private static final Filter<Evidence> HIGH_CONFIDENCE = new Filter<Evidence>() {
53		public boolean passes(Evidence evidence) {
54	482	return evidence.getConfidence() == Confidence.HIGH;
55		}
56		};
57		/**
58		* Used to iterate over medium confidence evidence contained in the collection.
59		*/
60	1	private static final Filter<Evidence> MEDIUM_CONFIDENCE = new Filter<Evidence>() {
61		public boolean passes(Evidence evidence) {
62	452	return evidence.getConfidence() == Confidence.MEDIUM;
63		}
64		};
65		/**
66		* Used to iterate over low confidence evidence contained in the collection.
67		*/
68	1	private static final Filter<Evidence> LOW_CONFIDENCE = new Filter<Evidence>() {
69		public boolean passes(Evidence evidence) {
70	748	return evidence.getConfidence() == Confidence.LOW;
71		}
72		};
73		/**
74		* Used to iterate over evidence that has was used (aka read) from the collection.
75		*/
76	1	private static final Filter<Evidence> EVIDENCE_USED = new Filter<Evidence>() {
77		public boolean passes(Evidence evidence) {
78	9254	return evidence.isUsed();
79		}
80		};
81
82		/**
83		* Used to iterate over evidence of the specified confidence.
84		*
85		* @param confidence the confidence level for the evidence to be iterated over.
86		* @return Iterable<Evidence> an iterable collection of evidence
87		*/
88		public final Iterable<Evidence> iterator(Confidence confidence) {
89	504	if (confidence == Confidence.HIGHEST) {
90	128	return EvidenceCollection.HIGHEST_CONFIDENCE.filter(this.list);
91	376	} else if (confidence == Confidence.HIGH) {
92	123	return EvidenceCollection.HIGH_CONFIDENCE.filter(this.list);
93	253	} else if (confidence == Confidence.MEDIUM) {
94	108	return EvidenceCollection.MEDIUM_CONFIDENCE.filter(this.list);
95		} else {
96	145	return EvidenceCollection.LOW_CONFIDENCE.filter(this.list);
97		}
98		}
99		/**
100		* A collection of evidence.
101		*/
102		private final Set<Evidence> list;
103		/**
104		* A collection of strings used to adjust Lucene's term weighting.
105		*/
106		private final Set<String> weightedStrings;
107
108		/**
109		* Creates a new EvidenceCollection.
110		*/
111	212	public EvidenceCollection() {
112	212	list = new TreeSet<Evidence>();
113	212	weightedStrings = new HashSet<String>();
114	212	}
115
116		/**
117		* Adds evidence to the collection.
118		*
119		* @param e Evidence.
120		*/
121		public void addEvidence(Evidence e) {
122	70895	list.add(e);
123	70895	}
124
125		/**
126		* Creates an Evidence object from the parameters and adds the resulting object to the collection.
127		*
128		* @param source the source of the Evidence.
129		* @param name the name of the Evidence.
130		* @param value the value of the Evidence.
131		* @param confidence the confidence of the Evidence.
132		*/
133		public void addEvidence(String source, String name, String value, Confidence confidence) {
134	70835	final Evidence e = new Evidence(source, name, value, confidence);
135	70835	addEvidence(e);
136	70835	}
137
138		/**
139		* Adds term to the weighting collection. The terms added here are used later to boost the score of other terms.
140		* This is a way of combining evidence from multiple sources to boost the confidence of the given evidence.
141		*
142		* Example: The term 'Apache' is found in the manifest of a JAR and is added to the Collection. When we parse the
143		* package names within the JAR file we may add these package names to the "weighted" strings collection to boost
144		* the score in the Lucene query. That way when we construct the Lucene query we find the term Apache in the
145		* collection AND in the weighted strings; as such, we will boost the confidence of the term Apache.
146		*
147		* @param str to add to the weighting collection.
148		*/
149		public void addWeighting(String str) {
150	57	weightedStrings.add(str);
151	57	}
152
153		/**
154		* Returns a set of Weightings - a list of terms that are believed to be of higher confidence when also found in
155		* another location.
156		*
157		* @return Set<String>
158		*/
159		public Set<String> getWeighting() {
160	145	return weightedStrings;
161		}
162
163		/**
164		* Returns the set of evidence.
165		*
166		* @return the set of evidence.
167		*/
168		public Set<Evidence> getEvidence() {
169	30	return list;
170		}
171
172		/**
173		* Returns the set of evidence from a given source.
174		*
175		* @param source the source of the evidence
176		* @return the set of evidence.
177		*/
178		public Set<Evidence> getEvidence(String source) {
179	0	if (source == null) {
180	0	return null;
181		}
182	0	final Set<Evidence> ret = new HashSet<Evidence>();
183	0	for (Evidence e : list) {
184	0	if (source.equals(e.getSource())) {
185	0	ret.add(e);
186		}
187	0	}
188	0	return ret;
189		}
190
191		/**
192		* Returns the set of evidence from a given source and name.
193		*
194		* @param source the source of the evidence
195		* @param name the name of the evidence to return
196		* @return the set of evidence.
197		*/
198		public Set<Evidence> getEvidence(String source, String name) {
199	0	if (source == null \|\| name == null) {
200	0	return null;
201		}
202	0	final Set<Evidence> ret = new HashSet<Evidence>();
203	0	for (Evidence e : list) {
204	0	if (source.equals(e.getSource()) && name.equals(e.getName())) {
205	0	ret.add(e);
206		}
207	0	}
208	0	return ret;
209		}
210
211		/**
212		* Implements the iterator interface for the Evidence Collection.
213		*
214		* @return an Iterator<Evidence>.
215		*/
216		public Iterator<Evidence> iterator() {
217	1465	return list.iterator();
218		}
219
220		/**
221		* Used to determine if a given string was used (aka read).
222		*
223		* @param text the string to search for.
224		* @return whether or not the string was used.
225		*/
226		public boolean containsUsedString(String text) {
227	1444	if (text == null) {
228	0	return false;
229		}
230	1444	final String textToTest = text.toLowerCase();
231
232	1444	for (Evidence e : EvidenceCollection.EVIDENCE_USED.filter(this)) {
233		//TODO consider changing the regex to only compare alpha-numeric (i.e. strip everything else)
234	6183	final String value = urlCorrection(e.getValue().toLowerCase()).replaceAll("[\\s_-]", "");
235	6183	if (value.contains(textToTest)) {
236	510	return true;
237		}
238	5673	}
239	934	return false;
240		}
241
242		/**
243		* Used to determine if a given version was used (aka read) from the EvidenceCollection.
244		*
245		* @param version the version to search for within the collected evidence.
246		* @return whether or not the string was used.
247		*/
248		public boolean containsUsedVersion(DependencyVersion version) {
249	0	if (version == null) {
250	0	return false;
251		}
252
253	0	for (Evidence e : EvidenceCollection.EVIDENCE_USED.filter(this)) {
254	0	final DependencyVersion value = DependencyVersionUtil.parseVersion(e.getValue());
255	0	if (value != null && value.matchesAtLeastThreeLevels(version)) {
256	0	return true;
257		}
258	0	}
259	0	return false;
260		}
261
262		/**
263		* Returns whether or not the collection contains evidence of a specified Confidence.
264		*
265		* @param confidence A Confidence value.
266		* @return boolean.
267		*/
268		public boolean contains(Confidence confidence) {
269	228	for (Evidence e : list) {
270	701	if (e.getConfidence().equals(confidence)) {
271	162	return true;
272		}
273	539	}
274	66	return false;
275		}
276
277		/**
278		* Merges multiple EvidenceCollections together, only merging evidence that was used, into a new EvidenceCollection.
279		*
280		* @param ec One or more EvidenceCollections.
281		* @return a new EvidenceCollection containing the used evidence.
282		*/
283		public static EvidenceCollection mergeUsed(EvidenceCollection... ec) {
284	4	final EvidenceCollection ret = new EvidenceCollection();
285	16	for (EvidenceCollection col : ec) {
286	12	for (Evidence e : col.list) {
287	59	if (e.isUsed()) {
288	58	ret.addEvidence(e);
289		}
290	59	}
291		}
292	4	return ret;
293		}
294
295		/**
296		* Merges multiple EvidenceCollections together.
297		*
298		* @param ec One or more EvidenceCollections.
299		* @return a new EvidenceCollection.
300		*/
301		public static EvidenceCollection merge(EvidenceCollection... ec) {
302	1	final EvidenceCollection ret = new EvidenceCollection();
303	4	for (EvidenceCollection col : ec) {
304	3	ret.list.addAll(col.list);
305	3	ret.weightedStrings.addAll(col.weightedStrings);
306		}
307	1	return ret;
308		}
309
310		/**
311		* Returns a string of evidence 'values'.
312		*
313		* @return a string containing the evidence.
314		*/
315		@Override
316		public String toString() {
317	3	final StringBuilder sb = new StringBuilder();
318	3	for (Evidence e : this.list) {
319	11	sb.append(e.getValue()).append(' ');
320	11	}
321	3	return sb.toString();
322		}
323
324		/**
325		* Returns the number of elements in the EvidenceCollection.
326		*
327		* @return the number of elements in the collection.
328		*/
329		public int size() {
330	221	return list.size();
331		}
332
333		/**
334		* <p>
335		* Takes a string that may contain a fully qualified domain and it will return the string having removed the query
336		* string, the protocol, the sub-domain of 'www', and the file extension of the path.</p>
337		* <p>
338		* This is useful for checking if the evidence contains a specific string. The presence of the protocol, file
339		* extension, etc. may produce false positives.
340		*
341		* <p>
342		* Example, given the following input:</p>
343		* <code>'Please visit https://www.somedomain.com/path1/path2/file.php?id=439'</code>
344		* <p>
345		* The function would return:</p>
346		* <code>'Please visit somedomain path1 path2 file'</code>
347		*
348		* @param value the value that may contain a url
349		* @return the modified string
350		*/
351		private String urlCorrection(String value) {
352	6183	if (value == null \|\| !UrlStringUtils.containsUrl(value)) {
353	6050	return value;
354		}
355	133	final StringBuilder sb = new StringBuilder(value.length());
356	133	final String[] parts = value.split("\\s");
357	266	for (String part : parts) {
358	133	if (UrlStringUtils.isUrl(part)) {
359		try {
360	133	final List<String> data = UrlStringUtils.extractImportantUrlData(part);
361	133	sb.append(' ').append(StringUtils.join(data, ' '));
362	0	} catch (MalformedURLException ex) {
363	0	Logger.getLogger(EvidenceCollection.class.getName()).log(Level.INFO, "error parsing " + part, ex);
364	0	sb.append(' ').append(part);
365	133	}
366		} else {
367	0	sb.append(' ').append(part);
368		}
369		}
370	133	return sb.toString().trim();
371		}
372		}