Coverage Report - org.owasp.dependencycheck.dependency.EvidenceCollection
 
Classes in this File Line Coverage Branch Coverage Complexity
EvidenceCollection
76%
73/95
63%
37/58
2.783
EvidenceCollection$1
100%
2/2
100%
2/2
2.783
EvidenceCollection$2
100%
2/2
100%
2/2
2.783
EvidenceCollection$3
100%
2/2
100%
2/2
2.783
EvidenceCollection$4
100%
2/2
100%
2/2
2.783
EvidenceCollection$5
100%
2/2
N/A
2.783
 
 1  
 /*
 2  
  * This file is part of dependency-check-core.
 3  
  *
 4  
  * Licensed under the Apache License, Version 2.0 (the "License");
 5  
  * you may not use this file except in compliance with the License.
 6  
  * You may obtain a copy of the License at
 7  
  *
 8  
  *     http://www.apache.org/licenses/LICENSE-2.0
 9  
  *
 10  
  * Unless required by applicable law or agreed to in writing, software
 11  
  * distributed under the License is distributed on an "AS IS" BASIS,
 12  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13  
  * See the License for the specific language governing permissions and
 14  
  * limitations under the License.
 15  
  *
 16  
  * Copyright (c) 2012 Jeremy Long. All Rights Reserved.
 17  
  */
 18  
 package org.owasp.dependencycheck.dependency;
 19  
 
 20  
 import java.net.MalformedURLException;
 21  
 import java.util.HashSet;
 22  
 import java.util.Iterator;
 23  
 import java.util.List;
 24  
 import java.util.Set;
 25  
 import java.util.TreeSet;
 26  
 import java.util.logging.Level;
 27  
 import java.util.logging.Logger;
 28  
 import org.apache.commons.lang.StringUtils;
 29  
 import org.owasp.dependencycheck.utils.DependencyVersion;
 30  
 import org.owasp.dependencycheck.utils.DependencyVersionUtil;
 31  
 import org.owasp.dependencycheck.utils.Filter;
 32  
 import org.owasp.dependencycheck.utils.UrlStringUtils;
 33  
 
 34  
 /**
 35  
  * Used to maintain a collection of Evidence.
 36  
  *
 37  
  * @author Jeremy Long <jeremy.long@owasp.org>
 38  
  */
 39  
 public class EvidenceCollection implements Iterable<Evidence> {
 40  
 
 41  
     /**
 42  
      * The logger.
 43  
      */
 44  1
     private static final Logger LOGGER = Logger.getLogger(EvidenceCollection.class.getName());
 45  
     /**
 46  
      * Used to iterate over highest confidence evidence contained in the collection.
 47  
      */
 48  73
     private static final Filter<Evidence> HIGHEST_CONFIDENCE = new Filter<Evidence>() {
 49  
         public boolean passes(Evidence evidence) {
 50  73
             return evidence.getConfidence() == Confidence.HIGHEST;
 51  
         }
 52  
     };
 53  
     /**
 54  
      * Used to iterate over high confidence evidence contained in the collection.
 55  
      */
 56  63
     private static final Filter<Evidence> HIGH_CONFIDENCE = new Filter<Evidence>() {
 57  
         public boolean passes(Evidence evidence) {
 58  63
             return evidence.getConfidence() == Confidence.HIGH;
 59  
         }
 60  
     };
 61  
     /**
 62  
      * Used to iterate over medium confidence evidence contained in the collection.
 63  
      */
 64  1
     private static final Filter<Evidence> MEDIUM_CONFIDENCE = new Filter<Evidence>() {
 65  
         public boolean passes(Evidence evidence) {
 66  73
             return evidence.getConfidence() == Confidence.MEDIUM;
 67  
         }
 68  
     };
 69  
     /**
 70  
      * Used to iterate over low confidence evidence contained in the collection.
 71  
      */
 72  1
     private static final Filter<Evidence> LOW_CONFIDENCE = new Filter<Evidence>() {
 73  
         public boolean passes(Evidence evidence) {
 74  122
             return evidence.getConfidence() == Confidence.LOW;
 75  
         }
 76  
     };
 77  
     /**
 78  
      * Used to iterate over evidence that has was used (aka read) from the collection.
 79  
      */
 80  1
     private static final Filter<Evidence> EVIDENCE_USED = new Filter<Evidence>() {
 81  
         public boolean passes(Evidence evidence) {
 82  995
             return evidence.isUsed();
 83  
         }
 84  
     };
 85  
 
 86  
     /**
 87  
      * Used to iterate over evidence of the specified confidence.
 88  
      *
 89  
      * @param confidence the confidence level for the evidence to be iterated over.
 90  
      * @return Iterable<Evidence> an iterable collection of evidence
 91  
      */
 92  
     public final Iterable<Evidence> iterator(Confidence confidence) {
 93  44
         if (confidence == Confidence.HIGHEST) {
 94  10
             return EvidenceCollection.HIGHEST_CONFIDENCE.filter(this.list);
 95  34
         } else if (confidence == Confidence.HIGH) {
 96  10
             return EvidenceCollection.HIGH_CONFIDENCE.filter(this.list);
 97  24
         } else if (confidence == Confidence.MEDIUM) {
 98  10
             return EvidenceCollection.MEDIUM_CONFIDENCE.filter(this.list);
 99  
         } else {
 100  14
             return EvidenceCollection.LOW_CONFIDENCE.filter(this.list);
 101  
         }
 102  
     }
 103  
     /**
 104  
      * A collection of evidence.
 105  
      */
 106  
     private final Set<Evidence> list;
 107  
     /**
 108  
      * A collection of strings used to adjust Lucene's term weighting.
 109  
      */
 110  
     private final Set<String> weightedStrings;
 111  
 
 112  
     /**
 113  
      * Creates a new EvidenceCollection.
 114  
      */
 115  115
     public EvidenceCollection() {
 116  115
         list = new TreeSet<Evidence>();
 117  115
         weightedStrings = new HashSet<String>();
 118  115
     }
 119  
 
 120  
     /**
 121  
      * Adds evidence to the collection.
 122  
      *
 123  
      * @param e Evidence.
 124  
      */
 125  
     public void addEvidence(Evidence e) {
 126  13904
         list.add(e);
 127  13904
     }
 128  
 
 129  
     /**
 130  
      * Creates an Evidence object from the parameters and adds the resulting object to the collection.
 131  
      *
 132  
      * @param source the source of the Evidence.
 133  
      * @param name the name of the Evidence.
 134  
      * @param value the value of the Evidence.
 135  
      * @param confidence the confidence of the Evidence.
 136  
      */
 137  
     public void addEvidence(String source, String name, String value, Confidence confidence) {
 138  13897
         final Evidence e = new Evidence(source, name, value, confidence);
 139  13897
         addEvidence(e);
 140  13897
     }
 141  
 
 142  
     /**
 143  
      * Adds term to the weighting collection. The terms added here are used later to boost the score of other terms.
 144  
      * This is a way of combining evidence from multiple sources to boost the confidence of the given evidence.
 145  
      *
 146  
      * Example: The term 'Apache' is found in the manifest of a JAR and is added to the Collection. When we parse the
 147  
      * package names within the JAR file we may add these package names to the "weighted" strings collection to boost
 148  
      * the score in the Lucene query. That way when we construct the Lucene query we find the term Apache in the
 149  
      * collection AND in the weighted strings; as such, we will boost the confidence of the term Apache.
 150  
      *
 151  
      * @param str to add to the weighting collection.
 152  
      */
 153  
     public void addWeighting(String str) {
 154  15
         weightedStrings.add(str);
 155  15
     }
 156  
 
 157  
     /**
 158  
      * Returns a set of Weightings - a list of terms that are believed to be of higher confidence when also found in
 159  
      * another location.
 160  
      *
 161  
      * @return Set<String>
 162  
      */
 163  
     public Set<String> getWeighting() {
 164  
         return weightedStrings;
 165  
     }
 166  
 
 167  
     /**
 168  
      * Returns the set of evidence.
 169  
      *
 170  
      * @return the set of evidence.
 171  
      */
 172  
     public Set<Evidence> getEvidence() {
 173  
         return list;
 174  
     }
 175  
 
 176  
     /**
 177  
      * Returns the set of evidence from a given source.
 178  
      *
 179  
      * @param source the source of the evidence
 180  
      * @return the set of evidence.
 181  
      */
 182  
     public Set<Evidence> getEvidence(String source) {
 183  0
         if (source == null) {
 184  0
             return null;
 185  
         }
 186  0
         final Set<Evidence> ret = new HashSet<Evidence>();
 187  0
         for (Evidence e : list) {
 188  0
             if (source.equals(e.getSource())) {
 189  0
                 ret.add(e);
 190  
             }
 191  0
         }
 192  0
         return ret;
 193  
     }
 194  
 
 195  
     /**
 196  
      * Returns the set of evidence from a given source and name.
 197  
      *
 198  
      * @param source the source of the evidence
 199  
      * @param name the name of the evidence to return
 200  
      * @return the set of evidence.
 201  
      */
 202  
     public Set<Evidence> getEvidence(String source, String name) {
 203  2
         if (source == null || name == null) {
 204  0
             return null;
 205  
         }
 206  2
         final Set<Evidence> ret = new HashSet<Evidence>();
 207  2
         for (Evidence e : list) {
 208  2
             if (source.equals(e.getSource()) && name.equals(e.getName())) {
 209  2
                 ret.add(e);
 210  
             }
 211  2
         }
 212  2
         return ret;
 213  
     }
 214  
 
 215  
     /**
 216  
      * Implements the iterator interface for the Evidence Collection.
 217  
      *
 218  
      * @return an Iterator<Evidence>.
 219  
      */
 220  
     public Iterator<Evidence> iterator() {
 221  117
         return list.iterator();
 222  
     }
 223  
 
 224  
     /**
 225  
      * Used to determine if a given string was used (aka read).
 226  
      *
 227  
      * @param text the string to search for.
 228  
      * @return whether or not the string was used.
 229  
      */
 230  
     public boolean containsUsedString(String text) {
 231  112
         if (text == null) {
 232  0
             return false;
 233  
         }
 234  112
         final String textToTest = text.toLowerCase();
 235  
 
 236  112
         for (Evidence e : EvidenceCollection.EVIDENCE_USED.filter(this)) {
 237  
             //TODO consider changing the regex to only compare alpha-numeric (i.e. strip everything else)
 238  754
             final String value = urlCorrection(e.getValue().toLowerCase()).replaceAll("[\\s_-]", "");
 239  754
             if (value.contains(textToTest)) {
 240  44
                 return true;
 241  
             }
 242  710
         }
 243  68
         return false;
 244  
     }
 245  
 
 246  
     /**
 247  
      * Used to determine if a given version was used (aka read) from the EvidenceCollection.
 248  
      *
 249  
      * @param version the version to search for within the collected evidence.
 250  
      * @return whether or not the string was used.
 251  
      */
 252  
     public boolean containsUsedVersion(DependencyVersion version) {
 253  0
         if (version == null) {
 254  0
             return false;
 255  
         }
 256  
 
 257  0
         for (Evidence e : EvidenceCollection.EVIDENCE_USED.filter(this)) {
 258  0
             final DependencyVersion value = DependencyVersionUtil.parseVersion(e.getValue());
 259  0
             if (value != null && value.matchesAtLeastThreeLevels(version)) {
 260  0
                 return true;
 261  
             }
 262  0
         }
 263  0
         return false;
 264  
     }
 265  
 
 266  
     /**
 267  
      * Returns whether or not the collection contains evidence of a specified Confidence.
 268  
      *
 269  
      * @param confidence A Confidence value.
 270  
      * @return boolean.
 271  
      */
 272  
     public boolean contains(Confidence confidence) {
 273  24
         for (Evidence e : list) {
 274  114
             if (e.getConfidence().equals(confidence)) {
 275  21
                 return true;
 276  
             }
 277  93
         }
 278  3
         return false;
 279  
     }
 280  
 
 281  
     /**
 282  
      * Merges multiple EvidenceCollections together, only merging evidence that was used, into a new EvidenceCollection.
 283  
      *
 284  
      * @param ec One or more EvidenceCollections.
 285  
      * @return a new EvidenceCollection containing the used evidence.
 286  
      */
 287  
     public static EvidenceCollection mergeUsed(EvidenceCollection... ec) {
 288  1
         final EvidenceCollection ret = new EvidenceCollection();
 289  4
         for (EvidenceCollection col : ec) {
 290  3
             for (Evidence e : col.list) {
 291  2
                 if (e.isUsed()) {
 292  1
                     ret.addEvidence(e);
 293  
                 }
 294  2
             }
 295  
         }
 296  1
         return ret;
 297  
     }
 298  
 
 299  
     /**
 300  
      * Merges multiple EvidenceCollections together.
 301  
      *
 302  
      * @param ec One or more EvidenceCollections.
 303  
      * @return a new EvidenceCollection.
 304  
      */
 305  
     public static EvidenceCollection merge(EvidenceCollection... ec) {
 306  3
         final EvidenceCollection ret = new EvidenceCollection();
 307  12
         for (EvidenceCollection col : ec) {
 308  9
             ret.list.addAll(col.list);
 309  9
             ret.weightedStrings.addAll(col.weightedStrings);
 310  
         }
 311  3
         return ret;
 312  
     }
 313  
 
 314  
     /**
 315  
      * Returns a string of evidence 'values'.
 316  
      *
 317  
      * @return a string containing the evidence.
 318  
      */
 319  
     @Override
 320  
     public String toString() {
 321  3
         final StringBuilder sb = new StringBuilder();
 322  3
         for (Evidence e : this.list) {
 323  11
             sb.append(e.getValue()).append(' ');
 324  11
         }
 325  3
         return sb.toString();
 326  
     }
 327  
 
 328  
     /**
 329  
      * Returns the number of elements in the EvidenceCollection.
 330  
      *
 331  
      * @return the number of elements in the collection.
 332  
      */
 333  
     public int size() {
 334  29
         return list.size();
 335  
     }
 336  
 
 337  
     /**
 338  
      * <p>
 339  
      * Takes a string that may contain a fully qualified domain and it will return the string having removed the query
 340  
      * string, the protocol, the sub-domain of 'www', and the file extension of the path.</p>
 341  
      * <p>
 342  
      * This is useful for checking if the evidence contains a specific string. The presence of the protocol, file
 343  
      * extension, etc. may produce false positives.
 344  
      *
 345  
      * <p>
 346  
      * Example, given the following input:</p>
 347  
      * <code>'Please visit https://www.somedomain.com/path1/path2/file.php?id=439'</code>
 348  
      * <p>
 349  
      * The function would return:</p>
 350  
      * <code>'Please visit somedomain path1 path2 file'</code>
 351  
      *
 352  
      * @param value the value that may contain a url
 353  
      * @return the modified string
 354  
      */
 355  
     private String urlCorrection(String value) {
 356  754
         if (value == null || !UrlStringUtils.containsUrl(value)) {
 357  749
             return value;
 358  
         }
 359  5
         final StringBuilder sb = new StringBuilder(value.length());
 360  5
         final String[] parts = value.split("\\s");
 361  10
         for (String part : parts) {
 362  5
             if (UrlStringUtils.isUrl(part)) {
 363  
                 try {
 364  5
                     final List<String> data = UrlStringUtils.extractImportantUrlData(part);
 365  5
                     sb.append(' ').append(StringUtils.join(data, ' '));
 366  0
                 } catch (MalformedURLException ex) {
 367  0
                     LOGGER.log(Level.FINE, "error parsing " + part, ex);
 368  0
                     sb.append(' ').append(part);
 369  5
                 }
 370  
             } else {
 371  0
                 sb.append(' ').append(part);
 372  
             }
 373  
         }
 374  5
         return sb.toString().trim();
 375  
     }
 376  
 }