Coverage Report - org.owasp.dependencycheck.dependency.EvidenceCollection
 
Classes in this File Line Coverage Branch Coverage Complexity
EvidenceCollection
69%
67/96
53%
31/58
2.783
EvidenceCollection$1
100%
2/2
100%
2/2
2.783
EvidenceCollection$2
100%
2/2
100%
2/2
2.783
EvidenceCollection$3
100%
2/2
100%
2/2
2.783
EvidenceCollection$4
100%
2/2
100%
2/2
2.783
EvidenceCollection$5
100%
2/2
N/A
2.783
 
 1  
 /*
 2  
  * This file is part of dependency-check-core.
 3  
  *
 4  
  * Licensed under the Apache License, Version 2.0 (the "License");
 5  
  * you may not use this file except in compliance with the License.
 6  
  * You may obtain a copy of the License at
 7  
  *
 8  
  *     http://www.apache.org/licenses/LICENSE-2.0
 9  
  *
 10  
  * Unless required by applicable law or agreed to in writing, software
 11  
  * distributed under the License is distributed on an "AS IS" BASIS,
 12  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13  
  * See the License for the specific language governing permissions and
 14  
  * limitations under the License.
 15  
  *
 16  
  * Copyright (c) 2012 Jeremy Long. All Rights Reserved.
 17  
  */
 18  
 package org.owasp.dependencycheck.dependency;
 19  
 
 20  
 import java.net.MalformedURLException;
 21  
 import java.util.HashSet;
 22  
 import java.util.Iterator;
 23  
 import java.util.List;
 24  
 import java.util.Set;
 25  
 import java.util.TreeSet;
 26  
 import java.util.logging.Level;
 27  
 import java.util.logging.Logger;
 28  
 import org.apache.commons.lang.StringUtils;
 29  
 import org.owasp.dependencycheck.utils.DependencyVersion;
 30  
 import org.owasp.dependencycheck.utils.DependencyVersionUtil;
 31  
 import org.owasp.dependencycheck.utils.Filter;
 32  
 import org.owasp.dependencycheck.utils.UrlStringUtils;
 33  
 
 34  
 /**
 35  
  * Used to maintain a collection of Evidence.
 36  
  *
 37  
  * @author Jeremy Long <jeremy.long@owasp.org>
 38  
  */
 39  
 public class EvidenceCollection implements Iterable<Evidence> {
 40  
 
 41  
     /**
 42  
      * Used to iterate over highest confidence evidence contained in the collection.
 43  
      */
 44  489
     private static final Filter<Evidence> HIGHEST_CONFIDENCE = new Filter<Evidence>() {
 45  
         public boolean passes(Evidence evidence) {
 46  488
             return evidence.getConfidence() == Confidence.HIGHEST;
 47  
         }
 48  
     };
 49  
     /**
 50  
      * Used to iterate over high confidence evidence contained in the collection.
 51  
      */
 52  483
     private static final Filter<Evidence> HIGH_CONFIDENCE = new Filter<Evidence>() {
 53  
         public boolean passes(Evidence evidence) {
 54  482
             return evidence.getConfidence() == Confidence.HIGH;
 55  
         }
 56  
     };
 57  
     /**
 58  
      * Used to iterate over medium confidence evidence contained in the collection.
 59  
      */
 60  1
     private static final Filter<Evidence> MEDIUM_CONFIDENCE = new Filter<Evidence>() {
 61  
         public boolean passes(Evidence evidence) {
 62  452
             return evidence.getConfidence() == Confidence.MEDIUM;
 63  
         }
 64  
     };
 65  
     /**
 66  
      * Used to iterate over low confidence evidence contained in the collection.
 67  
      */
 68  1
     private static final Filter<Evidence> LOW_CONFIDENCE = new Filter<Evidence>() {
 69  
         public boolean passes(Evidence evidence) {
 70  748
             return evidence.getConfidence() == Confidence.LOW;
 71  
         }
 72  
     };
 73  
     /**
 74  
      * Used to iterate over evidence that has was used (aka read) from the collection.
 75  
      */
 76  1
     private static final Filter<Evidence> EVIDENCE_USED = new Filter<Evidence>() {
 77  
         public boolean passes(Evidence evidence) {
 78  9254
             return evidence.isUsed();
 79  
         }
 80  
     };
 81  
 
 82  
     /**
 83  
      * Used to iterate over evidence of the specified confidence.
 84  
      *
 85  
      * @param confidence the confidence level for the evidence to be iterated over.
 86  
      * @return Iterable<Evidence> an iterable collection of evidence
 87  
      */
 88  
     public final Iterable<Evidence> iterator(Confidence confidence) {
 89  504
         if (confidence == Confidence.HIGHEST) {
 90  128
             return EvidenceCollection.HIGHEST_CONFIDENCE.filter(this.list);
 91  376
         } else if (confidence == Confidence.HIGH) {
 92  123
             return EvidenceCollection.HIGH_CONFIDENCE.filter(this.list);
 93  253
         } else if (confidence == Confidence.MEDIUM) {
 94  108
             return EvidenceCollection.MEDIUM_CONFIDENCE.filter(this.list);
 95  
         } else {
 96  145
             return EvidenceCollection.LOW_CONFIDENCE.filter(this.list);
 97  
         }
 98  
     }
 99  
     /**
 100  
      * A collection of evidence.
 101  
      */
 102  
     private final Set<Evidence> list;
 103  
     /**
 104  
      * A collection of strings used to adjust Lucene's term weighting.
 105  
      */
 106  
     private final Set<String> weightedStrings;
 107  
 
 108  
     /**
 109  
      * Creates a new EvidenceCollection.
 110  
      */
 111  212
     public EvidenceCollection() {
 112  212
         list = new TreeSet<Evidence>();
 113  212
         weightedStrings = new HashSet<String>();
 114  212
     }
 115  
 
 116  
     /**
 117  
      * Adds evidence to the collection.
 118  
      *
 119  
      * @param e Evidence.
 120  
      */
 121  
     public void addEvidence(Evidence e) {
 122  70895
         list.add(e);
 123  70895
     }
 124  
 
 125  
     /**
 126  
      * Creates an Evidence object from the parameters and adds the resulting object to the collection.
 127  
      *
 128  
      * @param source the source of the Evidence.
 129  
      * @param name the name of the Evidence.
 130  
      * @param value the value of the Evidence.
 131  
      * @param confidence the confidence of the Evidence.
 132  
      */
 133  
     public void addEvidence(String source, String name, String value, Confidence confidence) {
 134  70835
         final Evidence e = new Evidence(source, name, value, confidence);
 135  70835
         addEvidence(e);
 136  70835
     }
 137  
 
 138  
     /**
 139  
      * Adds term to the weighting collection. The terms added here are used later to boost the score of other terms.
 140  
      * This is a way of combining evidence from multiple sources to boost the confidence of the given evidence.
 141  
      *
 142  
      * Example: The term 'Apache' is found in the manifest of a JAR and is added to the Collection. When we parse the
 143  
      * package names within the JAR file we may add these package names to the "weighted" strings collection to boost
 144  
      * the score in the Lucene query. That way when we construct the Lucene query we find the term Apache in the
 145  
      * collection AND in the weighted strings; as such, we will boost the confidence of the term Apache.
 146  
      *
 147  
      * @param str to add to the weighting collection.
 148  
      */
 149  
     public void addWeighting(String str) {
 150  57
         weightedStrings.add(str);
 151  57
     }
 152  
 
 153  
     /**
 154  
      * Returns a set of Weightings - a list of terms that are believed to be of higher confidence when also found in
 155  
      * another location.
 156  
      *
 157  
      * @return Set<String>
 158  
      */
 159  
     public Set<String> getWeighting() {
 160  145
         return weightedStrings;
 161  
     }
 162  
 
 163  
     /**
 164  
      * Returns the set of evidence.
 165  
      *
 166  
      * @return the set of evidence.
 167  
      */
 168  
     public Set<Evidence> getEvidence() {
 169  30
         return list;
 170  
     }
 171  
 
 172  
     /**
 173  
      * Returns the set of evidence from a given source.
 174  
      *
 175  
      * @param source the source of the evidence
 176  
      * @return the set of evidence.
 177  
      */
 178  
     public Set<Evidence> getEvidence(String source) {
 179  0
         if (source == null) {
 180  0
             return null;
 181  
         }
 182  0
         final Set<Evidence> ret = new HashSet<Evidence>();
 183  0
         for (Evidence e : list) {
 184  0
             if (source.equals(e.getSource())) {
 185  0
                 ret.add(e);
 186  
             }
 187  0
         }
 188  0
         return ret;
 189  
     }
 190  
 
 191  
     /**
 192  
      * Returns the set of evidence from a given source and name.
 193  
      *
 194  
      * @param source the source of the evidence
 195  
      * @param name the name of the evidence to return
 196  
      * @return the set of evidence.
 197  
      */
 198  
     public Set<Evidence> getEvidence(String source, String name) {
 199  0
         if (source == null || name == null) {
 200  0
             return null;
 201  
         }
 202  0
         final Set<Evidence> ret = new HashSet<Evidence>();
 203  0
         for (Evidence e : list) {
 204  0
             if (source.equals(e.getSource()) && name.equals(e.getName())) {
 205  0
                 ret.add(e);
 206  
             }
 207  0
         }
 208  0
         return ret;
 209  
     }
 210  
 
 211  
     /**
 212  
      * Implements the iterator interface for the Evidence Collection.
 213  
      *
 214  
      * @return an Iterator<Evidence>.
 215  
      */
 216  
     public Iterator<Evidence> iterator() {
 217  1465
         return list.iterator();
 218  
     }
 219  
 
 220  
     /**
 221  
      * Used to determine if a given string was used (aka read).
 222  
      *
 223  
      * @param text the string to search for.
 224  
      * @return whether or not the string was used.
 225  
      */
 226  
     public boolean containsUsedString(String text) {
 227  1444
         if (text == null) {
 228  0
             return false;
 229  
         }
 230  1444
         final String textToTest = text.toLowerCase();
 231  
 
 232  1444
         for (Evidence e : EvidenceCollection.EVIDENCE_USED.filter(this)) {
 233  
             //TODO consider changing the regex to only compare alpha-numeric (i.e. strip everything else)
 234  6183
             final String value = urlCorrection(e.getValue().toLowerCase()).replaceAll("[\\s_-]", "");
 235  6183
             if (value.contains(textToTest)) {
 236  510
                 return true;
 237  
             }
 238  5673
         }
 239  934
         return false;
 240  
     }
 241  
 
 242  
     /**
 243  
      * Used to determine if a given version was used (aka read) from the EvidenceCollection.
 244  
      *
 245  
      * @param version the version to search for within the collected evidence.
 246  
      * @return whether or not the string was used.
 247  
      */
 248  
     public boolean containsUsedVersion(DependencyVersion version) {
 249  0
         if (version == null) {
 250  0
             return false;
 251  
         }
 252  
 
 253  0
         for (Evidence e : EvidenceCollection.EVIDENCE_USED.filter(this)) {
 254  0
             final DependencyVersion value = DependencyVersionUtil.parseVersion(e.getValue());
 255  0
             if (value != null && value.matchesAtLeastThreeLevels(version)) {
 256  0
                 return true;
 257  
             }
 258  0
         }
 259  0
         return false;
 260  
     }
 261  
 
 262  
     /**
 263  
      * Returns whether or not the collection contains evidence of a specified Confidence.
 264  
      *
 265  
      * @param confidence A Confidence value.
 266  
      * @return boolean.
 267  
      */
 268  
     public boolean contains(Confidence confidence) {
 269  228
         for (Evidence e : list) {
 270  701
             if (e.getConfidence().equals(confidence)) {
 271  162
                 return true;
 272  
             }
 273  539
         }
 274  66
         return false;
 275  
     }
 276  
 
 277  
     /**
 278  
      * Merges multiple EvidenceCollections together, only merging evidence that was used, into a new EvidenceCollection.
 279  
      *
 280  
      * @param ec One or more EvidenceCollections.
 281  
      * @return a new EvidenceCollection containing the used evidence.
 282  
      */
 283  
     public static EvidenceCollection mergeUsed(EvidenceCollection... ec) {
 284  4
         final EvidenceCollection ret = new EvidenceCollection();
 285  16
         for (EvidenceCollection col : ec) {
 286  12
             for (Evidence e : col.list) {
 287  59
                 if (e.isUsed()) {
 288  58
                     ret.addEvidence(e);
 289  
                 }
 290  59
             }
 291  
         }
 292  4
         return ret;
 293  
     }
 294  
 
 295  
     /**
 296  
      * Merges multiple EvidenceCollections together.
 297  
      *
 298  
      * @param ec One or more EvidenceCollections.
 299  
      * @return a new EvidenceCollection.
 300  
      */
 301  
     public static EvidenceCollection merge(EvidenceCollection... ec) {
 302  1
         final EvidenceCollection ret = new EvidenceCollection();
 303  4
         for (EvidenceCollection col : ec) {
 304  3
             ret.list.addAll(col.list);
 305  3
             ret.weightedStrings.addAll(col.weightedStrings);
 306  
         }
 307  1
         return ret;
 308  
     }
 309  
 
 310  
     /**
 311  
      * Returns a string of evidence 'values'.
 312  
      *
 313  
      * @return a string containing the evidence.
 314  
      */
 315  
     @Override
 316  
     public String toString() {
 317  3
         final StringBuilder sb = new StringBuilder();
 318  3
         for (Evidence e : this.list) {
 319  11
             sb.append(e.getValue()).append(' ');
 320  11
         }
 321  3
         return sb.toString();
 322  
     }
 323  
 
 324  
     /**
 325  
      * Returns the number of elements in the EvidenceCollection.
 326  
      *
 327  
      * @return the number of elements in the collection.
 328  
      */
 329  
     public int size() {
 330  221
         return list.size();
 331  
     }
 332  
 
 333  
     /**
 334  
      * <p>
 335  
      * Takes a string that may contain a fully qualified domain and it will return the string having removed the query
 336  
      * string, the protocol, the sub-domain of 'www', and the file extension of the path.</p>
 337  
      * <p>
 338  
      * This is useful for checking if the evidence contains a specific string. The presence of the protocol, file
 339  
      * extension, etc. may produce false positives.
 340  
      *
 341  
      * <p>
 342  
      * Example, given the following input:</p>
 343  
      * <code>'Please visit https://www.somedomain.com/path1/path2/file.php?id=439'</code>
 344  
      * <p>
 345  
      * The function would return:</p>
 346  
      * <code>'Please visit somedomain path1 path2 file'</code>
 347  
      *
 348  
      * @param value the value that may contain a url
 349  
      * @return the modified string
 350  
      */
 351  
     private String urlCorrection(String value) {
 352  6183
         if (value == null || !UrlStringUtils.containsUrl(value)) {
 353  6050
             return value;
 354  
         }
 355  133
         final StringBuilder sb = new StringBuilder(value.length());
 356  133
         final String[] parts = value.split("\\s");
 357  266
         for (String part : parts) {
 358  133
             if (UrlStringUtils.isUrl(part)) {
 359  
                 try {
 360  133
                     final List<String> data = UrlStringUtils.extractImportantUrlData(part);
 361  133
                     sb.append(' ').append(StringUtils.join(data, ' '));
 362  0
                 } catch (MalformedURLException ex) {
 363  0
                     Logger.getLogger(EvidenceCollection.class.getName()).log(Level.INFO, "error parsing " + part, ex);
 364  0
                     sb.append(' ').append(part);
 365  133
                 }
 366  
             } else {
 367  0
                 sb.append(' ').append(part);
 368  
             }
 369  
         }
 370  133
         return sb.toString().trim();
 371  
     }
 372  
 }