Coverage Report - org.owasp.dependencycheck.analyzer.CPEAnalyzer
 
Classes in this File Line Coverage Branch Coverage Complexity
CPEAnalyzer
91%
177/194
84%
107/126
4.2
CPEAnalyzer$IdentifierConfidence
100%
3/3
N/A
4.2
CPEAnalyzer$IdentifierMatch
40%
12/30
16%
4/24
4.2
 
 1  
 /*
 2  
  * This file is part of dependency-check-core.
 3  
  *
 4  
  * Licensed under the Apache License, Version 2.0 (the "License");
 5  
  * you may not use this file except in compliance with the License.
 6  
  * You may obtain a copy of the License at
 7  
  *
 8  
  *     http://www.apache.org/licenses/LICENSE-2.0
 9  
  *
 10  
  * Unless required by applicable law or agreed to in writing, software
 11  
  * distributed under the License is distributed on an "AS IS" BASIS,
 12  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13  
  * See the License for the specific language governing permissions and
 14  
  * limitations under the License.
 15  
  *
 16  
  * Copyright (c) 2012 Jeremy Long. All Rights Reserved.
 17  
  */
 18  
 package org.owasp.dependencycheck.analyzer;
 19  
 
 20  
 import java.io.IOException;
 21  
 import java.io.UnsupportedEncodingException;
 22  
 import java.net.URLEncoder;
 23  
 import java.util.ArrayList;
 24  
 import java.util.Collections;
 25  
 import java.util.List;
 26  
 import java.util.Set;
 27  
 import java.util.StringTokenizer;
 28  
 import java.util.logging.Level;
 29  
 import java.util.logging.Logger;
 30  
 import org.apache.lucene.document.Document;
 31  
 import org.apache.lucene.index.CorruptIndexException;
 32  
 import org.apache.lucene.queryparser.classic.ParseException;
 33  
 import org.apache.lucene.search.ScoreDoc;
 34  
 import org.apache.lucene.search.TopDocs;
 35  
 import org.owasp.dependencycheck.Engine;
 36  
 import org.owasp.dependencycheck.analyzer.exception.AnalysisException;
 37  
 import org.owasp.dependencycheck.data.cpe.CpeMemoryIndex;
 38  
 import org.owasp.dependencycheck.data.cpe.Fields;
 39  
 import org.owasp.dependencycheck.data.cpe.IndexEntry;
 40  
 import org.owasp.dependencycheck.data.cpe.IndexException;
 41  
 import org.owasp.dependencycheck.data.lucene.LuceneUtils;
 42  
 import org.owasp.dependencycheck.data.nvdcve.CveDB;
 43  
 import org.owasp.dependencycheck.data.nvdcve.DatabaseException;
 44  
 import org.owasp.dependencycheck.dependency.Confidence;
 45  
 import org.owasp.dependencycheck.dependency.Dependency;
 46  
 import org.owasp.dependencycheck.dependency.Evidence;
 47  
 import org.owasp.dependencycheck.dependency.EvidenceCollection;
 48  
 import org.owasp.dependencycheck.dependency.Identifier;
 49  
 import org.owasp.dependencycheck.dependency.VulnerableSoftware;
 50  
 import org.owasp.dependencycheck.utils.DependencyVersion;
 51  
 import org.owasp.dependencycheck.utils.DependencyVersionUtil;
 52  
 
 53  
 /**
 54  
  * CPEAnalyzer is a utility class that takes a project dependency and attempts to discern if there is an associated CPE.
 55  
  * It uses the evidence contained within the dependency to search the Lucene index.
 56  
  *
 57  
  * @author Jeremy Long <jeremy.long@owasp.org>
 58  
  */
 59  
 public class CPEAnalyzer implements Analyzer {
 60  
 
 61  
     /**
 62  
      * The maximum number of query results to return.
 63  
      */
 64  
     static final int MAX_QUERY_RESULTS = 25;
 65  
     /**
 66  
      * The weighting boost to give terms when constructing the Lucene query.
 67  
      */
 68  
     static final String WEIGHTING_BOOST = "^5";
 69  
     /**
 70  
      * A string representation of a regular expression defining characters utilized within the CPE Names.
 71  
      */
 72  
     static final String CLEANSE_CHARACTER_RX = "[^A-Za-z0-9 ._-]";
 73  
     /**
 74  
      * A string representation of a regular expression used to remove all but alpha characters.
 75  
      */
 76  
     static final String CLEANSE_NONALPHA_RX = "[^A-Za-z]*";
 77  
     /**
 78  
      * The additional size to add to a new StringBuilder to account for extra data that will be written into the string.
 79  
      */
 80  
     static final int STRING_BUILDER_BUFFER = 20;
 81  
     /**
 82  
      * The CPE in memory index.
 83  
      */
 84  
     private CpeMemoryIndex cpe;
 85  
     /**
 86  
      * The CVE Database.
 87  
      */
 88  
     private CveDB cve;
 89  
 
 90  
     /**
 91  
      * Opens the data source.
 92  
      *
 93  
      * @throws IOException when the Lucene directory to be queried does not exist or is corrupt.
 94  
      * @throws DatabaseException when the database throws an exception. This usually occurs when the database is in use
 95  
      * by another process.
 96  
      */
 97  
     public void open() throws IOException, DatabaseException {
 98  11
         Logger.getLogger(CPEAnalyzer.class.getName()).log(Level.FINE, "Opening the CVE Database");
 99  11
         cve = new CveDB();
 100  11
         cve.open();
 101  11
         Logger.getLogger(CPEAnalyzer.class.getName()).log(Level.FINE, "Creating the Lucene CPE Index");
 102  11
         cpe = CpeMemoryIndex.getInstance();
 103  
         try {
 104  11
             cpe.open(cve);
 105  0
         } catch (IndexException ex) {
 106  0
             Logger.getLogger(CPEAnalyzer.class.getName()).log(Level.FINE, "IndexException", ex);
 107  0
             throw new DatabaseException(ex);
 108  11
         }
 109  11
     }
 110  
 
 111  
     /**
 112  
      * Closes the data sources.
 113  
      */
 114  
     @Override
 115  
     public void close() {
 116  11
         if (cpe != null) {
 117  11
             cpe.close();
 118  
         }
 119  11
         if (cve != null) {
 120  11
             cve.close();
 121  
         }
 122  11
     }
 123  
 
 124  
     /**
 125  
      * Searches the data store of CPE entries, trying to identify the CPE for the given dependency based on the evidence
 126  
      * contained within. The dependency passed in is updated with any identified CPE values.
 127  
      *
 128  
      * @param dependency the dependency to search for CPE entries on.
 129  
      * @throws CorruptIndexException is thrown when the Lucene index is corrupt.
 130  
      * @throws IOException is thrown when an IOException occurs.
 131  
      * @throws ParseException is thrown when the Lucene query cannot be parsed.
 132  
      */
 133  
     protected void determineCPE(Dependency dependency) throws CorruptIndexException, IOException, ParseException {
 134  19
         Confidence confidence = Confidence.HIGHEST;
 135  
 
 136  19
         String vendors = addEvidenceWithoutDuplicateTerms("", dependency.getVendorEvidence(), confidence);
 137  19
         String products = addEvidenceWithoutDuplicateTerms("", dependency.getProductEvidence(), confidence);
 138  
         /* bug fix for #40 - version evidence is not showing up as "used" in the reports if there is no
 139  
          * CPE identified. As such, we are "using" the evidence and ignoring the results. */
 140  19
         addEvidenceWithoutDuplicateTerms("", dependency.getVersionEvidence(), confidence);
 141  
 
 142  19
         int ctr = 0;
 143  
         do {
 144  76
             if (!vendors.isEmpty() && !products.isEmpty()) {
 145  72
                 final List<IndexEntry> entries = searchCPE(vendors, products, dependency.getProductEvidence().getWeighting(),
 146  
                         dependency.getVendorEvidence().getWeighting());
 147  
 
 148  72
                 for (IndexEntry e : entries) {
 149  729
                     if (verifyEntry(e, dependency)) {
 150  71
                         final String vendor = e.getVendor();
 151  71
                         final String product = e.getProduct();
 152  71
                         determineIdentifiers(dependency, vendor, product);
 153  
                     }
 154  729
                 }
 155  
             }
 156  76
             confidence = reduceConfidence(confidence);
 157  76
             if (dependency.getVendorEvidence().contains(confidence)) {
 158  61
                 vendors = addEvidenceWithoutDuplicateTerms(vendors, dependency.getVendorEvidence(), confidence);
 159  
             }
 160  76
             if (dependency.getProductEvidence().contains(confidence)) {
 161  64
                 products = addEvidenceWithoutDuplicateTerms(products, dependency.getProductEvidence(), confidence);
 162  
             }
 163  
             /* bug fix for #40 - version evidence is not showing up as "used" in the reports if there is no
 164  
              * CPE identified. As such, we are "using" the evidence and ignoring the results. */
 165  76
             if (dependency.getVersionEvidence().contains(confidence)) {
 166  37
                 addEvidenceWithoutDuplicateTerms("", dependency.getVersionEvidence(), confidence);
 167  
             }
 168  76
         } while ((++ctr) < 4);
 169  19
     }
 170  
 
 171  
     /**
 172  
      * Returns the text created by concatenating the text and the values from the EvidenceCollection (filtered for a
 173  
      * specific confidence). This attempts to prevent duplicate terms from being added.<br/<br/> Note, if the evidence
 174  
      * is longer then 200 characters it will be truncated.
 175  
      *
 176  
      * @param text the base text.
 177  
      * @param ec an EvidenceCollection
 178  
      * @param confidenceFilter a Confidence level to filter the evidence by.
 179  
      * @return the new evidence text
 180  
      */
 181  
     private String addEvidenceWithoutDuplicateTerms(final String text, final EvidenceCollection ec, Confidence confidenceFilter) {
 182  219
         final String txt = (text == null) ? "" : text;
 183  219
         final StringBuilder sb = new StringBuilder(txt.length() + (20 * ec.size()));
 184  219
         sb.append(' ').append(txt).append(' ');
 185  219
         for (Evidence e : ec.iterator(confidenceFilter)) {
 186  409
             String value = e.getValue();
 187  
 
 188  
             //hack to get around the fact that lucene does a really good job of recognizing domains and not
 189  
             // splitting them. TODO - put together a better lucene analyzer specific to the domain.
 190  409
             if (value.startsWith("http://")) {
 191  32
                 value = value.substring(7).replaceAll("\\.", " ");
 192  
             }
 193  409
             if (value.startsWith("https://")) {
 194  0
                 value = value.substring(8).replaceAll("\\.", " ");
 195  
             }
 196  409
             if (sb.indexOf(" " + value + " ") < 0) {
 197  270
                 sb.append(value).append(' ');
 198  
             }
 199  409
         }
 200  219
         return sb.toString().trim();
 201  
     }
 202  
 
 203  
     /**
 204  
      * Reduces the given confidence by one level. This returns LOW if the confidence passed in is not HIGH.
 205  
      *
 206  
      * @param c the confidence to reduce.
 207  
      * @return One less then the confidence passed in.
 208  
      */
 209  
     private Confidence reduceConfidence(final Confidence c) {
 210  76
         if (c == Confidence.HIGHEST) {
 211  19
             return Confidence.HIGH;
 212  57
         } else if (c == Confidence.HIGH) {
 213  19
             return Confidence.MEDIUM;
 214  
         } else {
 215  38
             return Confidence.LOW;
 216  
         }
 217  
     }
 218  
 
 219  
     /**
 220  
      * <p>
 221  
      * Searches the Lucene CPE index to identify possible CPE entries associated with the supplied vendor, product, and
 222  
      * version.</p>
 223  
      *
 224  
      * <p>
 225  
      * If either the vendorWeightings or productWeightings lists have been populated this data is used to add weighting
 226  
      * factors to the search.</p>
 227  
      *
 228  
      * @param vendor the text used to search the vendor field
 229  
      * @param product the text used to search the product field
 230  
      * @param vendorWeightings a list of strings to use to add weighting factors to the vendor field
 231  
      * @param productWeightings Adds a list of strings that will be used to add weighting factors to the product search
 232  
      * @return a list of possible CPE values
 233  
      * @throws CorruptIndexException when the Lucene index is corrupt
 234  
      * @throws IOException when the Lucene index is not found
 235  
      * @throws ParseException when the generated query is not valid
 236  
      */
 237  
     protected List<IndexEntry> searchCPE(String vendor, String product,
 238  
             Set<String> vendorWeightings, Set<String> productWeightings)
 239  
             throws CorruptIndexException, IOException, ParseException {
 240  73
         final ArrayList<IndexEntry> ret = new ArrayList<IndexEntry>(MAX_QUERY_RESULTS);
 241  
 
 242  73
         final String searchString = buildSearch(vendor, product, vendorWeightings, productWeightings);
 243  73
         if (searchString == null) {
 244  0
             return ret;
 245  
         }
 246  
 
 247  73
         final TopDocs docs = cpe.search(searchString, MAX_QUERY_RESULTS);
 248  1222
         for (ScoreDoc d : docs.scoreDocs) {
 249  1149
             if (d.score >= 0.08) {
 250  750
                 final Document doc = cpe.getDocument(d.doc);
 251  750
                 final IndexEntry entry = new IndexEntry();
 252  750
                 entry.setVendor(doc.get(Fields.VENDOR));
 253  750
                 entry.setProduct(doc.get(Fields.PRODUCT));
 254  
 //                if (d.score < 0.08) {
 255  
 //                    System.out.print(entry.getVendor());
 256  
 //                    System.out.print(":");
 257  
 //                    System.out.print(entry.getProduct());
 258  
 //                    System.out.print(":");
 259  
 //                    System.out.println(d.score);
 260  
 //                }
 261  750
                 entry.setSearchScore(d.score);
 262  750
                 if (!ret.contains(entry)) {
 263  750
                     ret.add(entry);
 264  
                 }
 265  
             }
 266  
         }
 267  73
         return ret;
 268  
     }
 269  
 
 270  
     /**
 271  
      * <p>
 272  
      * Builds a Lucene search string by properly escaping data and constructing a valid search query.</p>
 273  
      *
 274  
      * <p>
 275  
      * If either the possibleVendor or possibleProducts lists have been populated this data is used to add weighting
 276  
      * factors to the search string generated.</p>
 277  
      *
 278  
      * @param vendor text to search the vendor field
 279  
      * @param product text to search the product field
 280  
      * @param vendorWeighting a list of strings to apply to the vendor to boost the terms weight
 281  
      * @param productWeightings a list of strings to apply to the product to boost the terms weight
 282  
      * @return the Lucene query
 283  
      */
 284  
     protected String buildSearch(String vendor, String product,
 285  
             Set<String> vendorWeighting, Set<String> productWeightings) {
 286  77
         final String v = vendor; //.replaceAll("[^\\w\\d]", " ");
 287  77
         final String p = product; //.replaceAll("[^\\w\\d]", " ");
 288  77
         final StringBuilder sb = new StringBuilder(v.length() + p.length()
 289  
                 + Fields.PRODUCT.length() + Fields.VENDOR.length() + STRING_BUILDER_BUFFER);
 290  
 
 291  77
         if (!appendWeightedSearch(sb, Fields.PRODUCT, p, productWeightings)) {
 292  0
             return null;
 293  
         }
 294  77
         sb.append(" AND ");
 295  77
         if (!appendWeightedSearch(sb, Fields.VENDOR, v, vendorWeighting)) {
 296  0
             return null;
 297  
         }
 298  77
         return sb.toString();
 299  
     }
 300  
 
 301  
     /**
 302  
      * This method constructs a Lucene query for a given field. The searchText is split into separate words and if the
 303  
      * word is within the list of weighted words then an additional weighting is applied to the term as it is appended
 304  
      * into the query.
 305  
      *
 306  
      * @param sb a StringBuilder that the query text will be appended to.
 307  
      * @param field the field within the Lucene index that the query is searching.
 308  
      * @param searchText text used to construct the query.
 309  
      * @param weightedText a list of terms that will be considered higher importance when searching.
 310  
      * @return if the append was successful.
 311  
      */
 312  
     private boolean appendWeightedSearch(StringBuilder sb, String field, String searchText, Set<String> weightedText) {
 313  154
         sb.append(" ").append(field).append(":( ");
 314  
 
 315  154
         final String cleanText = cleanseText(searchText);
 316  
 
 317  154
         if ("".equals(cleanText)) {
 318  0
             return false;
 319  
         }
 320  
 
 321  154
         if (weightedText == null || weightedText.isEmpty()) {
 322  40
             LuceneUtils.appendEscapedLuceneQuery(sb, cleanText);
 323  
         } else {
 324  114
             final StringTokenizer tokens = new StringTokenizer(cleanText);
 325  1211
             while (tokens.hasMoreElements()) {
 326  1097
                 final String word = tokens.nextToken();
 327  1097
                 String temp = null;
 328  1097
                 for (String weighted : weightedText) {
 329  2202
                     final String weightedStr = cleanseText(weighted);
 330  2202
                     if (equalsIgnoreCaseAndNonAlpha(word, weightedStr)) {
 331  251
                         temp = LuceneUtils.escapeLuceneQuery(word) + WEIGHTING_BOOST;
 332  251
                         if (!word.equalsIgnoreCase(weightedStr)) {
 333  18
                             temp += " " + LuceneUtils.escapeLuceneQuery(weightedStr) + WEIGHTING_BOOST;
 334  
                         }
 335  
                     }
 336  2202
                 }
 337  1097
                 if (temp == null) {
 338  846
                     temp = LuceneUtils.escapeLuceneQuery(word);
 339  
                 }
 340  1097
                 sb.append(" ").append(temp);
 341  1097
             }
 342  
         }
 343  154
         sb.append(" ) ");
 344  154
         return true;
 345  
     }
 346  
 
 347  
     /**
 348  
      * Removes characters from the input text that are not used within the CPE index.
 349  
      *
 350  
      * @param text is the text to remove the characters from.
 351  
      * @return the text having removed some characters.
 352  
      */
 353  
     private String cleanseText(String text) {
 354  2356
         return text.replaceAll(CLEANSE_CHARACTER_RX, " ");
 355  
     }
 356  
 
 357  
     /**
 358  
      * Compares two strings after lower casing them and removing the non-alpha characters.
 359  
      *
 360  
      * @param l string one to compare.
 361  
      * @param r string two to compare.
 362  
      * @return whether or not the two strings are similar.
 363  
      */
 364  
     private boolean equalsIgnoreCaseAndNonAlpha(String l, String r) {
 365  2202
         if (l == null || r == null) {
 366  0
             return false;
 367  
         }
 368  
 
 369  2202
         final String left = l.replaceAll(CLEANSE_NONALPHA_RX, "");
 370  2202
         final String right = r.replaceAll(CLEANSE_NONALPHA_RX, "");
 371  2202
         return left.equalsIgnoreCase(right);
 372  
     }
 373  
 
 374  
     /**
 375  
      * Ensures that the CPE Identified matches the dependency. This validates that the product, vendor, and version
 376  
      * information for the CPE are contained within the dependencies evidence.
 377  
      *
 378  
      * @param entry a CPE entry.
 379  
      * @param dependency the dependency that the CPE entries could be for.
 380  
      * @return whether or not the entry is valid.
 381  
      */
 382  
     private boolean verifyEntry(final IndexEntry entry, final Dependency dependency) {
 383  729
         boolean isValid = false;
 384  
 
 385  729
         if (collectionContainsString(dependency.getProductEvidence(), entry.getProduct())
 386  
                 && collectionContainsString(dependency.getVendorEvidence(), entry.getVendor())) {
 387  
             //&& collectionContainsVersion(dependency.getVersionEvidence(), entry.getVersion())
 388  71
             isValid = true;
 389  
         }
 390  729
         return isValid;
 391  
     }
 392  
 
 393  
     /**
 394  
      * Used to determine if the EvidenceCollection contains a specific string.
 395  
      *
 396  
      * @param ec an EvidenceCollection
 397  
      * @param text the text to search for
 398  
      * @return whether or not the EvidenceCollection contains the string
 399  
      */
 400  
     private boolean collectionContainsString(EvidenceCollection ec, String text) {
 401  
 
 402  
         //<editor-fold defaultstate="collapsed" desc="This code fold contains an old version of the code, delete once more testing is done">
 403  
         //        String[] splitText = text.split("[\\s_-]");
 404  
         //
 405  
         //        for (String search : splitText) {
 406  
         //            //final String search = text.replaceAll("[\\s_-]", "").toLowerCase();
 407  
         //            if (ec.containsUsedString(search)) {
 408  
         //                return true;
 409  
         //            }
 410  
         //        }
 411  
         //</editor-fold>
 412  
         //TODO - likely need to change the split... not sure if this will work for CPE with special chars
 413  806
         if (text == null) {
 414  0
             return false;
 415  
         }
 416  806
         final String[] words = text.split("[\\s_-]");
 417  806
         final List<String> list = new ArrayList<String>();
 418  806
         String tempWord = null;
 419  2270
         for (String word : words) {
 420  
             /*
 421  
              single letter words should be concatenated with the next word.
 422  
              so { "m", "core", "sample" } -> { "mcore", "sample" }
 423  
              */
 424  1464
             if (tempWord != null) {
 425  17
                 list.add(tempWord + word);
 426  17
                 tempWord = null;
 427  1447
             } else if (word.length() <= 2) {
 428  42
                 tempWord = word;
 429  
             } else {
 430  1405
                 list.add(word);
 431  
             }
 432  
         }
 433  806
         if (tempWord != null && !list.isEmpty()) {
 434  21
             final String tmp = list.get(list.size() - 1) + tempWord;
 435  21
             list.add(tmp);
 436  
         }
 437  806
         boolean contains = true;
 438  806
         for (String word : list) {
 439  1443
             contains &= ec.containsUsedString(word);
 440  1443
         }
 441  806
         return contains;
 442  
     }
 443  
 
 444  
     /**
 445  
      * Analyzes a dependency and attempts to determine if there are any CPE identifiers for this dependency.
 446  
      *
 447  
      * @param dependency The Dependency to analyze.
 448  
      * @param engine The analysis engine
 449  
      * @throws AnalysisException is thrown if there is an issue analyzing the dependency.
 450  
      */
 451  
     @Override
 452  
     public void analyze(Dependency dependency, Engine engine) throws AnalysisException {
 453  
         try {
 454  15
             determineCPE(dependency);
 455  0
         } catch (CorruptIndexException ex) {
 456  0
             throw new AnalysisException("CPE Index is corrupt.", ex);
 457  0
         } catch (IOException ex) {
 458  0
             throw new AnalysisException("Failure opening the CPE Index.", ex);
 459  0
         } catch (ParseException ex) {
 460  0
             throw new AnalysisException("Unable to parse the generated Lucene query for this dependency.", ex);
 461  15
         }
 462  15
     }
 463  
 
 464  
     /**
 465  
      * Returns true because this analyzer supports all dependency types.
 466  
      *
 467  
      * @return true.
 468  
      */
 469  
     @Override
 470  
     public Set<String> getSupportedExtensions() {
 471  
         return null;
 472  
     }
 473  
 
 474  
     /**
 475  
      * Returns the name of this analyzer.
 476  
      *
 477  
      * @return the name of this analyzer.
 478  
      */
 479  
     @Override
 480  
     public String getName() {
 481  9
         return "CPE Analyzer";
 482  
     }
 483  
 
 484  
     /**
 485  
      * Returns true because this analyzer supports all dependency types.
 486  
      *
 487  
      * @param extension the file extension of the dependency being analyzed.
 488  
      * @return true.
 489  
      */
 490  
     @Override
 491  
     public boolean supportsExtension(String extension) {
 492  9
         return true;
 493  
     }
 494  
 
 495  
     /**
 496  
      * Returns the analysis phase that this analyzer should run in.
 497  
      *
 498  
      * @return the analysis phase that this analyzer should run in.
 499  
      */
 500  
     @Override
 501  
     public AnalysisPhase getAnalysisPhase() {
 502  6
         return AnalysisPhase.IDENTIFIER_ANALYSIS;
 503  
     }
 504  
 
 505  
     /**
 506  
      * Opens the CPE Lucene Index.
 507  
      *
 508  
      * @throws Exception is thrown if there is an issue opening the index.
 509  
      */
 510  
     @Override
 511  
     public void initialize() throws Exception {
 512  3
         this.open();
 513  3
     }
 514  
 
 515  
     /**
 516  
      * Retrieves a list of CPE values from the CveDB based on the vendor and product passed in. The list is then
 517  
      * validated to find only CPEs that are valid for the given dependency. It is possible that the CPE identified is a
 518  
      * best effort "guess" based on the vendor, product, and version information.
 519  
      *
 520  
      * @param dependency the Dependency being analyzed
 521  
      * @param vendor the vendor for the CPE being analyzed
 522  
      * @param product the product for the CPE being analyzed
 523  
      * @throws UnsupportedEncodingException is thrown if UTF-8 is not supported
 524  
      */
 525  
     private void determineIdentifiers(Dependency dependency, String vendor, String product) throws UnsupportedEncodingException {
 526  71
         final Set<VulnerableSoftware> cpes = cve.getCPEs(vendor, product);
 527  71
         DependencyVersion bestGuess = new DependencyVersion("-");
 528  71
         Confidence bestGuessConf = null;
 529  71
         final List<IdentifierMatch> collected = new ArrayList<IdentifierMatch>();
 530  355
         for (Confidence conf : Confidence.values()) {
 531  284
             for (Evidence evidence : dependency.getVersionEvidence().iterator(conf)) {
 532  160
                 final DependencyVersion evVer = DependencyVersionUtil.parseVersion(evidence.getValue());
 533  160
                 if (evVer == null) {
 534  0
                     continue;
 535  
                 }
 536  160
                 for (VulnerableSoftware vs : cpes) {
 537  
                     DependencyVersion dbVer;
 538  9104
                     if (vs.getRevision() != null && !vs.getRevision().isEmpty()) {
 539  2855
                         dbVer = DependencyVersionUtil.parseVersion(vs.getVersion() + "." + vs.getRevision());
 540  
                     } else {
 541  6249
                         dbVer = DependencyVersionUtil.parseVersion(vs.getVersion());
 542  
                     }
 543  9104
                     if (dbVer == null //special case, no version specified - everything is vulnerable
 544  
                             || evVer.equals(dbVer)) { //yeah! exact match
 545  169
                         final String url = String.format("http://web.nvd.nist.gov/view/vuln/search?cpe=%s", URLEncoder.encode(vs.getName(), "UTF-8"));
 546  169
                         final IdentifierMatch match = new IdentifierMatch("cpe", vs.getName(), url, IdentifierConfidence.EXACT_MATCH, conf);
 547  169
                         collected.add(match);
 548  169
                     } else {
 549  
                         //TODO the following isn't quite right is it? need to think about this guessing game a bit more.
 550  8935
                         if (evVer.getVersionParts().size() <= dbVer.getVersionParts().size()
 551  
                                 && evVer.matchesAtLeastThreeLevels(dbVer)) {
 552  412
                             if (bestGuessConf == null || bestGuessConf.compareTo(conf) > 0) {
 553  10
                                 if (bestGuess.getVersionParts().size() < dbVer.getVersionParts().size()) {
 554  10
                                     bestGuess = dbVer;
 555  10
                                     bestGuessConf = conf;
 556  
                                 }
 557  
                             }
 558  
                         }
 559  
                     }
 560  9104
                 }
 561  160
                 if (bestGuessConf == null || bestGuessConf.compareTo(conf) > 0) {
 562  49
                     if (bestGuess.getVersionParts().size() < evVer.getVersionParts().size()) {
 563  49
                         bestGuess = evVer;
 564  49
                         bestGuessConf = conf;
 565  
                     }
 566  
                 }
 567  160
             }
 568  
         }
 569  71
         final String cpeName = String.format("cpe:/a:%s:%s:%s", vendor, product, bestGuess.toString());
 570  71
         final String url = null; //String.format("http://web.nvd.nist.gov/view/vuln/search?cpe=%s", URLEncoder.encode(cpeName, "UTF-8"));
 571  71
         if (bestGuessConf == null) {
 572  12
             bestGuessConf = Confidence.LOW;
 573  
         }
 574  71
         final IdentifierMatch match = new IdentifierMatch("cpe", cpeName, url, IdentifierConfidence.BEST_GUESS, bestGuessConf);
 575  71
         collected.add(match);
 576  
 
 577  71
         Collections.sort(collected);
 578  71
         final IdentifierConfidence bestIdentifierQuality = collected.get(0).getConfidence();
 579  71
         final Confidence bestEvidenceQuality = collected.get(0).getEvidenceConfidence();
 580  71
         for (IdentifierMatch m : collected) {
 581  240
             if (bestIdentifierQuality.equals(m.getConfidence())
 582  
                     && bestEvidenceQuality.equals(m.getEvidenceConfidence())) {
 583  88
                 final Identifier i = m.getIdentifier();
 584  88
                 if (bestIdentifierQuality == IdentifierConfidence.BEST_GUESS) {
 585  23
                     i.setConfidence(Confidence.LOW);
 586  
                 } else {
 587  65
                     i.setConfidence(bestEvidenceQuality);
 588  
                 }
 589  88
                 dependency.addIdentifier(i);
 590  
             }
 591  240
         }
 592  71
     }
 593  
 
 594  
     /**
 595  
      * The confidence whether the identifier is an exact match, or a best guess.
 596  
      */
 597  1
     private enum IdentifierConfidence {
 598  
 
 599  
         /**
 600  
          * An exact match for the CPE.
 601  
          */
 602  1
         EXACT_MATCH,
 603  
         /**
 604  
          * A best guess for the CPE.
 605  
          */
 606  1
         BEST_GUESS
 607  
     }
 608  
 
 609  
     /**
 610  
      * A simple object to hold an identifier and carry information about the confidence in the identifier.
 611  
      */
 612  265
     private static class IdentifierMatch implements Comparable<IdentifierMatch> {
 613  
 
 614  
         /**
 615  
          * Constructs an IdentifierMatch.
 616  
          *
 617  
          * @param type the type of identifier (such as CPE)
 618  
          * @param value the value of the identifier
 619  
          * @param url the URL of the identifier
 620  
          * @param identifierConfidence the confidence in the identifier: best guess or exact match
 621  
          * @param evidenceConfidence the confidence of the evidence used to find the identifier
 622  
          */
 623  240
         IdentifierMatch(String type, String value, String url, IdentifierConfidence identifierConfidence, Confidence evidenceConfidence) {
 624  240
             this.identifier = new Identifier(type, value, url);
 625  240
             this.confidence = identifierConfidence;
 626  240
             this.evidenceConfidence = evidenceConfidence;
 627  240
         }
 628  
         //<editor-fold defaultstate="collapsed" desc="Property implementations: evidenceConfidence, confidence, identifier">
 629  
         /**
 630  
          * The confidence in the evidence used to identify this match.
 631  
          */
 632  
         private Confidence evidenceConfidence;
 633  
 
 634  
         /**
 635  
          * Get the value of evidenceConfidence
 636  
          *
 637  
          * @return the value of evidenceConfidence
 638  
          */
 639  
         public Confidence getEvidenceConfidence() {
 640  
             return evidenceConfidence;
 641  
         }
 642  
 
 643  
         /**
 644  
          * Set the value of evidenceConfidence
 645  
          *
 646  
          * @param evidenceConfidence new value of evidenceConfidence
 647  
          */
 648  
         public void setEvidenceConfidence(Confidence evidenceConfidence) {
 649  
             this.evidenceConfidence = evidenceConfidence;
 650  
         }
 651  
         /**
 652  
          * The confidence whether this is an exact match, or a best guess.
 653  
          */
 654  
         private IdentifierConfidence confidence;
 655  
 
 656  
         /**
 657  
          * Get the value of confidence.
 658  
          *
 659  
          * @return the value of confidence
 660  
          */
 661  
         public IdentifierConfidence getConfidence() {
 662  
             return confidence;
 663  
         }
 664  
 
 665  
         /**
 666  
          * Set the value of confidence.
 667  
          *
 668  
          * @param confidence new value of confidence
 669  
          */
 670  
         public void setConfidence(IdentifierConfidence confidence) {
 671  
             this.confidence = confidence;
 672  
         }
 673  
         /**
 674  
          * The CPE identifier.
 675  
          */
 676  
         private Identifier identifier;
 677  
 
 678  
         /**
 679  
          * Get the value of identifier.
 680  
          *
 681  
          * @return the value of identifier
 682  
          */
 683  
         public Identifier getIdentifier() {
 684  
             return identifier;
 685  
         }
 686  
 
 687  
         /**
 688  
          * Set the value of identifier.
 689  
          *
 690  
          * @param identifier new value of identifier
 691  
          */
 692  
         public void setIdentifier(Identifier identifier) {
 693  
             this.identifier = identifier;
 694  
         }
 695  
         //</editor-fold>
 696  
         //<editor-fold defaultstate="collapsed" desc="Standard implementations of toString, hashCode, and equals">
 697  
 
 698  
         /**
 699  
          * Standard toString() implementation.
 700  
          *
 701  
          * @return the string representation of the object
 702  
          */
 703  
         @Override
 704  
         public String toString() {
 705  0
             return "IdentifierMatch{" + "evidenceConfidence=" + evidenceConfidence
 706  
                     + ", confidence=" + confidence + ", identifier=" + identifier + '}';
 707  
         }
 708  
 
 709  
         /**
 710  
          * Standard hashCode() implementation.
 711  
          *
 712  
          * @return the hashCode
 713  
          */
 714  
         @Override
 715  
         public int hashCode() {
 716  0
             int hash = 5;
 717  0
             hash = 97 * hash + (this.evidenceConfidence != null ? this.evidenceConfidence.hashCode() : 0);
 718  0
             hash = 97 * hash + (this.confidence != null ? this.confidence.hashCode() : 0);
 719  0
             hash = 97 * hash + (this.identifier != null ? this.identifier.hashCode() : 0);
 720  0
             return hash;
 721  
         }
 722  
 
 723  
         /**
 724  
          * Standard equals implementation.
 725  
          *
 726  
          * @param obj the object to compare
 727  
          * @return true if the objects are equal, otherwise false
 728  
          */
 729  
         @Override
 730  
         public boolean equals(Object obj) {
 731  0
             if (obj == null) {
 732  0
                 return false;
 733  
             }
 734  0
             if (getClass() != obj.getClass()) {
 735  0
                 return false;
 736  
             }
 737  0
             final IdentifierMatch other = (IdentifierMatch) obj;
 738  0
             if (this.evidenceConfidence != other.evidenceConfidence) {
 739  0
                 return false;
 740  
             }
 741  0
             if (this.confidence != other.confidence) {
 742  0
                 return false;
 743  
             }
 744  0
             if (this.identifier != other.identifier && (this.identifier == null || !this.identifier.equals(other.identifier))) {
 745  0
                 return false;
 746  
             }
 747  0
             return true;
 748  
         }
 749  
         //</editor-fold>
 750  
 
 751  
         /**
 752  
          * Standard implementation of compareTo that compares identifier confidence, evidence confidence, and then the
 753  
          * identifier.
 754  
          *
 755  
          * @param o the IdentifierMatch to compare to
 756  
          * @return the natural ordering of IdentifierMatch
 757  
          */
 758  
         @Override
 759  
         public int compareTo(IdentifierMatch o) {
 760  265
             int conf = this.confidence.compareTo(o.confidence);
 761  265
             if (conf == 0) {
 762  193
                 conf = this.evidenceConfidence.compareTo(o.evidenceConfidence);
 763  193
                 if (conf == 0) {
 764  83
                     conf = identifier.compareTo(o.identifier);
 765  
                 }
 766  
             }
 767  265
             return conf;
 768  
         }
 769  
     }
 770  
 }