From 4fed1b45896787fe1ace4bfc7666bfb546cfff71 Mon Sep 17 00:00:00 2001 From: Jeremy Long Date: Sun, 23 Jun 2013 22:40:58 -0400 Subject: [PATCH] added correction to deal with http urls causing false positives Former-commit-id: 086de9647c8488ca388310937c6d12360fb04a35 --- .../dependency/EvidenceCollection.java | 99 +++++++++++++------ 1 file changed, 69 insertions(+), 30 deletions(-) diff --git a/src/main/java/org/owasp/dependencycheck/dependency/EvidenceCollection.java b/src/main/java/org/owasp/dependencycheck/dependency/EvidenceCollection.java index 758d09804..4d49484c1 100644 --- a/src/main/java/org/owasp/dependencycheck/dependency/EvidenceCollection.java +++ b/src/main/java/org/owasp/dependencycheck/dependency/EvidenceCollection.java @@ -18,11 +18,17 @@ */ package org.owasp.dependencycheck.dependency; +import java.net.MalformedURLException; import java.util.HashSet; import java.util.Iterator; +import java.util.List; import java.util.Set; import java.util.TreeSet; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.apache.commons.lang.StringUtils; import org.owasp.dependencycheck.utils.Filter; +import org.owasp.dependencycheck.utils.UrlStringUtils; /** * Used to maintain a collection of Evidence. @@ -35,45 +41,37 @@ public class EvidenceCollection implements Iterable { * Used to iterate over high confidence evidence contained in the * collection. */ - private static final Filter HIGH_CONFIDENCE = - new Filter() { - - public boolean passes(Evidence evidence) { - return evidence.getConfidence() == Evidence.Confidence.HIGH; - } - }; + private static final Filter HIGH_CONFIDENCE = new Filter() { + public boolean passes(Evidence evidence) { + return evidence.getConfidence() == Evidence.Confidence.HIGH; + } + }; /** * Used to iterate over medium confidence evidence contained in the * collection. */ - private static final Filter MEDIUM_CONFIDENCE = - new Filter() { - - public boolean passes(Evidence evidence) { - return evidence.getConfidence() == Evidence.Confidence.MEDIUM; - } - }; + private static final Filter MEDIUM_CONFIDENCE = new Filter() { + public boolean passes(Evidence evidence) { + return evidence.getConfidence() == Evidence.Confidence.MEDIUM; + } + }; /** * Used to iterate over low confidence evidence contained in the collection. */ - private static final Filter LOW_CONFIDENCE = - new Filter() { - - public boolean passes(Evidence evidence) { - return evidence.getConfidence() == Evidence.Confidence.LOW; - } - }; + private static final Filter LOW_CONFIDENCE = new Filter() { + public boolean passes(Evidence evidence) { + return evidence.getConfidence() == Evidence.Confidence.LOW; + } + }; /** * Used to iterate over evidence that has was used (aka read) from the * collection. */ - private static final Filter EVIDENCE_USED = - new Filter() { - - public boolean passes(Evidence evidence) { - return evidence.isUsed(); - } - }; + private static final Filter EVIDENCE_USED = new Filter() { + public boolean passes(Evidence evidence) { + return evidence.isUsed(); + } + }; /** * Used to iterate over evidence of the specified confidence. @@ -191,8 +189,11 @@ public class EvidenceCollection implements Iterable { final String textToTest = text.toLowerCase(); for (Evidence e : this.list) { - if (e.isUsed() && e.getValue().toLowerCase().contains(textToTest)) { - return true; + if (e.isUsed()) { + final String value = urlCorrection(e.getValue().toLowerCase()); + if (value.contains(textToTest)) { + return true; + } } } return false; @@ -270,4 +271,42 @@ public class EvidenceCollection implements Iterable { public int size() { return list.size(); } + + /** + *

Takes a string that may contain a fully qualified domain and it will + * return the string having removed the query string, the protocol, the + * sub-domain of 'www', and the file extension of the path.

+ *

This is useful for checking if the evidence contains a specific + * string. The presence of the protocol, file extension, etc. may produce + * false positives. + * + *

Example, given the following input:

+ * 'Please visit https://www.somedomain.com/path1/path2/file.php?id=439' + *

The function would return:

+ * 'Please visit somedomain path1 path2 file' + * + * @param value the value that may contain a url + * @return the modified string + */ + private String urlCorrection(String value) { + if (value == null || !UrlStringUtils.containsUrl(value)) { + return value; + } + final StringBuilder sb = new StringBuilder(value.length()); + final String[] parts = value.split("\\s"); + for (String part : parts) { + if (UrlStringUtils.isUrl(part)) { + try { + final List data = UrlStringUtils.extractImportantUrlData(part); + sb.append(' ').append(StringUtils.join(data, ' ')); + } catch (MalformedURLException ex) { + Logger.getLogger(EvidenceCollection.class.getName()).log(Level.INFO, "error parsing " + part, ex); + sb.append(' ').append(part); + } + } else { + sb.append(' ').append(part); + } + } + return sb.toString().trim(); + } }