From 5eebea7b7b2f1b4848eacddfcc8f1c249cc6762d Mon Sep 17 00:00:00 2001 From: Jeremy Long Date: Fri, 16 Aug 2013 12:48:22 -0400 Subject: [PATCH] increased the ignore list for parts of the domain that will not be used as evidence Former-commit-id: 11b68fc6097ae96735208f4384353d7615c4572a --- .../org/owasp/dependencycheck/utils/UrlStringUtils.java | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/dependency-check-core/src/main/java/org/owasp/dependencycheck/utils/UrlStringUtils.java b/dependency-check-core/src/main/java/org/owasp/dependencycheck/utils/UrlStringUtils.java index 338d5bdfe..501f2c65e 100644 --- a/dependency-check-core/src/main/java/org/owasp/dependencycheck/utils/UrlStringUtils.java +++ b/dependency-check-core/src/main/java/org/owasp/dependencycheck/utils/UrlStringUtils.java @@ -21,6 +21,8 @@ package org.owasp.dependencycheck.utils; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; import java.util.List; import java.util.regex.Pattern; @@ -64,6 +66,11 @@ public final class UrlStringUtils { public static boolean isUrl(String text) { return IS_URL_TEST.matcher(text).matches(); } + /** + * A listing of domain parts that shold not be used as evidence. Yes, this + * is an incomplete list. + */ + private static final HashSet IGNORE_LIST = new HashSet(Arrays.asList("www", "com", "org", "gov", "info", "name", "net", "pro", "tel", "mobi", "xxx")); /** *

Takes a URL, in String format, and adds the important parts of the URL @@ -84,7 +91,7 @@ public final class UrlStringUtils { //add the domain except www and the tld. for (int i = 0; i < domain.length - 1; i++) { final String sub = domain[i]; - if (!"www".equalsIgnoreCase(sub)) { + if (!IGNORE_LIST.contains(sub.toLowerCase())) { importantParts.add(sub); } }