Coverage Report

Coverage Report - org.owasp.dependencycheck.utils.UrlStringUtils

Classes in this File

Line Coverage

Branch Coverage

Complexity

UrlStringUtils

87%

21/24

83%

10/12

2.5

 /*
  * This file is part of dependency-check-core.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  *
  * Copyright (c) 2013 Jeremy Long. All Rights Reserved.
  */
 package org.owasp.dependencycheck.utils;
 
 import java.net.MalformedURLException;
 import java.net.URL;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 import java.util.regex.Pattern;
 
 /**
  *
  * @author Jeremy Long
  */
 public final class UrlStringUtils {
 
     /**
      * Private constructor for a utility class.
      */
     private UrlStringUtils() {
     }
     /**
      * A regular expression to test if a string contains a URL.
      */
     private static final Pattern CONTAINS_URL_TEST = Pattern.compile("^.*(ht|f)tps?://.*$", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE);
     /**
      * A regular expression to test if a string is a URL.
      */
     private static final Pattern IS_URL_TEST = Pattern.compile("^(ht|f)tps?://.*", Pattern.CASE_INSENSITIVE);
 
     /**
      * Tests if the text provided contains a URL. This is somewhat limited search in that it only looks for
      * (ftp|http|https)://
      *
      * @param text the text to search
      * @return true if the text contains a url, otherwise false
      */
     public static boolean containsUrl(String text) {
         return CONTAINS_URL_TEST.matcher(text).matches();
     }
 
     /**
      * Tests if the given text is url.
      *
      * @param text the string to test
      * @return returns true if the text is a url, otherwise false
      */
     public static boolean isUrl(String text) {
         return IS_URL_TEST.matcher(text).matches();
     }
     /**
      * A listing of domain parts that should not be used as evidence. Yes, this is an incomplete list.
      */
     private static final Set<String> IGNORE_LIST = new HashSet<String>(
             Arrays.asList("www", "com", "org", "gov", "info", "name", "net", "pro", "tel", "mobi", "xxx"));
 
     /**
      * <p>
      * Takes a URL, in String format, and adds the important parts of the URL to a list of strings.</p>
      * <p>
      * Example, given the following input:</p>
      * <code>"https://www.somedomain.com/path1/path2/file.php?id=439"</code>
      * <p>
      * The function would return:</p>
      * <code>{"some.domain", "path1", "path2", "file"}</code>
      *
      * @param text a URL
      * @return importantParts a list of the important parts of the URL
      * @throws MalformedURLException thrown if the URL is malformed
      */
     public static List<String> extractImportantUrlData(String text) throws MalformedURLException {
         final List<String> importantParts = new ArrayList<String>();
         final URL url = new URL(text);
         final String[] domain = url.getHost().split("\\.");
         //add the domain except www and the tld.
         for (int i = 0; i < domain.length - 1; i++) {
             final String sub = domain[i];
             if (!IGNORE_LIST.contains(sub.toLowerCase())) {
                 importantParts.add(sub);
             }
         }
         final String document = url.getPath();
         final String[] pathParts = document.split("[\\//]");
         for (int i = 0; i < pathParts.length - 2; i++) {
             if (!pathParts[i].isEmpty()) {
                 importantParts.add(pathParts[i]);
             }
         }
         if (pathParts.length > 0 && !pathParts[pathParts.length - 1].isEmpty()) {
             final String fileNameNoExt = pathParts[pathParts.length - 1].replaceAll("\\..*{0,5}$", "");
             importantParts.add(fileNameNoExt);
         }
         return importantParts;
     }
 }

1		/*
2		* This file is part of dependency-check-core.
3		*
4		* Licensed under the Apache License, Version 2.0 (the "License");
5		* you may not use this file except in compliance with the License.
6		* You may obtain a copy of the License at
7		*
8		* http://www.apache.org/licenses/LICENSE-2.0
9		*
10		* Unless required by applicable law or agreed to in writing, software
11		* distributed under the License is distributed on an "AS IS" BASIS,
12		* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13		* See the License for the specific language governing permissions and
14		* limitations under the License.
15		*
16		* Copyright (c) 2013 Jeremy Long. All Rights Reserved.
17		*/
18		package org.owasp.dependencycheck.utils;
19
20		import java.net.MalformedURLException;
21		import java.net.URL;
22		import java.util.ArrayList;
23		import java.util.Arrays;
24		import java.util.HashSet;
25		import java.util.List;
26		import java.util.Set;
27		import java.util.regex.Pattern;
28
29		/**
30		*
31		* @author Jeremy Long
32		*/
33		public final class UrlStringUtils {
34
35		/**
36		* Private constructor for a utility class.
37		*/
38	0	private UrlStringUtils() {
39	0	}
40		/**
41		* A regular expression to test if a string contains a URL.
42		*/
43	1	private static final Pattern CONTAINS_URL_TEST = Pattern.compile("^.(ht\|f)tps?://.$", Pattern.MULTILINE \| Pattern.CASE_INSENSITIVE);
44		/**
45		* A regular expression to test if a string is a URL.
46		*/
47	1	private static final Pattern IS_URL_TEST = Pattern.compile("^(ht\|f)tps?://.*", Pattern.CASE_INSENSITIVE);
48
49		/**
50		* Tests if the text provided contains a URL. This is somewhat limited search in that it only looks for
51		* (ftp\|http\|https)://
52		*
53		* @param text the text to search
54		* @return true if the text contains a url, otherwise false
55		*/
56		public static boolean containsUrl(String text) {
57	22176	return CONTAINS_URL_TEST.matcher(text).matches();
58		}
59
60		/**
61		* Tests if the given text is url.
62		*
63		* @param text the string to test
64		* @return returns true if the text is a url, otherwise false
65		*/
66		public static boolean isUrl(String text) {
67	33	return IS_URL_TEST.matcher(text).matches();
68		}
69		/**
70		* A listing of domain parts that should not be used as evidence. Yes, this is an incomplete list.
71		*/
72	2	private static final Set<String> IGNORE_LIST = new HashSet<String>(
73	1	Arrays.asList("www", "com", "org", "gov", "info", "name", "net", "pro", "tel", "mobi", "xxx"));
74
75		/**
76		* <p>
77		* Takes a URL, in String format, and adds the important parts of the URL to a list of strings.</p>
78		* <p>
79		* Example, given the following input:</p>
80		* <code>"https://www.somedomain.com/path1/path2/file.php?id=439"</code>
81		* <p>
82		* The function would return:</p>
83		* <code>{"some.domain", "path1", "path2", "file"}</code>
84		*
85		* @param text a URL
86		* @return importantParts a list of the important parts of the URL
87		* @throws MalformedURLException thrown if the URL is malformed
88		*/
89		public static List<String> extractImportantUrlData(String text) throws MalformedURLException {
90	25	final List<String> importantParts = new ArrayList<String>();
91	25	final URL url = new URL(text);
92	25	final String[] domain = url.getHost().split("\\.");
93		//add the domain except www and the tld.
94	72	for (int i = 0; i < domain.length - 1; i++) {
95	47	final String sub = domain[i];
96	47	if (!IGNORE_LIST.contains(sub.toLowerCase())) {
97	44	importantParts.add(sub);
98		}
99		}
100	25	final String document = url.getPath();
101	25	final String[] pathParts = document.split("[\\//]");
102	44	for (int i = 0; i < pathParts.length - 2; i++) {
103	19	if (!pathParts[i].isEmpty()) {
104	0	importantParts.add(pathParts[i]);
105		}
106		}
107	25	if (pathParts.length > 0 && !pathParts[pathParts.length - 1].isEmpty()) {
108	22	final String fileNameNoExt = pathParts[pathParts.length - 1].replaceAll("\\..*{0,5}$", "");
109	22	importantParts.add(fileNameNoExt);
110		}
111	25	return importantParts;
112		}
113		}