| Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
| UrlStringUtils |
|
| 2.5;2.5 |
| 1 | /* | |
| 2 | * This file is part of dependency-check-core. | |
| 3 | * | |
| 4 | * Dependency-check-core is free software: you can redistribute it and/or modify it | |
| 5 | * under the terms of the GNU General Public License as published by the Free | |
| 6 | * Software Foundation, either version 3 of the License, or (at your option) any | |
| 7 | * later version. | |
| 8 | * | |
| 9 | * Dependency-check-core is distributed in the hope that it will be useful, but | |
| 10 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
| 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more | |
| 12 | * details. | |
| 13 | * | |
| 14 | * You should have received a copy of the GNU General Public License along with | |
| 15 | * dependency-check-core. If not, see http://www.gnu.org/licenses/. | |
| 16 | * | |
| 17 | * Copyright (c) 2013 Jeremy Long. All Rights Reserved. | |
| 18 | */ | |
| 19 | package org.owasp.dependencycheck.utils; | |
| 20 | ||
| 21 | import java.net.MalformedURLException; | |
| 22 | import java.net.URL; | |
| 23 | import java.util.ArrayList; | |
| 24 | import java.util.Arrays; | |
| 25 | import java.util.HashSet; | |
| 26 | import java.util.List; | |
| 27 | import java.util.regex.Pattern; | |
| 28 | ||
| 29 | /** | |
| 30 | * | |
| 31 | * @author Jeremy Long (jeremy.long@owasp.org) | |
| 32 | */ | |
| 33 | public final class UrlStringUtils { | |
| 34 | ||
| 35 | /** | |
| 36 | * Private constructor for a utility class. | |
| 37 | */ | |
| 38 | 0 | private UrlStringUtils() { |
| 39 | 0 | } |
| 40 | /** | |
| 41 | * A regular expression to test if a string contains a URL. | |
| 42 | */ | |
| 43 | 1 | private static final Pattern CONTAINS_URL_TEST = Pattern.compile("^.*(ht|f)tps?://.*$", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE); |
| 44 | /** | |
| 45 | * A regular expression to test if a string is a URL. | |
| 46 | */ | |
| 47 | 1 | private static final Pattern IS_URL_TEST = Pattern.compile("^(ht|f)tps?://.*", Pattern.CASE_INSENSITIVE); |
| 48 | ||
| 49 | /** | |
| 50 | * Tests if the text provided contains a URL. This is somewhat limited | |
| 51 | * search in that it only looks for (ftp|http|https):// | |
| 52 | * | |
| 53 | * @param text the text to search | |
| 54 | * @return true if the text contains a url, otherwise false | |
| 55 | */ | |
| 56 | public static boolean containsUrl(String text) { | |
| 57 | 21169 | return CONTAINS_URL_TEST.matcher(text).matches(); |
| 58 | } | |
| 59 | ||
| 60 | /** | |
| 61 | * Tests if the given text is url. | |
| 62 | * | |
| 63 | * @param text the string to test | |
| 64 | * @return returns true if the text is a url, otherwise false | |
| 65 | */ | |
| 66 | public static boolean isUrl(String text) { | |
| 67 | 135 | return IS_URL_TEST.matcher(text).matches(); |
| 68 | } | |
| 69 | /** | |
| 70 | * A listing of domain parts that shold not be used as evidence. Yes, this | |
| 71 | * is an incomplete list. | |
| 72 | */ | |
| 73 | 1 | private static final HashSet<String> IGNORE_LIST = new HashSet<String>( |
| 74 | Arrays.asList("www", "com", "org", "gov", "info", "name", "net", "pro", "tel", "mobi", "xxx")); | |
| 75 | ||
| 76 | /** | |
| 77 | * <p>Takes a URL, in String format, and adds the important parts of the URL | |
| 78 | * to a list of strings.</p> | |
| 79 | * <p>Example, given the following input:</p> | |
| 80 | * <code>"https://www.somedomain.com/path1/path2/file.php?id=439"</code> | |
| 81 | * <p>The function would return:</p> | |
| 82 | * <code>{"somedomain", "path1", "path2", "file"}</code> | |
| 83 | * | |
| 84 | * @param text a URL | |
| 85 | * @return importantParts a list of the important parts of the URL | |
| 86 | * @throws MalformedURLException thrown if the URL is malformed | |
| 87 | */ | |
| 88 | public static List<String> extractImportantUrlData(String text) throws MalformedURLException { | |
| 89 | 135 | final ArrayList<String> importantParts = new ArrayList<String>(); |
| 90 | 135 | final URL url = new URL(text); |
| 91 | 135 | final String[] domain = url.getHost().split("\\."); |
| 92 | //add the domain except www and the tld. | |
| 93 | 404 | for (int i = 0; i < domain.length - 1; i++) { |
| 94 | 269 | final String sub = domain[i]; |
| 95 | 269 | if (!IGNORE_LIST.contains(sub.toLowerCase())) { |
| 96 | 185 | importantParts.add(sub); |
| 97 | } | |
| 98 | } | |
| 99 | 135 | final String document = url.getPath(); |
| 100 | 135 | final String[] pathParts = document.split("[\\//]"); |
| 101 | 135 | for (int i = 0; i < pathParts.length - 2; i++) { |
| 102 | 0 | if (!pathParts[i].isEmpty()) { |
| 103 | 0 | importantParts.add(pathParts[i]); |
| 104 | } | |
| 105 | } | |
| 106 | 135 | if (pathParts.length > 0 && !pathParts[pathParts.length - 1].isEmpty()) { |
| 107 | 47 | final String fileNameNoExt = pathParts[pathParts.length - 1].replaceAll("\\..*{0,5}$", ""); |
| 108 | 47 | importantParts.add(fileNameNoExt); |
| 109 | } | |
| 110 | 135 | return importantParts; |
| 111 | } | |
| 112 | } |