| Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
| UrlStringUtils |
|
| 2.5;2.5 |
| 1 | /* | |
| 2 | * This file is part of dependency-check-core. | |
| 3 | * | |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
| 5 | * you may not use this file except in compliance with the License. | |
| 6 | * You may obtain a copy of the License at | |
| 7 | * | |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
| 9 | * | |
| 10 | * Unless required by applicable law or agreed to in writing, software | |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 13 | * See the License for the specific language governing permissions and | |
| 14 | * limitations under the License. | |
| 15 | * | |
| 16 | * Copyright (c) 2013 Jeremy Long. All Rights Reserved. | |
| 17 | */ | |
| 18 | package org.owasp.dependencycheck.utils; | |
| 19 | ||
| 20 | import java.net.MalformedURLException; | |
| 21 | import java.net.URL; | |
| 22 | import java.util.ArrayList; | |
| 23 | import java.util.Arrays; | |
| 24 | import java.util.HashSet; | |
| 25 | import java.util.List; | |
| 26 | import java.util.Set; | |
| 27 | import java.util.regex.Pattern; | |
| 28 | ||
| 29 | /** | |
| 30 | * | |
| 31 | * @author Jeremy Long | |
| 32 | */ | |
| 33 | public final class UrlStringUtils { | |
| 34 | ||
| 35 | /** | |
| 36 | * Private constructor for a utility class. | |
| 37 | */ | |
| 38 | 0 | private UrlStringUtils() { |
| 39 | 0 | } |
| 40 | /** | |
| 41 | * A regular expression to test if a string contains a URL. | |
| 42 | */ | |
| 43 | 1 | private static final Pattern CONTAINS_URL_TEST = Pattern.compile("^.*(ht|f)tps?://.*$", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE); |
| 44 | /** | |
| 45 | * A regular expression to test if a string is a URL. | |
| 46 | */ | |
| 47 | 1 | private static final Pattern IS_URL_TEST = Pattern.compile("^(ht|f)tps?://.*", Pattern.CASE_INSENSITIVE); |
| 48 | ||
| 49 | /** | |
| 50 | * Tests if the text provided contains a URL. This is somewhat limited search in that it only looks for | |
| 51 | * (ftp|http|https):// | |
| 52 | * | |
| 53 | * @param text the text to search | |
| 54 | * @return true if the text contains a url, otherwise false | |
| 55 | */ | |
| 56 | public static boolean containsUrl(String text) { | |
| 57 | 22176 | return CONTAINS_URL_TEST.matcher(text).matches(); |
| 58 | } | |
| 59 | ||
| 60 | /** | |
| 61 | * Tests if the given text is url. | |
| 62 | * | |
| 63 | * @param text the string to test | |
| 64 | * @return returns true if the text is a url, otherwise false | |
| 65 | */ | |
| 66 | public static boolean isUrl(String text) { | |
| 67 | 33 | return IS_URL_TEST.matcher(text).matches(); |
| 68 | } | |
| 69 | /** | |
| 70 | * A listing of domain parts that should not be used as evidence. Yes, this is an incomplete list. | |
| 71 | */ | |
| 72 | 2 | private static final Set<String> IGNORE_LIST = new HashSet<String>( |
| 73 | 1 | Arrays.asList("www", "com", "org", "gov", "info", "name", "net", "pro", "tel", "mobi", "xxx")); |
| 74 | ||
| 75 | /** | |
| 76 | * <p> | |
| 77 | * Takes a URL, in String format, and adds the important parts of the URL to a list of strings.</p> | |
| 78 | * <p> | |
| 79 | * Example, given the following input:</p> | |
| 80 | * <code>"https://www.somedomain.com/path1/path2/file.php?id=439"</code> | |
| 81 | * <p> | |
| 82 | * The function would return:</p> | |
| 83 | * <code>{"some.domain", "path1", "path2", "file"}</code> | |
| 84 | * | |
| 85 | * @param text a URL | |
| 86 | * @return importantParts a list of the important parts of the URL | |
| 87 | * @throws MalformedURLException thrown if the URL is malformed | |
| 88 | */ | |
| 89 | public static List<String> extractImportantUrlData(String text) throws MalformedURLException { | |
| 90 | 25 | final List<String> importantParts = new ArrayList<String>(); |
| 91 | 25 | final URL url = new URL(text); |
| 92 | 25 | final String[] domain = url.getHost().split("\\."); |
| 93 | //add the domain except www and the tld. | |
| 94 | 72 | for (int i = 0; i < domain.length - 1; i++) { |
| 95 | 47 | final String sub = domain[i]; |
| 96 | 47 | if (!IGNORE_LIST.contains(sub.toLowerCase())) { |
| 97 | 44 | importantParts.add(sub); |
| 98 | } | |
| 99 | } | |
| 100 | 25 | final String document = url.getPath(); |
| 101 | 25 | final String[] pathParts = document.split("[\\//]"); |
| 102 | 44 | for (int i = 0; i < pathParts.length - 2; i++) { |
| 103 | 19 | if (!pathParts[i].isEmpty()) { |
| 104 | 0 | importantParts.add(pathParts[i]); |
| 105 | } | |
| 106 | } | |
| 107 | 25 | if (pathParts.length > 0 && !pathParts[pathParts.length - 1].isEmpty()) { |
| 108 | 22 | final String fileNameNoExt = pathParts[pathParts.length - 1].replaceAll("\\..*{0,5}$", ""); |
| 109 | 22 | importantParts.add(fileNameNoExt); |
| 110 | } | |
| 111 | 25 | return importantParts; |
| 112 | } | |
| 113 | } |