Coverage Report - org.owasp.dependencycheck.utils.UrlStringUtils
 
Classes in this File Line Coverage Branch Coverage Complexity
UrlStringUtils
95%
20/21
83%
10/12
2.5
 
 1  
 /*
 2  
  * This file is part of dependency-check-core.
 3  
  *
 4  
  * Licensed under the Apache License, Version 2.0 (the "License");
 5  
  * you may not use this file except in compliance with the License.
 6  
  * You may obtain a copy of the License at
 7  
  *
 8  
  *     http://www.apache.org/licenses/LICENSE-2.0
 9  
  *
 10  
  * Unless required by applicable law or agreed to in writing, software
 11  
  * distributed under the License is distributed on an "AS IS" BASIS,
 12  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13  
  * See the License for the specific language governing permissions and
 14  
  * limitations under the License.
 15  
  *
 16  
  * Copyright (c) 2013 Jeremy Long. All Rights Reserved.
 17  
  */
 18  
 package org.owasp.dependencycheck.utils;
 19  
 
 20  
 import java.net.MalformedURLException;
 21  
 import java.net.URL;
 22  
 import java.util.ArrayList;
 23  
 import java.util.Arrays;
 24  
 import java.util.HashSet;
 25  
 import java.util.List;
 26  
 import java.util.regex.Pattern;
 27  
 
 28  
 /**
 29  
  *
 30  
  * @author Jeremy Long <jeremy.long@owasp.org>
 31  
  */
 32  
 public final class UrlStringUtils {
 33  
 
 34  
     /**
 35  
      * Private constructor for a utility class.
 36  
      */
 37  
     private UrlStringUtils() {
 38  
     }
 39  
     /**
 40  
      * A regular expression to test if a string contains a URL.
 41  
      */
 42  1
     private static final Pattern CONTAINS_URL_TEST = Pattern.compile("^.*(ht|f)tps?://.*$", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE);
 43  
     /**
 44  
      * A regular expression to test if a string is a URL.
 45  
      */
 46  1
     private static final Pattern IS_URL_TEST = Pattern.compile("^(ht|f)tps?://.*", Pattern.CASE_INSENSITIVE);
 47  
 
 48  
     /**
 49  
      * Tests if the text provided contains a URL. This is somewhat limited search in that it only looks for
 50  
      * (ftp|http|https)://
 51  
      *
 52  
      * @param text the text to search
 53  
      * @return true if the text contains a url, otherwise false
 54  
      */
 55  
     public static boolean containsUrl(String text) {
 56  13495
         return CONTAINS_URL_TEST.matcher(text).matches();
 57  
     }
 58  
 
 59  
     /**
 60  
      * Tests if the given text is url.
 61  
      *
 62  
      * @param text the string to test
 63  
      * @return returns true if the text is a url, otherwise false
 64  
      */
 65  
     public static boolean isUrl(String text) {
 66  7
         return IS_URL_TEST.matcher(text).matches();
 67  
     }
 68  
     /**
 69  
      * A listing of domain parts that should not be used as evidence. Yes, this is an incomplete list.
 70  
      */
 71  1
     private static final HashSet<String> IGNORE_LIST = new HashSet<String>(
 72  
             Arrays.asList("www", "com", "org", "gov", "info", "name", "net", "pro", "tel", "mobi", "xxx"));
 73  
 
 74  
     /**
 75  
      * <p>
 76  
      * Takes a URL, in String format, and adds the important parts of the URL to a list of strings.</p>
 77  
      * <p>
 78  
      * Example, given the following input:</p>
 79  
      * <code>"https://www.somedomain.com/path1/path2/file.php?id=439"</code>
 80  
      * <p>
 81  
      * The function would return:</p>
 82  
      * <code>{"some.domain", "path1", "path2", "file"}</code>
 83  
      *
 84  
      * @param text a URL
 85  
      * @return importantParts a list of the important parts of the URL
 86  
      * @throws MalformedURLException thrown if the URL is malformed
 87  
      */
 88  
     public static List<String> extractImportantUrlData(String text) throws MalformedURLException {
 89  7
         final ArrayList<String> importantParts = new ArrayList<String>();
 90  7
         final URL url = new URL(text);
 91  7
         final String[] domain = url.getHost().split("\\.");
 92  
         //add the domain except www and the tld.
 93  20
         for (int i = 0; i < domain.length - 1; i++) {
 94  13
             final String sub = domain[i];
 95  13
             if (!IGNORE_LIST.contains(sub.toLowerCase())) {
 96  12
                 importantParts.add(sub);
 97  
             }
 98  
         }
 99  7
         final String document = url.getPath();
 100  7
         final String[] pathParts = document.split("[\\//]");
 101  12
         for (int i = 0; i < pathParts.length - 2; i++) {
 102  5
             if (!pathParts[i].isEmpty()) {
 103  0
                 importantParts.add(pathParts[i]);
 104  
             }
 105  
         }
 106  7
         if (pathParts.length > 0 && !pathParts[pathParts.length - 1].isEmpty()) {
 107  6
             final String fileNameNoExt = pathParts[pathParts.length - 1].replaceAll("\\..*{0,5}$", "");
 108  6
             importantParts.add(fileNameNoExt);
 109  
         }
 110  7
         return importantParts;
 111  
     }
 112  
 }