Coverage Report - org.owasp.dependencycheck.utils.UrlStringUtils
 
Classes in this File Line Coverage Branch Coverage Complexity
UrlStringUtils
87%
21/24
83%
10/12
2.5
 
 1  
 /*
 2  
  * This file is part of dependency-check-core.
 3  
  *
 4  
  * Licensed under the Apache License, Version 2.0 (the "License");
 5  
  * you may not use this file except in compliance with the License.
 6  
  * You may obtain a copy of the License at
 7  
  *
 8  
  *     http://www.apache.org/licenses/LICENSE-2.0
 9  
  *
 10  
  * Unless required by applicable law or agreed to in writing, software
 11  
  * distributed under the License is distributed on an "AS IS" BASIS,
 12  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13  
  * See the License for the specific language governing permissions and
 14  
  * limitations under the License.
 15  
  *
 16  
  * Copyright (c) 2013 Jeremy Long. All Rights Reserved.
 17  
  */
 18  
 package org.owasp.dependencycheck.utils;
 19  
 
 20  
 import java.net.MalformedURLException;
 21  
 import java.net.URL;
 22  
 import java.util.ArrayList;
 23  
 import java.util.Arrays;
 24  
 import java.util.HashSet;
 25  
 import java.util.List;
 26  
 import java.util.Set;
 27  
 import java.util.regex.Pattern;
 28  
 
 29  
 /**
 30  
  *
 31  
  * @author Jeremy Long
 32  
  */
 33  
 public final class UrlStringUtils {
 34  
 
 35  
     /**
 36  
      * Private constructor for a utility class.
 37  
      */
 38  0
     private UrlStringUtils() {
 39  0
     }
 40  
     /**
 41  
      * A regular expression to test if a string contains a URL.
 42  
      */
 43  1
     private static final Pattern CONTAINS_URL_TEST = Pattern.compile("^.*(ht|f)tps?://.*$", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE);
 44  
     /**
 45  
      * A regular expression to test if a string is a URL.
 46  
      */
 47  1
     private static final Pattern IS_URL_TEST = Pattern.compile("^(ht|f)tps?://.*", Pattern.CASE_INSENSITIVE);
 48  
 
 49  
     /**
 50  
      * Tests if the text provided contains a URL. This is somewhat limited search in that it only looks for
 51  
      * (ftp|http|https)://
 52  
      *
 53  
      * @param text the text to search
 54  
      * @return true if the text contains a url, otherwise false
 55  
      */
 56  
     public static boolean containsUrl(String text) {
 57  22176
         return CONTAINS_URL_TEST.matcher(text).matches();
 58  
     }
 59  
 
 60  
     /**
 61  
      * Tests if the given text is url.
 62  
      *
 63  
      * @param text the string to test
 64  
      * @return returns true if the text is a url, otherwise false
 65  
      */
 66  
     public static boolean isUrl(String text) {
 67  33
         return IS_URL_TEST.matcher(text).matches();
 68  
     }
 69  
     /**
 70  
      * A listing of domain parts that should not be used as evidence. Yes, this is an incomplete list.
 71  
      */
 72  2
     private static final Set<String> IGNORE_LIST = new HashSet<String>(
 73  1
             Arrays.asList("www", "com", "org", "gov", "info", "name", "net", "pro", "tel", "mobi", "xxx"));
 74  
 
 75  
     /**
 76  
      * <p>
 77  
      * Takes a URL, in String format, and adds the important parts of the URL to a list of strings.</p>
 78  
      * <p>
 79  
      * Example, given the following input:</p>
 80  
      * <code>"https://www.somedomain.com/path1/path2/file.php?id=439"</code>
 81  
      * <p>
 82  
      * The function would return:</p>
 83  
      * <code>{"some.domain", "path1", "path2", "file"}</code>
 84  
      *
 85  
      * @param text a URL
 86  
      * @return importantParts a list of the important parts of the URL
 87  
      * @throws MalformedURLException thrown if the URL is malformed
 88  
      */
 89  
     public static List<String> extractImportantUrlData(String text) throws MalformedURLException {
 90  25
         final List<String> importantParts = new ArrayList<String>();
 91  25
         final URL url = new URL(text);
 92  25
         final String[] domain = url.getHost().split("\\.");
 93  
         //add the domain except www and the tld.
 94  72
         for (int i = 0; i < domain.length - 1; i++) {
 95  47
             final String sub = domain[i];
 96  47
             if (!IGNORE_LIST.contains(sub.toLowerCase())) {
 97  44
                 importantParts.add(sub);
 98  
             }
 99  
         }
 100  25
         final String document = url.getPath();
 101  25
         final String[] pathParts = document.split("[\\//]");
 102  44
         for (int i = 0; i < pathParts.length - 2; i++) {
 103  19
             if (!pathParts[i].isEmpty()) {
 104  0
                 importantParts.add(pathParts[i]);
 105  
             }
 106  
         }
 107  25
         if (pathParts.length > 0 && !pathParts[pathParts.length - 1].isEmpty()) {
 108  22
             final String fileNameNoExt = pathParts[pathParts.length - 1].replaceAll("\\..*{0,5}$", "");
 109  22
             importantParts.add(fileNameNoExt);
 110  
         }
 111  25
         return importantParts;
 112  
     }
 113  
 }