Coverage Report - org.owasp.dependencycheck.utils.UrlStringUtils
 
Classes in this File Line Coverage Branch Coverage Complexity
UrlStringUtils
82%
19/23
75%
9/12
2.5
 
 1  
 /*
 2  
  * This file is part of dependency-check-core.
 3  
  *
 4  
  * Dependency-check-core is free software: you can redistribute it and/or modify it
 5  
  * under the terms of the GNU General Public License as published by the Free
 6  
  * Software Foundation, either version 3 of the License, or (at your option) any
 7  
  * later version.
 8  
  *
 9  
  * Dependency-check-core is distributed in the hope that it will be useful, but
 10  
  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  
  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
 12  
  * details.
 13  
  *
 14  
  * You should have received a copy of the GNU General Public License along with
 15  
  * dependency-check-core. If not, see http://www.gnu.org/licenses/.
 16  
  *
 17  
  * Copyright (c) 2013 Jeremy Long. All Rights Reserved.
 18  
  */
 19  
 package org.owasp.dependencycheck.utils;
 20  
 
 21  
 import java.net.MalformedURLException;
 22  
 import java.net.URL;
 23  
 import java.util.ArrayList;
 24  
 import java.util.Arrays;
 25  
 import java.util.HashSet;
 26  
 import java.util.List;
 27  
 import java.util.regex.Pattern;
 28  
 
 29  
 /**
 30  
  *
 31  
  * @author Jeremy Long (jeremy.long@owasp.org)
 32  
  */
 33  
 public final class UrlStringUtils {
 34  
 
 35  
     /**
 36  
      * Private constructor for a utility class.
 37  
      */
 38  0
     private UrlStringUtils() {
 39  0
     }
 40  
     /**
 41  
      * A regular expression to test if a string contains a URL.
 42  
      */
 43  1
     private static final Pattern CONTAINS_URL_TEST = Pattern.compile("^.*(ht|f)tps?://.*$", Pattern.MULTILINE | Pattern.CASE_INSENSITIVE);
 44  
     /**
 45  
      * A regular expression to test if a string is a URL.
 46  
      */
 47  1
     private static final Pattern IS_URL_TEST = Pattern.compile("^(ht|f)tps?://.*", Pattern.CASE_INSENSITIVE);
 48  
 
 49  
     /**
 50  
      * Tests if the text provided contains a URL. This is somewhat limited
 51  
      * search in that it only looks for (ftp|http|https)://
 52  
      *
 53  
      * @param text the text to search
 54  
      * @return true if the text contains a url, otherwise false
 55  
      */
 56  
     public static boolean containsUrl(String text) {
 57  21169
         return CONTAINS_URL_TEST.matcher(text).matches();
 58  
     }
 59  
 
 60  
     /**
 61  
      * Tests if the given text is url.
 62  
      *
 63  
      * @param text the string to test
 64  
      * @return returns true if the text is a url, otherwise false
 65  
      */
 66  
     public static boolean isUrl(String text) {
 67  135
         return IS_URL_TEST.matcher(text).matches();
 68  
     }
 69  
     /**
 70  
      * A listing of domain parts that shold not be used as evidence. Yes, this
 71  
      * is an incomplete list.
 72  
      */
 73  1
     private static final HashSet<String> IGNORE_LIST = new HashSet<String>(
 74  
             Arrays.asList("www", "com", "org", "gov", "info", "name", "net", "pro", "tel", "mobi", "xxx"));
 75  
 
 76  
     /**
 77  
      * <p>Takes a URL, in String format, and adds the important parts of the URL
 78  
      * to a list of strings.</p>
 79  
      * <p>Example, given the following input:</p>
 80  
      * <code>"https://www.somedomain.com/path1/path2/file.php?id=439"</code>
 81  
      * <p>The function would return:</p>
 82  
      * <code>{"somedomain", "path1", "path2", "file"}</code>
 83  
      *
 84  
      * @param text a URL
 85  
      * @return importantParts a list of the important parts of the URL
 86  
      * @throws MalformedURLException thrown if the URL is malformed
 87  
      */
 88  
     public static List<String> extractImportantUrlData(String text) throws MalformedURLException {
 89  135
         final ArrayList<String> importantParts = new ArrayList<String>();
 90  135
         final URL url = new URL(text);
 91  135
         final String[] domain = url.getHost().split("\\.");
 92  
         //add the domain except www and the tld.
 93  404
         for (int i = 0; i < domain.length - 1; i++) {
 94  269
             final String sub = domain[i];
 95  269
             if (!IGNORE_LIST.contains(sub.toLowerCase())) {
 96  185
                 importantParts.add(sub);
 97  
             }
 98  
         }
 99  135
         final String document = url.getPath();
 100  135
         final String[] pathParts = document.split("[\\//]");
 101  135
         for (int i = 0; i < pathParts.length - 2; i++) {
 102  0
             if (!pathParts[i].isEmpty()) {
 103  0
                 importantParts.add(pathParts[i]);
 104  
             }
 105  
         }
 106  135
         if (pathParts.length > 0 && !pathParts[pathParts.length - 1].isEmpty()) {
 107  47
             final String fileNameNoExt = pathParts[pathParts.length - 1].replaceAll("\\..*{0,5}$", "");
 108  47
             importantParts.add(fileNameNoExt);
 109  
         }
 110  135
         return importantParts;
 111  
     }
 112  
 }