Coverage Report - org.owasp.dependencycheck.data.lucene.FieldAnalyzer
 
Classes in this File Line Coverage Branch Coverage Complexity
FieldAnalyzer
100%
9/9
N/A
1
 
 1  
 /*
 2  
  * This file is part of dependency-check-core.
 3  
  *
 4  
  * Dependency-check-core is free software: you can redistribute it and/or modify it
 5  
  * under the terms of the GNU General Public License as published by the Free
 6  
  * Software Foundation, either version 3 of the License, or (at your option) any
 7  
  * later version.
 8  
  *
 9  
  * Dependency-check-core is distributed in the hope that it will be useful, but
 10  
  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  
  * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
 12  
  * details.
 13  
  *
 14  
  * You should have received a copy of the GNU General Public License along with
 15  
  * dependency-check-core. If not, see http://www.gnu.org/licenses/.
 16  
  *
 17  
  * Copyright (c) 2012 Jeremy Long. All Rights Reserved.
 18  
  */
 19  
 package org.owasp.dependencycheck.data.lucene;
 20  
 
 21  
 import java.io.Reader;
 22  
 import org.apache.lucene.analysis.Analyzer;
 23  
 import org.apache.lucene.analysis.TokenStream;
 24  
 import org.apache.lucene.analysis.Tokenizer;
 25  
 import org.apache.lucene.analysis.core.LowerCaseFilter;
 26  
 import org.apache.lucene.analysis.core.StopAnalyzer;
 27  
 import org.apache.lucene.analysis.core.StopFilter;
 28  
 import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
 29  
 import org.apache.lucene.util.Version;
 30  
 
 31  
 /**
 32  
  * <p>A Lucene Analyzer that utilizes the WhitespaceTokenizer,
 33  
  * WordDelimiterFilter, LowerCaseFilter, and StopFilter. The intended purpose of
 34  
  * this Analyzer is to index the CPE fields vendor and product.</p>
 35  
  *
 36  
  * @author Jeremy Long (jeremy.long@owasp.org)
 37  
  */
 38  
 public class FieldAnalyzer extends Analyzer {
 39  
 
 40  
     /**
 41  
      * The Lucene Version used.
 42  
      */
 43  
     private final Version version;
 44  
 
 45  
     /**
 46  
      * Creates a new FieldAnalyzer.
 47  
      *
 48  
      * @param version the Lucene version
 49  
      */
 50  23
     public FieldAnalyzer(Version version) {
 51  23
         this.version = version;
 52  23
     }
 53  
 
 54  
     /**
 55  
      * Creates the TokenStreamComponents
 56  
      *
 57  
      * @param fieldName the field name being analyzed
 58  
      * @param reader the reader containing the input
 59  
      * @return the TokenStreamComponents
 60  
      */
 61  
     @Override
 62  
     protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
 63  23
         final Tokenizer source = new AlphaNumericTokenizer(version, reader);
 64  
 
 65  23
         TokenStream stream = source;
 66  
 
 67  23
         stream = new WordDelimiterFilter(stream,
 68  
                 WordDelimiterFilter.CATENATE_WORDS
 69  
                 | WordDelimiterFilter.GENERATE_WORD_PARTS
 70  
                 | WordDelimiterFilter.GENERATE_NUMBER_PARTS
 71  
                 | WordDelimiterFilter.PRESERVE_ORIGINAL
 72  
                 | WordDelimiterFilter.SPLIT_ON_CASE_CHANGE
 73  
                 | WordDelimiterFilter.SPLIT_ON_NUMERICS
 74  
                 | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE, null);
 75  
 
 76  23
         stream = new LowerCaseFilter(version, stream);
 77  23
         stream = new StopFilter(version, stream, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
 78  
 
 79  23
         return new TokenStreamComponents(source, stream);
 80  
     }
 81  
 }