Coverage Report - org.owasp.dependencycheck.data.lucene.FieldAnalyzer
 
Classes in this File Line Coverage Branch Coverage Complexity
FieldAnalyzer
100%
6/6
N/A
1
 
 1  
 /*
 2  
  * This file is part of dependency-check-core.
 3  
  *
 4  
  * Licensed under the Apache License, Version 2.0 (the "License");
 5  
  * you may not use this file except in compliance with the License.
 6  
  * You may obtain a copy of the License at
 7  
  *
 8  
  *     http://www.apache.org/licenses/LICENSE-2.0
 9  
  *
 10  
  * Unless required by applicable law or agreed to in writing, software
 11  
  * distributed under the License is distributed on an "AS IS" BASIS,
 12  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13  
  * See the License for the specific language governing permissions and
 14  
  * limitations under the License.
 15  
  *
 16  
  * Copyright (c) 2012 Jeremy Long. All Rights Reserved.
 17  
  */
 18  
 package org.owasp.dependencycheck.data.lucene;
 19  
 
 20  
 import java.io.Reader;
 21  
 import org.apache.lucene.analysis.Analyzer;
 22  
 import org.apache.lucene.analysis.TokenStream;
 23  
 import org.apache.lucene.analysis.Tokenizer;
 24  
 import org.apache.lucene.analysis.core.LowerCaseFilter;
 25  
 import org.apache.lucene.analysis.core.StopAnalyzer;
 26  
 import org.apache.lucene.analysis.core.StopFilter;
 27  
 import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
 28  
 import org.apache.lucene.util.Version;
 29  
 
 30  
 /**
 31  
  * <p>
 32  
  * A Lucene Analyzer that utilizes the WhitespaceTokenizer, WordDelimiterFilter, LowerCaseFilter, and StopFilter. The
 33  
  * intended purpose of this Analyzer is to index the CPE fields vendor and product.</p>
 34  
  *
 35  
  * @author Jeremy Long <jeremy.long@owasp.org>
 36  
  */
 37  
 public class FieldAnalyzer extends Analyzer {
 38  
 
 39  
     /**
 40  
      * The Lucene Version used.
 41  
      */
 42  
     private final Version version;
 43  
 
 44  
     /**
 45  
      * Creates a new FieldAnalyzer.
 46  
      *
 47  
      * @param version the Lucene version
 48  
      */
 49  
     public FieldAnalyzer(Version version) {
 50  
         this.version = version;
 51  
     }
 52  
 
 53  
     /**
 54  
      * Creates the TokenStreamComponents
 55  
      *
 56  
      * @param fieldName the field name being analyzed
 57  
      * @param reader the reader containing the input
 58  
      * @return the TokenStreamComponents
 59  
      */
 60  
     @Override
 61  
     protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
 62  3
         final Tokenizer source = new AlphaNumericTokenizer(version, reader);
 63  
 
 64  3
         TokenStream stream = source;
 65  
 
 66  3
         stream = new WordDelimiterFilter(stream,
 67  
                 WordDelimiterFilter.CATENATE_WORDS
 68  
                 | WordDelimiterFilter.GENERATE_WORD_PARTS
 69  
                 | WordDelimiterFilter.GENERATE_NUMBER_PARTS
 70  
                 | WordDelimiterFilter.PRESERVE_ORIGINAL
 71  
                 | WordDelimiterFilter.SPLIT_ON_CASE_CHANGE
 72  
                 | WordDelimiterFilter.SPLIT_ON_NUMERICS
 73  
                 | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE, null);
 74  
 
 75  3
         stream = new LowerCaseFilter(version, stream);
 76  3
         stream = new StopFilter(version, stream, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
 77  
 
 78  3
         return new TokenStreamComponents(source, stream);
 79  
     }
 80  
 }