View Javadoc
1   /*
2    * This file is part of dependency-check-core.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13   * See the License for the specific language governing permissions and
14   * limitations under the License.
15   *
16   * Copyright (c) 2012 Jeremy Long. All Rights Reserved.
17   */
18  package org.owasp.dependencycheck.data.lucene;
19  
20  import java.io.Reader;
21  import org.apache.lucene.analysis.Analyzer;
22  import org.apache.lucene.analysis.TokenStream;
23  import org.apache.lucene.analysis.Tokenizer;
24  import org.apache.lucene.analysis.core.LowerCaseFilter;
25  import org.apache.lucene.analysis.core.StopAnalyzer;
26  import org.apache.lucene.analysis.core.StopFilter;
27  import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
28  import org.apache.lucene.util.Version;
29  
30  /**
31   * <p>
32   * A Lucene Analyzer that utilizes the WhitespaceTokenizer, WordDelimiterFilter,
33   * LowerCaseFilter, and StopFilter. The intended purpose of this Analyzer is to
34   * index the CPE fields vendor and product.</p>
35   *
36   * @author Jeremy Long
37   * @deprecated the field analyzer should not be used, instead use the
38   * SearchFieldAnalyzer so that the token analyzing filter is used.
39   */
40  @Deprecated
41  public class FieldAnalyzer extends Analyzer {
42  
43      /**
44       * The Lucene Version used.
45       */
46      private final Version version;
47  
48      /**
49       * Creates a new FieldAnalyzer.
50       *
51       * @param version the Lucene version
52       */
53      public FieldAnalyzer(Version version) {
54          this.version = version;
55      }
56  
57      /**
58       * Creates the TokenStreamComponents
59       *
60       * @param fieldName the field name being analyzed
61       * @param reader the reader containing the input
62       * @return the TokenStreamComponents
63       */
64      @Override
65      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
66          final Tokenizer source = new AlphaNumericTokenizer(version, reader);
67  
68          TokenStream stream = source;
69  
70          stream = new WordDelimiterFilter(stream,
71                  WordDelimiterFilter.CATENATE_WORDS
72                  | WordDelimiterFilter.GENERATE_WORD_PARTS
73                  | WordDelimiterFilter.GENERATE_NUMBER_PARTS
74                  | WordDelimiterFilter.PRESERVE_ORIGINAL
75                  | WordDelimiterFilter.SPLIT_ON_CASE_CHANGE
76                  | WordDelimiterFilter.SPLIT_ON_NUMERICS
77                  | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE, null);
78  
79          stream = new LowerCaseFilter(version, stream);
80          stream = new StopFilter(version, stream, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
81  
82          return new TokenStreamComponents(source, stream);
83      }
84  }