1 /*
2 * This file is part of dependency-check-core.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 * Copyright (c) 2012 Jeremy Long. All Rights Reserved.
17 */
18 package org.owasp.dependencycheck.data.lucene;
19
20 import java.io.Reader;
21 import org.apache.lucene.analysis.Analyzer;
22 import org.apache.lucene.analysis.TokenStream;
23 import org.apache.lucene.analysis.Tokenizer;
24 import org.apache.lucene.analysis.core.LowerCaseFilter;
25 import org.apache.lucene.analysis.core.StopAnalyzer;
26 import org.apache.lucene.analysis.core.StopFilter;
27 import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
28 import org.apache.lucene.util.Version;
29
30 /**
31 * <p>
32 * A Lucene Analyzer that utilizes the WhitespaceTokenizer, WordDelimiterFilter,
33 * LowerCaseFilter, and StopFilter. The intended purpose of this Analyzer is to
34 * index the CPE fields vendor and product.</p>
35 *
36 * @author Jeremy Long
37 * @deprecated the field analyzer should not be used, instead use the
38 * SearchFieldAnalyzer so that the token analyzing filter is used.
39 */
40 @Deprecated
41 public class FieldAnalyzer extends Analyzer {
42
43 /**
44 * The Lucene Version used.
45 */
46 private final Version version;
47
48 /**
49 * Creates a new FieldAnalyzer.
50 *
51 * @param version the Lucene version
52 */
53 public FieldAnalyzer(Version version) {
54 this.version = version;
55 }
56
57 /**
58 * Creates the TokenStreamComponents
59 *
60 * @param fieldName the field name being analyzed
61 * @param reader the reader containing the input
62 * @return the TokenStreamComponents
63 */
64 @Override
65 protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
66 final Tokenizer source = new AlphaNumericTokenizer(version, reader);
67
68 TokenStream stream = source;
69
70 stream = new WordDelimiterFilter(stream,
71 WordDelimiterFilter.CATENATE_WORDS
72 | WordDelimiterFilter.GENERATE_WORD_PARTS
73 | WordDelimiterFilter.GENERATE_NUMBER_PARTS
74 | WordDelimiterFilter.PRESERVE_ORIGINAL
75 | WordDelimiterFilter.SPLIT_ON_CASE_CHANGE
76 | WordDelimiterFilter.SPLIT_ON_NUMERICS
77 | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE, null);
78
79 stream = new LowerCaseFilter(version, stream);
80 stream = new StopFilter(version, stream, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
81
82 return new TokenStreamComponents(source, stream);
83 }
84 }