| Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
| FieldAnalyzer |
|
| 1.0;1 |
| 1 | /* | |
| 2 | * This file is part of dependency-check-core. | |
| 3 | * | |
| 4 | * Dependency-check-core is free software: you can redistribute it and/or modify it | |
| 5 | * under the terms of the GNU General Public License as published by the Free | |
| 6 | * Software Foundation, either version 3 of the License, or (at your option) any | |
| 7 | * later version. | |
| 8 | * | |
| 9 | * Dependency-check-core is distributed in the hope that it will be useful, but | |
| 10 | * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
| 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more | |
| 12 | * details. | |
| 13 | * | |
| 14 | * You should have received a copy of the GNU General Public License along with | |
| 15 | * dependency-check-core. If not, see http://www.gnu.org/licenses/. | |
| 16 | * | |
| 17 | * Copyright (c) 2012 Jeremy Long. All Rights Reserved. | |
| 18 | */ | |
| 19 | package org.owasp.dependencycheck.data.lucene; | |
| 20 | ||
| 21 | import java.io.Reader; | |
| 22 | import org.apache.lucene.analysis.Analyzer; | |
| 23 | import org.apache.lucene.analysis.TokenStream; | |
| 24 | import org.apache.lucene.analysis.Tokenizer; | |
| 25 | import org.apache.lucene.analysis.core.LowerCaseFilter; | |
| 26 | import org.apache.lucene.analysis.core.StopAnalyzer; | |
| 27 | import org.apache.lucene.analysis.core.StopFilter; | |
| 28 | import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter; | |
| 29 | import org.apache.lucene.util.Version; | |
| 30 | ||
| 31 | /** | |
| 32 | * <p>A Lucene Analyzer that utilizes the WhitespaceTokenizer, | |
| 33 | * WordDelimiterFilter, LowerCaseFilter, and StopFilter. The intended purpose of | |
| 34 | * this Analyzer is to index the CPE fields vendor and product.</p> | |
| 35 | * | |
| 36 | * @author Jeremy Long (jeremy.long@owasp.org) | |
| 37 | */ | |
| 38 | public class FieldAnalyzer extends Analyzer { | |
| 39 | ||
| 40 | /** | |
| 41 | * The Lucene Version used. | |
| 42 | */ | |
| 43 | private final Version version; | |
| 44 | ||
| 45 | /** | |
| 46 | * Creates a new FieldAnalyzer. | |
| 47 | * | |
| 48 | * @param version the Lucene version | |
| 49 | */ | |
| 50 | 23 | public FieldAnalyzer(Version version) { |
| 51 | 23 | this.version = version; |
| 52 | 23 | } |
| 53 | ||
| 54 | /** | |
| 55 | * Creates the TokenStreamComponents | |
| 56 | * | |
| 57 | * @param fieldName the field name being analyzed | |
| 58 | * @param reader the reader containing the input | |
| 59 | * @return the TokenStreamComponents | |
| 60 | */ | |
| 61 | @Override | |
| 62 | protected TokenStreamComponents createComponents(String fieldName, Reader reader) { | |
| 63 | 23 | final Tokenizer source = new AlphaNumericTokenizer(version, reader); |
| 64 | ||
| 65 | 23 | TokenStream stream = source; |
| 66 | ||
| 67 | 23 | stream = new WordDelimiterFilter(stream, |
| 68 | WordDelimiterFilter.CATENATE_WORDS | |
| 69 | | WordDelimiterFilter.GENERATE_WORD_PARTS | |
| 70 | | WordDelimiterFilter.GENERATE_NUMBER_PARTS | |
| 71 | | WordDelimiterFilter.PRESERVE_ORIGINAL | |
| 72 | | WordDelimiterFilter.SPLIT_ON_CASE_CHANGE | |
| 73 | | WordDelimiterFilter.SPLIT_ON_NUMERICS | |
| 74 | | WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE, null); | |
| 75 | ||
| 76 | 23 | stream = new LowerCaseFilter(version, stream); |
| 77 | 23 | stream = new StopFilter(version, stream, StopAnalyzer.ENGLISH_STOP_WORDS_SET); |
| 78 | ||
| 79 | 23 | return new TokenStreamComponents(source, stream); |
| 80 | } | |
| 81 | } |