added a max length to limit query parse issues

This commit is contained in:
Jeremy Long
2017-12-20 06:26:03 -05:00
parent b91d086340
commit 16a6a2d2d8

View File

@@ -251,20 +251,20 @@ public class CPEAnalyzer extends AbstractAnalyzer {
* @param evidence an iterable set of evidence to concatenate
* @return the new evidence text
*/
@SuppressWarnings("null")
private String addEvidenceWithoutDuplicateTerms(final String text, final Iterable<Evidence> evidence) {
final String txt = (text == null) ? "" : text;
final StringBuilder sb = new StringBuilder();
final StringBuilder sb = new StringBuilder(text.length() * 2);
sb.append(' ').append(txt).append(' ');
for (Evidence e : evidence) {
final String value = e.getValue();
//removed as the URLTokenizingFilter was created
//hack to get around the fact that lucene does a really good job of recognizing domains and not splitting them.
// if (value.startsWith("http://")) {
// value = value.substring(7).replaceAll("\\.", " ");
// }
// if (value.startsWith("https://")) {
// value = value.substring(8).replaceAll("\\.", " ");
// }
String value = e.getValue();
if (value.length() > 1000) {
value = value.substring(0, 1000);
final int pos = value.lastIndexOf(" ");
if (pos > 0) {
value = value.substring(0, pos);
}
}
if (sb.indexOf(" " + value + " ") < 0) {
sb.append(value).append(' ');
}