added a max length to limit query parse issues

This commit is contained in:
Jeremy Long
2017-12-20 06:26:03 -05:00
parent b91d086340
commit 16a6a2d2d8

View File

@@ -251,20 +251,20 @@ public class CPEAnalyzer extends AbstractAnalyzer {
* @param evidence an iterable set of evidence to concatenate * @param evidence an iterable set of evidence to concatenate
* @return the new evidence text * @return the new evidence text
*/ */
@SuppressWarnings("null")
private String addEvidenceWithoutDuplicateTerms(final String text, final Iterable<Evidence> evidence) { private String addEvidenceWithoutDuplicateTerms(final String text, final Iterable<Evidence> evidence) {
final String txt = (text == null) ? "" : text; final String txt = (text == null) ? "" : text;
final StringBuilder sb = new StringBuilder(); final StringBuilder sb = new StringBuilder(text.length() * 2);
sb.append(' ').append(txt).append(' '); sb.append(' ').append(txt).append(' ');
for (Evidence e : evidence) { for (Evidence e : evidence) {
final String value = e.getValue(); String value = e.getValue();
//removed as the URLTokenizingFilter was created if (value.length() > 1000) {
//hack to get around the fact that lucene does a really good job of recognizing domains and not splitting them. value = value.substring(0, 1000);
// if (value.startsWith("http://")) { final int pos = value.lastIndexOf(" ");
// value = value.substring(7).replaceAll("\\.", " "); if (pos > 0) {
// } value = value.substring(0, pos);
// if (value.startsWith("https://")) { }
// value = value.substring(8).replaceAll("\\.", " "); }
// }
if (sb.indexOf(" " + value + " ") < 0) { if (sb.indexOf(" " + value + " ") < 0) {
sb.append(value).append(' '); sb.append(value).append(' ');
} }