fixed bug with short words at the end of a CPE were being ignored in verifyEntry. Also, added a min score of 0.08 for documents retrieved from lucene in order to prune bad matches earlier

Former-commit-id: b1720b2fba627507c601dc1e5b08a5b540095271
This commit is contained in:
Jeremy Long
2013-08-16 07:15:10 -04:00
parent 33249fad21
commit c4cde366e8

View File

@@ -275,13 +275,22 @@ public class CPEAnalyzer implements Analyzer {
final TopDocs docs = cpe.search(searchString, MAX_QUERY_RESULTS);
for (ScoreDoc d : docs.scoreDocs) {
final Document doc = cpe.getDocument(d.doc);
final IndexEntry entry = new IndexEntry();
entry.setVendor(doc.get(Fields.VENDOR));
entry.setProduct(doc.get(Fields.PRODUCT));
entry.setSearchScore(d.score);
if (!ret.contains(entry)) {
ret.add(entry);
if (d.score >= 0.08) {
final Document doc = cpe.getDocument(d.doc);
final IndexEntry entry = new IndexEntry();
entry.setVendor(doc.get(Fields.VENDOR));
entry.setProduct(doc.get(Fields.PRODUCT));
// if (d.score < 0.08) {
// System.out.print(entry.getVendor());
// System.out.print(":");
// System.out.print(entry.getProduct());
// System.out.print(":");
// System.out.println(d.score);
// }
entry.setSearchScore(d.score);
if (!ret.contains(entry)) {
ret.add(entry);
}
}
}
return ret;
@@ -454,9 +463,10 @@ public class CPEAnalyzer implements Analyzer {
list.add(word);
}
}
// if (tempWord != null) {
// //for now ignore any last single letter words...
// }
if (tempWord != null) {
String tmp = list.get(list.size() - 1) + tempWord;
list.add(tmp);
}
boolean contains = true;
for (String word : list) {
contains &= ec.containsUsedString(word);