fixed bug with short words at the end of a CPE were being ignored in verifyEntry. Also, added a min score of 0.08 for documents retrieved from lucene in order to prune bad matches earlier

Former-commit-id: 5f6b87fa09b0acf851e1bbef5b1b53ec667ee562
This commit is contained in:
Jeremy Long
2013-08-16 07:15:10 -04:00
parent 05c05552da
commit f3cac80b2b

View File

@@ -275,13 +275,22 @@ public class CPEAnalyzer implements Analyzer {
final TopDocs docs = cpe.search(searchString, MAX_QUERY_RESULTS); final TopDocs docs = cpe.search(searchString, MAX_QUERY_RESULTS);
for (ScoreDoc d : docs.scoreDocs) { for (ScoreDoc d : docs.scoreDocs) {
final Document doc = cpe.getDocument(d.doc); if (d.score >= 0.08) {
final IndexEntry entry = new IndexEntry(); final Document doc = cpe.getDocument(d.doc);
entry.setVendor(doc.get(Fields.VENDOR)); final IndexEntry entry = new IndexEntry();
entry.setProduct(doc.get(Fields.PRODUCT)); entry.setVendor(doc.get(Fields.VENDOR));
entry.setSearchScore(d.score); entry.setProduct(doc.get(Fields.PRODUCT));
if (!ret.contains(entry)) { // if (d.score < 0.08) {
ret.add(entry); // System.out.print(entry.getVendor());
// System.out.print(":");
// System.out.print(entry.getProduct());
// System.out.print(":");
// System.out.println(d.score);
// }
entry.setSearchScore(d.score);
if (!ret.contains(entry)) {
ret.add(entry);
}
} }
} }
return ret; return ret;
@@ -454,9 +463,10 @@ public class CPEAnalyzer implements Analyzer {
list.add(word); list.add(word);
} }
} }
// if (tempWord != null) { if (tempWord != null) {
// //for now ignore any last single letter words... String tmp = list.get(list.size() - 1) + tempWord;
// } list.add(tmp);
}
boolean contains = true; boolean contains = true;
for (String word : list) { for (String word : list) {
contains &= ec.containsUsedString(word); contains &= ec.containsUsedString(word);