public void TestDefaultFilter() { DuplicateFilter df = new DuplicateFilter(KEY_FIELD); Hashtable results = new Hashtable(); Hits h = searcher.Search(tq, df); for (int i = 0; i < h.Length(); i++) { Document d = h.Doc(i); String url = d.Get(KEY_FIELD); Assert.IsFalse(results.Contains(url), "No duplicate urls should be returned"); results.Add(url,url); } }
public override bool Equals(Object obj) { if (this == obj) { return(true); } if ((obj == null) || (obj.GetType() != this.GetType())) { return(false); } DuplicateFilter other = (DuplicateFilter)obj; return(keepMode == other.keepMode && processingMode == other.processingMode && (fieldName == other.fieldName || (fieldName != null && fieldName.Equals(other.fieldName)))); }
public void TestFastFilter() { DuplicateFilter df = new DuplicateFilter(KEY_FIELD); df.SetProcessingMode(DuplicateFilter.PM_FAST_INVALIDATION); Hashtable results = new Hashtable(); Hits h = searcher.Search(tq, df); Assert.IsTrue(h.Length() > 0, "Filtered searching should have found some matches"); for (int i = 0; i < h.Length(); i++) { Document d = h.Doc(i); String url = d.Get(KEY_FIELD); Assert.IsFalse(results.Contains(url), "No duplicate urls should be returned"); results.Add(url,url); } Assert.AreEqual(2, results.Count, "Two urls found"); }
public void TestKeepsFirstFilter() { DuplicateFilter df = new DuplicateFilter(KEY_FIELD); df.SetKeepMode(DuplicateFilter.KM_USE_FIRST_OCCURRENCE); Hits h = searcher.Search(tq, df); Assert.IsTrue(h.Length() > 0, "Filtered searching should have found some matches"); for (int i = 0; i < h.Length(); i++) { Document d = h.Doc(i); String url = d.Get(KEY_FIELD); TermDocs td = reader.TermDocs(new Term(KEY_FIELD, url)); int lastDoc = 0; td.Next(); lastDoc = td.Doc(); Assert.AreEqual(lastDoc, h.Id((i)), "Duplicate urls should return first doc"); } }
public void TestKeepsFirstFilter() { DuplicateFilter df = new DuplicateFilter(KEY_FIELD); df.KeepMode = DuplicateFilter.KM_USE_FIRST_OCCURRENCE; ScoreDoc[] h = searcher.Search(tq, df, 1000).ScoreDocs; Assert.IsTrue(h.Length > 0, "Filtered searching should have found some matches"); for (int i = 0; i < h.Length; i++) { Document d = searcher.Doc(h[i].Doc); String url = d.Get(KEY_FIELD); TermDocs td = reader.TermDocs(new Term(KEY_FIELD, url)); int lastDoc = 0; td.Next(); lastDoc = td.Doc; Assert.AreEqual(lastDoc, h[i].Doc, "Duplicate urls should return first doc"); } }