public void TestKeepsLastFilter() { DuplicateFilter df = new DuplicateFilter(KEY_FIELD); df.KeepMode = (KeepMode.KM_USE_LAST_OCCURRENCE); ScoreDoc[] hits = searcher.Search(tq, df, 1000).ScoreDocs; assertTrue("Filtered searching should have found some matches", hits.Length > 0); foreach (ScoreDoc hit in hits) { Document d = searcher.Doc(hit.Doc); string url = d.Get(KEY_FIELD); DocsEnum td = TestUtil.Docs(Random(), reader, KEY_FIELD, new BytesRef(url), MultiFields.GetLiveDocs(reader), null, 0); int lastDoc = 0; while (td.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { lastDoc = td.DocID; } assertEquals("Duplicate urls should return last doc", lastDoc, hit.Doc); } }
public void TestDefaultFilter() { DuplicateFilter df = new DuplicateFilter(KEY_FIELD); HashSet <string> results = new HashSet <string>(); ScoreDoc[] hits = searcher.Search(tq, df, 1000).ScoreDocs; foreach (ScoreDoc hit in hits) { Document d = searcher.Doc(hit.Doc); string url = d.Get(KEY_FIELD); assertFalse("No duplicate urls should be returned", results.contains(url)); results.add(url); } }
public override bool Equals(object obj) { if (this == obj) { return(true); } if ((obj == null) || (obj.GetType() != this.GetType())) { return(false); } DuplicateFilter other = (DuplicateFilter)obj; return(keepMode == other.keepMode && processingMode == other.processingMode && fieldName != null && fieldName.Equals(other.fieldName, StringComparison.Ordinal)); }
public void TestFastFilter() { DuplicateFilter df = new DuplicateFilter(KEY_FIELD); df.ProcessingMode = (ProcessingMode.PM_FAST_INVALIDATION); HashSet <string> results = new HashSet <string>(); ScoreDoc[] hits = searcher.Search(tq, df, 1000).ScoreDocs; assertTrue("Filtered searching should have found some matches", hits.Length > 0); foreach (ScoreDoc hit in hits) { Document d = searcher.Doc(hit.Doc); string url = d.Get(KEY_FIELD); assertFalse("No duplicate urls should be returned", results.contains(url)); results.add(url); } assertEquals("Two urls found", 2, results.size()); }