public void TestDefaultFilter()
 {
     DuplicateFilter df = new DuplicateFilter(KEY_FIELD);
     Hashtable results = new Hashtable();
     Hits h = searcher.Search(tq, df);
     for (int i = 0; i < h.Length(); i++)
     {
         Document d = h.Doc(i);
         String url = d.Get(KEY_FIELD);
         Assert.IsFalse(results.Contains(url), "No duplicate urls should be returned");
         results.Add(url,url);
     }
 }
예제 #2
0
 public void TestDefaultFilter()
 {
     DuplicateFilter df = new DuplicateFilter(KEY_FIELD);
     Hashtable results = new Hashtable();
     Hits h = searcher.Search(tq, df);
     for (int i = 0; i < h.Length(); i++)
     {
         Document d = h.Doc(i);
         String url = d.Get(KEY_FIELD);
         Assert.IsFalse(results.Contains(url), "No duplicate urls should be returned");
         results.Add(url,url);
     }
 }
예제 #3
0
        public override bool Equals(Object obj)
        {
            if (this == obj)
            {
                return(true);
            }
            if ((obj == null) || (obj.GetType() != this.GetType()))
            {
                return(false);
            }
            DuplicateFilter other = (DuplicateFilter)obj;

            return(keepMode == other.keepMode &&
                   processingMode == other.processingMode &&
                   (fieldName == other.fieldName || (fieldName != null && fieldName.Equals(other.fieldName))));
        }
예제 #4
0
 public void TestFastFilter()
 {
     DuplicateFilter df = new DuplicateFilter(KEY_FIELD);
     df.SetProcessingMode(DuplicateFilter.PM_FAST_INVALIDATION);
     Hashtable results = new Hashtable();
     Hits h = searcher.Search(tq, df);
     Assert.IsTrue(h.Length() > 0, "Filtered searching should have found some matches");
     for (int i = 0; i < h.Length(); i++)
     {
         Document d = h.Doc(i);
         String url = d.Get(KEY_FIELD);
         Assert.IsFalse(results.Contains(url), "No duplicate urls should be returned");
         results.Add(url,url);
     }
     Assert.AreEqual(2, results.Count, "Two urls found");
 }
예제 #5
0
 public void TestKeepsFirstFilter()
 {
     DuplicateFilter df = new DuplicateFilter(KEY_FIELD);
     df.SetKeepMode(DuplicateFilter.KM_USE_FIRST_OCCURRENCE);
     Hits h = searcher.Search(tq, df);
     Assert.IsTrue(h.Length() > 0, "Filtered searching should have found some matches");
     for (int i = 0; i < h.Length(); i++)
     {
         Document d = h.Doc(i);
         String url = d.Get(KEY_FIELD);
         TermDocs td = reader.TermDocs(new Term(KEY_FIELD, url));
         int lastDoc = 0;
         td.Next();
         lastDoc = td.Doc();
         Assert.AreEqual(lastDoc, h.Id((i)), "Duplicate urls should return first doc");
     }
 }
        public void TestKeepsFirstFilter()
        {
            DuplicateFilter df = new DuplicateFilter(KEY_FIELD);

            df.KeepMode = DuplicateFilter.KM_USE_FIRST_OCCURRENCE;
            ScoreDoc[] h = searcher.Search(tq, df, 1000).ScoreDocs;
            Assert.IsTrue(h.Length > 0, "Filtered searching should have found some matches");
            for (int i = 0; i < h.Length; i++)
            {
                Document d       = searcher.Doc(h[i].Doc);
                String   url     = d.Get(KEY_FIELD);
                TermDocs td      = reader.TermDocs(new Term(KEY_FIELD, url));
                int      lastDoc = 0;
                td.Next();
                lastDoc = td.Doc;
                Assert.AreEqual(lastDoc, h[i].Doc, "Duplicate urls should return first doc");
            }
        }
 public void TestKeepsFirstFilter()
 {
     DuplicateFilter df = new DuplicateFilter(KEY_FIELD);
     df.SetKeepMode(DuplicateFilter.KM_USE_FIRST_OCCURRENCE);
     Hits h = searcher.Search(tq, df);
     Assert.IsTrue(h.Length() > 0, "Filtered searching should have found some matches");
     for (int i = 0; i < h.Length(); i++)
     {
         Document d = h.Doc(i);
         String url = d.Get(KEY_FIELD);
         TermDocs td = reader.TermDocs(new Term(KEY_FIELD, url));
         int lastDoc = 0;
         td.Next();
         lastDoc = td.Doc();
         Assert.AreEqual(lastDoc, h.Id((i)), "Duplicate urls should return first doc");
     }
 }
 public void TestFastFilter()
 {
     DuplicateFilter df = new DuplicateFilter(KEY_FIELD);
     df.SetProcessingMode(DuplicateFilter.PM_FAST_INVALIDATION);
     Hashtable results = new Hashtable();
     Hits h = searcher.Search(tq, df);
     Assert.IsTrue(h.Length() > 0, "Filtered searching should have found some matches");
     for (int i = 0; i < h.Length(); i++)
     {
         Document d = h.Doc(i);
         String url = d.Get(KEY_FIELD);
         Assert.IsFalse(results.Contains(url), "No duplicate urls should be returned");
         results.Add(url,url);
     }
     Assert.AreEqual(2, results.Count, "Two urls found");
 }
예제 #9
0
 public void TestKeepsFirstFilter()
 {
     DuplicateFilter df = new DuplicateFilter(KEY_FIELD);
     df.KeepMode = DuplicateFilter.KM_USE_FIRST_OCCURRENCE;
     ScoreDoc[] h = searcher.Search(tq, df, 1000).ScoreDocs;
     Assert.IsTrue(h.Length > 0, "Filtered searching should have found some matches");
     for (int i = 0; i < h.Length; i++)
     {
         Document d = searcher.Doc(h[i].Doc);
         String url = d.Get(KEY_FIELD);
         TermDocs td = reader.TermDocs(new Term(KEY_FIELD, url));
         int lastDoc = 0;
         td.Next();
         lastDoc = td.Doc;
         Assert.AreEqual(lastDoc, h[i].Doc, "Duplicate urls should return first doc");
     }
 }