public virtual void TestFilterWorks() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); for (int i = 0; i < 500; i++) { Document document = new Document(); document.Add(new Field("field", English.IntToEnglish(i) + " equals " + English.IntToEnglish(i), Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(document); } writer.Close(); IndexReader reader = IndexReader.Open(dir, true); SpanTermQuery query = new SpanTermQuery(new Term("field", English.IntToEnglish(10).Trim())); SpanQueryFilter filter = new SpanQueryFilter(query); SpanFilterResult result = filter.BitSpans(reader); DocIdSet docIdSet = result.DocIdSet; Assert.IsTrue(docIdSet != null, "docIdSet is null and it shouldn't be"); AssertContainsDocId("docIdSet doesn't contain docId 10", docIdSet, 10); var spans = result.Positions; Assert.IsTrue(spans != null, "spans is null and it shouldn't be"); int size = GetDocIdSetSize(docIdSet); Assert.IsTrue(spans.Count == size, "spans Size: " + spans.Count + " is not: " + size); for (System.Collections.IEnumerator iterator = spans.GetEnumerator(); iterator.MoveNext(); ) { SpanFilterResult.PositionInfo info = (SpanFilterResult.PositionInfo) iterator.Current; Assert.IsTrue(info != null, "info is null and it shouldn't be"); //The doc should indicate the bit is on AssertContainsDocId("docIdSet doesn't contain docId " + info.Doc, docIdSet, info.Doc); //There should be two positions in each Assert.IsTrue(info.Positions.Count == 2, "info.getPositions() Size: " + info.Positions.Count + " is not: " + 2); } reader.Close(); }
public virtual void TestFilterWorks() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true); for (int i = 0; i < 500; i++) { Document document = new Document(); document.Add(new Field("field", English.IntToEnglish(i) + " equals " + English.IntToEnglish(i), Field.Store.NO, Field.Index.TOKENIZED)); writer.AddDocument(document); } writer.Close(); IndexReader reader = IndexReader.Open(dir); SpanTermQuery query = new SpanTermQuery(new Term("field", English.IntToEnglish(10).Trim())); SpanQueryFilter filter = new SpanQueryFilter(query); SpanFilterResult result = filter.BitSpans(reader); System.Collections.BitArray bits = result.GetBits(); Assert.IsTrue(bits != null, "bits is null and it shouldn't be"); Assert.IsTrue(bits.Get(10), "tenth bit is not on"); System.Collections.IList spans = result.GetPositions(); Assert.IsTrue(spans != null, "spans is null and it shouldn't be"); int cardinality = 0; for (int i = 0; i < bits.Count; i++) { if (bits.Get(i)) cardinality++; } Assert.IsTrue(spans.Count == cardinality, "spans Size: " + spans.Count + " is not: " + cardinality); for (System.Collections.IEnumerator iterator = spans.GetEnumerator(); iterator.MoveNext(); ) { SpanFilterResult.PositionInfo info = (SpanFilterResult.PositionInfo) iterator.Current; Assert.IsTrue(info != null, "info is null and it shouldn't be"); //The doc should indicate the bit is on Assert.IsTrue(bits.Get(info.GetDoc()), "Bit is not on and it should be"); //There should be two positions in each Assert.IsTrue(info.GetPositions().Count == 2, "info.getPositions() Size: " + info.GetPositions().Count + " is not: " + 2); } reader.Close(); }
public virtual void TestFilterWorks() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED, null); for (int i = 0; i < 500; i++) { Document document = new Document(); document.Add(new Field("field", English.IntToEnglish(i) + " equals " + English.IntToEnglish(i), Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(document, null); } writer.Close(); IndexReader reader = IndexReader.Open(dir, true, null); SpanTermQuery query = new SpanTermQuery(new Term("field", English.IntToEnglish(10).Trim())); SpanQueryFilter filter = new SpanQueryFilter(query); SpanFilterResult result = filter.BitSpans(reader, null); DocIdSet docIdSet = result.DocIdSet; Assert.IsTrue(docIdSet != null, "docIdSet is null and it shouldn't be"); AssertContainsDocId("docIdSet doesn't contain docId 10", docIdSet, 10); var spans = result.Positions; Assert.IsTrue(spans != null, "spans is null and it shouldn't be"); int size = GetDocIdSetSize(docIdSet); Assert.IsTrue(spans.Count == size, "spans Size: " + spans.Count + " is not: " + size); for (System.Collections.IEnumerator iterator = spans.GetEnumerator(); iterator.MoveNext();) { SpanFilterResult.PositionInfo info = (SpanFilterResult.PositionInfo)iterator.Current; Assert.IsTrue(info != null, "info is null and it shouldn't be"); //The doc should indicate the bit is on AssertContainsDocId("docIdSet doesn't contain docId " + info.Doc, docIdSet, info.Doc); //There should be two positions in each Assert.IsTrue(info.Positions.Count == 2, "info.getPositions() Size: " + info.Positions.Count + " is not: " + 2); } reader.Close(); }
public void TestEnforceDeletions() { Directory dir = new MockRAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); IndexReader reader = writer.GetReader(); IndexSearcher searcher = new IndexSearcher(reader); // add a doc, refresh the reader, and check that its there Document doc = new Document(); doc.Add(new Field("id", "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.AddDocument(doc); reader = RefreshReader(reader); searcher = new IndexSearcher(reader); TopDocs docs = searcher.Search(new MatchAllDocsQuery(), 1); Assert.AreEqual(1, docs.TotalHits, "Should find a hit..."); SpanFilter startFilter = new SpanQueryFilter(new SpanTermQuery(new Term("id", "1"))); // ignore deletions CachingSpanFilter filter = new CachingSpanFilter(startFilter, CachingWrapperFilter.DeletesMode.IGNORE); docs = searcher.Search(new MatchAllDocsQuery(), filter, 1); Assert.AreEqual(1, docs.TotalHits, "[query + filter] Should find a hit..."); ConstantScoreQuery constantScore = new ConstantScoreQuery(filter); docs = searcher.Search(constantScore, 1); Assert.AreEqual(1, docs.TotalHits, "[just filter] Should find a hit..."); // now delete the doc, refresh the reader, and see that it's not there writer.DeleteDocuments(new Term("id", "1")); reader = RefreshReader(reader); searcher = new IndexSearcher(reader); docs = searcher.Search(new MatchAllDocsQuery(), filter, 1); Assert.AreEqual(0, docs.TotalHits, "[query + filter] Should *not* find a hit..."); docs = searcher.Search(constantScore, 1); Assert.AreEqual(1, docs.TotalHits, "[just filter] Should find a hit..."); // force cache to regenerate: filter = new CachingSpanFilter(startFilter, CachingWrapperFilter.DeletesMode.RECACHE); writer.AddDocument(doc); reader = RefreshReader(reader); searcher = new IndexSearcher(reader); docs = searcher.Search(new MatchAllDocsQuery(), filter, 1); Assert.AreEqual(1, docs.TotalHits, "[query + filter] Should find a hit..."); constantScore = new ConstantScoreQuery(filter); docs = searcher.Search(constantScore, 1); Assert.AreEqual(1, docs.TotalHits, "[just filter] Should find a hit..."); // make sure we get a cache hit when we reopen readers // that had no new deletions IndexReader newReader = RefreshReader(reader); Assert.IsTrue(reader != newReader); reader = newReader; searcher = new IndexSearcher(reader); int missCount = filter.missCount; docs = searcher.Search(constantScore, 1); Assert.AreEqual(1, docs.TotalHits, "[just filter] Should find a hit..."); Assert.AreEqual(missCount, filter.missCount); // now delete the doc, refresh the reader, and see that it's not there writer.DeleteDocuments(new Term("id", "1")); reader = RefreshReader(reader); searcher = new IndexSearcher(reader); docs = searcher.Search(new MatchAllDocsQuery(), filter, 1); Assert.AreEqual(0, docs.TotalHits, "[query + filter] Should *not* find a hit..."); docs = searcher.Search(constantScore, 1); Assert.AreEqual(0, docs.TotalHits, "[just filter] Should *not* find a hit..."); }