public virtual void TestIntersectStartTerm() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); iwc.SetMergePolicy(new LogDocMergePolicy()); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, iwc); Document doc = new Document(); doc.Add(NewStringField("field", "abc", Field.Store.NO)); w.AddDocument(doc); doc = new Document(); doc.Add(NewStringField("field", "abd", Field.Store.NO)); w.AddDocument(doc); doc = new Document(); doc.Add(NewStringField("field", "acd", Field.Store.NO)); w.AddDocument(doc); doc = new Document(); doc.Add(NewStringField("field", "bcd", Field.Store.NO)); w.AddDocument(doc); w.ForceMerge(1); DirectoryReader r = w.Reader; w.Dispose(); AtomicReader sub = GetOnlySegmentReader(r); Terms terms = sub.Fields.Terms("field"); Automaton automaton = (new RegExp(".*d", RegExp.NONE)).ToAutomaton(); CompiledAutomaton ca = new CompiledAutomaton(automaton, false, false); TermsEnum te; // should seek to startTerm te = terms.Intersect(ca, new BytesRef("aad")); Assert.AreEqual("abd", te.Next().Utf8ToString()); Assert.AreEqual(1, te.Docs(null, null, DocsEnum.FLAG_NONE).NextDoc()); Assert.AreEqual("acd", te.Next().Utf8ToString()); Assert.AreEqual(2, te.Docs(null, null, DocsEnum.FLAG_NONE).NextDoc()); Assert.AreEqual("bcd", te.Next().Utf8ToString()); Assert.AreEqual(3, te.Docs(null, null, DocsEnum.FLAG_NONE).NextDoc()); Assert.IsNull(te.Next()); // should fail to find ceil label on second arc, rewind te = terms.Intersect(ca, new BytesRef("add")); Assert.AreEqual("bcd", te.Next().Utf8ToString()); Assert.AreEqual(3, te.Docs(null, null, DocsEnum.FLAG_NONE).NextDoc()); Assert.IsNull(te.Next()); // should reach end te = terms.Intersect(ca, new BytesRef("bcd")); Assert.IsNull(te.Next()); te = terms.Intersect(ca, new BytesRef("ddd")); Assert.IsNull(te.Next()); r.Dispose(); dir.Dispose(); }
public virtual void TestIntersectEmptyString() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); iwc.SetMergePolicy(new LogDocMergePolicy()); RandomIndexWriter w = new RandomIndexWriter(Random, dir, iwc); Document doc = new Document(); doc.Add(NewStringField("field", "", Field.Store.NO)); doc.Add(NewStringField("field", "abc", Field.Store.NO)); w.AddDocument(doc); doc = new Document(); // add empty string to both documents, so that singletonDocID == -1. // For a FST-based term dict, we'll expect to see the first arc is // flaged with HAS_FINAL_OUTPUT doc.Add(NewStringField("field", "abc", Field.Store.NO)); doc.Add(NewStringField("field", "", Field.Store.NO)); w.AddDocument(doc); w.ForceMerge(1); DirectoryReader r = w.GetReader(); w.Dispose(); AtomicReader sub = GetOnlySegmentReader(r); Terms terms = sub.Fields.GetTerms("field"); Automaton automaton = (new RegExp(".*", RegExpSyntax.NONE)).ToAutomaton(); // accept ALL CompiledAutomaton ca = new CompiledAutomaton(automaton, false, false); TermsEnum te = terms.Intersect(ca, null); DocsEnum de; Assert.AreEqual("", te.Next().Utf8ToString()); de = te.Docs(null, null, DocsFlags.NONE); Assert.AreEqual(0, de.NextDoc()); Assert.AreEqual(1, de.NextDoc()); Assert.AreEqual("abc", te.Next().Utf8ToString()); de = te.Docs(null, null, DocsFlags.NONE); Assert.AreEqual(0, de.NextDoc()); Assert.AreEqual(1, de.NextDoc()); Assert.IsNull(te.Next()); // pass empty string te = terms.Intersect(ca, new BytesRef("")); Assert.AreEqual("abc", te.Next().Utf8ToString()); de = te.Docs(null, null, DocsFlags.NONE); Assert.AreEqual(0, de.NextDoc()); Assert.AreEqual(1, de.NextDoc()); Assert.IsNull(te.Next()); r.Dispose(); dir.Dispose(); }
public virtual void TestIntersectBasic() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); iwc.SetMergePolicy(new LogDocMergePolicy()); RandomIndexWriter w = new RandomIndexWriter(Random, dir, iwc); Document doc = new Document(); doc.Add(NewTextField("field", "aaa", Field.Store.NO)); w.AddDocument(doc); doc = new Document(); doc.Add(NewStringField("field", "bbb", Field.Store.NO)); w.AddDocument(doc); doc = new Document(); doc.Add(NewTextField("field", "ccc", Field.Store.NO)); w.AddDocument(doc); w.ForceMerge(1); DirectoryReader r = w.GetReader(); w.Dispose(); AtomicReader sub = GetOnlySegmentReader(r); Terms terms = sub.Fields.GetTerms("field"); Automaton automaton = (new RegExp(".*", RegExpSyntax.NONE)).ToAutomaton(); CompiledAutomaton ca = new CompiledAutomaton(automaton, false, false); TermsEnum te = terms.Intersect(ca, null); Assert.AreEqual("aaa", te.Next().Utf8ToString()); Assert.AreEqual(0, te.Docs(null, null, DocsFlags.NONE).NextDoc()); Assert.AreEqual("bbb", te.Next().Utf8ToString()); Assert.AreEqual(1, te.Docs(null, null, DocsFlags.NONE).NextDoc()); Assert.AreEqual("ccc", te.Next().Utf8ToString()); Assert.AreEqual(2, te.Docs(null, null, DocsFlags.NONE).NextDoc()); Assert.IsNull(te.Next()); te = terms.Intersect(ca, new BytesRef("abc")); Assert.AreEqual("bbb", te.Next().Utf8ToString()); Assert.AreEqual(1, te.Docs(null, null, DocsFlags.NONE).NextDoc()); Assert.AreEqual("ccc", te.Next().Utf8ToString()); Assert.AreEqual(2, te.Docs(null, null, DocsFlags.NONE).NextDoc()); Assert.IsNull(te.Next()); te = terms.Intersect(ca, new BytesRef("aaa")); Assert.AreEqual("bbb", te.Next().Utf8ToString()); Assert.AreEqual(1, te.Docs(null, null, DocsFlags.NONE).NextDoc()); Assert.AreEqual("ccc", te.Next().Utf8ToString()); Assert.AreEqual(2, te.Docs(null, null, DocsFlags.NONE).NextDoc()); Assert.IsNull(te.Next()); r.Dispose(); dir.Dispose(); }
/// <summary> /// Terms api equivalency /// </summary> public void AssertTermsEquals(string info, IndexReader leftReader, Terms leftTerms, Terms rightTerms, bool deep) { if (leftTerms == null || rightTerms == null) { Assert.IsNull(leftTerms, info); Assert.IsNull(rightTerms, info); return; } AssertTermsStatisticsEquals(info, leftTerms, rightTerms); Assert.AreEqual(leftTerms.HasOffsets(), rightTerms.HasOffsets()); Assert.AreEqual(leftTerms.HasPositions(), rightTerms.HasPositions()); Assert.AreEqual(leftTerms.HasPayloads(), rightTerms.HasPayloads()); TermsEnum leftTermsEnum = leftTerms.Iterator(null); TermsEnum rightTermsEnum = rightTerms.Iterator(null); AssertTermsEnumEquals(info, leftReader, leftTermsEnum, rightTermsEnum, true); AssertTermsSeekingEquals(info, leftTerms, rightTerms); if (deep) { int numIntersections = AtLeast(3); for (int i = 0; i < numIntersections; i++) { string re = AutomatonTestUtil.RandomRegexp(Random()); CompiledAutomaton automaton = new CompiledAutomaton((new RegExp(re, RegExp.NONE)).ToAutomaton()); if (automaton.Type == CompiledAutomaton.AUTOMATON_TYPE.NORMAL) { // TODO: test start term too TermsEnum leftIntersection = leftTerms.Intersect(automaton, null); TermsEnum rightIntersection = rightTerms.Intersect(automaton, null); AssertTermsEnumEquals(info, leftReader, leftIntersection, rightIntersection, Rarely()); } } } }
// following code is almost an exact dup of code from TestDuelingCodecs: sorry! public virtual void AssertTerms(Terms leftTerms, Terms rightTerms, bool deep) { if (leftTerms == null || rightTerms == null) { Assert.IsNull(leftTerms); Assert.IsNull(rightTerms); return; } AssertTermsStatistics(leftTerms, rightTerms); // NOTE: we don't assert hasOffsets/hasPositions/hasPayloads because they are allowed to be different TermsEnum leftTermsEnum = leftTerms.Iterator(null); TermsEnum rightTermsEnum = rightTerms.Iterator(null); AssertTermsEnum(leftTermsEnum, rightTermsEnum, true); AssertTermsSeeking(leftTerms, rightTerms); if (deep) { int numIntersections = AtLeast(3); for (int i = 0; i < numIntersections; i++) { string re = AutomatonTestUtil.RandomRegexp(Random()); CompiledAutomaton automaton = new CompiledAutomaton((new RegExp(re, RegExp.NONE)).ToAutomaton()); if (automaton.Type == CompiledAutomaton.AUTOMATON_TYPE.NORMAL) { // TODO: test start term too TermsEnum leftIntersection = leftTerms.Intersect(automaton, null); TermsEnum rightIntersection = rightTerms.Intersect(automaton, null); AssertTermsEnum(leftIntersection, rightIntersection, Rarely()); } } } }