public virtual void TestBasic() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS; Field f = NewField("foo", "this is a test test", ft); doc.Add(f); for (int i = 0; i < 100; i++) { w.AddDocument(doc); } IndexReader reader = w.Reader; w.Dispose(); Assert.IsNull(MultiFields.GetTermPositionsEnum(reader, null, "foo", new BytesRef("test"))); DocsEnum de = TestUtil.Docs(Random(), reader, "foo", new BytesRef("test"), null, null, DocsEnum.FLAG_FREQS); while (de.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { Assert.AreEqual(2, de.Freq()); } reader.Dispose(); dir.Dispose(); }
public virtual void TestDocsOnlyFreq() { // tests that when fields are indexed with DOCS_ONLY, the Codec // returns 1 in docsEnum.Freq() Directory dir = NewDirectory(); Random random = Random(); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); // we don't need many documents to assert this, but don't use one document either int numDocs = AtLeast(random, 50); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(new StringField("f", "doc", Store.NO)); writer.AddDocument(doc); } writer.Dispose(); Term term = new Term("f", new BytesRef("doc")); DirectoryReader reader = DirectoryReader.Open(dir); foreach (AtomicReaderContext ctx in reader.Leaves) { DocsEnum de = ((AtomicReader)ctx.Reader).TermDocsEnum(term); while (de.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { Assert.AreEqual(1, de.Freq(), "wrong freq for doc " + de.DocID()); } } reader.Dispose(); dir.Dispose(); }
public virtual void TestTermDocs(int indexDivisor) { //After adding the document, we should be able to read it back in SegmentReader reader = new SegmentReader(Info, indexDivisor, NewIOContext(Random())); Assert.IsTrue(reader != null); Assert.AreEqual(indexDivisor, reader.TermInfosIndexDivisor); TermsEnum terms = reader.Fields.Terms(DocHelper.TEXT_FIELD_2_KEY).Iterator(null); terms.SeekCeil(new BytesRef("field")); DocsEnum termDocs = TestUtil.Docs(Random(), terms, reader.LiveDocs, null, DocsEnum.FLAG_FREQS); if (termDocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { int docId = termDocs.DocID(); Assert.IsTrue(docId == 0); int freq = termDocs.Freq(); Assert.IsTrue(freq == 3); } reader.Dispose(); }
/// <summary> /// checks advancing docs /// </summary> public void AssertDocsSkippingEquals(string info, IndexReader leftReader, int docFreq, DocsEnum leftDocs, DocsEnum rightDocs, bool hasFreqs) { if (leftDocs == null) { Assert.IsNull(rightDocs); return; } int docid = -1; int averageGap = leftReader.MaxDoc / (1 + docFreq); int skipInterval = 16; while (true) { if (Random().NextBoolean()) { // nextDoc() docid = leftDocs.NextDoc(); Assert.AreEqual(docid, rightDocs.NextDoc(), info); } else { // advance() int skip = docid + (int)Math.Ceiling(Math.Abs(skipInterval + Random().NextDouble() * averageGap)); docid = leftDocs.Advance(skip); Assert.AreEqual(docid, rightDocs.Advance(skip), info); } if (docid == DocIdSetIterator.NO_MORE_DOCS) { return; } if (hasFreqs) { Assert.AreEqual(leftDocs.Freq(), rightDocs.Freq(), info); } } }
/// <summary> /// checks docs + freqs, sequentially /// </summary> public void AssertDocsEnumEquals(string info, DocsEnum leftDocs, DocsEnum rightDocs, bool hasFreqs) { if (leftDocs == null) { Assert.IsNull(rightDocs); return; } Assert.AreEqual(-1, leftDocs.DocID(), info); Assert.AreEqual(-1, rightDocs.DocID(), info); int docid; while ((docid = leftDocs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { Assert.AreEqual(docid, rightDocs.NextDoc(), info); if (hasFreqs) { Assert.AreEqual(leftDocs.Freq(), rightDocs.Freq(), info); } } Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, rightDocs.NextDoc(), info); }
public virtual void TestRandom() { // token -> docID -> tokens IDictionary <string, IDictionary <int?, IList <Token> > > actualTokens = new Dictionary <string, IDictionary <int?, IList <Token> > >(); Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Iwc); int numDocs = AtLeast(20); //final int numDocs = AtLeast(5); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); // TODO: randomize what IndexOptions we use; also test // changing this up in one IW buffered segment...: ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; if (Random().NextBoolean()) { ft.StoreTermVectors = true; ft.StoreTermVectorOffsets = Random().NextBoolean(); ft.StoreTermVectorPositions = Random().NextBoolean(); } for (int docCount = 0; docCount < numDocs; docCount++) { Document doc = new Document(); doc.Add(new IntField("id", docCount, Field.Store.NO)); IList <Token> tokens = new List <Token>(); int numTokens = AtLeast(100); //final int numTokens = AtLeast(20); int pos = -1; int offset = 0; //System.out.println("doc id=" + docCount); for (int tokenCount = 0; tokenCount < numTokens; tokenCount++) { string text; if (Random().NextBoolean()) { text = "a"; } else if (Random().NextBoolean()) { text = "b"; } else if (Random().NextBoolean()) { text = "c"; } else { text = "d"; } int posIncr = Random().NextBoolean() ? 1 : Random().Next(5); if (tokenCount == 0 && posIncr == 0) { posIncr = 1; } int offIncr = Random().NextBoolean() ? 0 : Random().Next(5); int tokenOffset = Random().Next(5); Token token = MakeToken(text, posIncr, offset + offIncr, offset + offIncr + tokenOffset); if (!actualTokens.ContainsKey(text)) { actualTokens[text] = new Dictionary <int?, IList <Token> >(); } IDictionary <int?, IList <Token> > postingsByDoc = actualTokens[text]; if (!postingsByDoc.ContainsKey(docCount)) { postingsByDoc[docCount] = new List <Token>(); } postingsByDoc[docCount].Add(token); tokens.Add(token); pos += posIncr; // stuff abs position into type: token.Type = "" + pos; offset += offIncr + tokenOffset; //System.out.println(" " + token + " posIncr=" + token.getPositionIncrement() + " pos=" + pos + " off=" + token.StartOffset() + "/" + token.EndOffset() + " (freq=" + postingsByDoc.Get(docCount).Size() + ")"); } doc.Add(new Field("content", new CannedTokenStream(tokens.ToArray()), ft)); w.AddDocument(doc); } DirectoryReader r = w.Reader; w.Dispose(); string[] terms = new string[] { "a", "b", "c", "d" }; foreach (AtomicReaderContext ctx in r.Leaves) { // TODO: improve this AtomicReader sub = (AtomicReader)ctx.Reader; //System.out.println("\nsub=" + sub); TermsEnum termsEnum = sub.Fields.Terms("content").Iterator(null); DocsEnum docs = null; DocsAndPositionsEnum docsAndPositions = null; DocsAndPositionsEnum docsAndPositionsAndOffsets = null; FieldCache.Ints docIDToID = FieldCache.DEFAULT.GetInts(sub, "id", false); foreach (string term in terms) { //System.out.println(" term=" + term); if (termsEnum.SeekExact(new BytesRef(term))) { docs = termsEnum.Docs(null, docs); Assert.IsNotNull(docs); int doc; //System.out.println(" doc/freq"); while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { IList <Token> expected = actualTokens[term][docIDToID.Get(doc)]; //System.out.println(" doc=" + docIDToID.Get(doc) + " docID=" + doc + " " + expected.Size() + " freq"); Assert.IsNotNull(expected); Assert.AreEqual(expected.Count, docs.Freq()); } // explicitly exclude offsets here docsAndPositions = termsEnum.DocsAndPositions(null, docsAndPositions, DocsAndPositionsEnum.FLAG_PAYLOADS); Assert.IsNotNull(docsAndPositions); //System.out.println(" doc/freq/pos"); while ((doc = docsAndPositions.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { IList <Token> expected = actualTokens[term][docIDToID.Get(doc)]; //System.out.println(" doc=" + docIDToID.Get(doc) + " " + expected.Size() + " freq"); Assert.IsNotNull(expected); Assert.AreEqual(expected.Count, docsAndPositions.Freq()); foreach (Token token in expected) { int pos = Convert.ToInt32(token.Type); //System.out.println(" pos=" + pos); Assert.AreEqual(pos, docsAndPositions.NextPosition()); } } docsAndPositionsAndOffsets = termsEnum.DocsAndPositions(null, docsAndPositions); Assert.IsNotNull(docsAndPositionsAndOffsets); //System.out.println(" doc/freq/pos/offs"); while ((doc = docsAndPositionsAndOffsets.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { IList <Token> expected = actualTokens[term][docIDToID.Get(doc)]; //System.out.println(" doc=" + docIDToID.Get(doc) + " " + expected.Size() + " freq"); Assert.IsNotNull(expected); Assert.AreEqual(expected.Count, docsAndPositionsAndOffsets.Freq()); foreach (Token token in expected) { int pos = Convert.ToInt32(token.Type); //System.out.println(" pos=" + pos); Assert.AreEqual(pos, docsAndPositionsAndOffsets.NextPosition()); Assert.AreEqual(token.StartOffset(), docsAndPositionsAndOffsets.StartOffset()); Assert.AreEqual(token.EndOffset(), docsAndPositionsAndOffsets.EndOffset()); } } } } // TODO: test advance: } r.Dispose(); dir.Dispose(); }
public virtual void TestRandomDocs() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); int numDocs = AtLeast(49); int max = 15678; int term = Random().Next(max); int[] freqInDoc = new int[numDocs]; FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.OmitNorms = true; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); StringBuilder builder = new StringBuilder(); for (int j = 0; j < 199; j++) { int nextInt = Random().Next(max); builder.Append(nextInt).Append(' '); if (nextInt == term) { freqInDoc[i]++; } } doc.Add(NewField(FieldName, builder.ToString(), customType)); writer.AddDocument(doc); } IndexReader reader = writer.Reader; writer.Dispose(); int num = AtLeast(13); for (int i = 0; i < num; i++) { BytesRef bytes = new BytesRef("" + term); IndexReaderContext topReaderContext = reader.Context; foreach (AtomicReaderContext context in topReaderContext.Leaves) { int maxDoc = context.AtomicReader.MaxDoc; DocsEnum docsEnum = TestUtil.Docs(Random(), context.Reader, FieldName, bytes, null, null, DocsEnum.FLAG_FREQS); if (FindNext(freqInDoc, context.DocBase, context.DocBase + maxDoc) == int.MaxValue) { Assert.IsNull(docsEnum); continue; } Assert.IsNotNull(docsEnum); docsEnum.NextDoc(); for (int j = 0; j < maxDoc; j++) { if (freqInDoc[context.DocBase + j] != 0) { Assert.AreEqual(j, docsEnum.DocID()); Assert.AreEqual(docsEnum.Freq(), freqInDoc[context.DocBase + j]); if (i % 2 == 0 && Random().Next(10) == 0) { int next = FindNext(freqInDoc, context.DocBase + j + 1, context.DocBase + maxDoc) - context.DocBase; int advancedTo = docsEnum.Advance(next); if (next >= maxDoc) { Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, advancedTo); } else { Assert.IsTrue(next >= advancedTo, "advanced to: " + advancedTo + " but should be <= " + next); } } else { docsEnum.NextDoc(); } } } Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docsEnum.DocID(), "DocBase: " + context.DocBase + " maxDoc: " + maxDoc + " " + docsEnum.GetType()); } } reader.Dispose(); dir.Dispose(); }
/// <summary> /// Default merge impl: append documents, mapping around /// deletes /// </summary> public virtual TermStats Merge(MergeState mergeState, FieldInfo.IndexOptions? indexOptions, DocsEnum postings, FixedBitSet visitedDocs) { int df = 0; long totTF = 0; if (indexOptions == FieldInfo.IndexOptions.DOCS_ONLY) { while (true) { int doc = postings.NextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } visitedDocs.Set(doc); this.StartDoc(doc, -1); this.FinishDoc(); df++; } totTF = -1; } else if (indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS) { while (true) { int doc = postings.NextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } visitedDocs.Set(doc); int freq = postings.Freq(); this.StartDoc(doc, freq); this.FinishDoc(); df++; totTF += freq; } } else if (indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { var postingsEnum = (DocsAndPositionsEnum)postings; while (true) { int doc = postingsEnum.NextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } visitedDocs.Set(doc); int freq = postingsEnum.Freq(); this.StartDoc(doc, freq); totTF += freq; for (int i = 0; i < freq; i++) { int position = postingsEnum.NextPosition(); BytesRef payload = postingsEnum.Payload; this.AddPosition(position, payload, -1, -1); } this.FinishDoc(); df++; } } else { Debug.Assert(indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); var postingsEnum = (DocsAndPositionsEnum)postings; while (true) { int doc = postingsEnum.NextDoc(); if (doc == DocIdSetIterator.NO_MORE_DOCS) { break; } visitedDocs.Set(doc); int freq = postingsEnum.Freq(); this.StartDoc(doc, freq); totTF += freq; for (int i = 0; i < freq; i++) { int position = postingsEnum.NextPosition(); BytesRef payload = postingsEnum.Payload; this.AddPosition(position, payload, postingsEnum.StartOffset(), postingsEnum.EndOffset()); } this.FinishDoc(); df++; } } return new TermStats(df, indexOptions == FieldInfo.IndexOptions.DOCS_ONLY ? -1 : totTF); }
public virtual void TestSkipTo(int indexDivisor) { Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); Term ta = new Term("content", "aaa"); for (int i = 0; i < 10; i++) { AddDoc(writer, "aaa aaa aaa aaa"); } Term tb = new Term("content", "bbb"); for (int i = 0; i < 16; i++) { AddDoc(writer, "bbb bbb bbb bbb"); } Term tc = new Term("content", "ccc"); for (int i = 0; i < 50; i++) { AddDoc(writer, "ccc ccc ccc ccc"); } // assure that we deal with a single segment writer.ForceMerge(1); writer.Dispose(); IndexReader reader = DirectoryReader.Open(dir, indexDivisor); DocsEnum tdocs = TestUtil.Docs(Random(), reader, ta.Field(), new BytesRef(ta.Text()), MultiFields.GetLiveDocs(reader), null, DocsEnum.FLAG_FREQS); // without optimization (assumption skipInterval == 16) // with next Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(0, tdocs.DocID()); Assert.AreEqual(4, tdocs.Freq()); Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(1, tdocs.DocID()); Assert.AreEqual(4, tdocs.Freq()); Assert.IsTrue(tdocs.Advance(2) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(2, tdocs.DocID()); Assert.IsTrue(tdocs.Advance(4) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(4, tdocs.DocID()); Assert.IsTrue(tdocs.Advance(9) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(9, tdocs.DocID()); Assert.IsFalse(tdocs.Advance(10) != DocIdSetIterator.NO_MORE_DOCS); // without next tdocs = TestUtil.Docs(Random(), reader, ta.Field(), new BytesRef(ta.Text()), MultiFields.GetLiveDocs(reader), null, 0); Assert.IsTrue(tdocs.Advance(0) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(0, tdocs.DocID()); Assert.IsTrue(tdocs.Advance(4) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(4, tdocs.DocID()); Assert.IsTrue(tdocs.Advance(9) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(9, tdocs.DocID()); Assert.IsFalse(tdocs.Advance(10) != DocIdSetIterator.NO_MORE_DOCS); // exactly skipInterval documents and therefore with optimization // with next tdocs = TestUtil.Docs(Random(), reader, tb.Field(), new BytesRef(tb.Text()), MultiFields.GetLiveDocs(reader), null, DocsEnum.FLAG_FREQS); Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(10, tdocs.DocID()); Assert.AreEqual(4, tdocs.Freq()); Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(11, tdocs.DocID()); Assert.AreEqual(4, tdocs.Freq()); Assert.IsTrue(tdocs.Advance(12) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(12, tdocs.DocID()); Assert.IsTrue(tdocs.Advance(15) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(15, tdocs.DocID()); Assert.IsTrue(tdocs.Advance(24) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(24, tdocs.DocID()); Assert.IsTrue(tdocs.Advance(25) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(25, tdocs.DocID()); Assert.IsFalse(tdocs.Advance(26) != DocIdSetIterator.NO_MORE_DOCS); // without next tdocs = TestUtil.Docs(Random(), reader, tb.Field(), new BytesRef(tb.Text()), MultiFields.GetLiveDocs(reader), null, DocsEnum.FLAG_FREQS); Assert.IsTrue(tdocs.Advance(5) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(10, tdocs.DocID()); Assert.IsTrue(tdocs.Advance(15) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(15, tdocs.DocID()); Assert.IsTrue(tdocs.Advance(24) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(24, tdocs.DocID()); Assert.IsTrue(tdocs.Advance(25) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(25, tdocs.DocID()); Assert.IsFalse(tdocs.Advance(26) != DocIdSetIterator.NO_MORE_DOCS); // much more than skipInterval documents and therefore with optimization // with next tdocs = TestUtil.Docs(Random(), reader, tc.Field(), new BytesRef(tc.Text()), MultiFields.GetLiveDocs(reader), null, DocsEnum.FLAG_FREQS); Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(26, tdocs.DocID()); Assert.AreEqual(4, tdocs.Freq()); Assert.IsTrue(tdocs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(27, tdocs.DocID()); Assert.AreEqual(4, tdocs.Freq()); Assert.IsTrue(tdocs.Advance(28) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(28, tdocs.DocID()); Assert.IsTrue(tdocs.Advance(40) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(40, tdocs.DocID()); Assert.IsTrue(tdocs.Advance(57) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(57, tdocs.DocID()); Assert.IsTrue(tdocs.Advance(74) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(74, tdocs.DocID()); Assert.IsTrue(tdocs.Advance(75) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(75, tdocs.DocID()); Assert.IsFalse(tdocs.Advance(76) != DocIdSetIterator.NO_MORE_DOCS); //without next tdocs = TestUtil.Docs(Random(), reader, tc.Field(), new BytesRef(tc.Text()), MultiFields.GetLiveDocs(reader), null, 0); Assert.IsTrue(tdocs.Advance(5) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(26, tdocs.DocID()); Assert.IsTrue(tdocs.Advance(40) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(40, tdocs.DocID()); Assert.IsTrue(tdocs.Advance(57) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(57, tdocs.DocID()); Assert.IsTrue(tdocs.Advance(74) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(74, tdocs.DocID()); Assert.IsTrue(tdocs.Advance(75) != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(75, tdocs.DocID()); Assert.IsFalse(tdocs.Advance(76) != DocIdSetIterator.NO_MORE_DOCS); reader.Dispose(); dir.Dispose(); }
public static void VerifyEquals(Fields d1, Fields d2) { if (d1 == null) { Assert.IsTrue(d2 == null || d2.Size == 0); return; } Assert.IsTrue(d2 != null); IEnumerator <string> fieldsEnum2 = d2.GetEnumerator(); foreach (string field1 in d1) { fieldsEnum2.MoveNext(); string field2 = fieldsEnum2.Current; Assert.AreEqual(field1, field2); Terms terms1 = d1.Terms(field1); Assert.IsNotNull(terms1); TermsEnum termsEnum1 = terms1.Iterator(null); Terms terms2 = d2.Terms(field2); Assert.IsNotNull(terms2); TermsEnum termsEnum2 = terms2.Iterator(null); DocsAndPositionsEnum dpEnum1 = null; DocsAndPositionsEnum dpEnum2 = null; DocsEnum dEnum1 = null; DocsEnum dEnum2 = null; BytesRef term1; while ((term1 = termsEnum1.Next()) != null) { BytesRef term2 = termsEnum2.Next(); Assert.AreEqual(term1, term2); Assert.AreEqual(termsEnum1.TotalTermFreq(), termsEnum2.TotalTermFreq()); dpEnum1 = termsEnum1.DocsAndPositions(null, dpEnum1); dpEnum2 = termsEnum2.DocsAndPositions(null, dpEnum2); if (dpEnum1 != null) { Assert.IsNotNull(dpEnum2); int docID1 = dpEnum1.NextDoc(); dpEnum2.NextDoc(); // docIDs are not supposed to be equal //int docID2 = dpEnum2.NextDoc(); //Assert.AreEqual(docID1, docID2); Assert.IsTrue(docID1 != DocIdSetIterator.NO_MORE_DOCS); int freq1 = dpEnum1.Freq(); int freq2 = dpEnum2.Freq(); Assert.AreEqual(freq1, freq2); IOffsetAttribute offsetAtt1 = dpEnum1.Attributes().HasAttribute <IOffsetAttribute>() ? dpEnum1.Attributes().GetAttribute <IOffsetAttribute>() : null; IOffsetAttribute offsetAtt2 = dpEnum2.Attributes().HasAttribute <IOffsetAttribute>() ? dpEnum2.Attributes().GetAttribute <IOffsetAttribute>() : null; if (offsetAtt1 != null) { Assert.IsNotNull(offsetAtt2); } else { Assert.IsNull(offsetAtt2); } for (int posUpto = 0; posUpto < freq1; posUpto++) { int pos1 = dpEnum1.NextPosition(); int pos2 = dpEnum2.NextPosition(); Assert.AreEqual(pos1, pos2); if (offsetAtt1 != null) { Assert.AreEqual(offsetAtt1.StartOffset(), offsetAtt2.StartOffset()); Assert.AreEqual(offsetAtt1.EndOffset(), offsetAtt2.EndOffset()); } } Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum1.NextDoc()); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum2.NextDoc()); } else { dEnum1 = TestUtil.Docs(Random(), termsEnum1, null, dEnum1, DocsEnum.FLAG_FREQS); dEnum2 = TestUtil.Docs(Random(), termsEnum2, null, dEnum2, DocsEnum.FLAG_FREQS); Assert.IsNotNull(dEnum1); Assert.IsNotNull(dEnum2); int docID1 = dEnum1.NextDoc(); dEnum2.NextDoc(); // docIDs are not supposed to be equal //int docID2 = dEnum2.NextDoc(); //Assert.AreEqual(docID1, docID2); Assert.IsTrue(docID1 != DocIdSetIterator.NO_MORE_DOCS); int freq1 = dEnum1.Freq(); int freq2 = dEnum2.Freq(); Assert.AreEqual(freq1, freq2); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dEnum1.NextDoc()); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dEnum2.NextDoc()); } } Assert.IsNull(termsEnum2.Next()); } Assert.IsFalse(fieldsEnum2.MoveNext()); }
public virtual void VerifyEquals(DirectoryReader r1, DirectoryReader r2, string idField) { if (VERBOSE) { Console.WriteLine("\nr1 docs:"); PrintDocs(r1); Console.WriteLine("\nr2 docs:"); PrintDocs(r2); } if (r1.NumDocs != r2.NumDocs) { Debug.Assert(false, "r1.NumDocs=" + r1.NumDocs + " vs r2.NumDocs=" + r2.NumDocs); } bool hasDeletes = !(r1.MaxDoc == r2.MaxDoc && r1.NumDocs == r1.MaxDoc); int[] r2r1 = new int[r2.MaxDoc]; // r2 id to r1 id mapping // create mapping from id2 space to id2 based on idField Fields f1 = MultiFields.GetFields(r1); if (f1 == null) { // make sure r2 is empty Assert.IsNull(MultiFields.GetFields(r2)); return; } Terms terms1 = f1.Terms(idField); if (terms1 == null) { Assert.IsTrue(MultiFields.GetFields(r2) == null || MultiFields.GetFields(r2).Terms(idField) == null); return; } TermsEnum termsEnum = terms1.Iterator(null); Bits liveDocs1 = MultiFields.GetLiveDocs(r1); Bits liveDocs2 = MultiFields.GetLiveDocs(r2); Fields fields = MultiFields.GetFields(r2); if (fields == null) { // make sure r1 is in fact empty (eg has only all // deleted docs): Bits liveDocs = MultiFields.GetLiveDocs(r1); DocsEnum docs = null; while (termsEnum.Next() != null) { docs = TestUtil.Docs(Random(), termsEnum, liveDocs, docs, DocsEnum.FLAG_NONE); while (docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { Assert.Fail("r1 is not empty but r2 is"); } } return; } Terms terms2 = fields.Terms(idField); TermsEnum termsEnum2 = terms2.Iterator(null); DocsEnum termDocs1 = null; DocsEnum termDocs2 = null; while (true) { BytesRef term = termsEnum.Next(); //System.out.println("TEST: match id term=" + term); if (term == null) { break; } termDocs1 = TestUtil.Docs(Random(), termsEnum, liveDocs1, termDocs1, DocsEnum.FLAG_NONE); if (termsEnum2.SeekExact(term)) { termDocs2 = TestUtil.Docs(Random(), termsEnum2, liveDocs2, termDocs2, DocsEnum.FLAG_NONE); } else { termDocs2 = null; } if (termDocs1.NextDoc() == DocIdSetIterator.NO_MORE_DOCS) { // this doc is deleted and wasn't replaced Assert.IsTrue(termDocs2 == null || termDocs2.NextDoc() == DocIdSetIterator.NO_MORE_DOCS); continue; } int id1 = termDocs1.DocID(); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, termDocs1.NextDoc()); Assert.IsTrue(termDocs2.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); int id2 = termDocs2.DocID(); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, termDocs2.NextDoc()); r2r1[id2] = id1; // verify stored fields are equivalent try { VerifyEquals(r1.Document(id1), r2.Document(id2)); } catch (Exception t) { Console.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term=" + term); Console.WriteLine(" d1=" + r1.Document(id1)); Console.WriteLine(" d2=" + r2.Document(id2)); throw t; } try { // verify term vectors are equivalent VerifyEquals(r1.GetTermVectors(id1), r2.GetTermVectors(id2)); } catch (Exception e) { Console.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2); Fields tv1 = r1.GetTermVectors(id1); Console.WriteLine(" d1=" + tv1); if (tv1 != null) { DocsAndPositionsEnum dpEnum = null; DocsEnum dEnum = null; foreach (string field in tv1) { Console.WriteLine(" " + field + ":"); Terms terms3 = tv1.Terms(field); Assert.IsNotNull(terms3); TermsEnum termsEnum3 = terms3.Iterator(null); BytesRef term2; while ((term2 = termsEnum3.Next()) != null) { Console.WriteLine(" " + term2.Utf8ToString() + ": freq=" + termsEnum3.TotalTermFreq()); dpEnum = termsEnum3.DocsAndPositions(null, dpEnum); if (dpEnum != null) { Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); int freq = dpEnum.Freq(); Console.WriteLine(" doc=" + dpEnum.DocID() + " freq=" + freq); for (int posUpto = 0; posUpto < freq; posUpto++) { Console.WriteLine(" pos=" + dpEnum.NextPosition()); } } else { dEnum = TestUtil.Docs(Random(), termsEnum3, null, dEnum, DocsEnum.FLAG_FREQS); Assert.IsNotNull(dEnum); Assert.IsTrue(dEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); int freq = dEnum.Freq(); Console.WriteLine(" doc=" + dEnum.DocID() + " freq=" + freq); } } } } Fields tv2 = r2.GetTermVectors(id2); Console.WriteLine(" d2=" + tv2); if (tv2 != null) { DocsAndPositionsEnum dpEnum = null; DocsEnum dEnum = null; foreach (string field in tv2) { Console.WriteLine(" " + field + ":"); Terms terms3 = tv2.Terms(field); Assert.IsNotNull(terms3); TermsEnum termsEnum3 = terms3.Iterator(null); BytesRef term2; while ((term2 = termsEnum3.Next()) != null) { Console.WriteLine(" " + term2.Utf8ToString() + ": freq=" + termsEnum3.TotalTermFreq()); dpEnum = termsEnum3.DocsAndPositions(null, dpEnum); if (dpEnum != null) { Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); int freq = dpEnum.Freq(); Console.WriteLine(" doc=" + dpEnum.DocID() + " freq=" + freq); for (int posUpto = 0; posUpto < freq; posUpto++) { Console.WriteLine(" pos=" + dpEnum.NextPosition()); } } else { dEnum = TestUtil.Docs(Random(), termsEnum3, null, dEnum, DocsEnum.FLAG_FREQS); Assert.IsNotNull(dEnum); Assert.IsTrue(dEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); int freq = dEnum.Freq(); Console.WriteLine(" doc=" + dEnum.DocID() + " freq=" + freq); } } } } throw e; } } //System.out.println("TEST: done match id"); // Verify postings //System.out.println("TEST: create te1"); Fields fields1 = MultiFields.GetFields(r1); IEnumerator <string> fields1Enum = fields1.GetEnumerator(); Fields fields2 = MultiFields.GetFields(r2); IEnumerator <string> fields2Enum = fields2.GetEnumerator(); string field1 = null, field2 = null; TermsEnum termsEnum1 = null; termsEnum2 = null; DocsEnum docs1 = null, docs2 = null; // pack both doc and freq into single element for easy sorting long[] info1 = new long[r1.NumDocs]; long[] info2 = new long[r2.NumDocs]; for (; ;) { BytesRef term1 = null, term2 = null; // iterate until we get some docs int len1; for (; ;) { len1 = 0; if (termsEnum1 == null) { if (!fields1Enum.MoveNext()) { break; } field1 = fields1Enum.Current; Terms terms = fields1.Terms(field1); if (terms == null) { continue; } termsEnum1 = terms.Iterator(null); } term1 = termsEnum1.Next(); if (term1 == null) { // no more terms in this field termsEnum1 = null; continue; } //System.out.println("TEST: term1=" + term1); docs1 = TestUtil.Docs(Random(), termsEnum1, liveDocs1, docs1, DocsEnum.FLAG_FREQS); while (docs1.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { int d = docs1.DocID(); int f = docs1.Freq(); info1[len1] = (((long)d) << 32) | f; len1++; } if (len1 > 0) { break; } } // iterate until we get some docs int len2; for (; ;) { len2 = 0; if (termsEnum2 == null) { if (!fields2Enum.MoveNext()) { break; } field2 = fields2Enum.Current; Terms terms = fields2.Terms(field2); if (terms == null) { continue; } termsEnum2 = terms.Iterator(null); } term2 = termsEnum2.Next(); if (term2 == null) { // no more terms in this field termsEnum2 = null; continue; } //System.out.println("TEST: term1=" + term1); docs2 = TestUtil.Docs(Random(), termsEnum2, liveDocs2, docs2, DocsEnum.FLAG_FREQS); while (docs2.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { int d = r2r1[docs2.DocID()]; int f = docs2.Freq(); info2[len2] = (((long)d) << 32) | f; len2++; } if (len2 > 0) { break; } } Assert.AreEqual(len1, len2); if (len1 == 0) // no more terms { break; } Assert.AreEqual(field1, field2); Assert.IsTrue(term1.BytesEquals(term2)); if (!hasDeletes) { Assert.AreEqual(termsEnum1.DocFreq(), termsEnum2.DocFreq()); } Assert.AreEqual(term1, term2, "len1=" + len1 + " len2=" + len2 + " deletes?=" + hasDeletes); // sort info2 to get it into ascending docid Array.Sort(info2, 0, len2); // now compare for (int i = 0; i < len1; i++) { Assert.AreEqual(info1[i], info2[i], "i=" + i + " len=" + len1 + " d1=" + ((long)((ulong)info1[i] >> 32)) + " f1=" + (info1[i] & int.MaxValue) + " d2=" + ((long)((ulong)info2[i] >> 32)) + " f2=" + (info2[i] & int.MaxValue) + " field=" + field1 + " term=" + term1.Utf8ToString()); } } }
public override int Freq() { return(Current.Freq()); }
public override int Freq() { return(@in.Freq()); }