public virtual void TestPreAnalyzedField() { IndexWriter writer = new IndexWriter(Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); Document doc = new Document(); doc.Add(new TextField("preanalyzed", new TokenStreamAnonymousInnerClassHelper(this))); writer.AddDocument(doc); writer.Commit(); SegmentCommitInfo info = writer.NewestSegment(); writer.Dispose(); SegmentReader reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random())); DocsAndPositionsEnum termPositions = reader.TermPositionsEnum(new Term("preanalyzed", "term1")); Assert.IsTrue(termPositions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(1, termPositions.Freq()); Assert.AreEqual(0, termPositions.NextPosition()); termPositions = reader.TermPositionsEnum(new Term("preanalyzed", "term2")); Assert.IsTrue(termPositions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(2, termPositions.Freq()); Assert.AreEqual(1, termPositions.NextPosition()); Assert.AreEqual(3, termPositions.NextPosition()); termPositions = reader.TermPositionsEnum(new Term("preanalyzed", "term3")); Assert.IsTrue(termPositions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(1, termPositions.Freq()); Assert.AreEqual(2, termPositions.NextPosition()); reader.Dispose(); }
public virtual void TestBasic() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Iwc); Document doc = new Document(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; if (Random().NextBoolean()) { ft.StoreTermVectors = true; ft.StoreTermVectorPositions = Random().NextBoolean(); ft.StoreTermVectorOffsets = Random().NextBoolean(); } Token[] tokens = new Token[] { MakeToken("a", 1, 0, 6), MakeToken("b", 1, 8, 9), MakeToken("a", 1, 9, 17), MakeToken("c", 1, 19, 50) }; doc.Add(new Field("content", new CannedTokenStream(tokens), ft)); w.AddDocument(doc); IndexReader r = w.Reader; w.Dispose(); DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(r, null, "content", new BytesRef("a")); Assert.IsNotNull(dp); Assert.AreEqual(0, dp.NextDoc()); Assert.AreEqual(2, dp.Freq()); Assert.AreEqual(0, dp.NextPosition()); Assert.AreEqual(0, dp.StartOffset()); Assert.AreEqual(6, dp.EndOffset()); Assert.AreEqual(2, dp.NextPosition()); Assert.AreEqual(9, dp.StartOffset()); Assert.AreEqual(17, dp.EndOffset()); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc()); dp = MultiFields.GetTermPositionsEnum(r, null, "content", new BytesRef("b")); Assert.IsNotNull(dp); Assert.AreEqual(0, dp.NextDoc()); Assert.AreEqual(1, dp.Freq()); Assert.AreEqual(1, dp.NextPosition()); Assert.AreEqual(8, dp.StartOffset()); Assert.AreEqual(9, dp.EndOffset()); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc()); dp = MultiFields.GetTermPositionsEnum(r, null, "content", new BytesRef("c")); Assert.IsNotNull(dp); Assert.AreEqual(0, dp.NextDoc()); Assert.AreEqual(1, dp.Freq()); Assert.AreEqual(3, dp.NextPosition()); Assert.AreEqual(19, dp.StartOffset()); Assert.AreEqual(50, dp.EndOffset()); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc()); r.Dispose(); dir.Dispose(); }
public virtual void TestPositionsSimple() { Directory directory = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); for (int i = 0; i < 39; i++) { Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.OmitNorms = true; doc.Add(NewField(FieldName, "1 2 3 4 5 6 7 8 9 10 " + "1 2 3 4 5 6 7 8 9 10 " + "1 2 3 4 5 6 7 8 9 10 " + "1 2 3 4 5 6 7 8 9 10", customType)); writer.AddDocument(doc); } IndexReader reader = writer.Reader; writer.Dispose(); int num = AtLeast(13); for (int i = 0; i < num; i++) { BytesRef bytes = new BytesRef("1"); IndexReaderContext topReaderContext = reader.Context; foreach (AtomicReaderContext atomicReaderContext in topReaderContext.Leaves) { DocsAndPositionsEnum docsAndPosEnum = GetDocsAndPositions((AtomicReader)atomicReaderContext.Reader, bytes, null); Assert.IsNotNull(docsAndPosEnum); if (atomicReaderContext.Reader.MaxDoc == 0) { continue; } int advance = docsAndPosEnum.Advance(Random().Next(atomicReaderContext.Reader.MaxDoc)); do { string msg = "Advanced to: " + advance + " current doc: " + docsAndPosEnum.DocID(); // TODO: + " usePayloads: " + usePayload; Assert.AreEqual(4, docsAndPosEnum.Freq(), msg); Assert.AreEqual(0, docsAndPosEnum.NextPosition(), msg); Assert.AreEqual(4, docsAndPosEnum.Freq(), msg); Assert.AreEqual(10, docsAndPosEnum.NextPosition(), msg); Assert.AreEqual(4, docsAndPosEnum.Freq(), msg); Assert.AreEqual(20, docsAndPosEnum.NextPosition(), msg); Assert.AreEqual(4, docsAndPosEnum.Freq(), msg); Assert.AreEqual(30, docsAndPosEnum.NextPosition(), msg); } while (docsAndPosEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); } } reader.Dispose(); directory.Dispose(); }
public virtual void CheckSkipTo(DocsAndPositionsEnum tp, int target, int maxCounter) { tp.Advance(target); if (maxCounter < Counter) { Assert.Fail("Too many bytes read: " + Counter + " vs " + maxCounter); } Assert.AreEqual(target, tp.DocID(), "Wrong document " + tp.DocID() + " after skipTo target " + target); Assert.AreEqual(1, tp.Freq(), "Frequency is not 1: " + tp.Freq()); tp.NextPosition(); BytesRef b = tp.Payload; Assert.AreEqual(1, b.Length); Assert.AreEqual((sbyte)target, (sbyte)b.Bytes[b.Offset], "Wrong payload for the target " + target + ": " + (sbyte)b.Bytes[b.Offset]); }
public virtual void CheckSkipTo(DocsAndPositionsEnum tp, int target, int maxCounter) { tp.Advance(target); if (maxCounter < Counter) { Assert.Fail("Too many bytes read: " + Counter + " vs " + maxCounter); } Assert.AreEqual(target, tp.DocID(), "Wrong document " + tp.DocID() + " after skipTo target " + target); Assert.AreEqual(1, tp.Freq(), "Frequency is not 1: " + tp.Freq()); tp.NextPosition(); BytesRef b = tp.Payload; Assert.AreEqual(1, b.Length); Assert.AreEqual((sbyte)target, b.Bytes[b.Offset], "Wrong payload for the target " + target + ": " + b.Bytes[b.Offset]); }
public virtual void TestPositionIncrementGap() { Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this); IndexWriter writer = new IndexWriter(Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); doc.Add(NewTextField("repeated", "repeated one", Field.Store.YES)); doc.Add(NewTextField("repeated", "repeated two", Field.Store.YES)); writer.AddDocument(doc); writer.Commit(); SegmentCommitInfo info = writer.NewestSegment(); writer.Dispose(); SegmentReader reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random())); DocsAndPositionsEnum termPositions = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), "repeated", new BytesRef("repeated")); Assert.IsTrue(termPositions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); int freq = termPositions.Freq(); Assert.AreEqual(2, freq); Assert.AreEqual(0, termPositions.NextPosition()); Assert.AreEqual(502, termPositions.NextPosition()); reader.Dispose(); }
public virtual void TestTokenReuse() { Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper2(this); IndexWriter writer = new IndexWriter(Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); doc.Add(NewTextField("f1", "a 5 a a", Field.Store.YES)); writer.AddDocument(doc); writer.Commit(); SegmentCommitInfo info = writer.NewestSegment(); writer.Dispose(); SegmentReader reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random())); DocsAndPositionsEnum termPositions = MultiFields.GetTermPositionsEnum(reader, reader.LiveDocs, "f1", new BytesRef("a")); Assert.IsTrue(termPositions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); int freq = termPositions.Freq(); Assert.AreEqual(3, freq); Assert.AreEqual(0, termPositions.NextPosition()); Assert.IsNotNull(termPositions.Payload); Assert.AreEqual(6, termPositions.NextPosition()); Assert.IsNull(termPositions.Payload); Assert.AreEqual(7, termPositions.NextPosition()); Assert.IsNull(termPositions.Payload); reader.Dispose(); }
public virtual void TestOffsetReader() { TermVectorsReader reader = Codec.Default.TermVectorsFormat().VectorsReader(Dir, Seg.Info, FieldInfos, NewIOContext(Random())); Terms vector = reader.Get(0).Terms(TestFields[0]); Assert.IsNotNull(vector); TermsEnum termsEnum = vector.Iterator(null); Assert.IsNotNull(termsEnum); Assert.AreEqual(TestTerms.Length, vector.Size()); DocsAndPositionsEnum dpEnum = null; for (int i = 0; i < TestTerms.Length; i++) { BytesRef text = termsEnum.Next(); Assert.IsNotNull(text); string term = text.Utf8ToString(); Assert.AreEqual(TestTerms[i], term); dpEnum = termsEnum.DocsAndPositions(null, dpEnum); Assert.IsNotNull(dpEnum); Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(dpEnum.Freq(), Positions[i].Length); for (int j = 0; j < Positions[i].Length; j++) { Assert.AreEqual(Positions[i][j], dpEnum.NextPosition()); } Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc()); dpEnum = termsEnum.DocsAndPositions(null, dpEnum); Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.IsNotNull(dpEnum); Assert.AreEqual(dpEnum.Freq(), Positions[i].Length); for (int j = 0; j < Positions[i].Length; j++) { Assert.AreEqual(Positions[i][j], dpEnum.NextPosition()); Assert.AreEqual(j * 10, dpEnum.StartOffset()); Assert.AreEqual(j * 10 + TestTerms[i].Length, dpEnum.EndOffset()); } Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum.NextDoc()); } reader.Dispose(); }
private void PrintSegment(StreamWriter @out, SegmentCommitInfo si) { SegmentReader reader = new SegmentReader(si, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random())); for (int i = 0; i < reader.NumDocs; i++) { @out.WriteLine(reader.Document(i)); } Fields fields = reader.Fields; foreach (string field in fields) { Terms terms = fields.Terms(field); Assert.IsNotNull(terms); TermsEnum tis = terms.Iterator(null); while (tis.Next() != null) { @out.Write(" term=" + field + ":" + tis.Term()); @out.WriteLine(" DF=" + tis.DocFreq()); DocsAndPositionsEnum positions = tis.DocsAndPositions(reader.LiveDocs, null); while (positions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { @out.Write(" doc=" + positions.DocID()); @out.Write(" TF=" + positions.Freq()); @out.Write(" pos="); @out.Write(positions.NextPosition()); for (int j = 1; j < positions.Freq(); j++) { @out.Write("," + positions.NextPosition()); } @out.WriteLine(""); } } } reader.Dispose(); }
public virtual void TestThreadSafety() { const int numThreads = 5; int numDocs = AtLeast(50); ByteArrayPool pool = new ByteArrayPool(numThreads, 5); Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); const string field = "test"; ThreadClass[] ingesters = new ThreadClass[numThreads]; for (int i = 0; i < numThreads; i++) { ingesters[i] = new ThreadAnonymousInnerClassHelper(this, numDocs, pool, writer, field); ingesters[i].Start(); } for (int i = 0; i < numThreads; i++) { ingesters[i].Join(); } writer.Dispose(); IndexReader reader = DirectoryReader.Open(dir); TermsEnum terms = MultiFields.GetFields(reader).Terms(field).Iterator(null); Bits liveDocs = MultiFields.GetLiveDocs(reader); DocsAndPositionsEnum tp = null; while (terms.Next() != null) { string termText = terms.Term().Utf8ToString(); tp = terms.DocsAndPositions(liveDocs, tp); while (tp.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { int freq = tp.Freq(); for (int i = 0; i < freq; i++) { tp.NextPosition(); BytesRef payload = tp.Payload; Assert.AreEqual(termText, payload.Utf8ToString()); } } } reader.Dispose(); dir.Dispose(); Assert.AreEqual(pool.Size(), numThreads); }
public virtual void TestRandom() { // token -> docID -> tokens IDictionary <string, IDictionary <int?, IList <Token> > > actualTokens = new Dictionary <string, IDictionary <int?, IList <Token> > >(); Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Iwc); int numDocs = AtLeast(20); //final int numDocs = AtLeast(5); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); // TODO: randomize what IndexOptions we use; also test // changing this up in one IW buffered segment...: ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; if (Random().NextBoolean()) { ft.StoreTermVectors = true; ft.StoreTermVectorOffsets = Random().NextBoolean(); ft.StoreTermVectorPositions = Random().NextBoolean(); } for (int docCount = 0; docCount < numDocs; docCount++) { Document doc = new Document(); doc.Add(new IntField("id", docCount, Field.Store.NO)); IList <Token> tokens = new List <Token>(); int numTokens = AtLeast(100); //final int numTokens = AtLeast(20); int pos = -1; int offset = 0; //System.out.println("doc id=" + docCount); for (int tokenCount = 0; tokenCount < numTokens; tokenCount++) { string text; if (Random().NextBoolean()) { text = "a"; } else if (Random().NextBoolean()) { text = "b"; } else if (Random().NextBoolean()) { text = "c"; } else { text = "d"; } int posIncr = Random().NextBoolean() ? 1 : Random().Next(5); if (tokenCount == 0 && posIncr == 0) { posIncr = 1; } int offIncr = Random().NextBoolean() ? 0 : Random().Next(5); int tokenOffset = Random().Next(5); Token token = MakeToken(text, posIncr, offset + offIncr, offset + offIncr + tokenOffset); if (!actualTokens.ContainsKey(text)) { actualTokens[text] = new Dictionary <int?, IList <Token> >(); } IDictionary <int?, IList <Token> > postingsByDoc = actualTokens[text]; if (!postingsByDoc.ContainsKey(docCount)) { postingsByDoc[docCount] = new List <Token>(); } postingsByDoc[docCount].Add(token); tokens.Add(token); pos += posIncr; // stuff abs position into type: token.Type = "" + pos; offset += offIncr + tokenOffset; //System.out.println(" " + token + " posIncr=" + token.getPositionIncrement() + " pos=" + pos + " off=" + token.StartOffset() + "/" + token.EndOffset() + " (freq=" + postingsByDoc.Get(docCount).Size() + ")"); } doc.Add(new Field("content", new CannedTokenStream(tokens.ToArray()), ft)); w.AddDocument(doc); } DirectoryReader r = w.Reader; w.Dispose(); string[] terms = new string[] { "a", "b", "c", "d" }; foreach (AtomicReaderContext ctx in r.Leaves) { // TODO: improve this AtomicReader sub = (AtomicReader)ctx.Reader; //System.out.println("\nsub=" + sub); TermsEnum termsEnum = sub.Fields.Terms("content").Iterator(null); DocsEnum docs = null; DocsAndPositionsEnum docsAndPositions = null; DocsAndPositionsEnum docsAndPositionsAndOffsets = null; FieldCache.Ints docIDToID = FieldCache.DEFAULT.GetInts(sub, "id", false); foreach (string term in terms) { //System.out.println(" term=" + term); if (termsEnum.SeekExact(new BytesRef(term))) { docs = termsEnum.Docs(null, docs); Assert.IsNotNull(docs); int doc; //System.out.println(" doc/freq"); while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { IList <Token> expected = actualTokens[term][docIDToID.Get(doc)]; //System.out.println(" doc=" + docIDToID.Get(doc) + " docID=" + doc + " " + expected.Size() + " freq"); Assert.IsNotNull(expected); Assert.AreEqual(expected.Count, docs.Freq()); } // explicitly exclude offsets here docsAndPositions = termsEnum.DocsAndPositions(null, docsAndPositions, DocsAndPositionsEnum.FLAG_PAYLOADS); Assert.IsNotNull(docsAndPositions); //System.out.println(" doc/freq/pos"); while ((doc = docsAndPositions.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { IList <Token> expected = actualTokens[term][docIDToID.Get(doc)]; //System.out.println(" doc=" + docIDToID.Get(doc) + " " + expected.Size() + " freq"); Assert.IsNotNull(expected); Assert.AreEqual(expected.Count, docsAndPositions.Freq()); foreach (Token token in expected) { int pos = Convert.ToInt32(token.Type); //System.out.println(" pos=" + pos); Assert.AreEqual(pos, docsAndPositions.NextPosition()); } } docsAndPositionsAndOffsets = termsEnum.DocsAndPositions(null, docsAndPositions); Assert.IsNotNull(docsAndPositionsAndOffsets); //System.out.println(" doc/freq/pos/offs"); while ((doc = docsAndPositionsAndOffsets.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { IList <Token> expected = actualTokens[term][docIDToID.Get(doc)]; //System.out.println(" doc=" + docIDToID.Get(doc) + " " + expected.Size() + " freq"); Assert.IsNotNull(expected); Assert.AreEqual(expected.Count, docsAndPositionsAndOffsets.Freq()); foreach (Token token in expected) { int pos = Convert.ToInt32(token.Type); //System.out.println(" pos=" + pos); Assert.AreEqual(pos, docsAndPositionsAndOffsets.NextPosition()); Assert.AreEqual(token.StartOffset(), docsAndPositionsAndOffsets.StartOffset()); Assert.AreEqual(token.EndOffset(), docsAndPositionsAndOffsets.EndOffset()); } } } } // TODO: test advance: } r.Dispose(); dir.Dispose(); }
public virtual void DoTestNumbers(bool withPayloads) { Directory dir = NewDirectory(); Analyzer analyzer = withPayloads ? (Analyzer) new MockPayloadAnalyzer() : new MockAnalyzer(Random()); Iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); Iwc.SetMergePolicy(NewLogMergePolicy()); // will rely on docids a bit for skipping RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Iwc); FieldType ft = new FieldType(TextField.TYPE_STORED); ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; if (Random().NextBoolean()) { ft.StoreTermVectors = true; ft.StoreTermVectorOffsets = Random().NextBoolean(); ft.StoreTermVectorPositions = Random().NextBoolean(); } int numDocs = AtLeast(500); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(new Field("numbers", English.IntToEnglish(i), ft)); doc.Add(new Field("oddeven", (i % 2) == 0 ? "even" : "odd", ft)); doc.Add(new StringField("id", "" + i, Field.Store.NO)); w.AddDocument(doc); } IndexReader reader = w.Reader; w.Dispose(); string[] terms = new string[] { "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "hundred" }; foreach (string term in terms) { DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(reader, null, "numbers", new BytesRef(term)); int doc; while ((doc = dp.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { string storedNumbers = reader.Document(doc).Get("numbers"); int freq = dp.Freq(); for (int i = 0; i < freq; i++) { dp.NextPosition(); int start = dp.StartOffset(); Debug.Assert(start >= 0); int end = dp.EndOffset(); Debug.Assert(end >= 0 && end >= start); // check that the offsets correspond to the term in the src text Assert.IsTrue(storedNumbers.Substring(start, end - start).Equals(term)); if (withPayloads) { // check that we have a payload and it starts with "pos" Assert.IsNotNull(dp.Payload); BytesRef payload = dp.Payload; Assert.IsTrue(payload.Utf8ToString().StartsWith("pos:")); } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer! } } } // check we can skip correctly int numSkippingTests = AtLeast(50); for (int j = 0; j < numSkippingTests; j++) { int num = TestUtil.NextInt(Random(), 100, Math.Min(numDocs - 1, 999)); DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(reader, null, "numbers", new BytesRef("hundred")); int doc = dp.Advance(num); Assert.AreEqual(num, doc); int freq = dp.Freq(); for (int i = 0; i < freq; i++) { string storedNumbers = reader.Document(doc).Get("numbers"); dp.NextPosition(); int start = dp.StartOffset(); Debug.Assert(start >= 0); int end = dp.EndOffset(); Debug.Assert(end >= 0 && end >= start); // check that the offsets correspond to the term in the src text Assert.IsTrue(storedNumbers.Substring(start, end - start).Equals("hundred")); if (withPayloads) { // check that we have a payload and it starts with "pos" Assert.IsNotNull(dp.Payload); BytesRef payload = dp.Payload; Assert.IsTrue(payload.Utf8ToString().StartsWith("pos:")); } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer! } } // check that other fields (without offsets) work correctly for (int i = 0; i < numDocs; i++) { DocsEnum dp = MultiFields.GetTermDocsEnum(reader, null, "id", new BytesRef("" + i), 0); Assert.AreEqual(i, dp.NextDoc()); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc()); } reader.Dispose(); dir.Dispose(); }
public virtual void TestLongPostings_Mem() { // Don't use TestUtil.getTempDir so that we own the // randomness (ie same seed will point to same dir): Directory dir = NewFSDirectory(CreateTempDir("longpostings" + "." + Random().NextLong())); int NUM_DOCS = AtLeast(2000); if (VERBOSE) { Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS); } string s1 = GetRandomTerm(null); string s2 = GetRandomTerm(s1); if (VERBOSE) { Console.WriteLine("\nTEST: s1=" + s1 + " s2=" + s2); /* * for(int idx=0;idx<s1.Length();idx++) { * System.out.println(" s1 ch=0x" + Integer.toHexString(s1.charAt(idx))); * } * for(int idx=0;idx<s2.Length();idx++) { * System.out.println(" s2 ch=0x" + Integer.toHexString(s2.charAt(idx))); * } */ } FixedBitSet isS1 = new FixedBitSet(NUM_DOCS); for (int idx = 0; idx < NUM_DOCS; idx++) { if (Random().NextBoolean()) { isS1.Set(idx); } } IndexReader r; IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE).SetMergePolicy(NewLogMergePolicy()); iwc.SetRAMBufferSizeMB(16.0 + 16.0 * Random().NextDouble()); iwc.SetMaxBufferedDocs(-1); RandomIndexWriter riw = new RandomIndexWriter(Random(), dir, iwc); for (int idx = 0; idx < NUM_DOCS; idx++) { Document doc = new Document(); string s = isS1.Get(idx) ? s1 : s2; Field f = NewTextField("field", s, Field.Store.NO); int count = TestUtil.NextInt(Random(), 1, 4); for (int ct = 0; ct < count; ct++) { doc.Add(f); } riw.AddDocument(doc); } r = riw.Reader; riw.Dispose(); /* * if (VERBOSE) { * System.out.println("TEST: terms"); * TermEnum termEnum = r.Terms(); * while(termEnum.Next()) { * System.out.println(" term=" + termEnum.Term() + " len=" + termEnum.Term().Text().Length()); * Assert.IsTrue(termEnum.DocFreq() > 0); * System.out.println(" s1?=" + (termEnum.Term().Text().equals(s1)) + " s1len=" + s1.Length()); * System.out.println(" s2?=" + (termEnum.Term().Text().equals(s2)) + " s2len=" + s2.Length()); * final String s = termEnum.Term().Text(); * for(int idx=0;idx<s.Length();idx++) { * System.out.println(" ch=0x" + Integer.toHexString(s.charAt(idx))); * } * } * } */ Assert.AreEqual(NUM_DOCS, r.NumDocs); Assert.IsTrue(r.DocFreq(new Term("field", s1)) > 0); Assert.IsTrue(r.DocFreq(new Term("field", s2)) > 0); int num = AtLeast(1000); for (int iter = 0; iter < num; iter++) { string term; bool doS1; if (Random().NextBoolean()) { term = s1; doS1 = true; } else { term = s2; doS1 = false; } if (VERBOSE) { Console.WriteLine("\nTEST: iter=" + iter + " doS1=" + doS1); } DocsAndPositionsEnum postings = MultiFields.GetTermPositionsEnum(r, null, "field", new BytesRef(term)); int docID = -1; while (docID < DocIdSetIterator.NO_MORE_DOCS) { int what = Random().Next(3); if (what == 0) { if (VERBOSE) { Console.WriteLine("TEST: docID=" + docID + "; do next()"); } // nextDoc int expected = docID + 1; while (true) { if (expected == NUM_DOCS) { expected = int.MaxValue; break; } else if (isS1.Get(expected) == doS1) { break; } else { expected++; } } docID = postings.NextDoc(); if (VERBOSE) { Console.WriteLine(" got docID=" + docID); } Assert.AreEqual(expected, docID); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } if (Random().Next(6) == 3) { int freq = postings.Freq(); Assert.IsTrue(freq >= 1 && freq <= 4); for (int pos = 0; pos < freq; pos++) { Assert.AreEqual(pos, postings.NextPosition()); if (Random().NextBoolean()) { var dummy = postings.Payload; if (Random().NextBoolean()) { dummy = postings.Payload; // get it again } } } } } else { // advance int targetDocID; if (docID == -1) { targetDocID = Random().Next(NUM_DOCS + 1); } else { targetDocID = docID + TestUtil.NextInt(Random(), 1, NUM_DOCS - docID); } if (VERBOSE) { Console.WriteLine("TEST: docID=" + docID + "; do advance(" + targetDocID + ")"); } int expected = targetDocID; while (true) { if (expected == NUM_DOCS) { expected = int.MaxValue; break; } else if (isS1.Get(expected) == doS1) { break; } else { expected++; } } docID = postings.Advance(targetDocID); if (VERBOSE) { Console.WriteLine(" got docID=" + docID); } Assert.AreEqual(expected, docID); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } if (Random().Next(6) == 3) { int freq = postings.Freq(); Assert.IsTrue(freq >= 1 && freq <= 4); for (int pos = 0; pos < freq; pos++) { Assert.AreEqual(pos, postings.NextPosition()); if (Random().NextBoolean()) { var dummy = postings.Payload; if (Random().NextBoolean()) { dummy = postings.Payload; // get it again } } } } } } } r.Dispose(); dir.Dispose(); }
public override int Freq() { Debug.Assert(Current != null); return(Current.Freq()); }
public virtual void VerifyEquals(DirectoryReader r1, DirectoryReader r2, string idField) { if (VERBOSE) { Console.WriteLine("\nr1 docs:"); PrintDocs(r1); Console.WriteLine("\nr2 docs:"); PrintDocs(r2); } if (r1.NumDocs != r2.NumDocs) { Debug.Assert(false, "r1.NumDocs=" + r1.NumDocs + " vs r2.NumDocs=" + r2.NumDocs); } bool hasDeletes = !(r1.MaxDoc == r2.MaxDoc && r1.NumDocs == r1.MaxDoc); int[] r2r1 = new int[r2.MaxDoc]; // r2 id to r1 id mapping // create mapping from id2 space to id2 based on idField Fields f1 = MultiFields.GetFields(r1); if (f1 == null) { // make sure r2 is empty Assert.IsNull(MultiFields.GetFields(r2)); return; } Terms terms1 = f1.Terms(idField); if (terms1 == null) { Assert.IsTrue(MultiFields.GetFields(r2) == null || MultiFields.GetFields(r2).Terms(idField) == null); return; } TermsEnum termsEnum = terms1.Iterator(null); Bits liveDocs1 = MultiFields.GetLiveDocs(r1); Bits liveDocs2 = MultiFields.GetLiveDocs(r2); Fields fields = MultiFields.GetFields(r2); if (fields == null) { // make sure r1 is in fact empty (eg has only all // deleted docs): Bits liveDocs = MultiFields.GetLiveDocs(r1); DocsEnum docs = null; while (termsEnum.Next() != null) { docs = TestUtil.Docs(Random(), termsEnum, liveDocs, docs, DocsEnum.FLAG_NONE); while (docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { Assert.Fail("r1 is not empty but r2 is"); } } return; } Terms terms2 = fields.Terms(idField); TermsEnum termsEnum2 = terms2.Iterator(null); DocsEnum termDocs1 = null; DocsEnum termDocs2 = null; while (true) { BytesRef term = termsEnum.Next(); //System.out.println("TEST: match id term=" + term); if (term == null) { break; } termDocs1 = TestUtil.Docs(Random(), termsEnum, liveDocs1, termDocs1, DocsEnum.FLAG_NONE); if (termsEnum2.SeekExact(term)) { termDocs2 = TestUtil.Docs(Random(), termsEnum2, liveDocs2, termDocs2, DocsEnum.FLAG_NONE); } else { termDocs2 = null; } if (termDocs1.NextDoc() == DocIdSetIterator.NO_MORE_DOCS) { // this doc is deleted and wasn't replaced Assert.IsTrue(termDocs2 == null || termDocs2.NextDoc() == DocIdSetIterator.NO_MORE_DOCS); continue; } int id1 = termDocs1.DocID(); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, termDocs1.NextDoc()); Assert.IsTrue(termDocs2.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); int id2 = termDocs2.DocID(); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, termDocs2.NextDoc()); r2r1[id2] = id1; // verify stored fields are equivalent try { VerifyEquals(r1.Document(id1), r2.Document(id2)); } catch (Exception t) { Console.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term=" + term); Console.WriteLine(" d1=" + r1.Document(id1)); Console.WriteLine(" d2=" + r2.Document(id2)); throw t; } try { // verify term vectors are equivalent VerifyEquals(r1.GetTermVectors(id1), r2.GetTermVectors(id2)); } catch (Exception e) { Console.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2); Fields tv1 = r1.GetTermVectors(id1); Console.WriteLine(" d1=" + tv1); if (tv1 != null) { DocsAndPositionsEnum dpEnum = null; DocsEnum dEnum = null; foreach (string field in tv1) { Console.WriteLine(" " + field + ":"); Terms terms3 = tv1.Terms(field); Assert.IsNotNull(terms3); TermsEnum termsEnum3 = terms3.Iterator(null); BytesRef term2; while ((term2 = termsEnum3.Next()) != null) { Console.WriteLine(" " + term2.Utf8ToString() + ": freq=" + termsEnum3.TotalTermFreq()); dpEnum = termsEnum3.DocsAndPositions(null, dpEnum); if (dpEnum != null) { Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); int freq = dpEnum.Freq(); Console.WriteLine(" doc=" + dpEnum.DocID() + " freq=" + freq); for (int posUpto = 0; posUpto < freq; posUpto++) { Console.WriteLine(" pos=" + dpEnum.NextPosition()); } } else { dEnum = TestUtil.Docs(Random(), termsEnum3, null, dEnum, DocsEnum.FLAG_FREQS); Assert.IsNotNull(dEnum); Assert.IsTrue(dEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); int freq = dEnum.Freq(); Console.WriteLine(" doc=" + dEnum.DocID() + " freq=" + freq); } } } } Fields tv2 = r2.GetTermVectors(id2); Console.WriteLine(" d2=" + tv2); if (tv2 != null) { DocsAndPositionsEnum dpEnum = null; DocsEnum dEnum = null; foreach (string field in tv2) { Console.WriteLine(" " + field + ":"); Terms terms3 = tv2.Terms(field); Assert.IsNotNull(terms3); TermsEnum termsEnum3 = terms3.Iterator(null); BytesRef term2; while ((term2 = termsEnum3.Next()) != null) { Console.WriteLine(" " + term2.Utf8ToString() + ": freq=" + termsEnum3.TotalTermFreq()); dpEnum = termsEnum3.DocsAndPositions(null, dpEnum); if (dpEnum != null) { Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); int freq = dpEnum.Freq(); Console.WriteLine(" doc=" + dpEnum.DocID() + " freq=" + freq); for (int posUpto = 0; posUpto < freq; posUpto++) { Console.WriteLine(" pos=" + dpEnum.NextPosition()); } } else { dEnum = TestUtil.Docs(Random(), termsEnum3, null, dEnum, DocsEnum.FLAG_FREQS); Assert.IsNotNull(dEnum); Assert.IsTrue(dEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); int freq = dEnum.Freq(); Console.WriteLine(" doc=" + dEnum.DocID() + " freq=" + freq); } } } } throw e; } } //System.out.println("TEST: done match id"); // Verify postings //System.out.println("TEST: create te1"); Fields fields1 = MultiFields.GetFields(r1); IEnumerator <string> fields1Enum = fields1.GetEnumerator(); Fields fields2 = MultiFields.GetFields(r2); IEnumerator <string> fields2Enum = fields2.GetEnumerator(); string field1 = null, field2 = null; TermsEnum termsEnum1 = null; termsEnum2 = null; DocsEnum docs1 = null, docs2 = null; // pack both doc and freq into single element for easy sorting long[] info1 = new long[r1.NumDocs]; long[] info2 = new long[r2.NumDocs]; for (; ;) { BytesRef term1 = null, term2 = null; // iterate until we get some docs int len1; for (; ;) { len1 = 0; if (termsEnum1 == null) { if (!fields1Enum.MoveNext()) { break; } field1 = fields1Enum.Current; Terms terms = fields1.Terms(field1); if (terms == null) { continue; } termsEnum1 = terms.Iterator(null); } term1 = termsEnum1.Next(); if (term1 == null) { // no more terms in this field termsEnum1 = null; continue; } //System.out.println("TEST: term1=" + term1); docs1 = TestUtil.Docs(Random(), termsEnum1, liveDocs1, docs1, DocsEnum.FLAG_FREQS); while (docs1.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { int d = docs1.DocID(); int f = docs1.Freq(); info1[len1] = (((long)d) << 32) | f; len1++; } if (len1 > 0) { break; } } // iterate until we get some docs int len2; for (; ;) { len2 = 0; if (termsEnum2 == null) { if (!fields2Enum.MoveNext()) { break; } field2 = fields2Enum.Current; Terms terms = fields2.Terms(field2); if (terms == null) { continue; } termsEnum2 = terms.Iterator(null); } term2 = termsEnum2.Next(); if (term2 == null) { // no more terms in this field termsEnum2 = null; continue; } //System.out.println("TEST: term1=" + term1); docs2 = TestUtil.Docs(Random(), termsEnum2, liveDocs2, docs2, DocsEnum.FLAG_FREQS); while (docs2.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { int d = r2r1[docs2.DocID()]; int f = docs2.Freq(); info2[len2] = (((long)d) << 32) | f; len2++; } if (len2 > 0) { break; } } Assert.AreEqual(len1, len2); if (len1 == 0) // no more terms { break; } Assert.AreEqual(field1, field2); Assert.IsTrue(term1.BytesEquals(term2)); if (!hasDeletes) { Assert.AreEqual(termsEnum1.DocFreq(), termsEnum2.DocFreq()); } Assert.AreEqual(term1, term2, "len1=" + len1 + " len2=" + len2 + " deletes?=" + hasDeletes); // sort info2 to get it into ascending docid Array.Sort(info2, 0, len2); // now compare for (int i = 0; i < len1; i++) { Assert.AreEqual(info1[i], info2[i], "i=" + i + " len=" + len1 + " d1=" + ((long)((ulong)info1[i] >> 32)) + " f1=" + (info1[i] & int.MaxValue) + " d2=" + ((long)((ulong)info2[i] >> 32)) + " f2=" + (info2[i] & int.MaxValue) + " field=" + field1 + " term=" + term1.Utf8ToString()); } } }
public static void VerifyEquals(Fields d1, Fields d2) { if (d1 == null) { Assert.IsTrue(d2 == null || d2.Size == 0); return; } Assert.IsTrue(d2 != null); IEnumerator <string> fieldsEnum2 = d2.GetEnumerator(); foreach (string field1 in d1) { fieldsEnum2.MoveNext(); string field2 = fieldsEnum2.Current; Assert.AreEqual(field1, field2); Terms terms1 = d1.Terms(field1); Assert.IsNotNull(terms1); TermsEnum termsEnum1 = terms1.Iterator(null); Terms terms2 = d2.Terms(field2); Assert.IsNotNull(terms2); TermsEnum termsEnum2 = terms2.Iterator(null); DocsAndPositionsEnum dpEnum1 = null; DocsAndPositionsEnum dpEnum2 = null; DocsEnum dEnum1 = null; DocsEnum dEnum2 = null; BytesRef term1; while ((term1 = termsEnum1.Next()) != null) { BytesRef term2 = termsEnum2.Next(); Assert.AreEqual(term1, term2); Assert.AreEqual(termsEnum1.TotalTermFreq(), termsEnum2.TotalTermFreq()); dpEnum1 = termsEnum1.DocsAndPositions(null, dpEnum1); dpEnum2 = termsEnum2.DocsAndPositions(null, dpEnum2); if (dpEnum1 != null) { Assert.IsNotNull(dpEnum2); int docID1 = dpEnum1.NextDoc(); dpEnum2.NextDoc(); // docIDs are not supposed to be equal //int docID2 = dpEnum2.NextDoc(); //Assert.AreEqual(docID1, docID2); Assert.IsTrue(docID1 != DocIdSetIterator.NO_MORE_DOCS); int freq1 = dpEnum1.Freq(); int freq2 = dpEnum2.Freq(); Assert.AreEqual(freq1, freq2); IOffsetAttribute offsetAtt1 = dpEnum1.Attributes().HasAttribute <IOffsetAttribute>() ? dpEnum1.Attributes().GetAttribute <IOffsetAttribute>() : null; IOffsetAttribute offsetAtt2 = dpEnum2.Attributes().HasAttribute <IOffsetAttribute>() ? dpEnum2.Attributes().GetAttribute <IOffsetAttribute>() : null; if (offsetAtt1 != null) { Assert.IsNotNull(offsetAtt2); } else { Assert.IsNull(offsetAtt2); } for (int posUpto = 0; posUpto < freq1; posUpto++) { int pos1 = dpEnum1.NextPosition(); int pos2 = dpEnum2.NextPosition(); Assert.AreEqual(pos1, pos2); if (offsetAtt1 != null) { Assert.AreEqual(offsetAtt1.StartOffset(), offsetAtt2.StartOffset()); Assert.AreEqual(offsetAtt1.EndOffset(), offsetAtt2.EndOffset()); } } Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum1.NextDoc()); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum2.NextDoc()); } else { dEnum1 = TestUtil.Docs(Random(), termsEnum1, null, dEnum1, DocsEnum.FLAG_FREQS); dEnum2 = TestUtil.Docs(Random(), termsEnum2, null, dEnum2, DocsEnum.FLAG_FREQS); Assert.IsNotNull(dEnum1); Assert.IsNotNull(dEnum2); int docID1 = dEnum1.NextDoc(); dEnum2.NextDoc(); // docIDs are not supposed to be equal //int docID2 = dEnum2.NextDoc(); //Assert.AreEqual(docID1, docID2); Assert.IsTrue(docID1 != DocIdSetIterator.NO_MORE_DOCS); int freq1 = dEnum1.Freq(); int freq2 = dEnum2.Freq(); Assert.AreEqual(freq1, freq2); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dEnum1.NextDoc()); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dEnum2.NextDoc()); } } Assert.IsNull(termsEnum2.Next()); } Assert.IsFalse(fieldsEnum2.MoveNext()); }
public override int Freq() { return(@in.Freq()); }
/// <summary> /// checks docs + freqs + positions + payloads, sequentially /// </summary> public void AssertDocsAndPositionsEnumEquals(string info, DocsAndPositionsEnum leftDocs, DocsAndPositionsEnum rightDocs) { if (leftDocs == null || rightDocs == null) { Assert.IsNull(leftDocs); Assert.IsNull(rightDocs); return; } Assert.AreEqual(-1, leftDocs.DocID(), info); Assert.AreEqual(-1, rightDocs.DocID(), info); int docid; while ((docid = leftDocs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { Assert.AreEqual(docid, rightDocs.NextDoc(), info); int freq = leftDocs.Freq(); Assert.AreEqual(freq, rightDocs.Freq(), info); for (int i = 0; i < freq; i++) { Assert.AreEqual(leftDocs.NextPosition(), rightDocs.NextPosition(), info); Assert.AreEqual(leftDocs.Payload, rightDocs.Payload, info); Assert.AreEqual(leftDocs.StartOffset(), rightDocs.StartOffset(), info); Assert.AreEqual(leftDocs.EndOffset(), rightDocs.EndOffset(), info); } } Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, rightDocs.NextDoc(), info); }
/// <summary> /// checks docs + freqs + positions + payloads, sequentially /// </summary> public virtual void AssertDocsAndPositionsEnum(DocsAndPositionsEnum leftDocs, DocsAndPositionsEnum rightDocs) { if (leftDocs == null || rightDocs == null) { Assert.IsNull(leftDocs); Assert.IsNull(rightDocs); return; } Assert.AreEqual(-1, leftDocs.DocID()); Assert.AreEqual(-1, rightDocs.DocID()); int docid; while ((docid = leftDocs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { Assert.AreEqual(docid, rightDocs.NextDoc()); int freq = leftDocs.Freq(); Assert.AreEqual(freq, rightDocs.Freq()); for (int i = 0; i < freq; i++) { Assert.AreEqual(leftDocs.NextPosition(), rightDocs.NextPosition()); // we don't assert offsets/payloads, they are allowed to be different } } Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, rightDocs.NextDoc()); }
public virtual void TestArbitraryFields() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir); int NUM_DOCS = AtLeast(27); if (VERBOSE) { Console.WriteLine("TEST: " + NUM_DOCS + " docs"); } int[] fieldsPerDoc = new int[NUM_DOCS]; int baseCount = 0; for (int docCount = 0; docCount < NUM_DOCS; docCount++) { int fieldCount = TestUtil.NextInt(Random(), 1, 17); fieldsPerDoc[docCount] = fieldCount - 1; int finalDocCount = docCount; if (VERBOSE) { Console.WriteLine("TEST: " + fieldCount + " fields in doc " + docCount); } int finalBaseCount = baseCount; baseCount += fieldCount - 1; w.AddDocument(new IterableAnonymousInnerClassHelper(this, fieldCount, finalDocCount, finalBaseCount)); } IndexReader r = w.Reader; w.Dispose(); IndexSearcher s = NewSearcher(r); int counter = 0; for (int id = 0; id < NUM_DOCS; id++) { if (VERBOSE) { Console.WriteLine("TEST: verify doc id=" + id + " (" + fieldsPerDoc[id] + " fields) counter=" + counter); } TopDocs hits = s.Search(new TermQuery(new Term("id", "" + id)), 1); Assert.AreEqual(1, hits.TotalHits); int docID = hits.ScoreDocs[0].Doc; Document doc = s.Doc(docID); int endCounter = counter + fieldsPerDoc[id]; while (counter < endCounter) { string name = "f" + counter; int fieldID = counter % 10; bool stored = (counter & 1) == 0 || fieldID == 3; bool binary = fieldID == 3; bool indexed = fieldID != 3; string stringValue; if (fieldID != 3 && fieldID != 9) { stringValue = "text " + counter; } else { stringValue = null; } // stored: if (stored) { IndexableField f = doc.GetField(name); Assert.IsNotNull(f, "doc " + id + " doesn't have field f" + counter); if (binary) { Assert.IsNotNull(f, "doc " + id + " doesn't have field f" + counter); BytesRef b = f.BinaryValue(); Assert.IsNotNull(b); Assert.AreEqual(10, b.Length); for (int idx = 0; idx < 10; idx++) { Assert.AreEqual((byte)(idx + counter), b.Bytes[b.Offset + idx]); } } else { Debug.Assert(stringValue != null); Assert.AreEqual(stringValue, f.StringValue); } } if (indexed) { bool tv = counter % 2 == 1 && fieldID != 9; if (tv) { Terms tfv = r.GetTermVectors(docID).Terms(name); Assert.IsNotNull(tfv); TermsEnum termsEnum = tfv.Iterator(null); Assert.AreEqual(new BytesRef("" + counter), termsEnum.Next()); Assert.AreEqual(1, termsEnum.TotalTermFreq()); DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null); Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(1, dpEnum.Freq()); Assert.AreEqual(1, dpEnum.NextPosition()); Assert.AreEqual(new BytesRef("text"), termsEnum.Next()); Assert.AreEqual(1, termsEnum.TotalTermFreq()); dpEnum = termsEnum.DocsAndPositions(null, dpEnum); Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); Assert.AreEqual(1, dpEnum.Freq()); Assert.AreEqual(0, dpEnum.NextPosition()); Assert.IsNull(termsEnum.Next()); // TODO: offsets } else { Fields vectors = r.GetTermVectors(docID); Assert.IsTrue(vectors == null || vectors.Terms(name) == null); } BooleanQuery bq = new BooleanQuery(); bq.Add(new TermQuery(new Term("id", "" + id)), BooleanClause.Occur.MUST); bq.Add(new TermQuery(new Term(name, "text")), BooleanClause.Occur.MUST); TopDocs hits2 = s.Search(bq, 1); Assert.AreEqual(1, hits2.TotalHits); Assert.AreEqual(docID, hits2.ScoreDocs[0].Doc); bq = new BooleanQuery(); bq.Add(new TermQuery(new Term("id", "" + id)), BooleanClause.Occur.MUST); bq.Add(new TermQuery(new Term(name, "" + counter)), BooleanClause.Occur.MUST); TopDocs hits3 = s.Search(bq, 1); Assert.AreEqual(1, hits3.TotalHits); Assert.AreEqual(docID, hits3.ScoreDocs[0].Doc); } counter++; } } r.Dispose(); dir.Dispose(); }
/// <summary> /// checks advancing docs + positions /// </summary> public virtual void AssertPositionsSkipping(int docFreq, DocsAndPositionsEnum leftDocs, DocsAndPositionsEnum rightDocs) { if (leftDocs == null || rightDocs == null) { Assert.IsNull(leftDocs); Assert.IsNull(rightDocs); return; } int docid = -1; int averageGap = MAXDOC / (1 + docFreq); int skipInterval = 16; while (true) { if (Random().NextBoolean()) { // nextDoc() docid = leftDocs.NextDoc(); Assert.AreEqual(docid, rightDocs.NextDoc()); } else { // advance() int skip = docid + (int)Math.Ceiling(Math.Abs(skipInterval + Random().NextDouble() * averageGap)); docid = leftDocs.Advance(skip); Assert.AreEqual(docid, rightDocs.Advance(skip)); } if (docid == DocIdSetIterator.NO_MORE_DOCS) { return; } int freq = leftDocs.Freq(); Assert.AreEqual(freq, rightDocs.Freq()); for (int i = 0; i < freq; i++) { Assert.AreEqual(leftDocs.NextPosition(), rightDocs.NextPosition()); // we don't compare the payloads, its allowed that one is empty etc } } }
public virtual void TestRandomPositions() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); int numDocs = AtLeast(47); int max = 1051; int term = Random().Next(max); int?[][] positionsInDoc = new int?[numDocs][]; FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.OmitNorms = true; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); List <int?> positions = new List <int?>(); StringBuilder builder = new StringBuilder(); int num = AtLeast(131); for (int j = 0; j < num; j++) { int nextInt = Random().Next(max); builder.Append(nextInt).Append(" "); if (nextInt == term) { positions.Add(Convert.ToInt32(j)); } } if (positions.Count == 0) { builder.Append(term); positions.Add(num); } doc.Add(NewField(FieldName, builder.ToString(), customType)); positionsInDoc[i] = positions.ToArray(); writer.AddDocument(doc); } IndexReader reader = writer.Reader; writer.Dispose(); int num_ = AtLeast(13); for (int i = 0; i < num_; i++) { BytesRef bytes = new BytesRef("" + term); IndexReaderContext topReaderContext = reader.Context; foreach (AtomicReaderContext atomicReaderContext in topReaderContext.Leaves) { DocsAndPositionsEnum docsAndPosEnum = GetDocsAndPositions((AtomicReader)atomicReaderContext.Reader, bytes, null); Assert.IsNotNull(docsAndPosEnum); int initDoc = 0; int maxDoc = atomicReaderContext.Reader.MaxDoc; // initially advance or do next doc if (Random().NextBoolean()) { initDoc = docsAndPosEnum.NextDoc(); } else { initDoc = docsAndPosEnum.Advance(Random().Next(maxDoc)); } // now run through the scorer and check if all positions are there... do { int docID = docsAndPosEnum.DocID(); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } int?[] pos = positionsInDoc[atomicReaderContext.DocBase + docID]; Assert.AreEqual(pos.Length, docsAndPosEnum.Freq()); // number of positions read should be random - don't read all of them // allways int howMany = Random().Next(20) == 0 ? pos.Length - Random().Next(pos.Length) : pos.Length; for (int j = 0; j < howMany; j++) { Assert.AreEqual(pos[j], docsAndPosEnum.NextPosition(), "iteration: " + i + " initDoc: " + initDoc + " doc: " + docID + " base: " + atomicReaderContext.DocBase + " positions: " + pos); /* TODO: + " usePayloads: " + usePayload*/ } if (Random().Next(10) == 0) // once is a while advance { if (docsAndPosEnum.Advance(docID + 1 + Random().Next((maxDoc - docID))) == DocIdSetIterator.NO_MORE_DOCS) { break; } } } while (docsAndPosEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); } } reader.Dispose(); dir.Dispose(); }
/// <summary> /// checks advancing docs + positions /// </summary> public void AssertPositionsSkippingEquals(string info, IndexReader leftReader, int docFreq, DocsAndPositionsEnum leftDocs, DocsAndPositionsEnum rightDocs) { if (leftDocs == null || rightDocs == null) { Assert.IsNull(leftDocs); Assert.IsNull(rightDocs); return; } int docid = -1; int averageGap = leftReader.MaxDoc / (1 + docFreq); int skipInterval = 16; while (true) { if (Random().NextBoolean()) { // nextDoc() docid = leftDocs.NextDoc(); Assert.AreEqual(docid, rightDocs.NextDoc(), info); } else { // advance() int skip = docid + (int)Math.Ceiling(Math.Abs(skipInterval + Random().NextDouble() * averageGap)); docid = leftDocs.Advance(skip); Assert.AreEqual(docid, rightDocs.Advance(skip), info); } if (docid == DocIdSetIterator.NO_MORE_DOCS) { return; } int freq = leftDocs.Freq(); Assert.AreEqual(freq, rightDocs.Freq(), info); for (int i = 0; i < freq; i++) { Assert.AreEqual(leftDocs.NextPosition(), rightDocs.NextPosition(), info); Assert.AreEqual(leftDocs.Payload, rightDocs.Payload, info); } } }
public virtual void TestLargeNumberOfPositions() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); int howMany = 1000; FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.OmitNorms = true; for (int i = 0; i < 39; i++) { Document doc = new Document(); StringBuilder builder = new StringBuilder(); for (int j = 0; j < howMany; j++) { if (j % 2 == 0) { builder.Append("even "); } else { builder.Append("odd "); } } doc.Add(NewField(FieldName, builder.ToString(), customType)); writer.AddDocument(doc); } // now do searches IndexReader reader = writer.Reader; writer.Dispose(); int num = AtLeast(13); for (int i = 0; i < num; i++) { BytesRef bytes = new BytesRef("even"); IndexReaderContext topReaderContext = reader.Context; foreach (AtomicReaderContext atomicReaderContext in topReaderContext.Leaves) { DocsAndPositionsEnum docsAndPosEnum = GetDocsAndPositions((AtomicReader)atomicReaderContext.Reader, bytes, null); Assert.IsNotNull(docsAndPosEnum); int initDoc = 0; int maxDoc = atomicReaderContext.Reader.MaxDoc; // initially advance or do next doc if (Random().NextBoolean()) { initDoc = docsAndPosEnum.NextDoc(); } else { initDoc = docsAndPosEnum.Advance(Random().Next(maxDoc)); } string msg = "Iteration: " + i + " initDoc: " + initDoc; // TODO: + " payloads: " + usePayload; Assert.AreEqual(howMany / 2, docsAndPosEnum.Freq()); for (int j = 0; j < howMany; j += 2) { Assert.AreEqual(j, docsAndPosEnum.NextPosition(), "position missmatch index: " + j + " with freq: " + docsAndPosEnum.Freq() + " -- " + msg); } } } reader.Dispose(); dir.Dispose(); }