public virtual void TestSimpleSkip() { RAMDirectory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new PayloadAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); Term term = new Term("test", "a"); for (int i = 0; i < 5000; i++) { Document d1 = new Document(); d1.Add(new Field(term.Field(), term.Text(), Field.Store.NO, Field.Index.ANALYZED)); writer.AddDocument(d1); } writer.Flush(); writer.Optimize(); writer.Close(); IndexReader reader = SegmentReader.GetOnlySegmentReader(dir); SegmentTermPositions tp = (SegmentTermPositions)reader.TermPositions(); tp.freqStream_ForNUnit = new CountingStream(this, tp.freqStream_ForNUnit); for (int i = 0; i < 2; i++) { counter = 0; tp.Seek(term); CheckSkipTo(tp, 14, 185); // no skips CheckSkipTo(tp, 17, 190); // one skip on level 0 CheckSkipTo(tp, 287, 200); // one skip on level 1, two on level 0 // this test would fail if we had only one skip level, // because than more bytes would be read from the freqStream CheckSkipTo(tp, 4800, 250); // one skip on level 2 } }
public virtual void TestSeek() { Directory directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); for (int i = 0; i < 10; i++) { Document doc = new Document(); doc.Add(new Field(this.field, "a b", Field.Store.YES, Field.Index.ANALYZED)); writer.AddDocument(doc); } writer.Close(); IndexReader reader = IndexReader.Open(directory); TermPositions tp = reader.TermPositions(); tp.Seek(new Term(this.field, "b")); for (int i = 0; i < 10; i++) { tp.Next(); Assert.AreEqual(tp.Doc(), i); Assert.AreEqual(tp.NextPosition(), 1); } tp.Seek(new Term(this.field, "a")); for (int i = 0; i < 10; i++) { tp.Next(); Assert.AreEqual(tp.Doc(), i); Assert.AreEqual(tp.NextPosition(), 0); } }
internal TermPositions GetPositions() { if (postings == null) { postings = reader.TermPositions(); } return(postings); }
internal TermPositions GetPositions(IState state) { if (postings == null) { postings = reader.TermPositions(state); } return(postings); }
/// <summary> Creates a new <c>MultipleTermPositions</c> instance. /// /// </summary> /// <exception cref="System.IO.IOException"> /// </exception> public MultipleTermPositions(IndexReader indexReader, Term[] terms) { System.Collections.IList termPositions = new System.Collections.ArrayList(); for (int i = 0; i < terms.Length; i++) termPositions.Add(indexReader.TermPositions(terms[i])); _termPositionsQueue = new TermPositionsQueue(termPositions); _posList = new IntQueue(); }
/// <summary> Creates a new <c>MultipleTermPositions</c> instance. /// /// </summary> /// <exception cref="System.IO.IOException"> /// </exception> public MultipleTermPositions(IndexReader indexReader, Term[] terms) { var termPositions = new System.Collections.Generic.LinkedList <TermPositions>(); for (int i = 0; i < terms.Length; i++) { termPositions.AddLast(indexReader.TermPositions(terms[i])); } _termPositionsQueue = new TermPositionsQueue(termPositions); _posList = new IntQueue(); }
/// <summary> Creates a new <code>MultipleTermPositions</code> instance. /// /// </summary> /// <exception cref="IOException"> /// </exception> public MultipleTermPositions(IndexReader indexReader, Term[] terms) { System.Collections.IList termPositions = new System.Collections.ArrayList(); for (int i = 0; i < terms.Length; i++) { termPositions.Add(indexReader.TermPositions(terms[i])); } _termPositionsQueue = new TermPositionsQueue(termPositions); _posList = new IntQueue(); }
/// <summary> Creates a new <code>MultipleTermPositions</code> instance. /// /// </summary> /// <exception cref="IOException"> /// </exception> public MultipleTermPositions(IndexReader indexReader, Term[] terms) { IList <TermPositions> termPositions = new List <TermPositions>(); for (int i = 0; i < terms.Length; i++) { termPositions.Add(indexReader.TermPositions(terms[i])); } _termPositionsQueue = new TermPositionsQueue(termPositions); _posList = new IntQueue(); }
public virtual void TestThreadSafety() { rnd = NewRandom(); int numThreads = 5; int numDocs = 50; ByteArrayPool pool = new ByteArrayPool(numThreads, 5); Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.LIMITED, null); System.String field = "test"; ThreadClass[] ingesters = new ThreadClass[numThreads]; for (int i = 0; i < numThreads; i++) { ingesters[i] = new AnonymousClassThread(numDocs, field, pool, writer, this); ingesters[i].Start(); } for (int i = 0; i < numThreads; i++) { ingesters[i].Join(); } writer.Close(); IndexReader reader = IndexReader.Open(dir, true, null); TermEnum terms = reader.Terms(null); while (terms.Next(null)) { TermPositions tp = reader.TermPositions(terms.Term, null); while (tp.Next(null)) { int freq = tp.Freq; for (int i = 0; i < freq; i++) { tp.NextPosition(null); Assert.AreEqual(pool.BytesToString(tp.GetPayload(new byte[5], 0, null)), terms.Term.Text); } } tp.Close(); } terms.Close(); reader.Close(); Assert.AreEqual(pool.Size(), numThreads); }
public virtual void TestCaching() { Directory dir = new RAMDirectory(); IndexWriter writer = new IndexWriter(dir, new SimpleAnalyzer(), IndexWriter.MaxFieldLength.LIMITED, null); Document doc = new Document(); TokenStream stream = new AnonymousClassTokenStream(this); stream = new CachingTokenFilter(stream); doc.Add(new Field("preanalyzed", stream, TermVector.NO)); // 1) we consume all tokens twice before we add the doc to the index checkTokens(stream); stream.Reset(); checkTokens(stream); // 2) now add the document to the index and verify if all tokens are indexed // don't reset the stream here, the DocumentWriter should do that implicitly writer.AddDocument(doc, null); writer.Close(); IndexReader reader = IndexReader.Open(dir, true, null); TermPositions termPositions = reader.TermPositions(new Term("preanalyzed", "term1"), null); Assert.IsTrue(termPositions.Next(null)); Assert.AreEqual(1, termPositions.Freq); Assert.AreEqual(0, termPositions.NextPosition(null)); termPositions.Seek(new Term("preanalyzed", "term2"), null); Assert.IsTrue(termPositions.Next(null)); Assert.AreEqual(2, termPositions.Freq); Assert.AreEqual(1, termPositions.NextPosition(null)); Assert.AreEqual(3, termPositions.NextPosition(null)); termPositions.Seek(new Term("preanalyzed", "term3"), null); Assert.IsTrue(termPositions.Next(null)); Assert.AreEqual(1, termPositions.Freq); Assert.AreEqual(2, termPositions.NextPosition(null)); reader.Close(); // 3) reset stream and consume tokens again stream.Reset(); checkTokens(stream); }
public override void Seek(Term term) { IndexReader reader = ((IndexReader)Enclosing_Instance.fieldToReader[term.Field()]); termDocs = reader != null?reader.TermPositions(term) : null; }
public override TermPositions TermPositions() { EnsureOpen(); return(in_Renamed.TermPositions()); }
// builds an index with payloads in the given Directory and performs // different tests to verify the payload encoding private void PerformTest(Directory dir) { PayloadAnalyzer analyzer = new PayloadAnalyzer(); IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED, null); // should be in sync with value in TermInfosWriter int skipInterval = 16; int numTerms = 5; System.String fieldName = "f1"; int numDocs = skipInterval + 1; // create content for the test documents with just a few terms Term[] terms = GenerateTerms(fieldName, numTerms); System.Text.StringBuilder sb = new System.Text.StringBuilder(); for (int i = 0; i < terms.Length; i++) { sb.Append(terms[i].Text); sb.Append(" "); } System.String content = sb.ToString(); int payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2; byte[] payloadData = GenerateRandomData(payloadDataLength); Document d = new Document(); d.Add(new Field(fieldName, content, Field.Store.NO, Field.Index.ANALYZED)); // add the same document multiple times to have the same payload lengths for all // occurrences within two consecutive skip intervals int offset = 0; for (int i = 0; i < 2 * numDocs; i++) { analyzer.SetPayloadData(fieldName, payloadData, offset, 1); offset += numTerms; writer.AddDocument(d, null); } // make sure we create more than one segment to test merging writer.Commit(null); // now we make sure to have different payload lengths next at the next skip point for (int i = 0; i < numDocs; i++) { analyzer.SetPayloadData(fieldName, payloadData, offset, i); offset += i * numTerms; writer.AddDocument(d, null); } writer.Optimize(null); // flush writer.Close(); /* * Verify the index * first we test if all payloads are stored correctly */ IndexReader reader = IndexReader.Open(dir, true, null); byte[] verifyPayloadData = new byte[payloadDataLength]; offset = 0; TermPositions[] tps = new TermPositions[numTerms]; for (int i = 0; i < numTerms; i++) { tps[i] = reader.TermPositions(terms[i], null); } while (tps[0].Next(null)) { for (int i = 1; i < numTerms; i++) { tps[i].Next(null); } int freq = tps[0].Freq; for (int i = 0; i < freq; i++) { for (int j = 0; j < numTerms; j++) { tps[j].NextPosition(null); tps[j].GetPayload(verifyPayloadData, offset, null); offset += tps[j].PayloadLength; } } } for (int i = 0; i < numTerms; i++) { tps[i].Close(); } AssertByteArrayEquals(payloadData, verifyPayloadData); /* * test lazy skipping */ TermPositions tp = reader.TermPositions(terms[0], null); tp.Next(null); tp.NextPosition(null); // now we don't read this payload tp.NextPosition(null); Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length."); byte[] payload = tp.GetPayload(null, 0, null); Assert.AreEqual(payload[0], payloadData[numTerms]); tp.NextPosition(null); // we don't read this payload and skip to a different document tp.SkipTo(5, null); tp.NextPosition(null); Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length."); payload = tp.GetPayload(null, 0, null); Assert.AreEqual(payload[0], payloadData[5 * numTerms]); /* * Test different lengths at skip points */ tp.Seek(terms[1], null); tp.Next(null); tp.NextPosition(null); Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length."); tp.SkipTo(skipInterval - 1, null); tp.NextPosition(null); Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length."); tp.SkipTo(2 * skipInterval - 1, null); tp.NextPosition(null); Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length."); tp.SkipTo(3 * skipInterval - 1, null); tp.NextPosition(null); Assert.AreEqual(3 * skipInterval - 2 * numDocs - 1, tp.PayloadLength, "Wrong payload length."); /* * Test multiple call of getPayload() */ tp.GetPayload(null, 0, null); // it is forbidden to call getPayload() more than once // without calling nextPosition() Assert.Throws <IOException>(() => tp.GetPayload(null, 0, null), "Expected exception not thrown"); reader.Close(); // test long payload analyzer = new PayloadAnalyzer(); writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED, null); System.String singleTerm = "lucene"; d = new Document(); d.Add(new Field(fieldName, singleTerm, Field.Store.NO, Field.Index.ANALYZED)); // add a payload whose length is greater than the buffer size of BufferedIndexOutput payloadData = GenerateRandomData(2000); analyzer.SetPayloadData(fieldName, payloadData, 100, 1500); writer.AddDocument(d, null); writer.Optimize(null); // flush writer.Close(); reader = IndexReader.Open(dir, true, null); tp = reader.TermPositions(new Term(fieldName, singleTerm), null); tp.Next(null); tp.NextPosition(null); verifyPayloadData = new byte[tp.PayloadLength]; tp.GetPayload(verifyPayloadData, 0, null); byte[] portion = new byte[1500]; Array.Copy(payloadData, 100, portion, 0, 1500); AssertByteArrayEquals(portion, verifyPayloadData); reader.Close(); }
public AbstractTerminalNode(Term term, IndexReader reader) { _tp = reader.TermPositions(); _tp.Seek(term); _posLeft = 0; }
protected internal override TermDocs TermDocs(IndexReader reader) { return (TermDocs) reader.TermPositions(); }
protected internal override TermDocs TermDocs(IndexReader reader) { return((TermDocs)reader.TermPositions()); }
public override void Seek(Term term, IState state) { IndexReader reader = Enclosing_Instance.fieldToReader[term.Field]; termDocs = reader != null?reader.TermPositions(term, state) : null; }
public static void AssertIndexEquals(IndexReader index1, IndexReader index2) { Assert.AreEqual(index1.NumDocs(), index2.NumDocs(), "IndexReaders have different values for numDocs."); Assert.AreEqual(index1.MaxDoc, index2.MaxDoc, "IndexReaders have different values for maxDoc."); Assert.AreEqual(index1.HasDeletions, index2.HasDeletions, "Only one IndexReader has deletions."); Assert.AreEqual(index1.IsOptimized(), index2.IsOptimized(), "Only one index is optimized."); // check field names System.Collections.Generic.ICollection<string> fieldsNames1 = index1.GetFieldNames(FieldOption.ALL); System.Collections.Generic.ICollection<string> fieldsNames2 = index1.GetFieldNames(FieldOption.ALL); System.Collections.Generic.ICollection<IFieldable> fields1 = null; System.Collections.Generic.ICollection<IFieldable> fields2 = null; Assert.AreEqual(fieldsNames1.Count, fieldsNames2.Count, "IndexReaders have different numbers of fields."); System.Collections.IEnumerator it1 = fieldsNames1.GetEnumerator(); System.Collections.IEnumerator it2 = fieldsNames2.GetEnumerator(); while (it1.MoveNext() && it2.MoveNext()) { Assert.AreEqual((System.String) it1.Current, (System.String) it2.Current, "Different field names."); } // check norms it1 = fieldsNames1.GetEnumerator(); while (it1.MoveNext()) { System.String curField = (System.String) it1.Current; byte[] norms1 = index1.Norms(curField); byte[] norms2 = index2.Norms(curField); if (norms1 != null && norms2 != null) { Assert.AreEqual(norms1.Length, norms2.Length); for (int i = 0; i < norms1.Length; i++) { Assert.AreEqual(norms1[i], norms2[i], "Norm different for doc " + i + " and field '" + curField + "'."); } } else { Assert.AreSame(norms1, norms2); } } // check deletions for (int i = 0; i < index1.MaxDoc; i++) { Assert.AreEqual(index1.IsDeleted(i), index2.IsDeleted(i), "Doc " + i + " only deleted in one index."); } // check stored fields for (int i = 0; i < index1.MaxDoc; i++) { if (!index1.IsDeleted(i)) { Document doc1 = index1.Document(i); Document doc2 = index2.Document(i); fields1 = doc1.GetFields(); fields2 = doc2.GetFields(); Assert.AreEqual(fields1.Count, fields2.Count, "Different numbers of fields for doc " + i + "."); it1 = fields1.GetEnumerator(); it2 = fields2.GetEnumerator(); while (it1.MoveNext() && it2.MoveNext()) { Field curField1 = (Field) it1.Current; Field curField2 = (Field) it2.Current; Assert.AreEqual(curField1.Name, curField2.Name, "Different fields names for doc " + i + "."); Assert.AreEqual(curField1.StringValue, curField2.StringValue, "Different field values for doc " + i + "."); } } } // check dictionary and posting lists TermEnum enum1 = index1.Terms(); TermEnum enum2 = index2.Terms(); TermPositions tp1 = index1.TermPositions(); TermPositions tp2 = index2.TermPositions(); while (enum1.Next()) { Assert.IsTrue(enum2.Next()); Assert.AreEqual(enum1.Term, enum2.Term, "Different term in dictionary."); tp1.Seek(enum1.Term); tp2.Seek(enum1.Term); while (tp1.Next()) { Assert.IsTrue(tp2.Next()); Assert.AreEqual(tp1.Doc, tp2.Doc, "Different doc id in postinglist of term " + enum1.Term + "."); Assert.AreEqual(tp1.Freq, tp2.Freq, "Different term frequence in postinglist of term " + enum1.Term + "."); for (int i = 0; i < tp1.Freq; i++) { Assert.AreEqual(tp1.NextPosition(), tp2.NextPosition(), "Different positions in postinglist of term " + enum1.Term + "."); } } } }
public override TermPositions TermPositions() { return(in_Renamed.TermPositions()); }
public override TermPositions TermPositions(IState state) { EnsureOpen(); return(in_Renamed.TermPositions(state)); }