public virtual void TestTokenReuse() { Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper2(this); IndexWriter writer = new IndexWriter(Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); doc.Add(NewTextField("f1", "a 5 a a", Field.Store.YES)); writer.AddDocument(doc); writer.Commit(); SegmentCommitInfo info = writer.NewestSegment(); writer.Dispose(); SegmentReader reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random())); DocsAndPositionsEnum termPositions = MultiFields.GetTermPositionsEnum(reader, reader.LiveDocs, "f1", new BytesRef("a")); Assert.IsTrue(termPositions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); int freq = termPositions.Freq; Assert.AreEqual(3, freq); Assert.AreEqual(0, termPositions.NextPosition()); Assert.IsNotNull(termPositions.GetPayload()); Assert.AreEqual(6, termPositions.NextPosition()); Assert.IsNull(termPositions.GetPayload()); Assert.AreEqual(7, termPositions.NextPosition()); Assert.IsNull(termPositions.GetPayload()); reader.Dispose(); }
public virtual void TestMixupMultiValued() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.StoreTermVectors = true; customType.StoreTermVectorPositions = true; customType.StoreTermVectorPayloads = true; customType.StoreTermVectorOffsets = Random.NextBoolean(); Field field = new Field("field", "", customType); TokenStream ts = new MockTokenizer(new StringReader("here we go"), MockTokenizer.WHITESPACE, true); Assert.IsFalse(ts.HasAttribute <IPayloadAttribute>()); field.SetTokenStream(ts); doc.Add(field); Field field2 = new Field("field", "", customType); Token withPayload = new Token("withPayload", 0, 11); withPayload.Payload = new BytesRef("test"); ts = new CannedTokenStream(withPayload); Assert.IsTrue(ts.HasAttribute <IPayloadAttribute>()); field2.SetTokenStream(ts); doc.Add(field2); Field field3 = new Field("field", "", customType); ts = new MockTokenizer(new StringReader("nopayload"), MockTokenizer.WHITESPACE, true); Assert.IsFalse(ts.HasAttribute <IPayloadAttribute>()); field3.SetTokenStream(ts); doc.Add(field3); writer.AddDocument(doc); DirectoryReader reader = writer.GetReader(); Terms terms = reader.GetTermVector(0, "field"); if (Debugging.AssertsEnabled) { Debugging.Assert(terms != null); } TermsEnum termsEnum = terms.GetIterator(null); Assert.IsTrue(termsEnum.SeekExact(new BytesRef("withPayload"))); DocsAndPositionsEnum de = termsEnum.DocsAndPositions(null, null); Assert.AreEqual(0, de.NextDoc()); Assert.AreEqual(3, de.NextPosition()); Assert.AreEqual(new BytesRef("test"), de.GetPayload()); writer.Dispose(); reader.Dispose(); dir.Dispose(); }
public virtual void TestMixupDocs() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(Random, dir, iwc); Document doc = new Document(); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.StoreTermVectors = true; customType.StoreTermVectorPositions = true; customType.StoreTermVectorPayloads = true; customType.StoreTermVectorOffsets = Random.NextBoolean(); Field field = new Field("field", "", customType); TokenStream ts = new MockTokenizer(new StringReader("here we go"), MockTokenizer.WHITESPACE, true); Assert.IsFalse(ts.HasAttribute <IPayloadAttribute>()); field.SetTokenStream(ts); doc.Add(field); writer.AddDocument(doc); Token withPayload = new Token("withPayload", 0, 11); withPayload.Payload = new BytesRef("test"); ts = new CannedTokenStream(withPayload); Assert.IsTrue(ts.HasAttribute <IPayloadAttribute>()); field.SetTokenStream(ts); writer.AddDocument(doc); ts = new MockTokenizer(new StringReader("another"), MockTokenizer.WHITESPACE, true); Assert.IsFalse(ts.HasAttribute <IPayloadAttribute>()); field.SetTokenStream(ts); writer.AddDocument(doc); DirectoryReader reader = writer.GetReader(); Terms terms = reader.GetTermVector(1, "field"); if (Debugging.AssertsEnabled) { Debugging.Assert(terms != null); } TermsEnum termsEnum = terms.GetIterator(null); Assert.IsTrue(termsEnum.SeekExact(new BytesRef("withPayload"))); DocsAndPositionsEnum de = termsEnum.DocsAndPositions(null, null); Assert.AreEqual(0, de.NextDoc()); Assert.AreEqual(0, de.NextPosition()); Assert.AreEqual(new BytesRef("test"), de.GetPayload()); writer.Dispose(); reader.Dispose(); dir.Dispose(); }
internal virtual void VerifyPositions(PositionData[] positions, DocsAndPositionsEnum posEnum) { for (int i = 0; i < positions.Length; i++) { int pos = posEnum.NextPosition(); Assert.AreEqual(positions[i].pos, pos); if (positions[i].payload != null) { Assert.IsNotNull(posEnum.GetPayload()); if (Random.Next(3) < 2) { // Verify the payload bytes BytesRef otherPayload = posEnum.GetPayload(); Assert.IsTrue(positions[i].payload.Equals(otherPayload), "expected=" + positions[i].payload.ToString() + " got=" + otherPayload.ToString()); } } else { Assert.IsNull(posEnum.GetPayload()); } } }
public virtual void CheckSkipTo(DocsAndPositionsEnum tp, int target, int maxCounter) { tp.Advance(target); if (maxCounter < counter) { Assert.Fail("Too many bytes read: " + counter + " vs " + maxCounter); } Assert.AreEqual(target, tp.DocID, "Wrong document " + tp.DocID + " after skipTo target " + target); Assert.AreEqual(1, tp.Freq, "Frequency is not 1: " + tp.Freq); tp.NextPosition(); BytesRef b = tp.GetPayload(); Assert.AreEqual(1, b.Length); Assert.AreEqual((sbyte)target, (sbyte)b.Bytes[b.Offset], "Wrong payload for the target " + target + ": " + (sbyte)b.Bytes[b.Offset]); }
public virtual void TestThreadSafety() { const int numThreads = 5; int numDocs = AtLeast(50); ByteArrayPool pool = new ByteArrayPool(numThreads, 5); Directory dir = NewDirectory(); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); const string field = "test"; ThreadJob[] ingesters = new ThreadJob[numThreads]; for (int i = 0; i < numThreads; i++) { ingesters[i] = new ThreadAnonymousInnerClassHelper(this, numDocs, pool, writer, field); ingesters[i].Start(); } for (int i = 0; i < numThreads; i++) { ingesters[i].Join(); } writer.Dispose(); IndexReader reader = DirectoryReader.Open(dir); TermsEnum terms = MultiFields.GetFields(reader).GetTerms(field).GetEnumerator(); IBits liveDocs = MultiFields.GetLiveDocs(reader); DocsAndPositionsEnum tp = null; while (terms.MoveNext()) { string termText = terms.Term.Utf8ToString(); tp = terms.DocsAndPositions(liveDocs, tp); while (tp.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { int freq = tp.Freq; for (int i = 0; i < freq; i++) { tp.NextPosition(); BytesRef payload = tp.GetPayload(); Assert.AreEqual(termText, payload.Utf8ToString()); } } } reader.Dispose(); dir.Dispose(); Assert.AreEqual(pool.Count, numThreads); }
public virtual void TestMixupMultiValued() { Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); Document doc = new Document(); Field field = new TextField("field", "", Field.Store.NO); TokenStream ts = new MockTokenizer(new StringReader("here we go"), MockTokenizer.WHITESPACE, true); Assert.IsFalse(ts.HasAttribute <PayloadAttribute>()); field.SetTokenStream(ts); doc.Add(field); Field field2 = new TextField("field", "", Field.Store.NO); Token withPayload = new Token("withPayload", 0, 11); withPayload.Payload = new BytesRef("test"); ts = new CannedTokenStream(withPayload); Assert.IsTrue(ts.HasAttribute <IPayloadAttribute>()); field2.SetTokenStream(ts); doc.Add(field2); Field field3 = new TextField("field", "", Field.Store.NO); ts = new MockTokenizer(new StringReader("nopayload"), MockTokenizer.WHITESPACE, true); Assert.IsFalse(ts.HasAttribute <PayloadAttribute>()); field3.SetTokenStream(ts); doc.Add(field3); writer.AddDocument(doc); DirectoryReader reader = writer.GetReader(); SegmentReader sr = GetOnlySegmentReader(reader); DocsAndPositionsEnum de = sr.GetTermPositionsEnum(new Term("field", "withPayload")); de.NextDoc(); de.NextPosition(); Assert.AreEqual(new BytesRef("test"), de.GetPayload()); writer.Dispose(); reader.Dispose(); dir.Dispose(); }
public virtual void TestMixupDocs() { Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, null); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter writer = new RandomIndexWriter(Random, dir, iwc); Document doc = new Document(); Field field = new TextField("field", "", Field.Store.NO); TokenStream ts = new MockTokenizer(new StringReader("here we go"), MockTokenizer.WHITESPACE, true); Assert.IsFalse(ts.HasAttribute <PayloadAttribute>()); field.SetTokenStream(ts); doc.Add(field); writer.AddDocument(doc); Token withPayload = new Token("withPayload", 0, 11); withPayload.Payload = new BytesRef("test"); ts = new CannedTokenStream(withPayload); Assert.IsTrue(ts.HasAttribute <IPayloadAttribute>()); field.SetTokenStream(ts); writer.AddDocument(doc); ts = new MockTokenizer(new StringReader("another"), MockTokenizer.WHITESPACE, true); Assert.IsFalse(ts.HasAttribute <PayloadAttribute>()); field.SetTokenStream(ts); writer.AddDocument(doc); DirectoryReader reader = writer.GetReader(); AtomicReader sr = SlowCompositeReaderWrapper.Wrap(reader); DocsAndPositionsEnum de = sr.GetTermPositionsEnum(new Term("field", "withPayload")); de.NextDoc(); de.NextPosition(); Assert.AreEqual(new BytesRef("test"), de.GetPayload()); writer.Dispose(); reader.Dispose(); dir.Dispose(); }
public virtual void DoTestNumbers(bool withPayloads) { Directory dir = NewDirectory(); Analyzer analyzer = withPayloads ? (Analyzer) new MockPayloadAnalyzer() : new MockAnalyzer(Random); iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); iwc.SetMergePolicy(NewLogMergePolicy()); // will rely on docids a bit for skipping RandomIndexWriter w = new RandomIndexWriter(Random, dir, iwc); FieldType ft = new FieldType(TextField.TYPE_STORED); ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; if (Random.NextBoolean()) { ft.StoreTermVectors = true; ft.StoreTermVectorOffsets = Random.NextBoolean(); ft.StoreTermVectorPositions = Random.NextBoolean(); } int numDocs = AtLeast(500); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(new Field("numbers", English.Int32ToEnglish(i), ft)); doc.Add(new Field("oddeven", (i % 2) == 0 ? "even" : "odd", ft)); doc.Add(new StringField("id", "" + i, Field.Store.NO)); w.AddDocument(doc); } IndexReader reader = w.GetReader(); w.Dispose(); string[] terms = new string[] { "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "hundred" }; foreach (string term in terms) { DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(reader, null, "numbers", new BytesRef(term)); int doc; while ((doc = dp.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { string storedNumbers = reader.Document(doc).Get("numbers"); int freq = dp.Freq; for (int i = 0; i < freq; i++) { dp.NextPosition(); int start = dp.StartOffset; if (Debugging.AssertsEnabled) { Debugging.Assert(start >= 0); } int end = dp.EndOffset; if (Debugging.AssertsEnabled) { Debugging.Assert(end >= 0 && end >= start); } // check that the offsets correspond to the term in the src text Assert.IsTrue(storedNumbers.Substring(start, end - start).Equals(term, StringComparison.Ordinal)); if (withPayloads) { // check that we have a payload and it starts with "pos" Assert.IsNotNull(dp.GetPayload()); BytesRef payload = dp.GetPayload(); Assert.IsTrue(payload.Utf8ToString().StartsWith("pos:", StringComparison.Ordinal)); } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer! } } } // check we can skip correctly int numSkippingTests = AtLeast(50); for (int j = 0; j < numSkippingTests; j++) { int num = TestUtil.NextInt32(Random, 100, Math.Min(numDocs - 1, 999)); DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(reader, null, "numbers", new BytesRef("hundred")); int doc = dp.Advance(num); Assert.AreEqual(num, doc); int freq = dp.Freq; for (int i = 0; i < freq; i++) { string storedNumbers = reader.Document(doc).Get("numbers"); dp.NextPosition(); int start = dp.StartOffset; if (Debugging.AssertsEnabled) { Debugging.Assert(start >= 0); } int end = dp.EndOffset; if (Debugging.AssertsEnabled) { Debugging.Assert(end >= 0 && end >= start); } // check that the offsets correspond to the term in the src text Assert.IsTrue(storedNumbers.Substring(start, end - start).Equals("hundred", StringComparison.Ordinal)); if (withPayloads) { // check that we have a payload and it starts with "pos" Assert.IsNotNull(dp.GetPayload()); BytesRef payload = dp.GetPayload(); Assert.IsTrue(payload.Utf8ToString().StartsWith("pos:", StringComparison.Ordinal)); } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer! } } // check that other fields (without offsets) work correctly for (int i = 0; i < numDocs; i++) { DocsEnum dp = MultiFields.GetTermDocsEnum(reader, null, "id", new BytesRef("" + i), 0); Assert.AreEqual(i, dp.NextDoc()); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc()); } reader.Dispose(); dir.Dispose(); }
public override BytesRef GetPayload() { return(current.GetPayload()); }
public virtual void TestLongPostings_Mem() { // Don't use TestUtil.getTempDir so that we own the // randomness (ie same seed will point to same dir): Directory dir = NewFSDirectory(CreateTempDir("longpostings" + "." + Random.NextInt64())); int NUM_DOCS = AtLeast(2000); if (VERBOSE) { Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS); } string s1 = GetRandomTerm(null); string s2 = GetRandomTerm(s1); if (VERBOSE) { Console.WriteLine("\nTEST: s1=" + s1 + " s2=" + s2); /* * for(int idx=0;idx<s1.Length();idx++) { * System.out.println(" s1 ch=0x" + Integer.toHexString(s1.charAt(idx))); * } * for(int idx=0;idx<s2.Length();idx++) { * System.out.println(" s2 ch=0x" + Integer.toHexString(s2.charAt(idx))); * } */ } FixedBitSet isS1 = new FixedBitSet(NUM_DOCS); for (int idx = 0; idx < NUM_DOCS; idx++) { if (Random.NextBoolean()) { isS1.Set(idx); } } IndexReader r; IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetMergePolicy(NewLogMergePolicy()); iwc.SetRAMBufferSizeMB(16.0 + 16.0 * Random.NextDouble()); iwc.SetMaxBufferedDocs(-1); RandomIndexWriter riw = new RandomIndexWriter(Random, dir, iwc); for (int idx = 0; idx < NUM_DOCS; idx++) { Document doc = new Document(); string s = isS1.Get(idx) ? s1 : s2; Field f = NewTextField("field", s, Field.Store.NO); int count = TestUtil.NextInt32(Random, 1, 4); for (int ct = 0; ct < count; ct++) { doc.Add(f); } riw.AddDocument(doc); } r = riw.GetReader(); riw.Dispose(); /* * if (VERBOSE) { * System.out.println("TEST: terms"); * TermEnum termEnum = r.Terms(); * while(termEnum.Next()) { * System.out.println(" term=" + termEnum.Term() + " len=" + termEnum.Term().Text().Length()); * Assert.IsTrue(termEnum.DocFreq() > 0); * System.out.println(" s1?=" + (termEnum.Term().Text().equals(s1)) + " s1len=" + s1.Length()); * System.out.println(" s2?=" + (termEnum.Term().Text().equals(s2)) + " s2len=" + s2.Length()); * final String s = termEnum.Term().Text(); * for(int idx=0;idx<s.Length();idx++) { * System.out.println(" ch=0x" + Integer.toHexString(s.charAt(idx))); * } * } * } */ Assert.AreEqual(NUM_DOCS, r.NumDocs); Assert.IsTrue(r.DocFreq(new Term("field", s1)) > 0); Assert.IsTrue(r.DocFreq(new Term("field", s2)) > 0); int num = AtLeast(1000); for (int iter = 0; iter < num; iter++) { string term; bool doS1; if (Random.NextBoolean()) { term = s1; doS1 = true; } else { term = s2; doS1 = false; } if (VERBOSE) { Console.WriteLine("\nTEST: iter=" + iter + " doS1=" + doS1); } DocsAndPositionsEnum postings = MultiFields.GetTermPositionsEnum(r, null, "field", new BytesRef(term)); int docID = -1; while (docID < DocIdSetIterator.NO_MORE_DOCS) { int what = Random.Next(3); if (what == 0) { if (VERBOSE) { Console.WriteLine("TEST: docID=" + docID + "; do next()"); } // nextDoc int expected = docID + 1; while (true) { if (expected == NUM_DOCS) { expected = int.MaxValue; break; } else if (isS1.Get(expected) == doS1) { break; } else { expected++; } } docID = postings.NextDoc(); if (VERBOSE) { Console.WriteLine(" got docID=" + docID); } Assert.AreEqual(expected, docID); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } if (Random.Next(6) == 3) { int freq = postings.Freq; Assert.IsTrue(freq >= 1 && freq <= 4); for (int pos = 0; pos < freq; pos++) { Assert.AreEqual(pos, postings.NextPosition()); if (Random.NextBoolean()) { var dummy = postings.GetPayload(); if (Random.NextBoolean()) { dummy = postings.GetPayload(); // get it again } } } } } else { // advance int targetDocID; if (docID == -1) { targetDocID = Random.Next(NUM_DOCS + 1); } else { targetDocID = docID + TestUtil.NextInt32(Random, 1, NUM_DOCS - docID); } if (VERBOSE) { Console.WriteLine("TEST: docID=" + docID + "; do advance(" + targetDocID + ")"); } int expected = targetDocID; while (true) { if (expected == NUM_DOCS) { expected = int.MaxValue; break; } else if (isS1.Get(expected) == doS1) { break; } else { expected++; } } docID = postings.Advance(targetDocID); if (VERBOSE) { Console.WriteLine(" got docID=" + docID); } Assert.AreEqual(expected, docID); if (docID == DocIdSetIterator.NO_MORE_DOCS) { break; } if (Random.Next(6) == 3) { int freq = postings.Freq; Assert.IsTrue(freq >= 1 && freq <= 4); for (int pos = 0; pos < freq; pos++) { Assert.AreEqual(pos, postings.NextPosition()); if (Random.NextBoolean()) { var dummy = postings.GetPayload(); if (Random.NextBoolean()) { dummy = postings.GetPayload(); // get it again } } } } } } } r.Dispose(); dir.Dispose(); }
// builds an index with payloads in the given Directory and performs // different tests to verify the payload encoding private void PerformTest(Directory dir) { PayloadAnalyzer analyzer = new PayloadAnalyzer(); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode.CREATE).SetMergePolicy(NewLogMergePolicy())); // should be in sync with value in TermInfosWriter const int skipInterval = 16; const int numTerms = 5; const string fieldName = "f1"; int numDocs = skipInterval + 1; // create content for the test documents with just a few terms Term[] terms = GenerateTerms(fieldName, numTerms); StringBuilder sb = new StringBuilder(); for (int i = 0; i < terms.Length; i++) { sb.Append(terms[i].Text()); sb.Append(" "); } string content = sb.ToString(); int payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2; var payloadData = GenerateRandomData(payloadDataLength); Document d = new Document(); d.Add(NewTextField(fieldName, content, Field.Store.NO)); // add the same document multiple times to have the same payload lengths for all // occurrences within two consecutive skip intervals int offset = 0; for (int i = 0; i < 2 * numDocs; i++) { analyzer = new PayloadAnalyzer(fieldName, payloadData, offset, 1); offset += numTerms; writer.AddDocument(d, analyzer); } // make sure we create more than one segment to test merging writer.Commit(); // now we make sure to have different payload lengths next at the next skip point for (int i = 0; i < numDocs; i++) { analyzer = new PayloadAnalyzer(fieldName, payloadData, offset, i); offset += i * numTerms; writer.AddDocument(d, analyzer); } writer.ForceMerge(1); // flush writer.Dispose(); /* * Verify the index * first we test if all payloads are stored correctly */ IndexReader reader = DirectoryReader.Open(dir); var verifyPayloadData = new byte[payloadDataLength]; offset = 0; var tps = new DocsAndPositionsEnum[numTerms]; for (int i = 0; i < numTerms; i++) { tps[i] = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), terms[i].Field, new BytesRef(terms[i].Text())); } while (tps[0].NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { for (int i = 1; i < numTerms; i++) { tps[i].NextDoc(); } int freq = tps[0].Freq; for (int i = 0; i < freq; i++) { for (int j = 0; j < numTerms; j++) { tps[j].NextPosition(); BytesRef br = tps[j].GetPayload(); if (br != null) { Array.Copy(br.Bytes, br.Offset, verifyPayloadData, offset, br.Length); offset += br.Length; } } } } AssertByteArrayEquals(payloadData, verifyPayloadData); /* * test lazy skipping */ DocsAndPositionsEnum tp = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), terms[0].Field, new BytesRef(terms[0].Text())); tp.NextDoc(); tp.NextPosition(); // NOTE: prior rev of this test was failing to first // call next here: tp.NextDoc(); // now we don't read this payload tp.NextPosition(); BytesRef payload = tp.GetPayload(); Assert.AreEqual(1, payload.Length, "Wrong payload length."); Assert.AreEqual(payload.Bytes[payload.Offset], payloadData[numTerms]); tp.NextDoc(); tp.NextPosition(); // we don't read this payload and skip to a different document tp.Advance(5); tp.NextPosition(); payload = tp.GetPayload(); Assert.AreEqual(1, payload.Length, "Wrong payload length."); Assert.AreEqual(payload.Bytes[payload.Offset], payloadData[5 * numTerms]); /* * Test different lengths at skip points */ tp = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), terms[1].Field, new BytesRef(terms[1].Text())); tp.NextDoc(); tp.NextPosition(); Assert.AreEqual(1, tp.GetPayload().Length, "Wrong payload length."); tp.Advance(skipInterval - 1); tp.NextPosition(); Assert.AreEqual(1, tp.GetPayload().Length, "Wrong payload length."); tp.Advance(2 * skipInterval - 1); tp.NextPosition(); Assert.AreEqual(1, tp.GetPayload().Length, "Wrong payload length."); tp.Advance(3 * skipInterval - 1); tp.NextPosition(); Assert.AreEqual(3 * skipInterval - 2 * numDocs - 1, tp.GetPayload().Length, "Wrong payload length."); reader.Dispose(); // test long payload analyzer = new PayloadAnalyzer(); writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode.CREATE)); string singleTerm = "lucene"; d = new Document(); d.Add(NewTextField(fieldName, singleTerm, Field.Store.NO)); // add a payload whose length is greater than the buffer size of BufferedIndexOutput payloadData = GenerateRandomData(2000); analyzer.SetPayloadData(fieldName, payloadData, 100, 1500); writer.AddDocument(d); writer.ForceMerge(1); // flush writer.Dispose(); reader = DirectoryReader.Open(dir); tp = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), fieldName, new BytesRef(singleTerm)); tp.NextDoc(); tp.NextPosition(); BytesRef bref = tp.GetPayload(); verifyPayloadData = new byte[bref.Length]; var portion = new byte[1500]; Array.Copy(payloadData, 100, portion, 0, 1500); AssertByteArrayEquals(portion, bref.Bytes, bref.Offset, bref.Length); reader.Dispose(); }
public override BytesRef GetPayload() { return(m_input.GetPayload()); }