public virtual void TestPayloadFieldBit() { Directory ram = NewDirectory(); PayloadAnalyzer analyzer = new PayloadAnalyzer(); IndexWriter writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document d = new Document(); // this field won't have any payloads d.Add(NewTextField("f1", "this field has no payloads", Field.Store.NO)); // this field will have payloads in all docs, however not for all term positions, // so this field is used to check if the DocumentWriter correctly enables the payloads bit // even if only some term positions have payloads d.Add(NewTextField("f2", "this field has payloads in all docs", Field.Store.NO)); d.Add(NewTextField("f2", "this field has payloads in all docs NO PAYLOAD", Field.Store.NO)); // this field is used to verify if the SegmentMerger enables payloads for a field if it has payloads // enabled in only some documents d.Add(NewTextField("f3", "this field has payloads in some docs", Field.Store.NO)); // only add payload data for field f2 analyzer.SetPayloadData("f2", "somedata".GetBytes(IOUtils.CHARSET_UTF_8), 0, 1); writer.AddDocument(d); // flush writer.Dispose(); SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(ram)); FieldInfos fi = reader.FieldInfos; Assert.IsFalse(fi.FieldInfo("f1").HasPayloads(), "Payload field bit should not be set."); Assert.IsTrue(fi.FieldInfo("f2").HasPayloads(), "Payload field bit should be set."); Assert.IsFalse(fi.FieldInfo("f3").HasPayloads(), "Payload field bit should not be set."); reader.Dispose(); // now we add another document which has payloads for field f3 and verify if the SegmentMerger // enabled payloads for that field analyzer = new PayloadAnalyzer(); // Clear payload state for each field writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode_e.CREATE)); d = new Document(); d.Add(NewTextField("f1", "this field has no payloads", Field.Store.NO)); d.Add(NewTextField("f2", "this field has payloads in all docs", Field.Store.NO)); d.Add(NewTextField("f2", "this field has payloads in all docs", Field.Store.NO)); d.Add(NewTextField("f3", "this field has payloads in some docs", Field.Store.NO)); // add payload data for field f2 and f3 analyzer.SetPayloadData("f2", "somedata".GetBytes(IOUtils.CHARSET_UTF_8), 0, 1); analyzer.SetPayloadData("f3", "somedata".GetBytes(IOUtils.CHARSET_UTF_8), 0, 3); writer.AddDocument(d); // force merge writer.ForceMerge(1); // flush writer.Dispose(); reader = GetOnlySegmentReader(DirectoryReader.Open(ram)); fi = reader.FieldInfos; Assert.IsFalse(fi.FieldInfo("f1").HasPayloads(), "Payload field bit should not be set."); Assert.IsTrue(fi.FieldInfo("f2").HasPayloads(), "Payload field bit should be set."); Assert.IsTrue(fi.FieldInfo("f3").HasPayloads(), "Payload field bit should be set."); reader.Dispose(); ram.Dispose(); }
public virtual void TestPayloadFieldBit() { rnd = NewRandom(); Directory ram = new RAMDirectory(); PayloadAnalyzer analyzer = new PayloadAnalyzer(); IndexWriter writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED, null); Document d = new Document(); // this field won't have any payloads d.Add(new Field("f1", "This field has no payloads", Field.Store.NO, Field.Index.ANALYZED)); // this field will have payloads in all docs, however not for all term positions, // so this field is used to check if the DocumentWriter correctly enables the payloads bit // even if only some term positions have payloads d.Add(new Field("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED)); d.Add(new Field("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED)); // this field is used to verify if the SegmentMerger enables payloads for a field if it has payloads // enabled in only some documents d.Add(new Field("f3", "This field has payloads in some docs", Field.Store.NO, Field.Index.ANALYZED)); // only add payload data for field f2 analyzer.SetPayloadData("f2", 1, System.Text.UTF8Encoding.UTF8.GetBytes("somedata"), 0, 1); writer.AddDocument(d, null); // flush writer.Close(); SegmentReader reader = SegmentReader.GetOnlySegmentReader(ram, null); FieldInfos fi = reader.FieldInfos(); Assert.IsFalse(fi.FieldInfo("f1").storePayloads_ForNUnit, "Payload field bit should not be set."); Assert.IsTrue(fi.FieldInfo("f2").storePayloads_ForNUnit, "Payload field bit should be set."); Assert.IsFalse(fi.FieldInfo("f3").storePayloads_ForNUnit, "Payload field bit should not be set."); reader.Close(); // now we add another document which has payloads for field f3 and verify if the SegmentMerger // enabled payloads for that field writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED, null); d = new Document(); d.Add(new Field("f1", "This field has no payloads", Field.Store.NO, Field.Index.ANALYZED)); d.Add(new Field("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED)); d.Add(new Field("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED)); d.Add(new Field("f3", "This field has payloads in some docs", Field.Store.NO, Field.Index.ANALYZED)); // add payload data for field f2 and f3 analyzer.SetPayloadData("f2", System.Text.UTF8Encoding.UTF8.GetBytes("somedata"), 0, 1); analyzer.SetPayloadData("f3", System.Text.UTF8Encoding.UTF8.GetBytes("somedata"), 0, 3); writer.AddDocument(d, null); // force merge writer.Optimize(null); // flush writer.Close(); reader = SegmentReader.GetOnlySegmentReader(ram, null); fi = reader.FieldInfos(); Assert.IsFalse(fi.FieldInfo("f1").storePayloads_ForNUnit, "Payload field bit should not be set."); Assert.IsTrue(fi.FieldInfo("f2").storePayloads_ForNUnit, "Payload field bit should be set."); Assert.IsTrue(fi.FieldInfo("f3").storePayloads_ForNUnit, "Payload field bit should be set."); reader.Close(); }
// builds an index with payloads in the given Directory and performs // different tests to verify the payload encoding private void PerformTest(Directory dir) { PayloadAnalyzer analyzer = new PayloadAnalyzer(); IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED, null); // should be in sync with value in TermInfosWriter int skipInterval = 16; int numTerms = 5; System.String fieldName = "f1"; int numDocs = skipInterval + 1; // create content for the test documents with just a few terms Term[] terms = GenerateTerms(fieldName, numTerms); System.Text.StringBuilder sb = new System.Text.StringBuilder(); for (int i = 0; i < terms.Length; i++) { sb.Append(terms[i].Text); sb.Append(" "); } System.String content = sb.ToString(); int payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2; byte[] payloadData = GenerateRandomData(payloadDataLength); Document d = new Document(); d.Add(new Field(fieldName, content, Field.Store.NO, Field.Index.ANALYZED)); // add the same document multiple times to have the same payload lengths for all // occurrences within two consecutive skip intervals int offset = 0; for (int i = 0; i < 2 * numDocs; i++) { analyzer.SetPayloadData(fieldName, payloadData, offset, 1); offset += numTerms; writer.AddDocument(d, null); } // make sure we create more than one segment to test merging writer.Commit(null); // now we make sure to have different payload lengths next at the next skip point for (int i = 0; i < numDocs; i++) { analyzer.SetPayloadData(fieldName, payloadData, offset, i); offset += i * numTerms; writer.AddDocument(d, null); } writer.Optimize(null); // flush writer.Close(); /* * Verify the index * first we test if all payloads are stored correctly */ IndexReader reader = IndexReader.Open(dir, true, null); byte[] verifyPayloadData = new byte[payloadDataLength]; offset = 0; TermPositions[] tps = new TermPositions[numTerms]; for (int i = 0; i < numTerms; i++) { tps[i] = reader.TermPositions(terms[i], null); } while (tps[0].Next(null)) { for (int i = 1; i < numTerms; i++) { tps[i].Next(null); } int freq = tps[0].Freq; for (int i = 0; i < freq; i++) { for (int j = 0; j < numTerms; j++) { tps[j].NextPosition(null); tps[j].GetPayload(verifyPayloadData, offset, null); offset += tps[j].PayloadLength; } } } for (int i = 0; i < numTerms; i++) { tps[i].Close(); } AssertByteArrayEquals(payloadData, verifyPayloadData); /* * test lazy skipping */ TermPositions tp = reader.TermPositions(terms[0], null); tp.Next(null); tp.NextPosition(null); // now we don't read this payload tp.NextPosition(null); Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length."); byte[] payload = tp.GetPayload(null, 0, null); Assert.AreEqual(payload[0], payloadData[numTerms]); tp.NextPosition(null); // we don't read this payload and skip to a different document tp.SkipTo(5, null); tp.NextPosition(null); Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length."); payload = tp.GetPayload(null, 0, null); Assert.AreEqual(payload[0], payloadData[5 * numTerms]); /* * Test different lengths at skip points */ tp.Seek(terms[1], null); tp.Next(null); tp.NextPosition(null); Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length."); tp.SkipTo(skipInterval - 1, null); tp.NextPosition(null); Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length."); tp.SkipTo(2 * skipInterval - 1, null); tp.NextPosition(null); Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length."); tp.SkipTo(3 * skipInterval - 1, null); tp.NextPosition(null); Assert.AreEqual(3 * skipInterval - 2 * numDocs - 1, tp.PayloadLength, "Wrong payload length."); /* * Test multiple call of getPayload() */ tp.GetPayload(null, 0, null); // it is forbidden to call getPayload() more than once // without calling nextPosition() Assert.Throws <IOException>(() => tp.GetPayload(null, 0, null), "Expected exception not thrown"); reader.Close(); // test long payload analyzer = new PayloadAnalyzer(); writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED, null); System.String singleTerm = "lucene"; d = new Document(); d.Add(new Field(fieldName, singleTerm, Field.Store.NO, Field.Index.ANALYZED)); // add a payload whose length is greater than the buffer size of BufferedIndexOutput payloadData = GenerateRandomData(2000); analyzer.SetPayloadData(fieldName, payloadData, 100, 1500); writer.AddDocument(d, null); writer.Optimize(null); // flush writer.Close(); reader = IndexReader.Open(dir, true, null); tp = reader.TermPositions(new Term(fieldName, singleTerm), null); tp.Next(null); tp.NextPosition(null); verifyPayloadData = new byte[tp.PayloadLength]; tp.GetPayload(verifyPayloadData, 0, null); byte[] portion = new byte[1500]; Array.Copy(payloadData, 100, portion, 0, 1500); AssertByteArrayEquals(portion, verifyPayloadData); reader.Close(); }
// builds an index with payloads in the given Directory and performs // different tests to verify the payload encoding private void PerformTest(Directory dir) { PayloadAnalyzer analyzer = new PayloadAnalyzer(); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode.CREATE).SetMergePolicy(NewLogMergePolicy())); // should be in sync with value in TermInfosWriter const int skipInterval = 16; const int numTerms = 5; const string fieldName = "f1"; int numDocs = skipInterval + 1; // create content for the test documents with just a few terms Term[] terms = GenerateTerms(fieldName, numTerms); StringBuilder sb = new StringBuilder(); for (int i = 0; i < terms.Length; i++) { sb.Append(terms[i].Text()); sb.Append(" "); } string content = sb.ToString(); int payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2; var payloadData = GenerateRandomData(payloadDataLength); Document d = new Document(); d.Add(NewTextField(fieldName, content, Field.Store.NO)); // add the same document multiple times to have the same payload lengths for all // occurrences within two consecutive skip intervals int offset = 0; for (int i = 0; i < 2 * numDocs; i++) { analyzer = new PayloadAnalyzer(fieldName, payloadData, offset, 1); offset += numTerms; writer.AddDocument(d, analyzer); } // make sure we create more than one segment to test merging writer.Commit(); // now we make sure to have different payload lengths next at the next skip point for (int i = 0; i < numDocs; i++) { analyzer = new PayloadAnalyzer(fieldName, payloadData, offset, i); offset += i * numTerms; writer.AddDocument(d, analyzer); } writer.ForceMerge(1); // flush writer.Dispose(); /* * Verify the index * first we test if all payloads are stored correctly */ IndexReader reader = DirectoryReader.Open(dir); var verifyPayloadData = new byte[payloadDataLength]; offset = 0; var tps = new DocsAndPositionsEnum[numTerms]; for (int i = 0; i < numTerms; i++) { tps[i] = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), terms[i].Field, new BytesRef(terms[i].Text())); } while (tps[0].NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { for (int i = 1; i < numTerms; i++) { tps[i].NextDoc(); } int freq = tps[0].Freq; for (int i = 0; i < freq; i++) { for (int j = 0; j < numTerms; j++) { tps[j].NextPosition(); BytesRef br = tps[j].GetPayload(); if (br != null) { Array.Copy(br.Bytes, br.Offset, verifyPayloadData, offset, br.Length); offset += br.Length; } } } } AssertByteArrayEquals(payloadData, verifyPayloadData); /* * test lazy skipping */ DocsAndPositionsEnum tp = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), terms[0].Field, new BytesRef(terms[0].Text())); tp.NextDoc(); tp.NextPosition(); // NOTE: prior rev of this test was failing to first // call next here: tp.NextDoc(); // now we don't read this payload tp.NextPosition(); BytesRef payload = tp.GetPayload(); Assert.AreEqual(1, payload.Length, "Wrong payload length."); Assert.AreEqual(payload.Bytes[payload.Offset], payloadData[numTerms]); tp.NextDoc(); tp.NextPosition(); // we don't read this payload and skip to a different document tp.Advance(5); tp.NextPosition(); payload = tp.GetPayload(); Assert.AreEqual(1, payload.Length, "Wrong payload length."); Assert.AreEqual(payload.Bytes[payload.Offset], payloadData[5 * numTerms]); /* * Test different lengths at skip points */ tp = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), terms[1].Field, new BytesRef(terms[1].Text())); tp.NextDoc(); tp.NextPosition(); Assert.AreEqual(1, tp.GetPayload().Length, "Wrong payload length."); tp.Advance(skipInterval - 1); tp.NextPosition(); Assert.AreEqual(1, tp.GetPayload().Length, "Wrong payload length."); tp.Advance(2 * skipInterval - 1); tp.NextPosition(); Assert.AreEqual(1, tp.GetPayload().Length, "Wrong payload length."); tp.Advance(3 * skipInterval - 1); tp.NextPosition(); Assert.AreEqual(3 * skipInterval - 2 * numDocs - 1, tp.GetPayload().Length, "Wrong payload length."); reader.Dispose(); // test long payload analyzer = new PayloadAnalyzer(); writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode.CREATE)); string singleTerm = "lucene"; d = new Document(); d.Add(NewTextField(fieldName, singleTerm, Field.Store.NO)); // add a payload whose length is greater than the buffer size of BufferedIndexOutput payloadData = GenerateRandomData(2000); analyzer.SetPayloadData(fieldName, payloadData, 100, 1500); writer.AddDocument(d); writer.ForceMerge(1); // flush writer.Dispose(); reader = DirectoryReader.Open(dir); tp = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), fieldName, new BytesRef(singleTerm)); tp.NextDoc(); tp.NextPosition(); BytesRef bref = tp.GetPayload(); verifyPayloadData = new byte[bref.Length]; var portion = new byte[1500]; Array.Copy(payloadData, 100, portion, 0, 1500); AssertByteArrayEquals(portion, bref.Bytes, bref.Offset, bref.Length); reader.Dispose(); }
// builds an index with payloads in the given Directory and performs // different tests to verify the payload encoding private void PerformTest(Directory dir) { PayloadAnalyzer analyzer = new PayloadAnalyzer(); IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); // should be in sync with value in TermInfosWriter int skipInterval = 16; int numTerms = 5; System.String fieldName = "f1"; int numDocs = skipInterval + 1; // create content for the test documents with just a few terms Term[] terms = GenerateTerms(fieldName, numTerms); System.Text.StringBuilder sb = new System.Text.StringBuilder(); for (int i = 0; i < terms.Length; i++) { sb.Append(terms[i].text_ForNUnit); sb.Append(" "); } System.String content = sb.ToString(); int payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2; byte[] payloadData = GenerateRandomData(payloadDataLength); Document d = new Document(); d.Add(new Field(fieldName, content, Field.Store.NO, Field.Index.ANALYZED)); // add the same document multiple times to have the same payload lengths for all // occurrences within two consecutive skip intervals int offset = 0; for (int i = 0; i < 2 * numDocs; i++) { analyzer.SetPayloadData(fieldName, payloadData, offset, 1); offset += numTerms; writer.AddDocument(d); } // make sure we create more than one segment to test merging writer.Flush(); // now we make sure to have different payload lengths next at the next skip point for (int i = 0; i < numDocs; i++) { analyzer.SetPayloadData(fieldName, payloadData, offset, i); offset += i * numTerms; writer.AddDocument(d); } writer.Optimize(); // flush writer.Close(); /* * Verify the index * first we test if all payloads are stored correctly */ IndexReader reader = IndexReader.Open(dir); byte[] verifyPayloadData = new byte[payloadDataLength]; offset = 0; TermPositions[] tps = new TermPositions[numTerms]; for (int i = 0; i < numTerms; i++) { tps[i] = reader.TermPositions(terms[i]); } while (tps[0].Next()) { for (int i = 1; i < numTerms; i++) { tps[i].Next(); } int freq = tps[0].Freq(); for (int i = 0; i < freq; i++) { for (int j = 0; j < numTerms; j++) { tps[j].NextPosition(); tps[j].GetPayload(verifyPayloadData, offset); offset += tps[j].GetPayloadLength(); } } } for (int i = 0; i < numTerms; i++) { tps[i].Close(); } AssertByteArrayEquals(payloadData, verifyPayloadData); /* * test lazy skipping */ TermPositions tp = reader.TermPositions(terms[0]); tp.Next(); tp.NextPosition(); // now we don't read this payload tp.NextPosition(); Assert.AreEqual(1, tp.GetPayloadLength(), "Wrong payload length."); byte[] payload = tp.GetPayload(null, 0); Assert.AreEqual(payload[0], payloadData[numTerms]); tp.NextPosition(); // we don't read this payload and skip to a different document tp.SkipTo(5); tp.NextPosition(); Assert.AreEqual(1, tp.GetPayloadLength(), "Wrong payload length."); payload = tp.GetPayload(null, 0); Assert.AreEqual(payload[0], payloadData[5 * numTerms]); /* * Test different lengths at skip points */ tp.Seek(terms[1]); tp.Next(); tp.NextPosition(); Assert.AreEqual(1, tp.GetPayloadLength(), "Wrong payload length."); tp.SkipTo(skipInterval - 1); tp.NextPosition(); Assert.AreEqual(1, tp.GetPayloadLength(), "Wrong payload length."); tp.SkipTo(2 * skipInterval - 1); tp.NextPosition(); Assert.AreEqual(1, tp.GetPayloadLength(), "Wrong payload length."); tp.SkipTo(3 * skipInterval - 1); tp.NextPosition(); Assert.AreEqual(3 * skipInterval - 2 * numDocs - 1, tp.GetPayloadLength(), "Wrong payload length."); /* * Test multiple call of getPayload() */ tp.GetPayload(null, 0); try { // it is forbidden to call getPayload() more than once // without calling nextPosition() tp.GetPayload(null, 0); Assert.Fail("Expected exception not thrown"); } catch (System.Exception expected) { // expected exception } reader.Close(); // test long payload analyzer = new PayloadAnalyzer(); writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); System.String singleTerm = "lucene"; d = new Document(); d.Add(new Field(fieldName, singleTerm, Field.Store.NO, Field.Index.ANALYZED)); // add a payload whose length is greater than the buffer size of BufferedIndexOutput payloadData = GenerateRandomData(2000); analyzer.SetPayloadData(fieldName, payloadData, 100, 1500); writer.AddDocument(d); writer.Optimize(); // flush writer.Close(); reader = IndexReader.Open(dir); tp = reader.TermPositions(new Term(fieldName, singleTerm)); tp.Next(); tp.NextPosition(); verifyPayloadData = new byte[tp.GetPayloadLength()]; tp.GetPayload(verifyPayloadData, 0); byte[] portion = new byte[1500]; Array.Copy(payloadData, 100, portion, 0, 1500); AssertByteArrayEquals(portion, verifyPayloadData); reader.Close(); }
public virtual void TestPayloadFieldBit() { rnd = NewRandom(); Directory ram = new RAMDirectory(); PayloadAnalyzer analyzer = new PayloadAnalyzer(); IndexWriter writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); Document d = new Document(); // this field won't have any payloads d.Add(new Field("f1", "This field has no payloads", Field.Store.NO, Field.Index.ANALYZED)); // this field will have payloads in all docs, however not for all term positions, // so this field is used to check if the DocumentWriter correctly enables the payloads bit // even if only some term positions have payloads d.Add(new Field("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED)); d.Add(new Field("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED)); // this field is used to verify if the SegmentMerger enables payloads for a field if it has payloads // enabled in only some documents d.Add(new Field("f3", "This field has payloads in some docs", Field.Store.NO, Field.Index.ANALYZED)); // only add payload data for field f2 analyzer.SetPayloadData("f2", 1, System.Text.UTF8Encoding.UTF8.GetBytes("somedata"), 0, 1); writer.AddDocument(d); // flush writer.Close(); SegmentReader reader = SegmentReader.GetOnlySegmentReader(ram); FieldInfos fi = reader.FieldInfos(); Assert.IsFalse(fi.FieldInfo("f1").storePayloads_ForNUnit, "Payload field bit should not be set."); Assert.IsTrue(fi.FieldInfo("f2").storePayloads_ForNUnit, "Payload field bit should be set."); Assert.IsFalse(fi.FieldInfo("f3").storePayloads_ForNUnit, "Payload field bit should not be set."); reader.Close(); // now we add another document which has payloads for field f3 and verify if the SegmentMerger // enabled payloads for that field writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED); d = new Document(); d.Add(new Field("f1", "This field has no payloads", Field.Store.NO, Field.Index.ANALYZED)); d.Add(new Field("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED)); d.Add(new Field("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED)); d.Add(new Field("f3", "This field has payloads in some docs", Field.Store.NO, Field.Index.ANALYZED)); // add payload data for field f2 and f3 analyzer.SetPayloadData("f2", System.Text.UTF8Encoding.UTF8.GetBytes("somedata"), 0, 1); analyzer.SetPayloadData("f3", System.Text.UTF8Encoding.UTF8.GetBytes("somedata"), 0, 3); writer.AddDocument(d); // force merge writer.Optimize(); // flush writer.Close(); reader = SegmentReader.GetOnlySegmentReader(ram); fi = reader.FieldInfos(); Assert.IsFalse(fi.FieldInfo("f1").storePayloads_ForNUnit, "Payload field bit should not be set."); Assert.IsTrue(fi.FieldInfo("f2").storePayloads_ForNUnit, "Payload field bit should be set."); Assert.IsTrue(fi.FieldInfo("f3").storePayloads_ForNUnit, "Payload field bit should be set."); reader.Close(); }
// builds an index with payloads in the given Directory and performs // different tests to verify the payload encoding private void PerformTest(Directory dir) { PayloadAnalyzer analyzer = new PayloadAnalyzer(); IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode_e.CREATE).SetMergePolicy(NewLogMergePolicy())); // should be in sync with value in TermInfosWriter const int skipInterval = 16; const int numTerms = 5; const string fieldName = "f1"; int numDocs = skipInterval + 1; // create content for the test documents with just a few terms Term[] terms = GenerateTerms(fieldName, numTerms); StringBuilder sb = new StringBuilder(); for (int i = 0; i < terms.Length; i++) { sb.Append(terms[i].Text()); sb.Append(" "); } string content = sb.ToString(); int payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2; sbyte[] payloadData = GenerateRandomData(payloadDataLength); Document d = new Document(); d.Add(NewTextField(fieldName, content, Field.Store.NO)); // add the same document multiple times to have the same payload lengths for all // occurrences within two consecutive skip intervals int offset = 0; for (int i = 0; i < 2 * numDocs; i++) { analyzer = new PayloadAnalyzer(fieldName, payloadData, offset, 1); offset += numTerms; writer.AddDocument(d, analyzer); } // make sure we create more than one segment to test merging writer.Commit(); // now we make sure to have different payload lengths next at the next skip point for (int i = 0; i < numDocs; i++) { analyzer = new PayloadAnalyzer(fieldName, payloadData, offset, i); offset += i * numTerms; writer.AddDocument(d, analyzer); } writer.ForceMerge(1); // flush writer.Dispose(); /* * Verify the index * first we test if all payloads are stored correctly */ IndexReader reader = DirectoryReader.Open(dir); sbyte[] verifyPayloadData = new sbyte[payloadDataLength]; offset = 0; DocsAndPositionsEnum[] tps = new DocsAndPositionsEnum[numTerms]; for (int i = 0; i < numTerms; i++) { tps[i] = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), terms[i].Field(), new BytesRef(terms[i].Text())); } while (tps[0].NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { for (int i = 1; i < numTerms; i++) { tps[i].NextDoc(); } int freq = tps[0].Freq(); for (int i = 0; i < freq; i++) { for (int j = 0; j < numTerms; j++) { tps[j].NextPosition(); BytesRef br = tps[j].Payload; if (br != null) { Array.Copy(br.Bytes, br.Offset, verifyPayloadData, offset, br.Length); offset += br.Length; } } } } AssertByteArrayEquals(payloadData, verifyPayloadData); /* * test lazy skipping */ DocsAndPositionsEnum tp = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), terms[0].Field(), new BytesRef(terms[0].Text())); tp.NextDoc(); tp.NextPosition(); // NOTE: prior rev of this test was failing to first // call next here: tp.NextDoc(); // now we don't read this payload tp.NextPosition(); BytesRef payload = tp.Payload; Assert.AreEqual(1, payload.Length, "Wrong payload length."); Assert.AreEqual(payload.Bytes[payload.Offset], payloadData[numTerms]); tp.NextDoc(); tp.NextPosition(); // we don't read this payload and skip to a different document tp.Advance(5); tp.NextPosition(); payload = tp.Payload; Assert.AreEqual(1, payload.Length, "Wrong payload length."); Assert.AreEqual(payload.Bytes[payload.Offset], payloadData[5 * numTerms]); /* * Test different lengths at skip points */ tp = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), terms[1].Field(), new BytesRef(terms[1].Text())); tp.NextDoc(); tp.NextPosition(); Assert.AreEqual(1, tp.Payload.Length, "Wrong payload length."); tp.Advance(skipInterval - 1); tp.NextPosition(); Assert.AreEqual(1, tp.Payload.Length, "Wrong payload length."); tp.Advance(2 * skipInterval - 1); tp.NextPosition(); Assert.AreEqual(1, tp.Payload.Length, "Wrong payload length."); tp.Advance(3 * skipInterval - 1); tp.NextPosition(); Assert.AreEqual(3 * skipInterval - 2 * numDocs - 1, tp.Payload.Length, "Wrong payload length."); reader.Dispose(); // test long payload analyzer = new PayloadAnalyzer(); writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode_e.CREATE)); string singleTerm = "lucene"; d = new Document(); d.Add(NewTextField(fieldName, singleTerm, Field.Store.NO)); // add a payload whose length is greater than the buffer size of BufferedIndexOutput payloadData = GenerateRandomData(2000); analyzer.SetPayloadData(fieldName, payloadData, 100, 1500); writer.AddDocument(d); writer.ForceMerge(1); // flush writer.Dispose(); reader = DirectoryReader.Open(dir); tp = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), fieldName, new BytesRef(singleTerm)); tp.NextDoc(); tp.NextPosition(); BytesRef bref = tp.Payload; verifyPayloadData = new sbyte[bref.Length]; sbyte[] portion = new sbyte[1500]; Array.Copy(payloadData, 100, portion, 0, 1500); AssertByteArrayEquals(portion, bref.Bytes, bref.Offset, bref.Length); reader.Dispose(); }