Esempio n. 1
0
        public virtual void TestPayloadFieldBit()
        {
            Directory       ram      = NewDirectory();
            PayloadAnalyzer analyzer = new PayloadAnalyzer();
            IndexWriter     writer   = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
            Document        d        = new Document();

            // this field won't have any payloads
            d.Add(NewTextField("f1", "this field has no payloads", Field.Store.NO));
            // this field will have payloads in all docs, however not for all term positions,
            // so this field is used to check if the DocumentWriter correctly enables the payloads bit
            // even if only some term positions have payloads
            d.Add(NewTextField("f2", "this field has payloads in all docs", Field.Store.NO));
            d.Add(NewTextField("f2", "this field has payloads in all docs NO PAYLOAD", Field.Store.NO));
            // this field is used to verify if the SegmentMerger enables payloads for a field if it has payloads
            // enabled in only some documents
            d.Add(NewTextField("f3", "this field has payloads in some docs", Field.Store.NO));
            // only add payload data for field f2
            analyzer.SetPayloadData("f2", "somedata".GetBytes(IOUtils.CHARSET_UTF_8), 0, 1);
            writer.AddDocument(d);
            // flush
            writer.Dispose();

            SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(ram));
            FieldInfos    fi     = reader.FieldInfos;

            Assert.IsFalse(fi.FieldInfo("f1").HasPayloads(), "Payload field bit should not be set.");
            Assert.IsTrue(fi.FieldInfo("f2").HasPayloads(), "Payload field bit should be set.");
            Assert.IsFalse(fi.FieldInfo("f3").HasPayloads(), "Payload field bit should not be set.");
            reader.Dispose();

            // now we add another document which has payloads for field f3 and verify if the SegmentMerger
            // enabled payloads for that field
            analyzer = new PayloadAnalyzer(); // Clear payload state for each field
            writer   = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode_e.CREATE));
            d        = new Document();
            d.Add(NewTextField("f1", "this field has no payloads", Field.Store.NO));
            d.Add(NewTextField("f2", "this field has payloads in all docs", Field.Store.NO));
            d.Add(NewTextField("f2", "this field has payloads in all docs", Field.Store.NO));
            d.Add(NewTextField("f3", "this field has payloads in some docs", Field.Store.NO));
            // add payload data for field f2 and f3
            analyzer.SetPayloadData("f2", "somedata".GetBytes(IOUtils.CHARSET_UTF_8), 0, 1);
            analyzer.SetPayloadData("f3", "somedata".GetBytes(IOUtils.CHARSET_UTF_8), 0, 3);
            writer.AddDocument(d);

            // force merge
            writer.ForceMerge(1);
            // flush
            writer.Dispose();

            reader = GetOnlySegmentReader(DirectoryReader.Open(ram));
            fi     = reader.FieldInfos;
            Assert.IsFalse(fi.FieldInfo("f1").HasPayloads(), "Payload field bit should not be set.");
            Assert.IsTrue(fi.FieldInfo("f2").HasPayloads(), "Payload field bit should be set.");
            Assert.IsTrue(fi.FieldInfo("f3").HasPayloads(), "Payload field bit should be set.");
            reader.Dispose();
            ram.Dispose();
        }
Esempio n. 2
0
        public virtual void  TestPayloadFieldBit()
        {
            rnd = NewRandom();
            Directory       ram      = new RAMDirectory();
            PayloadAnalyzer analyzer = new PayloadAnalyzer();
            IndexWriter     writer   = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED, null);
            Document        d        = new Document();

            // this field won't have any payloads
            d.Add(new Field("f1", "This field has no payloads", Field.Store.NO, Field.Index.ANALYZED));
            // this field will have payloads in all docs, however not for all term positions,
            // so this field is used to check if the DocumentWriter correctly enables the payloads bit
            // even if only some term positions have payloads
            d.Add(new Field("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED));
            d.Add(new Field("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED));
            // this field is used to verify if the SegmentMerger enables payloads for a field if it has payloads
            // enabled in only some documents
            d.Add(new Field("f3", "This field has payloads in some docs", Field.Store.NO, Field.Index.ANALYZED));
            // only add payload data for field f2
            analyzer.SetPayloadData("f2", 1, System.Text.UTF8Encoding.UTF8.GetBytes("somedata"), 0, 1);
            writer.AddDocument(d, null);
            // flush
            writer.Close();

            SegmentReader reader = SegmentReader.GetOnlySegmentReader(ram, null);
            FieldInfos    fi     = reader.FieldInfos();

            Assert.IsFalse(fi.FieldInfo("f1").storePayloads_ForNUnit, "Payload field bit should not be set.");
            Assert.IsTrue(fi.FieldInfo("f2").storePayloads_ForNUnit, "Payload field bit should be set.");
            Assert.IsFalse(fi.FieldInfo("f3").storePayloads_ForNUnit, "Payload field bit should not be set.");
            reader.Close();

            // now we add another document which has payloads for field f3 and verify if the SegmentMerger
            // enabled payloads for that field
            writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED, null);
            d      = new Document();
            d.Add(new Field("f1", "This field has no payloads", Field.Store.NO, Field.Index.ANALYZED));
            d.Add(new Field("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED));
            d.Add(new Field("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED));
            d.Add(new Field("f3", "This field has payloads in some docs", Field.Store.NO, Field.Index.ANALYZED));
            // add payload data for field f2 and f3
            analyzer.SetPayloadData("f2", System.Text.UTF8Encoding.UTF8.GetBytes("somedata"), 0, 1);
            analyzer.SetPayloadData("f3", System.Text.UTF8Encoding.UTF8.GetBytes("somedata"), 0, 3);
            writer.AddDocument(d, null);
            // force merge
            writer.Optimize(null);
            // flush
            writer.Close();

            reader = SegmentReader.GetOnlySegmentReader(ram, null);
            fi     = reader.FieldInfos();
            Assert.IsFalse(fi.FieldInfo("f1").storePayloads_ForNUnit, "Payload field bit should not be set.");
            Assert.IsTrue(fi.FieldInfo("f2").storePayloads_ForNUnit, "Payload field bit should be set.");
            Assert.IsTrue(fi.FieldInfo("f3").storePayloads_ForNUnit, "Payload field bit should be set.");
            reader.Close();
        }
Esempio n. 3
0
        public virtual void TestPayloadFieldBit()
        {
            Directory ram = NewDirectory();
            PayloadAnalyzer analyzer = new PayloadAnalyzer();
            IndexWriter writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
            Document d = new Document();
            // this field won't have any payloads
            d.Add(NewTextField("f1", "this field has no payloads", Field.Store.NO));
            // this field will have payloads in all docs, however not for all term positions,
            // so this field is used to check if the DocumentWriter correctly enables the payloads bit
            // even if only some term positions have payloads
            d.Add(NewTextField("f2", "this field has payloads in all docs", Field.Store.NO));
            d.Add(NewTextField("f2", "this field has payloads in all docs NO PAYLOAD", Field.Store.NO));
            // this field is used to verify if the SegmentMerger enables payloads for a field if it has payloads
            // enabled in only some documents
            d.Add(NewTextField("f3", "this field has payloads in some docs", Field.Store.NO));
            // only add payload data for field f2
            analyzer.SetPayloadData("f2", "somedata".GetBytes(IOUtils.CHARSET_UTF_8), 0, 1);
            writer.AddDocument(d);
            // flush
            writer.Dispose();

            SegmentReader reader = GetOnlySegmentReader(DirectoryReader.Open(ram));
            FieldInfos fi = reader.FieldInfos;
            Assert.IsFalse(fi.FieldInfo("f1").HasPayloads(), "Payload field bit should not be set.");
            Assert.IsTrue(fi.FieldInfo("f2").HasPayloads(), "Payload field bit should be set.");
            Assert.IsFalse(fi.FieldInfo("f3").HasPayloads(), "Payload field bit should not be set.");
            reader.Dispose();

            // now we add another document which has payloads for field f3 and verify if the SegmentMerger
            // enabled payloads for that field
            analyzer = new PayloadAnalyzer(); // Clear payload state for each field
            writer = new IndexWriter(ram, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode_e.CREATE));
            d = new Document();
            d.Add(NewTextField("f1", "this field has no payloads", Field.Store.NO));
            d.Add(NewTextField("f2", "this field has payloads in all docs", Field.Store.NO));
            d.Add(NewTextField("f2", "this field has payloads in all docs", Field.Store.NO));
            d.Add(NewTextField("f3", "this field has payloads in some docs", Field.Store.NO));
            // add payload data for field f2 and f3
            analyzer.SetPayloadData("f2", "somedata".GetBytes(IOUtils.CHARSET_UTF_8), 0, 1);
            analyzer.SetPayloadData("f3", "somedata".GetBytes(IOUtils.CHARSET_UTF_8), 0, 3);
            writer.AddDocument(d);

            // force merge
            writer.ForceMerge(1);
            // flush
            writer.Dispose();

            reader = GetOnlySegmentReader(DirectoryReader.Open(ram));
            fi = reader.FieldInfos;
            Assert.IsFalse(fi.FieldInfo("f1").HasPayloads(), "Payload field bit should not be set.");
            Assert.IsTrue(fi.FieldInfo("f2").HasPayloads(), "Payload field bit should be set.");
            Assert.IsTrue(fi.FieldInfo("f3").HasPayloads(), "Payload field bit should be set.");
            reader.Dispose();
            ram.Dispose();
        }
Esempio n. 4
0
        // builds an index with payloads in the given Directory and performs
        // different tests to verify the payload encoding
        private void  PerformTest(Directory dir)
        {
            PayloadAnalyzer analyzer = new PayloadAnalyzer();
            IndexWriter     writer   = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED, null);

            // should be in sync with value in TermInfosWriter
            int skipInterval = 16;

            int numTerms = 5;

            System.String fieldName = "f1";

            int numDocs = skipInterval + 1;

            // create content for the test documents with just a few terms
            Term[] terms = GenerateTerms(fieldName, numTerms);
            System.Text.StringBuilder sb = new System.Text.StringBuilder();
            for (int i = 0; i < terms.Length; i++)
            {
                sb.Append(terms[i].Text);
                sb.Append(" ");
            }
            System.String content = sb.ToString();


            int payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2;

            byte[] payloadData = GenerateRandomData(payloadDataLength);

            Document d = new Document();

            d.Add(new Field(fieldName, content, Field.Store.NO, Field.Index.ANALYZED));
            // add the same document multiple times to have the same payload lengths for all
            // occurrences within two consecutive skip intervals
            int offset = 0;

            for (int i = 0; i < 2 * numDocs; i++)
            {
                analyzer.SetPayloadData(fieldName, payloadData, offset, 1);
                offset += numTerms;
                writer.AddDocument(d, null);
            }

            // make sure we create more than one segment to test merging
            writer.Commit(null);

            // now we make sure to have different payload lengths next at the next skip point
            for (int i = 0; i < numDocs; i++)
            {
                analyzer.SetPayloadData(fieldName, payloadData, offset, i);
                offset += i * numTerms;
                writer.AddDocument(d, null);
            }

            writer.Optimize(null);
            // flush
            writer.Close();


            /*
             * Verify the index
             * first we test if all payloads are stored correctly
             */
            IndexReader reader = IndexReader.Open(dir, true, null);

            byte[] verifyPayloadData = new byte[payloadDataLength];
            offset = 0;
            TermPositions[] tps = new TermPositions[numTerms];
            for (int i = 0; i < numTerms; i++)
            {
                tps[i] = reader.TermPositions(terms[i], null);
            }

            while (tps[0].Next(null))
            {
                for (int i = 1; i < numTerms; i++)
                {
                    tps[i].Next(null);
                }
                int freq = tps[0].Freq;

                for (int i = 0; i < freq; i++)
                {
                    for (int j = 0; j < numTerms; j++)
                    {
                        tps[j].NextPosition(null);
                        tps[j].GetPayload(verifyPayloadData, offset, null);
                        offset += tps[j].PayloadLength;
                    }
                }
            }

            for (int i = 0; i < numTerms; i++)
            {
                tps[i].Close();
            }

            AssertByteArrayEquals(payloadData, verifyPayloadData);

            /*
             *  test lazy skipping
             */
            TermPositions tp = reader.TermPositions(terms[0], null);

            tp.Next(null);
            tp.NextPosition(null);
            // now we don't read this payload
            tp.NextPosition(null);
            Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length.");
            byte[] payload = tp.GetPayload(null, 0, null);
            Assert.AreEqual(payload[0], payloadData[numTerms]);
            tp.NextPosition(null);

            // we don't read this payload and skip to a different document
            tp.SkipTo(5, null);
            tp.NextPosition(null);
            Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length.");
            payload = tp.GetPayload(null, 0, null);
            Assert.AreEqual(payload[0], payloadData[5 * numTerms]);


            /*
             * Test different lengths at skip points
             */
            tp.Seek(terms[1], null);
            tp.Next(null);
            tp.NextPosition(null);
            Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length.");
            tp.SkipTo(skipInterval - 1, null);
            tp.NextPosition(null);
            Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length.");
            tp.SkipTo(2 * skipInterval - 1, null);
            tp.NextPosition(null);
            Assert.AreEqual(1, tp.PayloadLength, "Wrong payload length.");
            tp.SkipTo(3 * skipInterval - 1, null);
            tp.NextPosition(null);
            Assert.AreEqual(3 * skipInterval - 2 * numDocs - 1, tp.PayloadLength, "Wrong payload length.");

            /*
             * Test multiple call of getPayload()
             */
            tp.GetPayload(null, 0, null);

            // it is forbidden to call getPayload() more than once
            // without calling nextPosition()
            Assert.Throws <IOException>(() => tp.GetPayload(null, 0, null), "Expected exception not thrown");

            reader.Close();

            // test long payload
            analyzer = new PayloadAnalyzer();
            writer   = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED, null);
            System.String singleTerm = "lucene";

            d = new Document();
            d.Add(new Field(fieldName, singleTerm, Field.Store.NO, Field.Index.ANALYZED));
            // add a payload whose length is greater than the buffer size of BufferedIndexOutput
            payloadData = GenerateRandomData(2000);
            analyzer.SetPayloadData(fieldName, payloadData, 100, 1500);
            writer.AddDocument(d, null);


            writer.Optimize(null);
            // flush
            writer.Close();

            reader = IndexReader.Open(dir, true, null);
            tp     = reader.TermPositions(new Term(fieldName, singleTerm), null);
            tp.Next(null);
            tp.NextPosition(null);

            verifyPayloadData = new byte[tp.PayloadLength];
            tp.GetPayload(verifyPayloadData, 0, null);
            byte[] portion = new byte[1500];
            Array.Copy(payloadData, 100, portion, 0, 1500);

            AssertByteArrayEquals(portion, verifyPayloadData);
            reader.Close();
        }
Esempio n. 5
0
        // builds an index with payloads in the given Directory and performs
        // different tests to verify the payload encoding
        private void PerformTest(Directory dir)
        {
            PayloadAnalyzer analyzer = new PayloadAnalyzer();
            IndexWriter     writer   = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode.CREATE).SetMergePolicy(NewLogMergePolicy()));

            // should be in sync with value in TermInfosWriter
            const int skipInterval = 16;

            const int    numTerms  = 5;
            const string fieldName = "f1";

            int numDocs = skipInterval + 1;

            // create content for the test documents with just a few terms
            Term[]        terms = GenerateTerms(fieldName, numTerms);
            StringBuilder sb    = new StringBuilder();

            for (int i = 0; i < terms.Length; i++)
            {
                sb.Append(terms[i].Text());
                sb.Append(" ");
            }
            string content = sb.ToString();

            int payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2;
            var payloadData       = GenerateRandomData(payloadDataLength);

            Document d = new Document();

            d.Add(NewTextField(fieldName, content, Field.Store.NO));
            // add the same document multiple times to have the same payload lengths for all
            // occurrences within two consecutive skip intervals
            int offset = 0;

            for (int i = 0; i < 2 * numDocs; i++)
            {
                analyzer = new PayloadAnalyzer(fieldName, payloadData, offset, 1);
                offset  += numTerms;
                writer.AddDocument(d, analyzer);
            }

            // make sure we create more than one segment to test merging
            writer.Commit();

            // now we make sure to have different payload lengths next at the next skip point
            for (int i = 0; i < numDocs; i++)
            {
                analyzer = new PayloadAnalyzer(fieldName, payloadData, offset, i);
                offset  += i * numTerms;
                writer.AddDocument(d, analyzer);
            }

            writer.ForceMerge(1);
            // flush
            writer.Dispose();

            /*
             * Verify the index
             * first we test if all payloads are stored correctly
             */
            IndexReader reader = DirectoryReader.Open(dir);

            var verifyPayloadData = new byte[payloadDataLength];

            offset = 0;
            var tps = new DocsAndPositionsEnum[numTerms];

            for (int i = 0; i < numTerms; i++)
            {
                tps[i] = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), terms[i].Field, new BytesRef(terms[i].Text()));
            }

            while (tps[0].NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
            {
                for (int i = 1; i < numTerms; i++)
                {
                    tps[i].NextDoc();
                }
                int freq = tps[0].Freq;

                for (int i = 0; i < freq; i++)
                {
                    for (int j = 0; j < numTerms; j++)
                    {
                        tps[j].NextPosition();
                        BytesRef br = tps[j].GetPayload();
                        if (br != null)
                        {
                            Array.Copy(br.Bytes, br.Offset, verifyPayloadData, offset, br.Length);
                            offset += br.Length;
                        }
                    }
                }
            }

            AssertByteArrayEquals(payloadData, verifyPayloadData);

            /*
             *  test lazy skipping
             */
            DocsAndPositionsEnum tp = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), terms[0].Field, new BytesRef(terms[0].Text()));

            tp.NextDoc();
            tp.NextPosition();
            // NOTE: prior rev of this test was failing to first
            // call next here:
            tp.NextDoc();
            // now we don't read this payload
            tp.NextPosition();
            BytesRef payload = tp.GetPayload();

            Assert.AreEqual(1, payload.Length, "Wrong payload length.");
            Assert.AreEqual(payload.Bytes[payload.Offset], payloadData[numTerms]);
            tp.NextDoc();
            tp.NextPosition();

            // we don't read this payload and skip to a different document
            tp.Advance(5);
            tp.NextPosition();
            payload = tp.GetPayload();
            Assert.AreEqual(1, payload.Length, "Wrong payload length.");
            Assert.AreEqual(payload.Bytes[payload.Offset], payloadData[5 * numTerms]);

            /*
             * Test different lengths at skip points
             */
            tp = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), terms[1].Field, new BytesRef(terms[1].Text()));
            tp.NextDoc();
            tp.NextPosition();
            Assert.AreEqual(1, tp.GetPayload().Length, "Wrong payload length.");
            tp.Advance(skipInterval - 1);
            tp.NextPosition();
            Assert.AreEqual(1, tp.GetPayload().Length, "Wrong payload length.");
            tp.Advance(2 * skipInterval - 1);
            tp.NextPosition();
            Assert.AreEqual(1, tp.GetPayload().Length, "Wrong payload length.");
            tp.Advance(3 * skipInterval - 1);
            tp.NextPosition();
            Assert.AreEqual(3 * skipInterval - 2 * numDocs - 1, tp.GetPayload().Length, "Wrong payload length.");

            reader.Dispose();

            // test long payload
            analyzer = new PayloadAnalyzer();
            writer   = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode.CREATE));
            string singleTerm = "lucene";

            d = new Document();
            d.Add(NewTextField(fieldName, singleTerm, Field.Store.NO));
            // add a payload whose length is greater than the buffer size of BufferedIndexOutput
            payloadData = GenerateRandomData(2000);
            analyzer.SetPayloadData(fieldName, payloadData, 100, 1500);
            writer.AddDocument(d);

            writer.ForceMerge(1);
            // flush
            writer.Dispose();

            reader = DirectoryReader.Open(dir);
            tp     = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), fieldName, new BytesRef(singleTerm));
            tp.NextDoc();
            tp.NextPosition();

            BytesRef bref = tp.GetPayload();

            verifyPayloadData = new byte[bref.Length];
            var portion = new byte[1500];

            Array.Copy(payloadData, 100, portion, 0, 1500);

            AssertByteArrayEquals(portion, bref.Bytes, bref.Offset, bref.Length);
            reader.Dispose();
        }
Esempio n. 6
0
		// builds an index with payloads in the given Directory and performs
		// different tests to verify the payload encoding
		private void  PerformTest(Directory dir)
		{
			PayloadAnalyzer analyzer = new PayloadAnalyzer();
			IndexWriter writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
			
			// should be in sync with value in TermInfosWriter
			int skipInterval = 16;
			
			int numTerms = 5;
			System.String fieldName = "f1";
			
			int numDocs = skipInterval + 1;
			// create content for the test documents with just a few terms
			Term[] terms = GenerateTerms(fieldName, numTerms);
			System.Text.StringBuilder sb = new System.Text.StringBuilder();
			for (int i = 0; i < terms.Length; i++)
			{
				sb.Append(terms[i].text_ForNUnit);
				sb.Append(" ");
			}
			System.String content = sb.ToString();
			
			
			int payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2;
			byte[] payloadData = GenerateRandomData(payloadDataLength);
			
			Document d = new Document();
			d.Add(new Field(fieldName, content, Field.Store.NO, Field.Index.ANALYZED));
			// add the same document multiple times to have the same payload lengths for all
			// occurrences within two consecutive skip intervals
			int offset = 0;
			for (int i = 0; i < 2 * numDocs; i++)
			{
				analyzer.SetPayloadData(fieldName, payloadData, offset, 1);
				offset += numTerms;
				writer.AddDocument(d);
			}
			
			// make sure we create more than one segment to test merging
			writer.Flush();
			
			// now we make sure to have different payload lengths next at the next skip point        
			for (int i = 0; i < numDocs; i++)
			{
				analyzer.SetPayloadData(fieldName, payloadData, offset, i);
				offset += i * numTerms;
				writer.AddDocument(d);
			}
			
			writer.Optimize();
			// flush
			writer.Close();
			
			
			/*
			* Verify the index
			* first we test if all payloads are stored correctly
			*/
			IndexReader reader = IndexReader.Open(dir);
			
			byte[] verifyPayloadData = new byte[payloadDataLength];
			offset = 0;
			TermPositions[] tps = new TermPositions[numTerms];
			for (int i = 0; i < numTerms; i++)
			{
				tps[i] = reader.TermPositions(terms[i]);
			}
			
			while (tps[0].Next())
			{
				for (int i = 1; i < numTerms; i++)
				{
					tps[i].Next();
				}
				int freq = tps[0].Freq();
				
				for (int i = 0; i < freq; i++)
				{
					for (int j = 0; j < numTerms; j++)
					{
						tps[j].NextPosition();
						tps[j].GetPayload(verifyPayloadData, offset);
						offset += tps[j].GetPayloadLength();
					}
				}
			}
			
			for (int i = 0; i < numTerms; i++)
			{
				tps[i].Close();
			}
			
			AssertByteArrayEquals(payloadData, verifyPayloadData);
			
			/*
			*  test lazy skipping
			*/
			TermPositions tp = reader.TermPositions(terms[0]);
			tp.Next();
			tp.NextPosition();
			// now we don't read this payload
			tp.NextPosition();
			Assert.AreEqual(1, tp.GetPayloadLength(), "Wrong payload length.");
			byte[] payload = tp.GetPayload(null, 0);
			Assert.AreEqual(payload[0], payloadData[numTerms]);
			tp.NextPosition();
			
			// we don't read this payload and skip to a different document
			tp.SkipTo(5);
			tp.NextPosition();
			Assert.AreEqual(1, tp.GetPayloadLength(), "Wrong payload length.");
			payload = tp.GetPayload(null, 0);
			Assert.AreEqual(payload[0], payloadData[5 * numTerms]);
			
			
			/*
			* Test different lengths at skip points
			*/
			tp.Seek(terms[1]);
			tp.Next();
			tp.NextPosition();
			Assert.AreEqual(1, tp.GetPayloadLength(), "Wrong payload length.");
			tp.SkipTo(skipInterval - 1);
			tp.NextPosition();
			Assert.AreEqual(1, tp.GetPayloadLength(), "Wrong payload length.");
			tp.SkipTo(2 * skipInterval - 1);
			tp.NextPosition();
			Assert.AreEqual(1, tp.GetPayloadLength(), "Wrong payload length.");
			tp.SkipTo(3 * skipInterval - 1);
			tp.NextPosition();
			Assert.AreEqual(3 * skipInterval - 2 * numDocs - 1, tp.GetPayloadLength(), "Wrong payload length.");
			
			/*
			* Test multiple call of getPayload()
			*/
			tp.GetPayload(null, 0);
			try
			{
				// it is forbidden to call getPayload() more than once
				// without calling nextPosition()
				tp.GetPayload(null, 0);
				Assert.Fail("Expected exception not thrown");
			}
			catch (System.Exception expected)
			{
				// expected exception
			}
			
			reader.Close();
			
			// test long payload
			analyzer = new PayloadAnalyzer();
			writer = new IndexWriter(dir, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
			System.String singleTerm = "lucene";
			
			d = new Document();
			d.Add(new Field(fieldName, singleTerm, Field.Store.NO, Field.Index.ANALYZED));
			// add a payload whose length is greater than the buffer size of BufferedIndexOutput
			payloadData = GenerateRandomData(2000);
			analyzer.SetPayloadData(fieldName, payloadData, 100, 1500);
			writer.AddDocument(d);
			
			
			writer.Optimize();
			// flush
			writer.Close();
			
			reader = IndexReader.Open(dir);
			tp = reader.TermPositions(new Term(fieldName, singleTerm));
			tp.Next();
			tp.NextPosition();
			
			verifyPayloadData = new byte[tp.GetPayloadLength()];
			tp.GetPayload(verifyPayloadData, 0);
			byte[] portion = new byte[1500];
			Array.Copy(payloadData, 100, portion, 0, 1500);
			
			AssertByteArrayEquals(portion, verifyPayloadData);
			reader.Close();
		}
Esempio n. 7
0
		public virtual void  TestPayloadFieldBit()
		{
			rnd = NewRandom();
			Directory ram = new RAMDirectory();
			PayloadAnalyzer analyzer = new PayloadAnalyzer();
			IndexWriter writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
			Document d = new Document();
			// this field won't have any payloads
			d.Add(new Field("f1", "This field has no payloads", Field.Store.NO, Field.Index.ANALYZED));
			// this field will have payloads in all docs, however not for all term positions,
			// so this field is used to check if the DocumentWriter correctly enables the payloads bit
			// even if only some term positions have payloads
			d.Add(new Field("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED));
			d.Add(new Field("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED));
			// this field is used to verify if the SegmentMerger enables payloads for a field if it has payloads 
			// enabled in only some documents
			d.Add(new Field("f3", "This field has payloads in some docs", Field.Store.NO, Field.Index.ANALYZED));
			// only add payload data for field f2
			analyzer.SetPayloadData("f2", 1, System.Text.UTF8Encoding.UTF8.GetBytes("somedata"), 0, 1);
			writer.AddDocument(d);
			// flush
			writer.Close();
			
			SegmentReader reader = SegmentReader.GetOnlySegmentReader(ram);
			FieldInfos fi = reader.FieldInfos();
			Assert.IsFalse(fi.FieldInfo("f1").storePayloads_ForNUnit, "Payload field bit should not be set.");
			Assert.IsTrue(fi.FieldInfo("f2").storePayloads_ForNUnit, "Payload field bit should be set.");
			Assert.IsFalse(fi.FieldInfo("f3").storePayloads_ForNUnit, "Payload field bit should not be set.");
			reader.Close();
			
			// now we add another document which has payloads for field f3 and verify if the SegmentMerger
			// enabled payloads for that field
			writer = new IndexWriter(ram, analyzer, true, IndexWriter.MaxFieldLength.LIMITED);
			d = new Document();
			d.Add(new Field("f1", "This field has no payloads", Field.Store.NO, Field.Index.ANALYZED));
			d.Add(new Field("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED));
			d.Add(new Field("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED));
			d.Add(new Field("f3", "This field has payloads in some docs", Field.Store.NO, Field.Index.ANALYZED));
			// add payload data for field f2 and f3
			analyzer.SetPayloadData("f2", System.Text.UTF8Encoding.UTF8.GetBytes("somedata"), 0, 1);
			analyzer.SetPayloadData("f3", System.Text.UTF8Encoding.UTF8.GetBytes("somedata"), 0, 3);
			writer.AddDocument(d);
			// force merge
			writer.Optimize();
			// flush
			writer.Close();
			
			reader = SegmentReader.GetOnlySegmentReader(ram);
			fi = reader.FieldInfos();
			Assert.IsFalse(fi.FieldInfo("f1").storePayloads_ForNUnit, "Payload field bit should not be set.");
			Assert.IsTrue(fi.FieldInfo("f2").storePayloads_ForNUnit, "Payload field bit should be set.");
			Assert.IsTrue(fi.FieldInfo("f3").storePayloads_ForNUnit, "Payload field bit should be set.");
			reader.Close();
		}
Esempio n. 8
0
        // builds an index with payloads in the given Directory and performs
        // different tests to verify the payload encoding
        private void PerformTest(Directory dir)
        {
            PayloadAnalyzer analyzer = new PayloadAnalyzer();
            IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode_e.CREATE).SetMergePolicy(NewLogMergePolicy()));

            // should be in sync with value in TermInfosWriter
            const int skipInterval = 16;

            const int numTerms = 5;
            const string fieldName = "f1";

            int numDocs = skipInterval + 1;
            // create content for the test documents with just a few terms
            Term[] terms = GenerateTerms(fieldName, numTerms);
            StringBuilder sb = new StringBuilder();
            for (int i = 0; i < terms.Length; i++)
            {
                sb.Append(terms[i].Text());
                sb.Append(" ");
            }
            string content = sb.ToString();

            int payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2;
            sbyte[] payloadData = GenerateRandomData(payloadDataLength);

            Document d = new Document();
            d.Add(NewTextField(fieldName, content, Field.Store.NO));
            // add the same document multiple times to have the same payload lengths for all
            // occurrences within two consecutive skip intervals
            int offset = 0;
            for (int i = 0; i < 2 * numDocs; i++)
            {
                analyzer = new PayloadAnalyzer(fieldName, payloadData, offset, 1);
                offset += numTerms;
                writer.AddDocument(d, analyzer);
            }

            // make sure we create more than one segment to test merging
            writer.Commit();

            // now we make sure to have different payload lengths next at the next skip point
            for (int i = 0; i < numDocs; i++)
            {
                analyzer = new PayloadAnalyzer(fieldName, payloadData, offset, i);
                offset += i * numTerms;
                writer.AddDocument(d, analyzer);
            }

            writer.ForceMerge(1);
            // flush
            writer.Dispose();

            /*
             * Verify the index
             * first we test if all payloads are stored correctly
             */
            IndexReader reader = DirectoryReader.Open(dir);

            sbyte[] verifyPayloadData = new sbyte[payloadDataLength];
            offset = 0;
            DocsAndPositionsEnum[] tps = new DocsAndPositionsEnum[numTerms];
            for (int i = 0; i < numTerms; i++)
            {
                tps[i] = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), terms[i].Field(), new BytesRef(terms[i].Text()));
            }

            while (tps[0].NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
            {
                for (int i = 1; i < numTerms; i++)
                {
                    tps[i].NextDoc();
                }
                int freq = tps[0].Freq();

                for (int i = 0; i < freq; i++)
                {
                    for (int j = 0; j < numTerms; j++)
                    {
                        tps[j].NextPosition();
                        BytesRef br = tps[j].Payload;
                        if (br != null)
                        {
                            Array.Copy(br.Bytes, br.Offset, verifyPayloadData, offset, br.Length);
                            offset += br.Length;
                        }
                    }
                }
            }

            AssertByteArrayEquals(payloadData, verifyPayloadData);

            /*
             *  test lazy skipping
             */
            DocsAndPositionsEnum tp = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), terms[0].Field(), new BytesRef(terms[0].Text()));
            tp.NextDoc();
            tp.NextPosition();
            // NOTE: prior rev of this test was failing to first
            // call next here:
            tp.NextDoc();
            // now we don't read this payload
            tp.NextPosition();
            BytesRef payload = tp.Payload;
            Assert.AreEqual(1, payload.Length, "Wrong payload length.");
            Assert.AreEqual(payload.Bytes[payload.Offset], payloadData[numTerms]);
            tp.NextDoc();
            tp.NextPosition();

            // we don't read this payload and skip to a different document
            tp.Advance(5);
            tp.NextPosition();
            payload = tp.Payload;
            Assert.AreEqual(1, payload.Length, "Wrong payload length.");
            Assert.AreEqual(payload.Bytes[payload.Offset], payloadData[5 * numTerms]);

            /*
             * Test different lengths at skip points
             */
            tp = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), terms[1].Field(), new BytesRef(terms[1].Text()));
            tp.NextDoc();
            tp.NextPosition();
            Assert.AreEqual(1, tp.Payload.Length, "Wrong payload length.");
            tp.Advance(skipInterval - 1);
            tp.NextPosition();
            Assert.AreEqual(1, tp.Payload.Length, "Wrong payload length.");
            tp.Advance(2 * skipInterval - 1);
            tp.NextPosition();
            Assert.AreEqual(1, tp.Payload.Length, "Wrong payload length.");
            tp.Advance(3 * skipInterval - 1);
            tp.NextPosition();
            Assert.AreEqual(3 * skipInterval - 2 * numDocs - 1, tp.Payload.Length, "Wrong payload length.");

            reader.Dispose();

            // test long payload
            analyzer = new PayloadAnalyzer();
            writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode_e.CREATE));
            string singleTerm = "lucene";

            d = new Document();
            d.Add(NewTextField(fieldName, singleTerm, Field.Store.NO));
            // add a payload whose length is greater than the buffer size of BufferedIndexOutput
            payloadData = GenerateRandomData(2000);
            analyzer.SetPayloadData(fieldName, payloadData, 100, 1500);
            writer.AddDocument(d);

            writer.ForceMerge(1);
            // flush
            writer.Dispose();

            reader = DirectoryReader.Open(dir);
            tp = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), fieldName, new BytesRef(singleTerm));
            tp.NextDoc();
            tp.NextPosition();

            BytesRef bref = tp.Payload;
            verifyPayloadData = new sbyte[bref.Length];
            sbyte[] portion = new sbyte[1500];
            Array.Copy(payloadData, 100, portion, 0, 1500);

            AssertByteArrayEquals(portion, bref.Bytes, bref.Offset, bref.Length);
            reader.Dispose();
        }