示例#1
0
        public virtual void TestTokenReuse()
        {
            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper2(this);

            IndexWriter writer = new IndexWriter(Dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));

            Document doc = new Document();

            doc.Add(NewTextField("f1", "a 5 a a", Field.Store.YES));

            writer.AddDocument(doc);
            writer.Commit();
            SegmentCommitInfo info = writer.NewestSegment();

            writer.Dispose();
            SegmentReader reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, NewIOContext(Random()));

            DocsAndPositionsEnum termPositions = MultiFields.GetTermPositionsEnum(reader, reader.LiveDocs, "f1", new BytesRef("a"));

            Assert.IsTrue(termPositions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            int freq = termPositions.Freq;

            Assert.AreEqual(3, freq);
            Assert.AreEqual(0, termPositions.NextPosition());
            Assert.IsNotNull(termPositions.GetPayload());
            Assert.AreEqual(6, termPositions.NextPosition());
            Assert.IsNull(termPositions.GetPayload());
            Assert.AreEqual(7, termPositions.NextPosition());
            Assert.IsNull(termPositions.GetPayload());
            reader.Dispose();
        }
        public virtual void TestMixupMultiValued()
        {
            Directory         dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);
            Document  doc        = new Document();
            FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);

            customType.StoreTermVectors         = true;
            customType.StoreTermVectorPositions = true;
            customType.StoreTermVectorPayloads  = true;
            customType.StoreTermVectorOffsets   = Random.NextBoolean();
            Field       field = new Field("field", "", customType);
            TokenStream ts    = new MockTokenizer(new StringReader("here we go"), MockTokenizer.WHITESPACE, true);

            Assert.IsFalse(ts.HasAttribute <IPayloadAttribute>());
            field.SetTokenStream(ts);
            doc.Add(field);
            Field field2      = new Field("field", "", customType);
            Token withPayload = new Token("withPayload", 0, 11);

            withPayload.Payload = new BytesRef("test");
            ts = new CannedTokenStream(withPayload);
            Assert.IsTrue(ts.HasAttribute <IPayloadAttribute>());
            field2.SetTokenStream(ts);
            doc.Add(field2);
            Field field3 = new Field("field", "", customType);

            ts = new MockTokenizer(new StringReader("nopayload"), MockTokenizer.WHITESPACE, true);
            Assert.IsFalse(ts.HasAttribute <IPayloadAttribute>());
            field3.SetTokenStream(ts);
            doc.Add(field3);
            writer.AddDocument(doc);
            DirectoryReader reader = writer.GetReader();
            Terms           terms  = reader.GetTermVector(0, "field");

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(terms != null);
            }
            TermsEnum termsEnum = terms.GetIterator(null);

            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("withPayload")));
            DocsAndPositionsEnum de = termsEnum.DocsAndPositions(null, null);

            Assert.AreEqual(0, de.NextDoc());
            Assert.AreEqual(3, de.NextPosition());
            Assert.AreEqual(new BytesRef("test"), de.GetPayload());
            writer.Dispose();
            reader.Dispose();
            dir.Dispose();
        }
        public virtual void TestMixupDocs()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer     = new RandomIndexWriter(Random, dir, iwc);
            Document          doc        = new Document();
            FieldType         customType = new FieldType(TextField.TYPE_NOT_STORED);

            customType.StoreTermVectors         = true;
            customType.StoreTermVectorPositions = true;
            customType.StoreTermVectorPayloads  = true;
            customType.StoreTermVectorOffsets   = Random.NextBoolean();
            Field       field = new Field("field", "", customType);
            TokenStream ts    = new MockTokenizer(new StringReader("here we go"), MockTokenizer.WHITESPACE, true);

            Assert.IsFalse(ts.HasAttribute <IPayloadAttribute>());
            field.SetTokenStream(ts);
            doc.Add(field);
            writer.AddDocument(doc);

            Token withPayload = new Token("withPayload", 0, 11);

            withPayload.Payload = new BytesRef("test");
            ts = new CannedTokenStream(withPayload);
            Assert.IsTrue(ts.HasAttribute <IPayloadAttribute>());
            field.SetTokenStream(ts);
            writer.AddDocument(doc);

            ts = new MockTokenizer(new StringReader("another"), MockTokenizer.WHITESPACE, true);
            Assert.IsFalse(ts.HasAttribute <IPayloadAttribute>());
            field.SetTokenStream(ts);
            writer.AddDocument(doc);

            DirectoryReader reader = writer.GetReader();
            Terms           terms  = reader.GetTermVector(1, "field");

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(terms != null);
            }
            TermsEnum termsEnum = terms.GetIterator(null);

            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("withPayload")));
            DocsAndPositionsEnum de = termsEnum.DocsAndPositions(null, null);

            Assert.AreEqual(0, de.NextDoc());
            Assert.AreEqual(0, de.NextPosition());
            Assert.AreEqual(new BytesRef("test"), de.GetPayload());
            writer.Dispose();
            reader.Dispose();
            dir.Dispose();
        }
示例#4
0
 internal virtual void VerifyPositions(PositionData[] positions, DocsAndPositionsEnum posEnum)
 {
     for (int i = 0; i < positions.Length; i++)
     {
         int pos = posEnum.NextPosition();
         Assert.AreEqual(positions[i].pos, pos);
         if (positions[i].payload != null)
         {
             Assert.IsNotNull(posEnum.GetPayload());
             if (Random.Next(3) < 2)
             {
                 // Verify the payload bytes
                 BytesRef otherPayload = posEnum.GetPayload();
                 Assert.IsTrue(positions[i].payload.Equals(otherPayload), "expected=" + positions[i].payload.ToString() + " got=" + otherPayload.ToString());
             }
         }
         else
         {
             Assert.IsNull(posEnum.GetPayload());
         }
     }
 }
        public virtual void CheckSkipTo(DocsAndPositionsEnum tp, int target, int maxCounter)
        {
            tp.Advance(target);
            if (maxCounter < counter)
            {
                Assert.Fail("Too many bytes read: " + counter + " vs " + maxCounter);
            }

            Assert.AreEqual(target, tp.DocID, "Wrong document " + tp.DocID + " after skipTo target " + target);
            Assert.AreEqual(1, tp.Freq, "Frequency is not 1: " + tp.Freq);
            tp.NextPosition();
            BytesRef b = tp.GetPayload();

            Assert.AreEqual(1, b.Length);
            Assert.AreEqual((sbyte)target, (sbyte)b.Bytes[b.Offset], "Wrong payload for the target " + target + ": " + (sbyte)b.Bytes[b.Offset]);
        }
示例#6
0
        public virtual void TestThreadSafety()
        {
            const int     numThreads = 5;
            int           numDocs    = AtLeast(50);
            ByteArrayPool pool       = new ByteArrayPool(numThreads, 5);

            Directory    dir    = NewDirectory();
            IndexWriter  writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
            const string field  = "test";

            ThreadJob[] ingesters = new ThreadJob[numThreads];
            for (int i = 0; i < numThreads; i++)
            {
                ingesters[i] = new ThreadAnonymousInnerClassHelper(this, numDocs, pool, writer, field);
                ingesters[i].Start();
            }

            for (int i = 0; i < numThreads; i++)
            {
                ingesters[i].Join();
            }
            writer.Dispose();
            IndexReader          reader   = DirectoryReader.Open(dir);
            TermsEnum            terms    = MultiFields.GetFields(reader).GetTerms(field).GetEnumerator();
            IBits                liveDocs = MultiFields.GetLiveDocs(reader);
            DocsAndPositionsEnum tp       = null;

            while (terms.MoveNext())
            {
                string termText = terms.Term.Utf8ToString();
                tp = terms.DocsAndPositions(liveDocs, tp);
                while (tp.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                {
                    int freq = tp.Freq;
                    for (int i = 0; i < freq; i++)
                    {
                        tp.NextPosition();
                        BytesRef payload = tp.GetPayload();
                        Assert.AreEqual(termText, payload.Utf8ToString());
                    }
                }
            }
            reader.Dispose();
            dir.Dispose();
            Assert.AreEqual(pool.Count, numThreads);
        }
示例#7
0
        public virtual void TestMixupMultiValued()
        {
            Directory         dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);
            Document    doc   = new Document();
            Field       field = new TextField("field", "", Field.Store.NO);
            TokenStream ts    = new MockTokenizer(new StringReader("here we go"), MockTokenizer.WHITESPACE, true);

            Assert.IsFalse(ts.HasAttribute <PayloadAttribute>());
            field.SetTokenStream(ts);
            doc.Add(field);
            Field field2      = new TextField("field", "", Field.Store.NO);
            Token withPayload = new Token("withPayload", 0, 11);

            withPayload.Payload = new BytesRef("test");
            ts = new CannedTokenStream(withPayload);
            Assert.IsTrue(ts.HasAttribute <IPayloadAttribute>());
            field2.SetTokenStream(ts);
            doc.Add(field2);
            Field field3 = new TextField("field", "", Field.Store.NO);

            ts = new MockTokenizer(new StringReader("nopayload"), MockTokenizer.WHITESPACE, true);
            Assert.IsFalse(ts.HasAttribute <PayloadAttribute>());
            field3.SetTokenStream(ts);
            doc.Add(field3);
            writer.AddDocument(doc);
            DirectoryReader      reader = writer.GetReader();
            SegmentReader        sr     = GetOnlySegmentReader(reader);
            DocsAndPositionsEnum de     = sr.GetTermPositionsEnum(new Term("field", "withPayload"));

            de.NextDoc();
            de.NextPosition();
            Assert.AreEqual(new BytesRef("test"), de.GetPayload());
            writer.Dispose();
            reader.Dispose();
            dir.Dispose();
        }
示例#8
0
        public virtual void TestMixupDocs()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, null);

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer = new RandomIndexWriter(Random, dir, iwc);
            Document          doc    = new Document();
            Field             field  = new TextField("field", "", Field.Store.NO);
            TokenStream       ts     = new MockTokenizer(new StringReader("here we go"), MockTokenizer.WHITESPACE, true);

            Assert.IsFalse(ts.HasAttribute <PayloadAttribute>());
            field.SetTokenStream(ts);
            doc.Add(field);
            writer.AddDocument(doc);
            Token withPayload = new Token("withPayload", 0, 11);

            withPayload.Payload = new BytesRef("test");
            ts = new CannedTokenStream(withPayload);
            Assert.IsTrue(ts.HasAttribute <IPayloadAttribute>());
            field.SetTokenStream(ts);
            writer.AddDocument(doc);
            ts = new MockTokenizer(new StringReader("another"), MockTokenizer.WHITESPACE, true);
            Assert.IsFalse(ts.HasAttribute <PayloadAttribute>());
            field.SetTokenStream(ts);
            writer.AddDocument(doc);
            DirectoryReader      reader = writer.GetReader();
            AtomicReader         sr     = SlowCompositeReaderWrapper.Wrap(reader);
            DocsAndPositionsEnum de     = sr.GetTermPositionsEnum(new Term("field", "withPayload"));

            de.NextDoc();
            de.NextPosition();
            Assert.AreEqual(new BytesRef("test"), de.GetPayload());
            writer.Dispose();
            reader.Dispose();
            dir.Dispose();
        }
        public virtual void DoTestNumbers(bool withPayloads)
        {
            Directory dir      = NewDirectory();
            Analyzer  analyzer = withPayloads ? (Analyzer) new MockPayloadAnalyzer() : new MockAnalyzer(Random);

            iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            iwc.SetMergePolicy(NewLogMergePolicy()); // will rely on docids a bit for skipping
            RandomIndexWriter w = new RandomIndexWriter(Random, dir, iwc);

            FieldType ft = new FieldType(TextField.TYPE_STORED);

            ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
            if (Random.NextBoolean())
            {
                ft.StoreTermVectors         = true;
                ft.StoreTermVectorOffsets   = Random.NextBoolean();
                ft.StoreTermVectorPositions = Random.NextBoolean();
            }

            int numDocs = AtLeast(500);

            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                doc.Add(new Field("numbers", English.Int32ToEnglish(i), ft));
                doc.Add(new Field("oddeven", (i % 2) == 0 ? "even" : "odd", ft));
                doc.Add(new StringField("id", "" + i, Field.Store.NO));
                w.AddDocument(doc);
            }

            IndexReader reader = w.GetReader();

            w.Dispose();

            string[] terms = new string[] { "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "hundred" };

            foreach (string term in terms)
            {
                DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(reader, null, "numbers", new BytesRef(term));
                int doc;
                while ((doc = dp.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                {
                    string storedNumbers = reader.Document(doc).Get("numbers");
                    int    freq          = dp.Freq;
                    for (int i = 0; i < freq; i++)
                    {
                        dp.NextPosition();
                        int start = dp.StartOffset;
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(start >= 0);
                        }
                        int end = dp.EndOffset;
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(end >= 0 && end >= start);
                        }
                        // check that the offsets correspond to the term in the src text
                        Assert.IsTrue(storedNumbers.Substring(start, end - start).Equals(term, StringComparison.Ordinal));
                        if (withPayloads)
                        {
                            // check that we have a payload and it starts with "pos"
                            Assert.IsNotNull(dp.GetPayload());
                            BytesRef payload = dp.GetPayload();
                            Assert.IsTrue(payload.Utf8ToString().StartsWith("pos:", StringComparison.Ordinal));
                        } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer!
                    }
                }
            }

            // check we can skip correctly
            int numSkippingTests = AtLeast(50);

            for (int j = 0; j < numSkippingTests; j++)
            {
                int num = TestUtil.NextInt32(Random, 100, Math.Min(numDocs - 1, 999));
                DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(reader, null, "numbers", new BytesRef("hundred"));
                int doc = dp.Advance(num);
                Assert.AreEqual(num, doc);
                int freq = dp.Freq;
                for (int i = 0; i < freq; i++)
                {
                    string storedNumbers = reader.Document(doc).Get("numbers");
                    dp.NextPosition();
                    int start = dp.StartOffset;
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(start >= 0);
                    }
                    int end = dp.EndOffset;
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(end >= 0 && end >= start);
                    }
                    // check that the offsets correspond to the term in the src text
                    Assert.IsTrue(storedNumbers.Substring(start, end - start).Equals("hundred", StringComparison.Ordinal));
                    if (withPayloads)
                    {
                        // check that we have a payload and it starts with "pos"
                        Assert.IsNotNull(dp.GetPayload());
                        BytesRef payload = dp.GetPayload();
                        Assert.IsTrue(payload.Utf8ToString().StartsWith("pos:", StringComparison.Ordinal));
                    } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer!
                }
            }

            // check that other fields (without offsets) work correctly

            for (int i = 0; i < numDocs; i++)
            {
                DocsEnum dp = MultiFields.GetTermDocsEnum(reader, null, "id", new BytesRef("" + i), 0);
                Assert.AreEqual(i, dp.NextDoc());
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc());
            }

            reader.Dispose();
            dir.Dispose();
        }
示例#10
0
 public override BytesRef GetPayload()
 {
     return(current.GetPayload());
 }
示例#11
0
        public virtual void TestLongPostings_Mem()
        {
            // Don't use TestUtil.getTempDir so that we own the
            // randomness (ie same seed will point to same dir):
            Directory dir = NewFSDirectory(CreateTempDir("longpostings" + "." + Random.NextInt64()));

            int NUM_DOCS = AtLeast(2000);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS);
            }

            string s1 = GetRandomTerm(null);
            string s2 = GetRandomTerm(s1);

            if (VERBOSE)
            {
                Console.WriteLine("\nTEST: s1=" + s1 + " s2=" + s2);

                /*
                 * for(int idx=0;idx<s1.Length();idx++) {
                 * System.out.println("  s1 ch=0x" + Integer.toHexString(s1.charAt(idx)));
                 * }
                 * for(int idx=0;idx<s2.Length();idx++) {
                 * System.out.println("  s2 ch=0x" + Integer.toHexString(s2.charAt(idx)));
                 * }
                 */
            }

            FixedBitSet isS1 = new FixedBitSet(NUM_DOCS);

            for (int idx = 0; idx < NUM_DOCS; idx++)
            {
                if (Random.NextBoolean())
                {
                    isS1.Set(idx);
                }
            }

            IndexReader       r;
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetMergePolicy(NewLogMergePolicy());

            iwc.SetRAMBufferSizeMB(16.0 + 16.0 * Random.NextDouble());
            iwc.SetMaxBufferedDocs(-1);
            RandomIndexWriter riw = new RandomIndexWriter(Random, dir, iwc);

            for (int idx = 0; idx < NUM_DOCS; idx++)
            {
                Document doc   = new Document();
                string   s     = isS1.Get(idx) ? s1 : s2;
                Field    f     = NewTextField("field", s, Field.Store.NO);
                int      count = TestUtil.NextInt32(Random, 1, 4);
                for (int ct = 0; ct < count; ct++)
                {
                    doc.Add(f);
                }
                riw.AddDocument(doc);
            }

            r = riw.GetReader();
            riw.Dispose();

            /*
             * if (VERBOSE) {
             * System.out.println("TEST: terms");
             * TermEnum termEnum = r.Terms();
             * while(termEnum.Next()) {
             *  System.out.println("  term=" + termEnum.Term() + " len=" + termEnum.Term().Text().Length());
             *  Assert.IsTrue(termEnum.DocFreq() > 0);
             *  System.out.println("    s1?=" + (termEnum.Term().Text().equals(s1)) + " s1len=" + s1.Length());
             *  System.out.println("    s2?=" + (termEnum.Term().Text().equals(s2)) + " s2len=" + s2.Length());
             *  final String s = termEnum.Term().Text();
             *  for(int idx=0;idx<s.Length();idx++) {
             *    System.out.println("      ch=0x" + Integer.toHexString(s.charAt(idx)));
             *  }
             * }
             * }
             */

            Assert.AreEqual(NUM_DOCS, r.NumDocs);
            Assert.IsTrue(r.DocFreq(new Term("field", s1)) > 0);
            Assert.IsTrue(r.DocFreq(new Term("field", s2)) > 0);

            int num = AtLeast(1000);

            for (int iter = 0; iter < num; iter++)
            {
                string term;
                bool   doS1;
                if (Random.NextBoolean())
                {
                    term = s1;
                    doS1 = true;
                }
                else
                {
                    term = s2;
                    doS1 = false;
                }

                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: iter=" + iter + " doS1=" + doS1);
                }

                DocsAndPositionsEnum postings = MultiFields.GetTermPositionsEnum(r, null, "field", new BytesRef(term));

                int docID = -1;
                while (docID < DocIdSetIterator.NO_MORE_DOCS)
                {
                    int what = Random.Next(3);
                    if (what == 0)
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: docID=" + docID + "; do next()");
                        }
                        // nextDoc
                        int expected = docID + 1;
                        while (true)
                        {
                            if (expected == NUM_DOCS)
                            {
                                expected = int.MaxValue;
                                break;
                            }
                            else if (isS1.Get(expected) == doS1)
                            {
                                break;
                            }
                            else
                            {
                                expected++;
                            }
                        }
                        docID = postings.NextDoc();
                        if (VERBOSE)
                        {
                            Console.WriteLine("  got docID=" + docID);
                        }
                        Assert.AreEqual(expected, docID);
                        if (docID == DocIdSetIterator.NO_MORE_DOCS)
                        {
                            break;
                        }

                        if (Random.Next(6) == 3)
                        {
                            int freq = postings.Freq;
                            Assert.IsTrue(freq >= 1 && freq <= 4);
                            for (int pos = 0; pos < freq; pos++)
                            {
                                Assert.AreEqual(pos, postings.NextPosition());
                                if (Random.NextBoolean())
                                {
                                    var dummy = postings.GetPayload();
                                    if (Random.NextBoolean())
                                    {
                                        dummy = postings.GetPayload(); // get it again
                                    }
                                }
                            }
                        }
                    }
                    else
                    {
                        // advance
                        int targetDocID;
                        if (docID == -1)
                        {
                            targetDocID = Random.Next(NUM_DOCS + 1);
                        }
                        else
                        {
                            targetDocID = docID + TestUtil.NextInt32(Random, 1, NUM_DOCS - docID);
                        }
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: docID=" + docID + "; do advance(" + targetDocID + ")");
                        }
                        int expected = targetDocID;
                        while (true)
                        {
                            if (expected == NUM_DOCS)
                            {
                                expected = int.MaxValue;
                                break;
                            }
                            else if (isS1.Get(expected) == doS1)
                            {
                                break;
                            }
                            else
                            {
                                expected++;
                            }
                        }

                        docID = postings.Advance(targetDocID);
                        if (VERBOSE)
                        {
                            Console.WriteLine("  got docID=" + docID);
                        }
                        Assert.AreEqual(expected, docID);
                        if (docID == DocIdSetIterator.NO_MORE_DOCS)
                        {
                            break;
                        }

                        if (Random.Next(6) == 3)
                        {
                            int freq = postings.Freq;
                            Assert.IsTrue(freq >= 1 && freq <= 4);
                            for (int pos = 0; pos < freq; pos++)
                            {
                                Assert.AreEqual(pos, postings.NextPosition());
                                if (Random.NextBoolean())
                                {
                                    var dummy = postings.GetPayload();
                                    if (Random.NextBoolean())
                                    {
                                        dummy = postings.GetPayload(); // get it again
                                    }
                                }
                            }
                        }
                    }
                }
            }
            r.Dispose();
            dir.Dispose();
        }
示例#12
0
        // builds an index with payloads in the given Directory and performs
        // different tests to verify the payload encoding
        private void PerformTest(Directory dir)
        {
            PayloadAnalyzer analyzer = new PayloadAnalyzer();
            IndexWriter     writer   = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode.CREATE).SetMergePolicy(NewLogMergePolicy()));

            // should be in sync with value in TermInfosWriter
            const int skipInterval = 16;

            const int    numTerms  = 5;
            const string fieldName = "f1";

            int numDocs = skipInterval + 1;

            // create content for the test documents with just a few terms
            Term[]        terms = GenerateTerms(fieldName, numTerms);
            StringBuilder sb    = new StringBuilder();

            for (int i = 0; i < terms.Length; i++)
            {
                sb.Append(terms[i].Text());
                sb.Append(" ");
            }
            string content = sb.ToString();

            int payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2;
            var payloadData       = GenerateRandomData(payloadDataLength);

            Document d = new Document();

            d.Add(NewTextField(fieldName, content, Field.Store.NO));
            // add the same document multiple times to have the same payload lengths for all
            // occurrences within two consecutive skip intervals
            int offset = 0;

            for (int i = 0; i < 2 * numDocs; i++)
            {
                analyzer = new PayloadAnalyzer(fieldName, payloadData, offset, 1);
                offset  += numTerms;
                writer.AddDocument(d, analyzer);
            }

            // make sure we create more than one segment to test merging
            writer.Commit();

            // now we make sure to have different payload lengths next at the next skip point
            for (int i = 0; i < numDocs; i++)
            {
                analyzer = new PayloadAnalyzer(fieldName, payloadData, offset, i);
                offset  += i * numTerms;
                writer.AddDocument(d, analyzer);
            }

            writer.ForceMerge(1);
            // flush
            writer.Dispose();

            /*
             * Verify the index
             * first we test if all payloads are stored correctly
             */
            IndexReader reader = DirectoryReader.Open(dir);

            var verifyPayloadData = new byte[payloadDataLength];

            offset = 0;
            var tps = new DocsAndPositionsEnum[numTerms];

            for (int i = 0; i < numTerms; i++)
            {
                tps[i] = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), terms[i].Field, new BytesRef(terms[i].Text()));
            }

            while (tps[0].NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
            {
                for (int i = 1; i < numTerms; i++)
                {
                    tps[i].NextDoc();
                }
                int freq = tps[0].Freq;

                for (int i = 0; i < freq; i++)
                {
                    for (int j = 0; j < numTerms; j++)
                    {
                        tps[j].NextPosition();
                        BytesRef br = tps[j].GetPayload();
                        if (br != null)
                        {
                            Array.Copy(br.Bytes, br.Offset, verifyPayloadData, offset, br.Length);
                            offset += br.Length;
                        }
                    }
                }
            }

            AssertByteArrayEquals(payloadData, verifyPayloadData);

            /*
             *  test lazy skipping
             */
            DocsAndPositionsEnum tp = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), terms[0].Field, new BytesRef(terms[0].Text()));

            tp.NextDoc();
            tp.NextPosition();
            // NOTE: prior rev of this test was failing to first
            // call next here:
            tp.NextDoc();
            // now we don't read this payload
            tp.NextPosition();
            BytesRef payload = tp.GetPayload();

            Assert.AreEqual(1, payload.Length, "Wrong payload length.");
            Assert.AreEqual(payload.Bytes[payload.Offset], payloadData[numTerms]);
            tp.NextDoc();
            tp.NextPosition();

            // we don't read this payload and skip to a different document
            tp.Advance(5);
            tp.NextPosition();
            payload = tp.GetPayload();
            Assert.AreEqual(1, payload.Length, "Wrong payload length.");
            Assert.AreEqual(payload.Bytes[payload.Offset], payloadData[5 * numTerms]);

            /*
             * Test different lengths at skip points
             */
            tp = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), terms[1].Field, new BytesRef(terms[1].Text()));
            tp.NextDoc();
            tp.NextPosition();
            Assert.AreEqual(1, tp.GetPayload().Length, "Wrong payload length.");
            tp.Advance(skipInterval - 1);
            tp.NextPosition();
            Assert.AreEqual(1, tp.GetPayload().Length, "Wrong payload length.");
            tp.Advance(2 * skipInterval - 1);
            tp.NextPosition();
            Assert.AreEqual(1, tp.GetPayload().Length, "Wrong payload length.");
            tp.Advance(3 * skipInterval - 1);
            tp.NextPosition();
            Assert.AreEqual(3 * skipInterval - 2 * numDocs - 1, tp.GetPayload().Length, "Wrong payload length.");

            reader.Dispose();

            // test long payload
            analyzer = new PayloadAnalyzer();
            writer   = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode.CREATE));
            string singleTerm = "lucene";

            d = new Document();
            d.Add(NewTextField(fieldName, singleTerm, Field.Store.NO));
            // add a payload whose length is greater than the buffer size of BufferedIndexOutput
            payloadData = GenerateRandomData(2000);
            analyzer.SetPayloadData(fieldName, payloadData, 100, 1500);
            writer.AddDocument(d);

            writer.ForceMerge(1);
            // flush
            writer.Dispose();

            reader = DirectoryReader.Open(dir);
            tp     = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), fieldName, new BytesRef(singleTerm));
            tp.NextDoc();
            tp.NextPosition();

            BytesRef bref = tp.GetPayload();

            verifyPayloadData = new byte[bref.Length];
            var portion = new byte[1500];

            Array.Copy(payloadData, 100, portion, 0, 1500);

            AssertByteArrayEquals(portion, bref.Bytes, bref.Offset, bref.Length);
            reader.Dispose();
        }
示例#13
0
 public override BytesRef GetPayload()
 {
     return(m_input.GetPayload());
 }