예제 #1
0
        public virtual void TestPositionsSimple()
        {
            Directory         directory = NewDirectory();
            RandomIndexWriter writer    = new RandomIndexWriter(Random, directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));

            for (int i = 0; i < 39; i++)
            {
                Document  doc        = new Document();
                FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
                customType.OmitNorms = true;
                doc.Add(NewField(FieldName, "1 2 3 4 5 6 7 8 9 10 " + "1 2 3 4 5 6 7 8 9 10 " + "1 2 3 4 5 6 7 8 9 10 " + "1 2 3 4 5 6 7 8 9 10", customType));
                writer.AddDocument(doc);
            }
            IndexReader reader = writer.GetReader();

            writer.Dispose();

            int num = AtLeast(13);

            for (int i = 0; i < num; i++)
            {
                BytesRef           bytes            = new BytesRef("1");
                IndexReaderContext topReaderContext = reader.Context;
                foreach (AtomicReaderContext atomicReaderContext in topReaderContext.Leaves)
                {
                    DocsAndPositionsEnum docsAndPosEnum = GetDocsAndPositions((AtomicReader)atomicReaderContext.Reader, bytes, null);
                    Assert.IsNotNull(docsAndPosEnum);
                    if (atomicReaderContext.Reader.MaxDoc == 0)
                    {
                        continue;
                    }
                    int advance = docsAndPosEnum.Advance(Random.Next(atomicReaderContext.Reader.MaxDoc));
                    do
                    {
                        string msg = "Advanced to: " + advance + " current doc: " + docsAndPosEnum.DocID; // TODO: + " usePayloads: " + usePayload;
                        Assert.AreEqual(4, docsAndPosEnum.Freq, msg);
                        Assert.AreEqual(0, docsAndPosEnum.NextPosition(), msg);
                        Assert.AreEqual(4, docsAndPosEnum.Freq, msg);
                        Assert.AreEqual(10, docsAndPosEnum.NextPosition(), msg);
                        Assert.AreEqual(4, docsAndPosEnum.Freq, msg);
                        Assert.AreEqual(20, docsAndPosEnum.NextPosition(), msg);
                        Assert.AreEqual(4, docsAndPosEnum.Freq, msg);
                        Assert.AreEqual(30, docsAndPosEnum.NextPosition(), msg);
                    } while (docsAndPosEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                }
            }
            reader.Dispose();
            directory.Dispose();
        }
        public virtual void CheckSkipTo(DocsAndPositionsEnum tp, int target, int maxCounter)
        {
            tp.Advance(target);
            if (maxCounter < Counter)
            {
                Assert.Fail("Too many bytes read: " + Counter + " vs " + maxCounter);
            }

            Assert.AreEqual(target, tp.DocID(), "Wrong document " + tp.DocID() + " after skipTo target " + target);
            Assert.AreEqual(1, tp.Freq(), "Frequency is not 1: " + tp.Freq());
            tp.NextPosition();
            BytesRef b = tp.Payload;
            Assert.AreEqual(1, b.Length);
            Assert.AreEqual((sbyte)target, (sbyte)b.Bytes[b.Offset], "Wrong payload for the target " + target + ": " + (sbyte)b.Bytes[b.Offset]);
        }
예제 #3
0
        public virtual void CheckSkipTo(DocsAndPositionsEnum tp, int target, int maxCounter)
        {
            tp.Advance(target);
            if (maxCounter < counter)
            {
                Assert.Fail("Too many bytes read: " + counter + " vs " + maxCounter);
            }

            Assert.AreEqual(target, tp.DocID, "Wrong document " + tp.DocID + " after skipTo target " + target);
            Assert.AreEqual(1, tp.Freq, "Frequency is not 1: " + tp.Freq);
            tp.NextPosition();
            BytesRef b = tp.GetPayload();

            Assert.AreEqual(1, b.Length);
            Assert.AreEqual((sbyte)target, (sbyte)b.Bytes[b.Offset], "Wrong payload for the target " + target + ": " + (sbyte)b.Bytes[b.Offset]);
        }
예제 #4
0
 public override int Advance(int target)
 {
     if (Debugging.AssertsEnabled)
     {
         Debugging.Assert(target > doc);
     }
     while (true)
     {
         if (current != null)
         {
             int doc;
             if (target < currentBase)
             {
                 // target was in the previous slice but there was no matching doc after it
                 doc = current.NextDoc();
             }
             else
             {
                 doc = current.Advance(target - currentBase);
             }
             if (doc == NO_MORE_DOCS)
             {
                 current = null;
             }
             else
             {
                 return(this.doc = doc + currentBase);
             }
         }
         else if (upto == numSubs - 1)
         {
             return(this.doc = NO_MORE_DOCS);
         }
         else
         {
             upto++;
             current     = subs[upto].DocsAndPositionsEnum;
             currentBase = subs[upto].Slice.Start;
         }
     }
 }
예제 #5
0
 public override int Advance(int target)
 {
     Debug.Assert(target > Doc);
     while (true)
     {
         if (Current != null)
         {
             int doc;
             if (target < CurrentBase)
             {
                 // target was in the previous slice but there was no matching doc after it
                 doc = Current.NextDoc();
             }
             else
             {
                 doc = Current.Advance(target - CurrentBase);
             }
             if (doc == NO_MORE_DOCS)
             {
                 Current = null;
             }
             else
             {
                 return(this.Doc = doc + CurrentBase);
             }
         }
         else if (Upto == NumSubs_Renamed - 1)
         {
             return(this.Doc = NO_MORE_DOCS);
         }
         else
         {
             Upto++;
             Current     = Subs_Renamed[Upto].DocsAndPositionsEnum;
             CurrentBase = Subs_Renamed[Upto].Slice.Start;
         }
     }
 }
예제 #6
0
        public virtual void DoTestNumbers(bool withPayloads)
        {
            Directory dir      = NewDirectory();
            Analyzer  analyzer = withPayloads ? (Analyzer) new MockPayloadAnalyzer() : new MockAnalyzer(Random);

            iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            iwc.SetMergePolicy(NewLogMergePolicy()); // will rely on docids a bit for skipping
            RandomIndexWriter w = new RandomIndexWriter(Random, dir, iwc);

            FieldType ft = new FieldType(TextField.TYPE_STORED);

            ft.IndexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
            if (Random.NextBoolean())
            {
                ft.StoreTermVectors         = true;
                ft.StoreTermVectorOffsets   = Random.NextBoolean();
                ft.StoreTermVectorPositions = Random.NextBoolean();
            }

            int numDocs = AtLeast(500);

            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                doc.Add(new Field("numbers", English.Int32ToEnglish(i), ft));
                doc.Add(new Field("oddeven", (i % 2) == 0 ? "even" : "odd", ft));
                doc.Add(new StringField("id", "" + i, Field.Store.NO));
                w.AddDocument(doc);
            }

            IndexReader reader = w.GetReader();

            w.Dispose();

            string[] terms = new string[] { "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten", "hundred" };

            foreach (string term in terms)
            {
                DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(reader, null, "numbers", new BytesRef(term));
                int doc;
                while ((doc = dp.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                {
                    string storedNumbers = reader.Document(doc).Get("numbers");
                    int    freq          = dp.Freq;
                    for (int i = 0; i < freq; i++)
                    {
                        dp.NextPosition();
                        int start = dp.StartOffset;
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(start >= 0);
                        }
                        int end = dp.EndOffset;
                        if (Debugging.AssertsEnabled)
                        {
                            Debugging.Assert(end >= 0 && end >= start);
                        }
                        // check that the offsets correspond to the term in the src text
                        Assert.IsTrue(storedNumbers.Substring(start, end - start).Equals(term, StringComparison.Ordinal));
                        if (withPayloads)
                        {
                            // check that we have a payload and it starts with "pos"
                            Assert.IsNotNull(dp.GetPayload());
                            BytesRef payload = dp.GetPayload();
                            Assert.IsTrue(payload.Utf8ToString().StartsWith("pos:", StringComparison.Ordinal));
                        } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer!
                    }
                }
            }

            // check we can skip correctly
            int numSkippingTests = AtLeast(50);

            for (int j = 0; j < numSkippingTests; j++)
            {
                int num = TestUtil.NextInt32(Random, 100, Math.Min(numDocs - 1, 999));
                DocsAndPositionsEnum dp = MultiFields.GetTermPositionsEnum(reader, null, "numbers", new BytesRef("hundred"));
                int doc = dp.Advance(num);
                Assert.AreEqual(num, doc);
                int freq = dp.Freq;
                for (int i = 0; i < freq; i++)
                {
                    string storedNumbers = reader.Document(doc).Get("numbers");
                    dp.NextPosition();
                    int start = dp.StartOffset;
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(start >= 0);
                    }
                    int end = dp.EndOffset;
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(end >= 0 && end >= start);
                    }
                    // check that the offsets correspond to the term in the src text
                    Assert.IsTrue(storedNumbers.Substring(start, end - start).Equals("hundred", StringComparison.Ordinal));
                    if (withPayloads)
                    {
                        // check that we have a payload and it starts with "pos"
                        Assert.IsNotNull(dp.GetPayload());
                        BytesRef payload = dp.GetPayload();
                        Assert.IsTrue(payload.Utf8ToString().StartsWith("pos:", StringComparison.Ordinal));
                    } // note: withPayloads=false doesnt necessarily mean we dont have them from MockAnalyzer!
                }
            }

            // check that other fields (without offsets) work correctly

            for (int i = 0; i < numDocs; i++)
            {
                DocsEnum dp = MultiFields.GetTermDocsEnum(reader, null, "id", new BytesRef("" + i), 0);
                Assert.AreEqual(i, dp.NextDoc());
                Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dp.NextDoc());
            }

            reader.Dispose();
            dir.Dispose();
        }
예제 #7
0
        /// <summary>
        /// checks advancing docs + positions
        /// </summary>
        public void AssertPositionsSkippingEquals(string info, IndexReader leftReader, int docFreq, DocsAndPositionsEnum leftDocs, DocsAndPositionsEnum rightDocs)
        {
            if (leftDocs == null || rightDocs == null)
            {
                Assert.IsNull(leftDocs);
                Assert.IsNull(rightDocs);
                return;
            }

            int docid = -1;
            int averageGap = leftReader.MaxDoc / (1 + docFreq);
            int skipInterval = 16;

            while (true)
            {
                if (Random().NextBoolean())
                {
                    // nextDoc()
                    docid = leftDocs.NextDoc();
                    Assert.AreEqual(docid, rightDocs.NextDoc(), info);
                }
                else
                {
                    // advance()
                    int skip = docid + (int)Math.Ceiling(Math.Abs(skipInterval + Random().NextDouble() * averageGap));
                    docid = leftDocs.Advance(skip);
                    Assert.AreEqual(docid, rightDocs.Advance(skip), info);
                }

                if (docid == DocIdSetIterator.NO_MORE_DOCS)
                {
                    return;
                }
                int freq = leftDocs.Freq();
                Assert.AreEqual(freq, rightDocs.Freq(), info);
                for (int i = 0; i < freq; i++)
                {
                    Assert.AreEqual(leftDocs.NextPosition(), rightDocs.NextPosition(), info);
                    Assert.AreEqual(leftDocs.Payload, rightDocs.Payload, info);
                }
            }
        }
예제 #8
0
        public virtual void TestLargeNumberOfPositions()
        {
            Directory         dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)));
            int       howMany        = 1000;
            FieldType customType     = new FieldType(TextField.TYPE_NOT_STORED);

            customType.OmitNorms = true;
            for (int i = 0; i < 39; i++)
            {
                Document      doc     = new Document();
                StringBuilder builder = new StringBuilder();
                for (int j = 0; j < howMany; j++)
                {
                    if (j % 2 == 0)
                    {
                        builder.Append("even ");
                    }
                    else
                    {
                        builder.Append("odd ");
                    }
                }
                doc.Add(NewField(FieldName, builder.ToString(), customType));
                writer.AddDocument(doc);
            }

            // now do searches
            IndexReader reader = writer.GetReader();

            writer.Dispose();

            int num = AtLeast(13);

            for (int i = 0; i < num; i++)
            {
                BytesRef bytes = new BytesRef("even");

                IndexReaderContext topReaderContext = reader.Context;
                foreach (AtomicReaderContext atomicReaderContext in topReaderContext.Leaves)
                {
                    DocsAndPositionsEnum docsAndPosEnum = GetDocsAndPositions((AtomicReader)atomicReaderContext.Reader, bytes, null);
                    Assert.IsNotNull(docsAndPosEnum);

                    int initDoc = 0;
                    int maxDoc  = atomicReaderContext.Reader.MaxDoc;
                    // initially advance or do next doc
                    if (Random.NextBoolean())
                    {
                        initDoc = docsAndPosEnum.NextDoc();
                    }
                    else
                    {
                        initDoc = docsAndPosEnum.Advance(Random.Next(maxDoc));
                    }
                    string msg = "Iteration: " + i + " initDoc: " + initDoc; // TODO: + " payloads: " + usePayload;
                    Assert.AreEqual(howMany / 2, docsAndPosEnum.Freq);
                    for (int j = 0; j < howMany; j += 2)
                    {
                        Assert.AreEqual(j, docsAndPosEnum.NextPosition(), "position missmatch index: " + j + " with freq: " + docsAndPosEnum.Freq + " -- " + msg);
                    }
                }
            }
            reader.Dispose();
            dir.Dispose();
        }
예제 #9
0
        public virtual void TestRandomPositions()
        {
            Directory         dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy()));
            int numDocs = AtLeast(47);
            int max     = 1051;
            int term    = Random.Next(max);

            int?[][]  positionsInDoc = new int?[numDocs][];
            FieldType customType     = new FieldType(TextField.TYPE_NOT_STORED);

            customType.OmitNorms = true;
            for (int i = 0; i < numDocs; i++)
            {
                Document      doc       = new Document();
                List <int?>   positions = new List <int?>();
                StringBuilder builder   = new StringBuilder();
                int           num       = AtLeast(131);
                for (int j = 0; j < num; j++)
                {
                    int nextInt = Random.Next(max);
                    builder.Append(nextInt).Append(" ");
                    if (nextInt == term)
                    {
                        positions.Add(Convert.ToInt32(j));
                    }
                }
                if (positions.Count == 0)
                {
                    builder.Append(term);
                    positions.Add(num);
                }
                doc.Add(NewField(FieldName, builder.ToString(), customType));
                positionsInDoc[i] = positions.ToArray();
                writer.AddDocument(doc);
            }

            IndexReader reader = writer.GetReader();

            writer.Dispose();

            int num_ = AtLeast(13);

            for (int i = 0; i < num_; i++)
            {
                BytesRef           bytes            = new BytesRef("" + term);
                IndexReaderContext topReaderContext = reader.Context;
                foreach (AtomicReaderContext atomicReaderContext in topReaderContext.Leaves)
                {
                    DocsAndPositionsEnum docsAndPosEnum = GetDocsAndPositions((AtomicReader)atomicReaderContext.Reader, bytes, null);
                    Assert.IsNotNull(docsAndPosEnum);
                    int initDoc = 0;
                    int maxDoc  = atomicReaderContext.Reader.MaxDoc;
                    // initially advance or do next doc
                    if (Random.NextBoolean())
                    {
                        initDoc = docsAndPosEnum.NextDoc();
                    }
                    else
                    {
                        initDoc = docsAndPosEnum.Advance(Random.Next(maxDoc));
                    }
                    // now run through the scorer and check if all positions are there...
                    do
                    {
                        int docID = docsAndPosEnum.DocID;
                        if (docID == DocIdSetIterator.NO_MORE_DOCS)
                        {
                            break;
                        }
                        int?[] pos = positionsInDoc[atomicReaderContext.DocBase + docID];
                        Assert.AreEqual(pos.Length, docsAndPosEnum.Freq);
                        // number of positions read should be random - don't read all of them
                        // allways
                        int howMany = Random.Next(20) == 0 ? pos.Length - Random.Next(pos.Length) : pos.Length;
                        for (int j = 0; j < howMany; j++)
                        {
                            Assert.AreEqual(pos[j], docsAndPosEnum.NextPosition(), "iteration: " + i + " initDoc: " + initDoc + " doc: " + docID + " base: " + atomicReaderContext.DocBase + " positions: " + pos); /* TODO: + " usePayloads: "
                                                                                                                                                                                                                     + usePayload*/
                        }

                        if (Random.Next(10) == 0) // once is a while advance
                        {
                            if (docsAndPosEnum.Advance(docID + 1 + Random.Next((maxDoc - docID))) == DocIdSetIterator.NO_MORE_DOCS)
                            {
                                break;
                            }
                        }
                    } while (docsAndPosEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                }
            }
            reader.Dispose();
            dir.Dispose();
        }
예제 #10
0
        public virtual void TestLongPostings_Mem()
        {
            // Don't use TestUtil.getTempDir so that we own the
            // randomness (ie same seed will point to same dir):
            Directory dir = NewFSDirectory(CreateTempDir("longpostings" + "." + Random.NextInt64()));

            int NUM_DOCS = AtLeast(2000);

            if (VERBOSE)
            {
                Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS);
            }

            string s1 = GetRandomTerm(null);
            string s2 = GetRandomTerm(s1);

            if (VERBOSE)
            {
                Console.WriteLine("\nTEST: s1=" + s1 + " s2=" + s2);

                /*
                 * for(int idx=0;idx<s1.Length();idx++) {
                 * System.out.println("  s1 ch=0x" + Integer.toHexString(s1.charAt(idx)));
                 * }
                 * for(int idx=0;idx<s2.Length();idx++) {
                 * System.out.println("  s2 ch=0x" + Integer.toHexString(s2.charAt(idx)));
                 * }
                 */
            }

            FixedBitSet isS1 = new FixedBitSet(NUM_DOCS);

            for (int idx = 0; idx < NUM_DOCS; idx++)
            {
                if (Random.NextBoolean())
                {
                    isS1.Set(idx);
                }
            }

            IndexReader       r;
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetOpenMode(OpenMode.CREATE).SetMergePolicy(NewLogMergePolicy());

            iwc.SetRAMBufferSizeMB(16.0 + 16.0 * Random.NextDouble());
            iwc.SetMaxBufferedDocs(-1);
            RandomIndexWriter riw = new RandomIndexWriter(Random, dir, iwc);

            for (int idx = 0; idx < NUM_DOCS; idx++)
            {
                Document doc   = new Document();
                string   s     = isS1.Get(idx) ? s1 : s2;
                Field    f     = NewTextField("field", s, Field.Store.NO);
                int      count = TestUtil.NextInt32(Random, 1, 4);
                for (int ct = 0; ct < count; ct++)
                {
                    doc.Add(f);
                }
                riw.AddDocument(doc);
            }

            r = riw.GetReader();
            riw.Dispose();

            /*
             * if (VERBOSE) {
             * System.out.println("TEST: terms");
             * TermEnum termEnum = r.Terms();
             * while(termEnum.Next()) {
             *  System.out.println("  term=" + termEnum.Term() + " len=" + termEnum.Term().Text().Length());
             *  Assert.IsTrue(termEnum.DocFreq() > 0);
             *  System.out.println("    s1?=" + (termEnum.Term().Text().equals(s1)) + " s1len=" + s1.Length());
             *  System.out.println("    s2?=" + (termEnum.Term().Text().equals(s2)) + " s2len=" + s2.Length());
             *  final String s = termEnum.Term().Text();
             *  for(int idx=0;idx<s.Length();idx++) {
             *    System.out.println("      ch=0x" + Integer.toHexString(s.charAt(idx)));
             *  }
             * }
             * }
             */

            Assert.AreEqual(NUM_DOCS, r.NumDocs);
            Assert.IsTrue(r.DocFreq(new Term("field", s1)) > 0);
            Assert.IsTrue(r.DocFreq(new Term("field", s2)) > 0);

            int num = AtLeast(1000);

            for (int iter = 0; iter < num; iter++)
            {
                string term;
                bool   doS1;
                if (Random.NextBoolean())
                {
                    term = s1;
                    doS1 = true;
                }
                else
                {
                    term = s2;
                    doS1 = false;
                }

                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: iter=" + iter + " doS1=" + doS1);
                }

                DocsAndPositionsEnum postings = MultiFields.GetTermPositionsEnum(r, null, "field", new BytesRef(term));

                int docID = -1;
                while (docID < DocIdSetIterator.NO_MORE_DOCS)
                {
                    int what = Random.Next(3);
                    if (what == 0)
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: docID=" + docID + "; do next()");
                        }
                        // nextDoc
                        int expected = docID + 1;
                        while (true)
                        {
                            if (expected == NUM_DOCS)
                            {
                                expected = int.MaxValue;
                                break;
                            }
                            else if (isS1.Get(expected) == doS1)
                            {
                                break;
                            }
                            else
                            {
                                expected++;
                            }
                        }
                        docID = postings.NextDoc();
                        if (VERBOSE)
                        {
                            Console.WriteLine("  got docID=" + docID);
                        }
                        Assert.AreEqual(expected, docID);
                        if (docID == DocIdSetIterator.NO_MORE_DOCS)
                        {
                            break;
                        }

                        if (Random.Next(6) == 3)
                        {
                            int freq = postings.Freq;
                            Assert.IsTrue(freq >= 1 && freq <= 4);
                            for (int pos = 0; pos < freq; pos++)
                            {
                                Assert.AreEqual(pos, postings.NextPosition());
                                if (Random.NextBoolean())
                                {
                                    var dummy = postings.GetPayload();
                                    if (Random.NextBoolean())
                                    {
                                        dummy = postings.GetPayload(); // get it again
                                    }
                                }
                            }
                        }
                    }
                    else
                    {
                        // advance
                        int targetDocID;
                        if (docID == -1)
                        {
                            targetDocID = Random.Next(NUM_DOCS + 1);
                        }
                        else
                        {
                            targetDocID = docID + TestUtil.NextInt32(Random, 1, NUM_DOCS - docID);
                        }
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: docID=" + docID + "; do advance(" + targetDocID + ")");
                        }
                        int expected = targetDocID;
                        while (true)
                        {
                            if (expected == NUM_DOCS)
                            {
                                expected = int.MaxValue;
                                break;
                            }
                            else if (isS1.Get(expected) == doS1)
                            {
                                break;
                            }
                            else
                            {
                                expected++;
                            }
                        }

                        docID = postings.Advance(targetDocID);
                        if (VERBOSE)
                        {
                            Console.WriteLine("  got docID=" + docID);
                        }
                        Assert.AreEqual(expected, docID);
                        if (docID == DocIdSetIterator.NO_MORE_DOCS)
                        {
                            break;
                        }

                        if (Random.Next(6) == 3)
                        {
                            int freq = postings.Freq;
                            Assert.IsTrue(freq >= 1 && freq <= 4);
                            for (int pos = 0; pos < freq; pos++)
                            {
                                Assert.AreEqual(pos, postings.NextPosition());
                                if (Random.NextBoolean())
                                {
                                    var dummy = postings.GetPayload();
                                    if (Random.NextBoolean())
                                    {
                                        dummy = postings.GetPayload(); // get it again
                                    }
                                }
                            }
                        }
                    }
                }
            }
            r.Dispose();
            dir.Dispose();
        }
        /// <summary>
        /// checks advancing docs + positions
        /// </summary>
        public virtual void AssertPositionsSkipping(int docFreq, DocsAndPositionsEnum leftDocs, DocsAndPositionsEnum rightDocs)
        {
            if (leftDocs == null || rightDocs == null)
            {
                Assert.IsNull(leftDocs);
                Assert.IsNull(rightDocs);
                return;
            }

            int docid = -1;
            int averageGap = MAXDOC / (1 + docFreq);
            int skipInterval = 16;

            while (true)
            {
                if (Random().NextBoolean())
                {
                    // nextDoc()
                    docid = leftDocs.NextDoc();
                    Assert.AreEqual(docid, rightDocs.NextDoc());
                }
                else
                {
                    // advance()
                    int skip = docid + (int)Math.Ceiling(Math.Abs(skipInterval + Random().NextDouble() * averageGap));
                    docid = leftDocs.Advance(skip);
                    Assert.AreEqual(docid, rightDocs.Advance(skip));
                }

                if (docid == DocIdSetIterator.NO_MORE_DOCS)
                {
                    return;
                }
                int freq = leftDocs.Freq();
                Assert.AreEqual(freq, rightDocs.Freq());
                for (int i = 0; i < freq; i++)
                {
                    Assert.AreEqual(leftDocs.NextPosition(), rightDocs.NextPosition());
                    // we don't compare the payloads, its allowed that one is empty etc
                }
            }
        }
예제 #12
0
        // builds an index with payloads in the given Directory and performs
        // different tests to verify the payload encoding
        private void PerformTest(Directory dir)
        {
            PayloadAnalyzer analyzer = new PayloadAnalyzer();
            IndexWriter     writer   = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode.CREATE).SetMergePolicy(NewLogMergePolicy()));

            // should be in sync with value in TermInfosWriter
            const int skipInterval = 16;

            const int    numTerms  = 5;
            const string fieldName = "f1";

            int numDocs = skipInterval + 1;

            // create content for the test documents with just a few terms
            Term[]        terms = GenerateTerms(fieldName, numTerms);
            StringBuilder sb    = new StringBuilder();

            for (int i = 0; i < terms.Length; i++)
            {
                sb.Append(terms[i].Text());
                sb.Append(" ");
            }
            string content = sb.ToString();

            int payloadDataLength = numTerms * numDocs * 2 + numTerms * numDocs * (numDocs - 1) / 2;
            var payloadData       = GenerateRandomData(payloadDataLength);

            Document d = new Document();

            d.Add(NewTextField(fieldName, content, Field.Store.NO));
            // add the same document multiple times to have the same payload lengths for all
            // occurrences within two consecutive skip intervals
            int offset = 0;

            for (int i = 0; i < 2 * numDocs; i++)
            {
                analyzer = new PayloadAnalyzer(fieldName, payloadData, offset, 1);
                offset  += numTerms;
                writer.AddDocument(d, analyzer);
            }

            // make sure we create more than one segment to test merging
            writer.Commit();

            // now we make sure to have different payload lengths next at the next skip point
            for (int i = 0; i < numDocs; i++)
            {
                analyzer = new PayloadAnalyzer(fieldName, payloadData, offset, i);
                offset  += i * numTerms;
                writer.AddDocument(d, analyzer);
            }

            writer.ForceMerge(1);
            // flush
            writer.Dispose();

            /*
             * Verify the index
             * first we test if all payloads are stored correctly
             */
            IndexReader reader = DirectoryReader.Open(dir);

            var verifyPayloadData = new byte[payloadDataLength];

            offset = 0;
            var tps = new DocsAndPositionsEnum[numTerms];

            for (int i = 0; i < numTerms; i++)
            {
                tps[i] = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), terms[i].Field, new BytesRef(terms[i].Text()));
            }

            while (tps[0].NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
            {
                for (int i = 1; i < numTerms; i++)
                {
                    tps[i].NextDoc();
                }
                int freq = tps[0].Freq;

                for (int i = 0; i < freq; i++)
                {
                    for (int j = 0; j < numTerms; j++)
                    {
                        tps[j].NextPosition();
                        BytesRef br = tps[j].GetPayload();
                        if (br != null)
                        {
                            Array.Copy(br.Bytes, br.Offset, verifyPayloadData, offset, br.Length);
                            offset += br.Length;
                        }
                    }
                }
            }

            AssertByteArrayEquals(payloadData, verifyPayloadData);

            /*
             *  test lazy skipping
             */
            DocsAndPositionsEnum tp = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), terms[0].Field, new BytesRef(terms[0].Text()));

            tp.NextDoc();
            tp.NextPosition();
            // NOTE: prior rev of this test was failing to first
            // call next here:
            tp.NextDoc();
            // now we don't read this payload
            tp.NextPosition();
            BytesRef payload = tp.GetPayload();

            Assert.AreEqual(1, payload.Length, "Wrong payload length.");
            Assert.AreEqual(payload.Bytes[payload.Offset], payloadData[numTerms]);
            tp.NextDoc();
            tp.NextPosition();

            // we don't read this payload and skip to a different document
            tp.Advance(5);
            tp.NextPosition();
            payload = tp.GetPayload();
            Assert.AreEqual(1, payload.Length, "Wrong payload length.");
            Assert.AreEqual(payload.Bytes[payload.Offset], payloadData[5 * numTerms]);

            /*
             * Test different lengths at skip points
             */
            tp = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), terms[1].Field, new BytesRef(terms[1].Text()));
            tp.NextDoc();
            tp.NextPosition();
            Assert.AreEqual(1, tp.GetPayload().Length, "Wrong payload length.");
            tp.Advance(skipInterval - 1);
            tp.NextPosition();
            Assert.AreEqual(1, tp.GetPayload().Length, "Wrong payload length.");
            tp.Advance(2 * skipInterval - 1);
            tp.NextPosition();
            Assert.AreEqual(1, tp.GetPayload().Length, "Wrong payload length.");
            tp.Advance(3 * skipInterval - 1);
            tp.NextPosition();
            Assert.AreEqual(3 * skipInterval - 2 * numDocs - 1, tp.GetPayload().Length, "Wrong payload length.");

            reader.Dispose();

            // test long payload
            analyzer = new PayloadAnalyzer();
            writer   = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetOpenMode(OpenMode.CREATE));
            string singleTerm = "lucene";

            d = new Document();
            d.Add(NewTextField(fieldName, singleTerm, Field.Store.NO));
            // add a payload whose length is greater than the buffer size of BufferedIndexOutput
            payloadData = GenerateRandomData(2000);
            analyzer.SetPayloadData(fieldName, payloadData, 100, 1500);
            writer.AddDocument(d);

            writer.ForceMerge(1);
            // flush
            writer.Dispose();

            reader = DirectoryReader.Open(dir);
            tp     = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), fieldName, new BytesRef(singleTerm));
            tp.NextDoc();
            tp.NextPosition();

            BytesRef bref = tp.GetPayload();

            verifyPayloadData = new byte[bref.Length];
            var portion = new byte[1500];

            Array.Copy(payloadData, 100, portion, 0, 1500);

            AssertByteArrayEquals(portion, bref.Bytes, bref.Offset, bref.Length);
            reader.Dispose();
        }
예제 #13
0
 public override int Advance(int target)
 {
     return(m_input.Advance(target));
 }
예제 #14
0
 public override int Advance(int target)
 {
     return(@in.Advance(target));
 }