Ejemplo n.º 1
0
        public virtual void TestDocsAndPositionsEnum()
        {
            TermsEnum termsEnum = reader.Terms(DOC_POSITIONS_FIELD).Iterator(null);

            assertEquals(TermsEnum.SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef(DOC_POSITIONS_TERM)));
            DocsAndPositionsEnum sortedPositions = termsEnum.DocsAndPositions(null, null);
            int doc;

            // test nextDoc()
            while ((doc = sortedPositions.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
            {
                int freq = sortedPositions.Freq();
                assertEquals("incorrect freq for doc=" + doc, sortedValues[doc] / 10 + 1, freq);
                for (int i = 0; i < freq; i++)
                {
                    assertEquals("incorrect position for doc=" + doc, i, sortedPositions.NextPosition());
                    if (!DoesntSupportOffsets.contains(TestUtil.GetPostingsFormat(DOC_POSITIONS_FIELD)))
                    {
                        assertEquals("incorrect startOffset for doc=" + doc, i, sortedPositions.StartOffset());
                        assertEquals("incorrect endOffset for doc=" + doc, i, sortedPositions.EndOffset());
                    }
                    assertEquals("incorrect payload for doc=" + doc, freq - i, int.Parse(sortedPositions.Payload.Utf8ToString(), CultureInfo.InvariantCulture));
                }
            }

            // test advance()
            DocsAndPositionsEnum reuse = sortedPositions;

            sortedPositions = termsEnum.DocsAndPositions(null, reuse);
            if (sortedPositions is SortingAtomicReader.SortingDocsAndPositionsEnum)
            {
                assertTrue(((SortingAtomicReader.SortingDocsAndPositionsEnum)sortedPositions).Reused(reuse)); // make sure reuse worked
            }
            doc = 0;
            while ((doc = sortedPositions.Advance(doc + TestUtil.NextInt(Random(), 1, 5))) != DocIdSetIterator.NO_MORE_DOCS)
            {
                int freq = sortedPositions.Freq();
                assertEquals("incorrect freq for doc=" + doc, sortedValues[doc] / 10 + 1, freq);
                for (int i = 0; i < freq; i++)
                {
                    assertEquals("incorrect position for doc=" + doc, i, sortedPositions.NextPosition());
                    if (!DoesntSupportOffsets.contains(TestUtil.GetPostingsFormat(DOC_POSITIONS_FIELD)))
                    {
                        assertEquals("incorrect startOffset for doc=" + doc, i, sortedPositions.StartOffset());
                        assertEquals("incorrect endOffset for doc=" + doc, i, sortedPositions.EndOffset());
                    }
                    assertEquals("incorrect payload for doc=" + doc, freq - i, int.Parse(sortedPositions.Payload.Utf8ToString(), CultureInfo.InvariantCulture));
                }
            }
        }
Ejemplo n.º 2
0
            internal virtual void AddPositions(DocsAndPositionsEnum @in, IndexOutput @out)
            {
                int freq = @in.Freq();

                @out.WriteVInt(freq);
                int previousPosition  = 0;
                int previousEndOffset = 0;

                for (int i = 0; i < freq; i++)
                {
                    int      pos     = @in.NextPosition();
                    BytesRef payload = @in.Payload;
                    // The low-order bit of token is set only if there is a payload, the
                    // previous bits are the delta-encoded position.
                    int token = (pos - previousPosition) << 1 | (payload == null ? 0 : 1);
                    @out.WriteVInt(token);
                    previousPosition = pos;
                    if (storeOffsets) // don't encode offsets if they are not stored
                    {
                        int startOffset = @in.StartOffset();
                        int endOffset   = @in.EndOffset();
                        @out.WriteVInt(startOffset - previousEndOffset);
                        @out.WriteVInt(endOffset - startOffset);
                        previousEndOffset = endOffset;
                    }
                    if (payload != null)
                    {
                        @out.WriteVInt(payload.Length);
                        @out.WriteBytes(payload.Bytes, payload.Offset, payload.Length);
                    }
                }
            }
Ejemplo n.º 3
0
        /// <summary>
        /// checks docs + freqs + positions + payloads, sequentially
        /// </summary>
        public virtual void AssertDocsAndPositionsEnum(DocsAndPositionsEnum leftDocs, DocsAndPositionsEnum rightDocs)
        {
            if (leftDocs == null || rightDocs == null)
            {
                Assert.IsNull(leftDocs);
                Assert.IsNull(rightDocs);
                return;
            }
            Assert.AreEqual(-1, leftDocs.DocID());
            Assert.AreEqual(-1, rightDocs.DocID());
            int docid;

            while ((docid = leftDocs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
            {
                Assert.AreEqual(docid, rightDocs.NextDoc());
                int freq = leftDocs.Freq();
                Assert.AreEqual(freq, rightDocs.Freq());
                for (int i = 0; i < freq; i++)
                {
                    Assert.AreEqual(leftDocs.NextPosition(), rightDocs.NextPosition());
                    // we don't assert offsets/payloads, they are allowed to be different
                }
            }
            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, rightDocs.NextDoc());
        }
Ejemplo n.º 4
0
        public virtual void TestWickedLongTerm()
        {
            using (RAMDirectory dir = new RAMDirectory())
            {
                char[] chars = new char[IndexWriter.MAX_TERM_LENGTH];
                Arrays.Fill(chars, 'x');

                string   bigTerm = new string(chars);
                Document doc     = new Document();

                using (IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new ClassicAnalyzer(TEST_VERSION_CURRENT))))
                {
                    // This produces a too-long term:
                    string contents = "abc xyz x" + bigTerm + " another term";
                    doc.Add(new TextField("content", contents, Field.Store.NO));
                    writer.AddDocument(doc);

                    // Make sure we can add another normal document
                    doc = new Document();
                    doc.Add(new TextField("content", "abc bbb ccc", Field.Store.NO));
                    writer.AddDocument(doc);
                }

                using (IndexReader reader = IndexReader.Open(dir))
                {
                    // Make sure all terms < max size were indexed
                    assertEquals(2, reader.DocFreq(new Term("content", "abc")));
                    assertEquals(1, reader.DocFreq(new Term("content", "bbb")));
                    assertEquals(1, reader.DocFreq(new Term("content", "term")));
                    assertEquals(1, reader.DocFreq(new Term("content", "another")));

                    // Make sure position is still incremented when
                    // massive term is skipped:
                    DocsAndPositionsEnum tps = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), "content", new BytesRef("another"));
                    assertTrue(tps.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                    assertEquals(1, tps.Freq());
                    assertEquals(3, tps.NextPosition());

                    // Make sure the doc that has the massive term is in
                    // the index:
                    assertEquals("document with wicked long term should is not in the index!", 2, reader.NumDocs);
                }

                // Make sure we can add a document with exactly the
                // maximum length term, and search on that term:
                doc = new Document();
                doc.Add(new TextField("content", bigTerm, Field.Store.NO));
                ClassicAnalyzer sa = new ClassicAnalyzer(TEST_VERSION_CURRENT);
                sa.MaxTokenLength = 100000;
                using (var writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa)))
                {
                    writer.AddDocument(doc);
                }
                using (var reader = IndexReader.Open(dir))
                {
                    assertEquals(1, reader.DocFreq(new Term("content", bigTerm)));
                }
            }
        }
Ejemplo n.º 5
0
        public virtual void TestCaching()
        {
            Directory         dir    = new RAMDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            Document          doc    = new Document();
            TokenStream       stream = new TokenStreamAnonymousInnerClassHelper(this);

            stream = new CachingTokenFilter(stream);

            doc.Add(new TextField("preanalyzed", stream));

            // 1) we consume all tokens twice before we add the doc to the index
            CheckTokens(stream);
            stream.Reset();
            CheckTokens(stream);

            // 2) now add the document to the index and verify if all tokens are indexed
            //    don't reset the stream here, the DocumentWriter should do that implicitly
            writer.AddDocument(doc);

            IndexReader          reader        = writer.Reader;
            DocsAndPositionsEnum termPositions = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), "preanalyzed", new BytesRef("term1"));

            Assert.IsTrue(termPositions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(1, termPositions.Freq());
            Assert.AreEqual(0, termPositions.NextPosition());

            termPositions = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), "preanalyzed", new BytesRef("term2"));
            Assert.IsTrue(termPositions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(2, termPositions.Freq());
            Assert.AreEqual(1, termPositions.NextPosition());
            Assert.AreEqual(3, termPositions.NextPosition());

            termPositions = MultiFields.GetTermPositionsEnum(reader, MultiFields.GetLiveDocs(reader), "preanalyzed", new BytesRef("term3"));
            Assert.IsTrue(termPositions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(1, termPositions.Freq());
            Assert.AreEqual(2, termPositions.NextPosition());
            reader.Dispose();
            writer.Dispose();
            // 3) reset stream and consume tokens again
            stream.Reset();
            CheckTokens(stream);
            dir.Dispose();
        }
Ejemplo n.º 6
0
        /// <summary>
        /// Read the parents of the new categories
        /// </summary>
        private void InitParents(IndexReader reader, int first)
        {
            if (reader.MaxDoc == first)
            {
                return;
            }

            // it's ok to use MultiFields because we only iterate on one posting list.
            // breaking it to loop over the leaves() only complicates code for no
            // apparent gain.
            DocsAndPositionsEnum positions = MultiFields.GetTermPositionsEnum(reader, null, Consts.FIELD_PAYLOADS, Consts.PAYLOAD_PARENT_BYTES_REF, DocsAndPositionsEnum.FLAG_PAYLOADS);

            // shouldn't really happen, if it does, something's wrong
            if (positions == null || positions.Advance(first) == DocIdSetIterator.NO_MORE_DOCS)
            {
                throw new CorruptIndexException("Missing parent data for category " + first);
            }

            int num = reader.MaxDoc;

            for (int i = first; i < num; i++)
            {
                if (positions.DocID() == i)
                {
                    if (positions.Freq() == 0) // shouldn't happen
                    {
                        throw new CorruptIndexException("Missing parent data for category " + i);
                    }

                    parents[i] = positions.NextPosition();

                    if (positions.NextDoc() == DocIdSetIterator.NO_MORE_DOCS)
                    {
                        if (i + 1 < num)
                        {
                            throw new CorruptIndexException("Missing parent data for category " + (i + 1));
                        }
                        break;
                    }
                } // this shouldn't happen
                else
                {
                    throw new CorruptIndexException("Missing parent data for category " + i);
                }
            }
        }
Ejemplo n.º 7
0
        public virtual void TestEndOffsetPositionWithTeeSinkTokenFilter()
        {
            Store.Directory    dir         = NewDirectory();
            Analyzer           analyzer    = new MockAnalyzer(Random(), MockTokenizer.WHITESPACE, false);
            IndexWriter        w           = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
            Document           doc         = new Document();
            TokenStream        tokenStream = analyzer.TokenStream("field", "abcd   ");
            TeeSinkTokenFilter tee         = new TeeSinkTokenFilter(tokenStream);
            TokenStream        sink        = tee.NewSinkTokenStream();
            FieldType          ft          = new FieldType(TextField.TYPE_NOT_STORED);

            ft.StoreTermVectors         = true;
            ft.StoreTermVectorOffsets   = true;
            ft.StoreTermVectorPositions = true;
            Field f1 = new Field("field", tee, ft);
            Field f2 = new Field("field", sink, ft);

            doc.Add(f1);
            doc.Add(f2);
            w.AddDocument(doc);
            w.Dispose();

            IndexReader r      = DirectoryReader.Open(dir);
            Terms       vector = r.GetTermVectors(0).Terms("field");

            assertEquals(1, vector.Size());
            TermsEnum termsEnum = vector.Iterator(null);

            termsEnum.Next();
            assertEquals(2, termsEnum.TotalTermFreq());
            DocsAndPositionsEnum positions = termsEnum.DocsAndPositions(null, null);

            assertTrue(positions.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            assertEquals(2, positions.Freq());
            positions.NextPosition();
            assertEquals(0, positions.StartOffset());
            assertEquals(4, positions.EndOffset());
            positions.NextPosition();
            assertEquals(8, positions.StartOffset());
            assertEquals(12, positions.EndOffset());
            assertEquals(DocIdSetIterator.NO_MORE_DOCS, positions.NextDoc());
            r.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 8
0
        public virtual void TestChangeGaps()
        {
            // LUCENE-5324: check that it is possible to change the wrapper's gaps
            int      positionGap = Random().Next(1000);
            int      offsetGap   = Random().Next(1000);
            Analyzer @delegate   = new MockAnalyzer(Random());
            Analyzer a           = new AnalyzerWrapperAnonymousInnerClassHelper2(this, @delegate.Strategy, positionGap, offsetGap, @delegate);

            RandomIndexWriter writer = new RandomIndexWriter(Random(), NewDirectory());
            Document          doc    = new Document();
            FieldType         ft     = new FieldType();

            ft.Indexed                  = true;
            ft.IndexOptions             = FieldInfo.IndexOptions.DOCS_ONLY;
            ft.Tokenized                = true;
            ft.StoreTermVectors         = true;
            ft.StoreTermVectorPositions = true;
            ft.StoreTermVectorOffsets   = true;
            doc.Add(new Field("f", "a", ft));
            doc.Add(new Field("f", "a", ft));
            writer.AddDocument(doc, a);
            AtomicReader reader = GetOnlySegmentReader(writer.Reader);
            Fields       fields = reader.GetTermVectors(0);
            Terms        terms  = fields.Terms("f");
            TermsEnum    te     = terms.Iterator(null);

            Assert.AreEqual(new BytesRef("a"), te.Next());
            DocsAndPositionsEnum dpe = te.DocsAndPositions(null, null);

            Assert.AreEqual(0, dpe.NextDoc());
            Assert.AreEqual(2, dpe.Freq());
            Assert.AreEqual(0, dpe.NextPosition());
            Assert.AreEqual(0, dpe.StartOffset());
            int endOffset = dpe.EndOffset();

            Assert.AreEqual(1 + positionGap, dpe.NextPosition());
            Assert.AreEqual(1 + endOffset + offsetGap, dpe.EndOffset());
            Assert.AreEqual(null, te.Next());
            reader.Dispose();
            writer.Dispose();
            writer.w.Directory.Dispose();
        }
Ejemplo n.º 9
0
        /// <summary>
        /// checks advancing docs + positions
        /// </summary>
        public virtual void AssertPositionsSkipping(int docFreq, DocsAndPositionsEnum leftDocs, DocsAndPositionsEnum rightDocs)
        {
            if (leftDocs == null || rightDocs == null)
            {
                Assert.IsNull(leftDocs);
                Assert.IsNull(rightDocs);
                return;
            }

            int docid        = -1;
            int averageGap   = MAXDOC / (1 + docFreq);
            int skipInterval = 16;

            while (true)
            {
                if (Random().NextBoolean())
                {
                    // nextDoc()
                    docid = leftDocs.NextDoc();
                    Assert.AreEqual(docid, rightDocs.NextDoc());
                }
                else
                {
                    // advance()
                    int skip = docid + (int)Math.Ceiling(Math.Abs(skipInterval + Random().NextDouble() * averageGap));
                    docid = leftDocs.Advance(skip);
                    Assert.AreEqual(docid, rightDocs.Advance(skip));
                }

                if (docid == DocIdSetIterator.NO_MORE_DOCS)
                {
                    return;
                }
                int freq = leftDocs.Freq();
                Assert.AreEqual(freq, rightDocs.Freq());
                for (int i = 0; i < freq; i++)
                {
                    Assert.AreEqual(leftDocs.NextPosition(), rightDocs.NextPosition());
                    // we don't compare the payloads, its allowed that one is empty etc
                }
            }
        }
Ejemplo n.º 10
0
        protected void CompareTermVectors(Terms terms, Terms memTerms, string field_name)
        {
            TermsEnum termEnum    = terms.Iterator(null);
            TermsEnum memTermEnum = memTerms.Iterator(null);

            while (termEnum.Next() != null)
            {
                assertNotNull(memTermEnum.Next());

                assertEquals(termEnum.TotalTermFreq(), memTermEnum.TotalTermFreq());

                DocsAndPositionsEnum docsPosEnum    = termEnum.DocsAndPositions(null, null, 0);
                DocsAndPositionsEnum memDocsPosEnum = memTermEnum.DocsAndPositions(null, null, 0);
                String currentTerm = termEnum.Term().Utf8ToString();


                assertEquals("Token mismatch for field: " + field_name, currentTerm, memTermEnum.Term().Utf8ToString());

                docsPosEnum.NextDoc();
                memDocsPosEnum.NextDoc();

                int freq = docsPosEnum.Freq();
                assertEquals(freq, memDocsPosEnum.Freq());
                for (int i = 0; i < freq; i++)
                {
                    string failDesc = " (field:" + field_name + " term:" + currentTerm + ")";
                    int    memPos   = memDocsPosEnum.NextPosition();
                    int    pos      = docsPosEnum.NextPosition();
                    assertEquals("Position test failed" + failDesc, memPos, pos);
                    assertEquals("Start offset test failed" + failDesc, memDocsPosEnum.StartOffset(), docsPosEnum.StartOffset());
                    assertEquals("End offset test failed" + failDesc, memDocsPosEnum.EndOffset(), docsPosEnum.EndOffset());
                    assertEquals("Missing payload test failed" + failDesc, docsPosEnum.Payload, null);
                }
            }
            assertNull("Still some tokens not processed", memTermEnum.Next());
        }
Ejemplo n.º 11
0
        public virtual void TestMixedVectrosVectors()
        {
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true)).SetOpenMode(OpenMode.CREATE));
            Document          doc    = new Document();

            FieldType ft2 = new FieldType(TextField.TYPE_STORED);

            ft2.StoreTermVectors = true;

            FieldType ft3 = new FieldType(TextField.TYPE_STORED);

            ft3.StoreTermVectors         = true;
            ft3.StoreTermVectorPositions = true;

            FieldType ft4 = new FieldType(TextField.TYPE_STORED);

            ft4.StoreTermVectors       = true;
            ft4.StoreTermVectorOffsets = true;

            FieldType ft5 = new FieldType(TextField.TYPE_STORED);

            ft5.StoreTermVectors         = true;
            ft5.StoreTermVectorOffsets   = true;
            ft5.StoreTermVectorPositions = true;

            doc.Add(NewTextField("field", "one", Field.Store.YES));
            doc.Add(NewField("field", "one", ft2));
            doc.Add(NewField("field", "one", ft3));
            doc.Add(NewField("field", "one", ft4));
            doc.Add(NewField("field", "one", ft5));
            writer.AddDocument(doc);
            IndexReader reader = writer.Reader;

            writer.Dispose();

            IndexSearcher searcher = NewSearcher(reader);

            Query query = new TermQuery(new Term("field", "one"));

            ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
            Assert.AreEqual(1, hits.Length);

            Fields vectors = searcher.IndexReader.GetTermVectors(hits[0].Doc);

            Assert.IsNotNull(vectors);
            Assert.AreEqual(1, vectors.Size);
            Terms vector = vectors.Terms("field");

            Assert.IsNotNull(vector);
            Assert.AreEqual(1, vector.Size());
            TermsEnum termsEnum = vector.Iterator(null);

            Assert.IsNotNull(termsEnum.Next());
            Assert.AreEqual("one", termsEnum.Term().Utf8ToString());
            Assert.AreEqual(5, termsEnum.TotalTermFreq());
            DocsAndPositionsEnum dpEnum = termsEnum.DocsAndPositions(null, null);

            Assert.IsNotNull(dpEnum);
            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(5, dpEnum.Freq());
            for (int i = 0; i < 5; i++)
            {
                Assert.AreEqual(i, dpEnum.NextPosition());
            }

            dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
            Assert.IsNotNull(dpEnum);
            Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            Assert.AreEqual(5, dpEnum.Freq());
            for (int i = 0; i < 5; i++)
            {
                dpEnum.NextPosition();
                Assert.AreEqual(4 * i, dpEnum.StartOffset());
                Assert.AreEqual(4 * i + 3, dpEnum.EndOffset());
            }
            reader.Dispose();
        }
Ejemplo n.º 12
0
        public virtual void TestPayloadsPos0()
        {
            Directory         dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, new MockPayloadAnalyzer());
            Document          doc    = new Document();

            doc.Add(new TextField("content", new StringReader("a a b c d e a f g h i j a b k k")));
            writer.AddDocument(doc);

            IndexReader  readerFromWriter = writer.Reader;
            AtomicReader r = SlowCompositeReaderWrapper.Wrap(readerFromWriter);

            DocsAndPositionsEnum tp = r.TermPositionsEnum(new Term("content", "a"));

            int count = 0;

            Assert.IsTrue(tp.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            // "a" occurs 4 times
            Assert.AreEqual(4, tp.Freq());
            Assert.AreEqual(0, tp.NextPosition());
            Assert.AreEqual(1, tp.NextPosition());
            Assert.AreEqual(3, tp.NextPosition());
            Assert.AreEqual(6, tp.NextPosition());

            // only one doc has "a"
            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, tp.NextDoc());

            IndexSearcher @is = NewSearcher(readerFromWriter);

            SpanTermQuery stq1 = new SpanTermQuery(new Term("content", "a"));
            SpanTermQuery stq2 = new SpanTermQuery(new Term("content", "k"));

            SpanQuery[]   sqs = new SpanQuery[] { stq1, stq2 };
            SpanNearQuery snq = new SpanNearQuery(sqs, 30, false);

            count = 0;
            bool sawZero = false;

            if (VERBOSE)
            {
                Console.WriteLine("\ngetPayloadSpans test");
            }
            Search.Spans.Spans pspans = MultiSpansWrapper.Wrap(@is.TopReaderContext, snq);
            while (pspans.Next())
            {
                if (VERBOSE)
                {
                    Console.WriteLine("doc " + pspans.Doc() + ": span " + pspans.Start() + " to " + pspans.End());
                }
                var payloads = pspans.Payload;
                sawZero |= pspans.Start() == 0;
                foreach (var bytes in payloads)
                {
                    count++;
                    if (VERBOSE)
                    {
                        Console.WriteLine("  payload: " + Encoding.UTF8.GetString((byte[])(Array)bytes));
                    }
                }
            }
            Assert.IsTrue(sawZero);
            Assert.AreEqual(5, count);

            // System.out.println("\ngetSpans test");
            Search.Spans.Spans spans = MultiSpansWrapper.Wrap(@is.TopReaderContext, snq);
            count   = 0;
            sawZero = false;
            while (spans.Next())
            {
                count++;
                sawZero |= spans.Start() == 0;
                // System.out.println(spans.Doc() + " - " + spans.Start() + " - " +
                // spans.End());
            }
            Assert.AreEqual(4, count);
            Assert.IsTrue(sawZero);

            // System.out.println("\nPayloadSpanUtil test");

            sawZero = false;
            PayloadSpanUtil psu = new PayloadSpanUtil(@is.TopReaderContext);
            var             pls = psu.GetPayloadsForQuery(snq);

            count = pls.Count;
            foreach (var bytes in pls)
            {
                string s = Encoding.UTF8.GetString(bytes);
                //System.out.println(s);
                sawZero |= s.Equals("pos: 0");
            }
            Assert.AreEqual(5, count);
            Assert.IsTrue(sawZero);
            writer.Dispose();
            @is.IndexReader.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 13
0
        private void DuellReaders(CompositeReader other, AtomicReader memIndexReader)
        {
            AtomicReader competitor = SlowCompositeReaderWrapper.Wrap(other);
            Fields       memFields  = memIndexReader.Fields;

            foreach (string field in competitor.Fields)
            {
                Terms memTerms = memFields.Terms(field);
                Terms iwTerms  = memIndexReader.Terms(field);
                if (iwTerms == null)
                {
                    assertNull(memTerms);
                }
                else
                {
                    NumericDocValues normValues    = competitor.GetNormValues(field);
                    NumericDocValues memNormValues = memIndexReader.GetNormValues(field);
                    if (normValues != null)
                    {
                        // mem idx always computes norms on the fly
                        assertNotNull(memNormValues);
                        assertEquals(normValues.Get(0), memNormValues.Get(0));
                    }

                    assertNotNull(memTerms);
                    assertEquals(iwTerms.DocCount, memTerms.DocCount);
                    assertEquals(iwTerms.SumDocFreq, memTerms.SumDocFreq);
                    assertEquals(iwTerms.SumTotalTermFreq, memTerms.SumTotalTermFreq);
                    TermsEnum iwTermsIter  = iwTerms.Iterator(null);
                    TermsEnum memTermsIter = memTerms.Iterator(null);
                    if (iwTerms.HasPositions())
                    {
                        bool offsets = iwTerms.HasOffsets() && memTerms.HasOffsets();

                        while (iwTermsIter.Next() != null)
                        {
                            assertNotNull(memTermsIter.Next());
                            assertEquals(iwTermsIter.Term(), memTermsIter.Term());
                            DocsAndPositionsEnum iwDocsAndPos  = iwTermsIter.DocsAndPositions(null, null);
                            DocsAndPositionsEnum memDocsAndPos = memTermsIter.DocsAndPositions(null, null);
                            while (iwDocsAndPos.NextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS)
                            {
                                assertEquals(iwDocsAndPos.DocID(), memDocsAndPos.NextDoc());
                                assertEquals(iwDocsAndPos.Freq(), memDocsAndPos.Freq());
                                for (int i = 0; i < iwDocsAndPos.Freq(); i++)
                                {
                                    assertEquals("term: " + iwTermsIter.Term().Utf8ToString(), iwDocsAndPos.NextPosition(), memDocsAndPos.NextPosition());
                                    if (offsets)
                                    {
                                        assertEquals(iwDocsAndPos.StartOffset(), memDocsAndPos.StartOffset());
                                        assertEquals(iwDocsAndPos.EndOffset(), memDocsAndPos.EndOffset());
                                    }
                                }
                            }
                        }
                    }
                    else
                    {
                        while (iwTermsIter.Next() != null)
                        {
                            assertEquals(iwTermsIter.Term(), memTermsIter.Term());
                            DocsEnum iwDocsAndPos  = iwTermsIter.Docs(null, null);
                            DocsEnum memDocsAndPos = memTermsIter.Docs(null, null);
                            while (iwDocsAndPos.NextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS)
                            {
                                assertEquals(iwDocsAndPos.DocID(), memDocsAndPos.NextDoc());
                                assertEquals(iwDocsAndPos.Freq(), memDocsAndPos.Freq());
                            }
                        }
                    }
                }
            }
        }
Ejemplo n.º 14
0
 internal void FirstPosition()
 {
     Count = Postings.Freq(); // read first pos
     NextPosition();
 }
Ejemplo n.º 15
0
        /// <summary>
        /// Safe (but, slowish) default method to write every
        ///  vector field in the document.
        /// </summary>
        protected internal void AddAllDocVectors(Fields vectors, MergeState mergeState)
        {
            if (vectors == null)
            {
                StartDocument(0);
                FinishDocument();
                return;
            }

            int numFields = vectors.Size;

            if (numFields == -1)
            {
                // count manually! TODO: Maybe enforce that Fields.size() returns something valid?
                numFields = 0;
                //for (IEnumerator<string> it = vectors.Iterator(); it.hasNext();)
                foreach (string it in vectors)
                {
                    numFields++;
                }
            }
            StartDocument(numFields);

            string lastFieldName = null;

            TermsEnum            termsEnum            = null;
            DocsAndPositionsEnum docsAndPositionsEnum = null;

            int fieldCount = 0;

            foreach (string fieldName in vectors)
            {
                fieldCount++;
                FieldInfo fieldInfo = mergeState.FieldInfos.FieldInfo(fieldName);

                Debug.Assert(lastFieldName == null || fieldName.CompareTo(lastFieldName) > 0, "lastFieldName=" + lastFieldName + " fieldName=" + fieldName);
                lastFieldName = fieldName;

                Terms terms = vectors.Terms(fieldName);
                if (terms == null)
                {
                    // FieldsEnum shouldn't lie...
                    continue;
                }

                bool hasPositions = terms.HasPositions();
                bool hasOffsets   = terms.HasOffsets();
                bool hasPayloads  = terms.HasPayloads();
                Debug.Assert(!hasPayloads || hasPositions);

                int numTerms = (int)terms.Size();
                if (numTerms == -1)
                {
                    // count manually. It is stupid, but needed, as Terms.size() is not a mandatory statistics function
                    numTerms  = 0;
                    termsEnum = terms.Iterator(termsEnum);
                    while (termsEnum.Next() != null)
                    {
                        numTerms++;
                    }
                }

                StartField(fieldInfo, numTerms, hasPositions, hasOffsets, hasPayloads);
                termsEnum = terms.Iterator(termsEnum);

                int termCount = 0;
                while (termsEnum.Next() != null)
                {
                    termCount++;

                    int freq = (int)termsEnum.TotalTermFreq();

                    StartTerm(termsEnum.Term(), freq);

                    if (hasPositions || hasOffsets)
                    {
                        docsAndPositionsEnum = termsEnum.DocsAndPositions(null, docsAndPositionsEnum);
                        Debug.Assert(docsAndPositionsEnum != null);

                        int docID = docsAndPositionsEnum.NextDoc();
                        Debug.Assert(docID != DocIdSetIterator.NO_MORE_DOCS);
                        Debug.Assert(docsAndPositionsEnum.Freq() == freq);

                        for (int posUpto = 0; posUpto < freq; posUpto++)
                        {
                            int pos         = docsAndPositionsEnum.NextPosition();
                            int startOffset = docsAndPositionsEnum.StartOffset();
                            int endOffset   = docsAndPositionsEnum.EndOffset();

                            BytesRef payload = docsAndPositionsEnum.Payload;

                            Debug.Assert(!hasPositions || pos >= 0);
                            AddPosition(pos, startOffset, endOffset, payload);
                        }
                    }
                    FinishTerm();
                }
                Debug.Assert(termCount == numTerms);
                FinishField();
            }
            Debug.Assert(fieldCount == numFields);
            FinishDocument();
        }
 internal virtual void AddPositions(DocsAndPositionsEnum @in, IndexOutput @out)
 {
     int freq = @in.Freq();
     @out.WriteVInt(freq);
     int previousPosition = 0;
     int previousEndOffset = 0;
     for (int i = 0; i < freq; i++)
     {
         int pos = @in.NextPosition();
         BytesRef payload = @in.Payload;
         // The low-order bit of token is set only if there is a payload, the
         // previous bits are the delta-encoded position. 
         int token = (pos - previousPosition) << 1 | (payload == null ? 0 : 1);
         @out.WriteVInt(token);
         previousPosition = pos;
         if (storeOffsets) // don't encode offsets if they are not stored
         {
             int startOffset = @in.StartOffset();
             int endOffset = @in.EndOffset();
             @out.WriteVInt(startOffset - previousEndOffset);
             @out.WriteVInt(endOffset - startOffset);
             previousEndOffset = endOffset;
         }
         if (payload != null)
         {
             @out.WriteVInt(payload.Length);
             @out.WriteBytes(payload.Bytes, payload.Offset, payload.Length);
         }
     }
 }