private int CopyVectorsNoDeletions(MergeState mergeState, Lucene40TermVectorsReader matchingVectorsReader, AtomicReader reader, int[] rawDocLengths, int[] rawDocLengths2)
        {
            int maxDoc = reader.MaxDoc;

            if (matchingVectorsReader != null)
            {
                // We can bulk-copy because the fieldInfos are "congruent"
                int docCount = 0;
                while (docCount < maxDoc)
                {
                    int len = Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
                    matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, docCount, len);
                    AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, len);
                    docCount += len;
                    mergeState.CheckAbort.Work(300 * len);
                }
            }
            else
            {
                for (int docNum = 0; docNum < maxDoc; docNum++)
                {
                    // NOTE: it's very important to first assign to vectors then pass it to
                    // termVectorsWriter.addAllDocVectors; see LUCENE-1282
                    Fields vectors = reader.GetTermVectors(docNum);
                    AddAllDocVectors(vectors, mergeState);
                    mergeState.CheckAbort.Work(300);
                }
            }
            return(maxDoc);
        }
Beispiel #2
0
        public virtual int Merge(MergeState mergeState)
        {
            int docCount = 0;

            for (int i = 0; i < mergeState.Readers.Count; i++)
            {
                AtomicReader reader   = mergeState.Readers[i];
                int          maxDoc   = reader.MaxDoc;
                IBits        liveDocs = reader.LiveDocs;

                for (int docID = 0; docID < maxDoc; docID++)
                {
                    if (liveDocs != null && !liveDocs.Get(docID))
                    {
                        // skip deleted docs
                        continue;
                    }
                    // NOTE: it's very important to first assign to vectors then pass it to
                    // termVectorsWriter.addAllDocVectors; see LUCENE-1282
                    Fields vectors = reader.GetTermVectors(docID);
                    AddAllDocVectors(vectors, mergeState);
                    docCount++;
                    mergeState.CheckAbort.Work(300);
                }
            }
            Finish(mergeState.FieldInfos, docCount);
            return(docCount);
        }
        public virtual void TestChangeGaps()
        {
            // LUCENE-5324: check that it is possible to change the wrapper's gaps
            int      positionGap = Random.Next(1000);
            int      offsetGap   = Random.Next(1000);
            Analyzer @delegate   = new MockAnalyzer(Random);
            Analyzer a           = new AnalyzerWrapperAnonymousClass2(this, @delegate.Strategy, positionGap, offsetGap, @delegate);

            RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, NewDirectory());
            Document  doc = new Document();
            FieldType ft  = new FieldType();

            ft.IsIndexed                = true;
            ft.IndexOptions             = IndexOptions.DOCS_ONLY;
            ft.IsTokenized              = true;
            ft.StoreTermVectors         = true;
            ft.StoreTermVectorPositions = true;
            ft.StoreTermVectorOffsets   = true;
            doc.Add(new Field("f", "a", ft));
            doc.Add(new Field("f", "a", ft));
            writer.AddDocument(doc, a);
            AtomicReader reader = GetOnlySegmentReader(writer.GetReader());
            Fields       fields = reader.GetTermVectors(0);
            Terms        terms  = fields.GetTerms("f");
            TermsEnum    te     = terms.GetEnumerator();

            Assert.IsTrue(te.MoveNext());
            Assert.AreEqual(new BytesRef("a"), te.Term);
            DocsAndPositionsEnum dpe = te.DocsAndPositions(null, null);

            Assert.AreEqual(0, dpe.NextDoc());
            Assert.AreEqual(2, dpe.Freq);
            Assert.AreEqual(0, dpe.NextPosition());
            Assert.AreEqual(0, dpe.StartOffset);
            int endOffset = dpe.EndOffset;

            Assert.AreEqual(1 + positionGap, dpe.NextPosition());
            Assert.AreEqual(1 + endOffset + offsetGap, dpe.EndOffset);
            Assert.IsFalse(te.MoveNext());
            reader.Dispose();
            writer.Dispose();
            writer.IndexWriter.Directory.Dispose();
        }
Beispiel #4
0
        public virtual void TestChangeGaps()
        {
            // LUCENE-5324: check that it is possible to change the wrapper's gaps
            int      positionGap = Random().Next(1000);
            int      offsetGap   = Random().Next(1000);
            Analyzer @delegate   = new MockAnalyzer(Random());
            Analyzer a           = new AnalyzerWrapperAnonymousInnerClassHelper2(this, @delegate.Strategy, positionGap, offsetGap, @delegate);

            RandomIndexWriter writer = new RandomIndexWriter(Random(), NewDirectory());
            Document          doc    = new Document();
            FieldType         ft     = new FieldType();

            ft.Indexed                  = true;
            ft.IndexOptions             = FieldInfo.IndexOptions.DOCS_ONLY;
            ft.Tokenized                = true;
            ft.StoreTermVectors         = true;
            ft.StoreTermVectorPositions = true;
            ft.StoreTermVectorOffsets   = true;
            doc.Add(new Field("f", "a", ft));
            doc.Add(new Field("f", "a", ft));
            writer.AddDocument(doc, a);
            AtomicReader reader = GetOnlySegmentReader(writer.Reader);
            Fields       fields = reader.GetTermVectors(0);
            Terms        terms  = fields.Terms("f");
            TermsEnum    te     = terms.Iterator(null);

            Assert.AreEqual(new BytesRef("a"), te.Next());
            DocsAndPositionsEnum dpe = te.DocsAndPositions(null, null);

            Assert.AreEqual(0, dpe.NextDoc());
            Assert.AreEqual(2, dpe.Freq());
            Assert.AreEqual(0, dpe.NextPosition());
            Assert.AreEqual(0, dpe.StartOffset());
            int endOffset = dpe.EndOffset();

            Assert.AreEqual(1 + positionGap, dpe.NextPosition());
            Assert.AreEqual(1 + endOffset + offsetGap, dpe.EndOffset());
            Assert.AreEqual(null, te.Next());
            reader.Dispose();
            writer.Dispose();
            writer.w.Directory.Dispose();
        }
        private int CopyVectorsWithDeletions(MergeState mergeState, Lucene40TermVectorsReader matchingVectorsReader, AtomicReader reader, int[] rawDocLengths, int[] rawDocLengths2)
        {
            int   maxDoc       = reader.MaxDoc;
            IBits liveDocs     = reader.LiveDocs;
            int   totalNumDocs = 0;

            if (matchingVectorsReader != null)
            {
                // We can bulk-copy because the fieldInfos are "congruent"
                for (int docNum = 0; docNum < maxDoc;)
                {
                    if (!liveDocs.Get(docNum))
                    {
                        // skip deleted docs
                        ++docNum;
                        continue;
                    }
                    // We can optimize this case (doing a bulk byte copy) since the field
                    // numbers are identical
                    int start = docNum, numDocs = 0;
                    do
                    {
                        docNum++;
                        numDocs++;
                        if (docNum >= maxDoc)
                        {
                            break;
                        }
                        if (!liveDocs.Get(docNum))
                        {
                            docNum++;
                            break;
                        }
                    } while (numDocs < MAX_RAW_MERGE_DOCS);

                    matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, start, numDocs);
                    AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs);
                    totalNumDocs += numDocs;
                    mergeState.CheckAbort.Work(300 * numDocs);
                }
            }
            else
            {
                for (int docNum = 0; docNum < maxDoc; docNum++)
                {
                    if (!liveDocs.Get(docNum))
                    {
                        // skip deleted docs
                        continue;
                    }

                    // NOTE: it's very important to first assign to vectors then pass it to
                    // termVectorsWriter.addAllDocVectors; see LUCENE-1282
                    Fields vectors = reader.GetTermVectors(docNum);
                    AddAllDocVectors(vectors, mergeState);
                    totalNumDocs++;
                    mergeState.CheckAbort.Work(300);
                }
            }
            return(totalNumDocs);
        }