private int CopyVectorsNoDeletions(MergeState mergeState, Lucene40TermVectorsReader matchingVectorsReader, AtomicReader reader, int[] rawDocLengths, int[] rawDocLengths2)
        {
            int maxDoc = reader.MaxDoc;

            if (matchingVectorsReader != null)
            {
                // We can bulk-copy because the fieldInfos are "congruent"
                int docCount = 0;
                while (docCount < maxDoc)
                {
                    int len = Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
                    matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, docCount, len);
                    AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, len);
                    docCount += len;
                    mergeState.CheckAbort.Work(300 * len);
                }
            }
            else
            {
                for (int docNum = 0; docNum < maxDoc; docNum++)
                {
                    // NOTE: it's very important to first assign to vectors then pass it to
                    // termVectorsWriter.addAllDocVectors; see LUCENE-1282
                    Fields vectors = reader.GetTermVectors(docNum);
                    AddAllDocVectors(vectors, mergeState);
                    mergeState.CheckAbort.Work(300);
                }
            }
            return(maxDoc);
        }
Example #2
0
            public TVTerms(Lucene40TermVectorsReader outerInstance, long tvfFP)
            {
                this.outerInstance = outerInstance;
                outerInstance.tvf.Seek(tvfFP);
                numTerms = outerInstance.tvf.ReadVInt32();
                byte bits = outerInstance.tvf.ReadByte();

                storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
                storeOffsets   = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
                storePayloads  = (bits & STORE_PAYLOAD_WITH_TERMVECTOR) != 0;
                tvfFPStart     = outerInstance.tvf.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream
            }
            public TVTerms(Lucene40TermVectorsReader outerInstance, long tvfFP)
            {
                this.OuterInstance = outerInstance;
                outerInstance.Tvf.Seek(tvfFP);
                NumTerms = outerInstance.Tvf.ReadVInt();
                byte bits = outerInstance.Tvf.ReadByte();

                StorePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
                StoreOffsets   = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
                StorePayloads  = (bits & STORE_PAYLOAD_WITH_TERMVECTOR) != 0;
                TvfFPStart     = outerInstance.Tvf.FilePointer;
            }
Example #4
0
        /// <summary>
        /// Do a bulk copy of numDocs documents from reader to our
        /// streams.  this is used to expedite merging, if the
        /// field numbers are congruent.
        /// </summary>
        private void AddRawDocuments(Lucene40TermVectorsReader reader, int[] tvdLengths, int[] tvfLengths, int numDocs)
        {
            long tvdPosition = tvd.GetFilePointer();
            long tvfPosition = tvf.GetFilePointer();
            long tvdStart    = tvdPosition;
            long tvfStart    = tvfPosition;

            for (int i = 0; i < numDocs; i++)
            {
                tvx.WriteInt64(tvdPosition);
                tvdPosition += tvdLengths[i];
                tvx.WriteInt64(tvfPosition);
                tvfPosition += tvfLengths[i];
            }
            tvd.CopyBytes(reader.TvdStream, tvdPosition - tvdStart);
            tvf.CopyBytes(reader.TvfStream, tvfPosition - tvfStart);
            Debug.Assert(tvd.GetFilePointer() == tvdPosition);
            Debug.Assert(tvf.GetFilePointer() == tvfPosition);
        }
Example #5
0
            public TVFields(Lucene40TermVectorsReader outerInstance, int docID)
            {
                this.outerInstance = outerInstance;
                outerInstance.SeekTvx(docID);
                outerInstance.tvd.Seek(outerInstance.tvx.ReadInt64());

                int fieldCount = outerInstance.tvd.ReadVInt32();

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(fieldCount >= 0);
                }
                if (fieldCount != 0)
                {
                    fieldNumbers = new int[fieldCount];
                    fieldFPs     = new long[fieldCount];
                    for (int fieldUpto = 0; fieldUpto < fieldCount; fieldUpto++)
                    {
                        int fieldNumber = outerInstance.tvd.ReadVInt32();
                        fieldNumbers[fieldUpto]         = fieldNumber;
                        fieldNumberToIndex[fieldNumber] = fieldUpto;
                    }

                    long position = outerInstance.tvx.ReadInt64();
                    fieldFPs[0] = position;
                    for (int fieldUpto = 1; fieldUpto < fieldCount; fieldUpto++)
                    {
                        position           += outerInstance.tvd.ReadVInt64();
                        fieldFPs[fieldUpto] = position;
                    }
                }
                else
                {
                    // TODO: we can improve writer here, eg write 0 into
                    // tvx file, so we know on first read from tvx that
                    // this doc has no TVs
                    fieldNumbers = null;
                    fieldFPs     = null;
                }
            }
        /// <summary>
        /// Do a bulk copy of numDocs documents from reader to our
        /// streams.  This is used to expedite merging, if the
        /// field numbers are congruent.
        /// </summary>
        private void AddRawDocuments(Lucene40TermVectorsReader reader, int[] tvdLengths, int[] tvfLengths, int numDocs)
        {
            long tvdPosition = tvd.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream
            long tvfPosition = tvf.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream
            long tvdStart    = tvdPosition;
            long tvfStart    = tvfPosition;

            for (int i = 0; i < numDocs; i++)
            {
                tvx.WriteInt64(tvdPosition);
                tvdPosition += tvdLengths[i];
                tvx.WriteInt64(tvfPosition);
                tvfPosition += tvfLengths[i];
            }
            tvd.CopyBytes(reader.TvdStream, tvdPosition - tvdStart);
            tvf.CopyBytes(reader.TvfStream, tvfPosition - tvfStart);
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(tvd.Position == tvdPosition); // LUCENENET specific: Renamed from getFilePointer() to match FileStream
                Debugging.Assert(tvf.Position == tvfPosition); // LUCENENET specific: Renamed from getFilePointer() to match FileStream
            }
        }
        public override int Merge(MergeState mergeState)
        {
            // Used for bulk-reading raw bytes for term vectors
            int[] rawDocLengths  = new int[MAX_RAW_MERGE_DOCS];
            int[] rawDocLengths2 = new int[MAX_RAW_MERGE_DOCS];

            int idx     = 0;
            int numDocs = 0;

            for (int i = 0; i < mergeState.Readers.Count; i++)
            {
                AtomicReader reader = mergeState.Readers[i];

                SegmentReader             matchingSegmentReader = mergeState.MatchingSegmentReaders[idx++];
                Lucene40TermVectorsReader matchingVectorsReader = null;
                if (matchingSegmentReader != null)
                {
                    TermVectorsReader vectorsReader = matchingSegmentReader.TermVectorsReader;

                    if (vectorsReader != null && vectorsReader is Lucene40TermVectorsReader lucene40TermVectorsReader)
                    {
                        matchingVectorsReader = lucene40TermVectorsReader;
                    }
                }
                if (reader.LiveDocs != null)
                {
                    numDocs += CopyVectorsWithDeletions(mergeState, matchingVectorsReader, reader, rawDocLengths, rawDocLengths2);
                }
                else
                {
                    numDocs += CopyVectorsNoDeletions(mergeState, matchingVectorsReader, reader, rawDocLengths, rawDocLengths2);
                }
            }
            Finish(mergeState.FieldInfos, numDocs);
            return(numDocs);
        }
 // NOTE: tvf is pre-positioned by caller
 public TVTermsEnum(Lucene40TermVectorsReader outerInstance)
 {
     this.OuterInstance = outerInstance;
     this.OrigTVF = outerInstance.Tvf;
     Tvf = (IndexInput)OrigTVF.Clone();
 }
 public TVTerms(Lucene40TermVectorsReader outerInstance, long tvfFP)
 {
     this.OuterInstance = outerInstance;
     outerInstance.Tvf.Seek(tvfFP);
     NumTerms = outerInstance.Tvf.ReadVInt();
     byte bits = outerInstance.Tvf.ReadByte();
     StorePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
     StoreOffsets = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
     StorePayloads = (bits & STORE_PAYLOAD_WITH_TERMVECTOR) != 0;
     TvfFPStart = outerInstance.Tvf.FilePointer;
 }
            public TVFields(Lucene40TermVectorsReader outerInstance, int docID)
            {
                this.OuterInstance = outerInstance;
                outerInstance.SeekTvx(docID);
                outerInstance.Tvd.Seek(outerInstance.Tvx.ReadLong());

                int fieldCount = outerInstance.Tvd.ReadVInt();
                Debug.Assert(fieldCount >= 0);
                if (fieldCount != 0)
                {
                    FieldNumbers = new int[fieldCount];
                    FieldFPs = new long[fieldCount];
                    for (int fieldUpto = 0; fieldUpto < fieldCount; fieldUpto++)
                    {
                        int fieldNumber = outerInstance.Tvd.ReadVInt();
                        FieldNumbers[fieldUpto] = fieldNumber;
                        FieldNumberToIndex[fieldNumber] = fieldUpto;
                    }

                    long position = outerInstance.Tvx.ReadLong();
                    FieldFPs[0] = position;
                    for (int fieldUpto = 1; fieldUpto < fieldCount; fieldUpto++)
                    {
                        position += outerInstance.Tvd.ReadVLong();
                        FieldFPs[fieldUpto] = position;
                    }
                }
                else
                {
                    // TODO: we can improve writer here, eg write 0 into
                    // tvx file, so we know on first read from tvx that
                    // this doc has no TVs
                    FieldNumbers = null;
                    FieldFPs = null;
                }
            }
        private int CopyVectorsWithDeletions(MergeState mergeState, Lucene40TermVectorsReader matchingVectorsReader, AtomicReader reader, int[] rawDocLengths, int[] rawDocLengths2)
        {
            int   maxDoc       = reader.MaxDoc;
            IBits liveDocs     = reader.LiveDocs;
            int   totalNumDocs = 0;

            if (matchingVectorsReader != null)
            {
                // We can bulk-copy because the fieldInfos are "congruent"
                for (int docNum = 0; docNum < maxDoc;)
                {
                    if (!liveDocs.Get(docNum))
                    {
                        // skip deleted docs
                        ++docNum;
                        continue;
                    }
                    // We can optimize this case (doing a bulk byte copy) since the field
                    // numbers are identical
                    int start = docNum, numDocs = 0;
                    do
                    {
                        docNum++;
                        numDocs++;
                        if (docNum >= maxDoc)
                        {
                            break;
                        }
                        if (!liveDocs.Get(docNum))
                        {
                            docNum++;
                            break;
                        }
                    } while (numDocs < MAX_RAW_MERGE_DOCS);

                    matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, start, numDocs);
                    AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs);
                    totalNumDocs += numDocs;
                    mergeState.CheckAbort.Work(300 * numDocs);
                }
            }
            else
            {
                for (int docNum = 0; docNum < maxDoc; docNum++)
                {
                    if (!liveDocs.Get(docNum))
                    {
                        // skip deleted docs
                        continue;
                    }

                    // NOTE: it's very important to first assign to vectors then pass it to
                    // termVectorsWriter.addAllDocVectors; see LUCENE-1282
                    Fields vectors = reader.GetTermVectors(docNum);
                    AddAllDocVectors(vectors, mergeState);
                    totalNumDocs++;
                    mergeState.CheckAbort.Work(300);
                }
            }
            return(totalNumDocs);
        }
Example #12
0
 // NOTE: tvf is pre-positioned by caller
 public TVTermsEnum(Lucene40TermVectorsReader outerInstance)
 {
     this.origTVF = outerInstance.tvf;
     tvf          = (IndexInput)origTVF.Clone();
 }
 // NOTE: tvf is pre-positioned by caller
 public TVTermsEnum(Lucene40TermVectorsReader outerInstance)
 {
     this.OuterInstance = outerInstance;
     this.OrigTVF       = outerInstance.Tvf;
     Tvf = (IndexInput)OrigTVF.Clone();
 }
        private int CopyVectorsWithDeletions(MergeState mergeState, Lucene40TermVectorsReader matchingVectorsReader, AtomicReader reader, int[] rawDocLengths, int[] rawDocLengths2)
        {
            int maxDoc = reader.MaxDoc;
            Bits liveDocs = reader.LiveDocs;
            int totalNumDocs = 0;
            if (matchingVectorsReader != null)
            {
                // We can bulk-copy because the fieldInfos are "congruent"
                for (int docNum = 0; docNum < maxDoc; )
                {
                    if (!liveDocs.Get(docNum))
                    {
                        // skip deleted docs
                        ++docNum;
                        continue;
                    }
                    // We can optimize this case (doing a bulk byte copy) since the field
                    // numbers are identical
                    int start = docNum, numDocs = 0;
                    do
                    {
                        docNum++;
                        numDocs++;
                        if (docNum >= maxDoc)
                        {
                            break;
                        }
                        if (!liveDocs.Get(docNum))
                        {
                            docNum++;
                            break;
                        }
                    } while (numDocs < MAX_RAW_MERGE_DOCS);

                    matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, start, numDocs);
                    AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs);
                    totalNumDocs += numDocs;
                    mergeState.checkAbort.Work(300 * numDocs);
                }
            }
            else
            {
                for (int docNum = 0; docNum < maxDoc; docNum++)
                {
                    if (!liveDocs.Get(docNum))
                    {
                        // skip deleted docs
                        continue;
                    }

                    // NOTE: it's very important to first assign to vectors then pass it to
                    // termVectorsWriter.addAllDocVectors; see LUCENE-1282
                    Fields vectors = reader.GetTermVectors(docNum);
                    AddAllDocVectors(vectors, mergeState);
                    totalNumDocs++;
                    mergeState.checkAbort.Work(300);
                }
            }
            return totalNumDocs;
        }
 private int CopyVectorsNoDeletions(MergeState mergeState, Lucene40TermVectorsReader matchingVectorsReader, AtomicReader reader, int[] rawDocLengths, int[] rawDocLengths2)
 {
     int maxDoc = reader.MaxDoc;
     if (matchingVectorsReader != null)
     {
         // We can bulk-copy because the fieldInfos are "congruent"
         int docCount = 0;
         while (docCount < maxDoc)
         {
             int len = Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
             matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, docCount, len);
             AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, len);
             docCount += len;
             mergeState.checkAbort.Work(300 * len);
         }
     }
     else
     {
         for (int docNum = 0; docNum < maxDoc; docNum++)
         {
             // NOTE: it's very important to first assign to vectors then pass it to
             // termVectorsWriter.addAllDocVectors; see LUCENE-1282
             Fields vectors = reader.GetTermVectors(docNum);
             AddAllDocVectors(vectors, mergeState);
             mergeState.checkAbort.Work(300);
         }
     }
     return maxDoc;
 }
 /// <summary>
 /// Do a bulk copy of numDocs documents from reader to our
 /// streams.  this is used to expedite merging, if the
 /// field numbers are congruent.
 /// </summary>
 private void AddRawDocuments(Lucene40TermVectorsReader reader, int[] tvdLengths, int[] tvfLengths, int numDocs)
 {
     long tvdPosition = Tvd.FilePointer;
     long tvfPosition = Tvf.FilePointer;
     long tvdStart = tvdPosition;
     long tvfStart = tvfPosition;
     for (int i = 0; i < numDocs; i++)
     {
         Tvx.WriteLong(tvdPosition);
         tvdPosition += tvdLengths[i];
         Tvx.WriteLong(tvfPosition);
         tvfPosition += tvfLengths[i];
     }
     Tvd.CopyBytes(reader.TvdStream, tvdPosition - tvdStart);
     Tvf.CopyBytes(reader.TvfStream, tvfPosition - tvfStart);
     Debug.Assert(Tvd.FilePointer == tvdPosition);
     Debug.Assert(Tvf.FilePointer == tvfPosition);
 }