Beispiel #1
0
        private int CopyVectorsNoDeletions(MergeState mergeState, Lucene40TermVectorsReader matchingVectorsReader, AtomicReader reader, int[] rawDocLengths, int[] rawDocLengths2)
        {
            int maxDoc = reader.MaxDoc;

            if (matchingVectorsReader != null)
            {
                // We can bulk-copy because the fieldInfos are "congruent"
                int docCount = 0;
                while (docCount < maxDoc)
                {
                    int len = Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
                    matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, docCount, len);
                    AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, len);
                    docCount += len;
                    mergeState.CheckAbort.Work(300 * len);
                }
            }
            else
            {
                for (int docNum = 0; docNum < maxDoc; docNum++)
                {
                    // NOTE: it's very important to first assign to vectors then pass it to
                    // termVectorsWriter.addAllDocVectors; see LUCENE-1282
                    Fields vectors = reader.GetTermVectors(docNum);
                    AddAllDocVectors(vectors, mergeState);
                    mergeState.CheckAbort.Work(300);
                }
            }
            return(maxDoc);
        }
Beispiel #2
0
 public TVTerms(Lucene40TermVectorsReader outerInstance, long tvfFP)
 {
     this.outerInstance = outerInstance;
     outerInstance.tvf.Seek(tvfFP);
     numTerms = outerInstance.tvf.ReadVInt32();
     byte bits = outerInstance.tvf.ReadByte();
     storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
     storeOffsets = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
     storePayloads = (bits & STORE_PAYLOAD_WITH_TERMVECTOR) != 0;
     tvfFPStart = outerInstance.tvf.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream
 }
Beispiel #3
0
            public TVTerms(Lucene40TermVectorsReader outerInstance, long tvfFP)
            {
                this.outerInstance = outerInstance;
                outerInstance.tvf.Seek(tvfFP);
                numTerms = outerInstance.tvf.ReadVInt32();
                byte bits = outerInstance.tvf.ReadByte();

                storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
                storeOffsets   = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
                storePayloads  = (bits & STORE_PAYLOAD_WITH_TERMVECTOR) != 0;
                tvfFPStart     = outerInstance.tvf.GetFilePointer();
            }
Beispiel #4
0
        /// <summary>
        /// Do a bulk copy of numDocs documents from reader to our
        /// streams.  This is used to expedite merging, if the
        /// field numbers are congruent.
        /// </summary>
        private void AddRawDocuments(Lucene40TermVectorsReader reader, int[] tvdLengths, int[] tvfLengths, int numDocs)
        {
            long tvdPosition = tvd.GetFilePointer();
            long tvfPosition = tvf.GetFilePointer();
            long tvdStart    = tvdPosition;
            long tvfStart    = tvfPosition;

            for (int i = 0; i < numDocs; i++)
            {
                tvx.WriteInt64(tvdPosition);
                tvdPosition += tvdLengths[i];
                tvx.WriteInt64(tvfPosition);
                tvfPosition += tvfLengths[i];
            }
            tvd.CopyBytes(reader.TvdStream, tvdPosition - tvdStart);
            tvf.CopyBytes(reader.TvfStream, tvfPosition - tvfStart);
            Debug.Assert(tvd.GetFilePointer() == tvdPosition);
            Debug.Assert(tvf.GetFilePointer() == tvfPosition);
        }
            public TVFields(Lucene40TermVectorsReader outerInstance, int docID)
            {
                this.outerInstance = outerInstance;
                outerInstance.SeekTvx(docID);
                outerInstance.tvd.Seek(outerInstance.tvx.ReadInt64());

                int fieldCount = outerInstance.tvd.ReadVInt32();

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(fieldCount >= 0);
                }
                if (fieldCount != 0)
                {
                    fieldNumbers = new int[fieldCount];
                    fieldFPs     = new long[fieldCount];
                    for (int fieldUpto = 0; fieldUpto < fieldCount; fieldUpto++)
                    {
                        int fieldNumber = outerInstance.tvd.ReadVInt32();
                        fieldNumbers[fieldUpto]         = fieldNumber;
                        fieldNumberToIndex[fieldNumber] = fieldUpto;
                    }

                    long position = outerInstance.tvx.ReadInt64();
                    fieldFPs[0] = position;
                    for (int fieldUpto = 1; fieldUpto < fieldCount; fieldUpto++)
                    {
                        position           += outerInstance.tvd.ReadVInt64();
                        fieldFPs[fieldUpto] = position;
                    }
                }
                else
                {
                    // TODO: we can improve writer here, eg write 0 into
                    // tvx file, so we know on first read from tvx that
                    // this doc has no TVs
                    fieldNumbers = null;
                    fieldFPs     = null;
                }
            }
        /// <summary>
        /// Do a bulk copy of numDocs documents from reader to our
        /// streams.  This is used to expedite merging, if the
        /// field numbers are congruent.
        /// </summary>
        private void AddRawDocuments(Lucene40TermVectorsReader reader, int[] tvdLengths, int[] tvfLengths, int numDocs)
        {
            long tvdPosition = tvd.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream
            long tvfPosition = tvf.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream
            long tvdStart    = tvdPosition;
            long tvfStart    = tvfPosition;

            for (int i = 0; i < numDocs; i++)
            {
                tvx.WriteInt64(tvdPosition);
                tvdPosition += tvdLengths[i];
                tvx.WriteInt64(tvfPosition);
                tvfPosition += tvfLengths[i];
            }
            tvd.CopyBytes(reader.TvdStream, tvdPosition - tvdStart);
            tvf.CopyBytes(reader.TvfStream, tvfPosition - tvfStart);
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(tvd.Position == tvdPosition); // LUCENENET specific: Renamed from getFilePointer() to match FileStream
                Debugging.Assert(tvf.Position == tvfPosition); // LUCENENET specific: Renamed from getFilePointer() to match FileStream
            }
        }
Beispiel #7
0
        public override int Merge(MergeState mergeState)
        {
            // Used for bulk-reading raw bytes for term vectors
            int[] rawDocLengths  = new int[MAX_RAW_MERGE_DOCS];
            int[] rawDocLengths2 = new int[MAX_RAW_MERGE_DOCS];

            int idx     = 0;
            int numDocs = 0;

            for (int i = 0; i < mergeState.Readers.Count; i++)
            {
                AtomicReader reader = mergeState.Readers[i];

                SegmentReader             matchingSegmentReader = mergeState.MatchingSegmentReaders[idx++];
                Lucene40TermVectorsReader matchingVectorsReader = null;
                if (matchingSegmentReader != null)
                {
                    TermVectorsReader vectorsReader = matchingSegmentReader.TermVectorsReader;

                    if (vectorsReader != null && vectorsReader is Lucene40TermVectorsReader)
                    {
                        matchingVectorsReader = (Lucene40TermVectorsReader)vectorsReader;
                    }
                }
                if (reader.LiveDocs != null)
                {
                    numDocs += CopyVectorsWithDeletions(mergeState, matchingVectorsReader, reader, rawDocLengths, rawDocLengths2);
                }
                else
                {
                    numDocs += CopyVectorsNoDeletions(mergeState, matchingVectorsReader, reader, rawDocLengths, rawDocLengths2);
                }
            }
            Finish(mergeState.FieldInfos, numDocs);
            return(numDocs);
        }
Beispiel #8
0
 // NOTE: tvf is pre-positioned by caller
 public TVTermsEnum(Lucene40TermVectorsReader outerInstance)
 {
     this.outerInstance = outerInstance;
     this.origTVF       = outerInstance.tvf;
     tvf = (IndexInput)origTVF.Clone();
 }
Beispiel #9
0
        private int CopyVectorsWithDeletions(MergeState mergeState, Lucene40TermVectorsReader matchingVectorsReader, AtomicReader reader, int[] rawDocLengths, int[] rawDocLengths2)
        {
            int   maxDoc       = reader.MaxDoc;
            IBits liveDocs     = reader.LiveDocs;
            int   totalNumDocs = 0;

            if (matchingVectorsReader != null)
            {
                // We can bulk-copy because the fieldInfos are "congruent"
                for (int docNum = 0; docNum < maxDoc;)
                {
                    if (!liveDocs.Get(docNum))
                    {
                        // skip deleted docs
                        ++docNum;
                        continue;
                    }
                    // We can optimize this case (doing a bulk byte copy) since the field
                    // numbers are identical
                    int start = docNum, numDocs = 0;
                    do
                    {
                        docNum++;
                        numDocs++;
                        if (docNum >= maxDoc)
                        {
                            break;
                        }
                        if (!liveDocs.Get(docNum))
                        {
                            docNum++;
                            break;
                        }
                    } while (numDocs < MAX_RAW_MERGE_DOCS);

                    matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, start, numDocs);
                    AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs);
                    totalNumDocs += numDocs;
                    mergeState.CheckAbort.Work(300 * numDocs);
                }
            }
            else
            {
                for (int docNum = 0; docNum < maxDoc; docNum++)
                {
                    if (!liveDocs.Get(docNum))
                    {
                        // skip deleted docs
                        continue;
                    }

                    // NOTE: it's very important to first assign to vectors then pass it to
                    // termVectorsWriter.addAllDocVectors; see LUCENE-1282
                    Fields vectors = reader.GetTermVectors(docNum);
                    AddAllDocVectors(vectors, mergeState);
                    totalNumDocs++;
                    mergeState.CheckAbort.Work(300);
                }
            }
            return(totalNumDocs);
        }