Exemple #1
0
        public override void CheckIntegrity()
        {
            EnsureOpen();

            // stored fields
            FieldsReader.CheckIntegrity();

            // term vectors
            TermVectorsReader termVectorsReader = TermVectorsReader;

            if (termVectorsReader != null)
            {
                termVectorsReader.CheckIntegrity();
            }

            // terms/postings
            if (core.fields != null)
            {
                core.fields.CheckIntegrity();
            }

            // norms
            if (core.normsProducer != null)
            {
                core.normsProducer.CheckIntegrity();
            }

            // docvalues
            if (dvProducers != null)
            {
                foreach (DocValuesProducer producer in dvProducers)
                {
                    producer.CheckIntegrity();
                }
            }
        }
        public override int Merge(MergeState mergeState)
        {
            // Used for bulk-reading raw bytes for term vectors
            int[] rawDocLengths  = new int[MAX_RAW_MERGE_DOCS];
            int[] rawDocLengths2 = new int[MAX_RAW_MERGE_DOCS];

            int idx     = 0;
            int numDocs = 0;

            for (int i = 0; i < mergeState.Readers.Count; i++)
            {
                AtomicReader reader = mergeState.Readers[i];

                SegmentReader             matchingSegmentReader = mergeState.MatchingSegmentReaders[idx++];
                Lucene40TermVectorsReader matchingVectorsReader = null;
                if (matchingSegmentReader != null)
                {
                    TermVectorsReader vectorsReader = matchingSegmentReader.TermVectorsReader;

                    if (vectorsReader != null && vectorsReader is Lucene40TermVectorsReader lucene40TermVectorsReader)
                    {
                        matchingVectorsReader = lucene40TermVectorsReader;
                    }
                }
                if (reader.LiveDocs != null)
                {
                    numDocs += CopyVectorsWithDeletions(mergeState, matchingVectorsReader, reader, rawDocLengths, rawDocLengths2);
                }
                else
                {
                    numDocs += CopyVectorsNoDeletions(mergeState, matchingVectorsReader, reader, rawDocLengths, rawDocLengths2);
                }
            }
            Finish(mergeState.FieldInfos, numDocs);
            return(numDocs);
        }
 internal AssertingTermVectorsReader(TermVectorsReader @in)
 {
     this.@in = @in;
 }
Exemple #4
0
        public override int Merge(MergeState mergeState)
        {
            int docCount = 0;
            int idx      = 0;

            foreach (AtomicReader reader in mergeState.Readers)
            {
                SegmentReader matchingSegmentReader = mergeState.MatchingSegmentReaders[idx++];
                CompressingTermVectorsReader matchingVectorsReader = null;
                if (matchingSegmentReader != null)
                {
                    TermVectorsReader vectorsReader = matchingSegmentReader.TermVectorsReader;
                    // we can only bulk-copy if the matching reader is also a CompressingTermVectorsReader
                    if (vectorsReader != null && vectorsReader is CompressingTermVectorsReader)
                    {
                        matchingVectorsReader = (CompressingTermVectorsReader)vectorsReader;
                    }
                }

                int  maxDoc   = reader.MaxDoc;
                Bits liveDocs = reader.LiveDocs;

                if (matchingVectorsReader == null || matchingVectorsReader.Version != VERSION_CURRENT || matchingVectorsReader.CompressionMode != CompressionMode || matchingVectorsReader.ChunkSize != ChunkSize || matchingVectorsReader.PackedIntsVersion != PackedInts.VERSION_CURRENT)
                {
                    // naive merge...
                    for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = NextLiveDoc(i + 1, liveDocs, maxDoc))
                    {
                        Fields vectors = reader.GetTermVectors(i);
                        AddAllDocVectors(vectors, mergeState);
                        ++docCount;
                        mergeState.checkAbort.Work(300);
                    }
                }
                else
                {
                    CompressingStoredFieldsIndexReader index = matchingVectorsReader.Index;
                    IndexInput vectorsStreamOrig             = matchingVectorsReader.VectorsStream;
                    vectorsStreamOrig.Seek(0);
                    ChecksumIndexInput vectorsStream = new BufferedChecksumIndexInput((IndexInput)vectorsStreamOrig.Clone());

                    for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc;)
                    {
                        // We make sure to move the checksum input in any case, otherwise the final
                        // integrity check might need to read the whole file a second time
                        long startPointer = index.GetStartPointer(i);
                        if (startPointer > vectorsStream.FilePointer)
                        {
                            vectorsStream.Seek(startPointer);
                        }
                        if ((PendingDocs.Count == 0) && (i == 0 || index.GetStartPointer(i - 1) < startPointer)) // start of a chunk
                        {
                            int docBase   = vectorsStream.ReadVInt();
                            int chunkDocs = vectorsStream.ReadVInt();
                            Debug.Assert(docBase + chunkDocs <= matchingSegmentReader.MaxDoc);
                            if (docBase + chunkDocs < matchingSegmentReader.MaxDoc && NextDeletedDoc(docBase, liveDocs, docBase + chunkDocs) == docBase + chunkDocs)
                            {
                                long chunkEnd    = index.GetStartPointer(docBase + chunkDocs);
                                long chunkLength = chunkEnd - vectorsStream.FilePointer;
                                IndexWriter.WriteIndex(chunkDocs, this.VectorsStream.FilePointer);
                                this.VectorsStream.WriteVInt(docCount);
                                this.VectorsStream.WriteVInt(chunkDocs);
                                this.VectorsStream.CopyBytes(vectorsStream, chunkLength);
                                docCount     += chunkDocs;
                                this.NumDocs += chunkDocs;
                                mergeState.checkAbort.Work(300 * chunkDocs);
                                i = NextLiveDoc(docBase + chunkDocs, liveDocs, maxDoc);
                            }
                            else
                            {
                                for (; i < docBase + chunkDocs; i = NextLiveDoc(i + 1, liveDocs, maxDoc))
                                {
                                    Fields vectors = reader.GetTermVectors(i);
                                    AddAllDocVectors(vectors, mergeState);
                                    ++docCount;
                                    mergeState.checkAbort.Work(300);
                                }
                            }
                        }
                        else
                        {
                            Fields vectors = reader.GetTermVectors(i);
                            AddAllDocVectors(vectors, mergeState);
                            ++docCount;
                            mergeState.checkAbort.Work(300);
                            i = NextLiveDoc(i + 1, liveDocs, maxDoc);
                        }
                    }

                    vectorsStream.Seek(vectorsStream.Length() - CodecUtil.FooterLength());
                    CodecUtil.CheckFooter(vectorsStream);
                }
            }
            Finish(mergeState.FieldInfos, docCount);
            return(docCount);
        }
Exemple #5
0
        internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor)
        {
            fieldsReaderLocal = new AnonymousFieldsReaderLocal(this);
            termVectorsLocal  = new AnonymousTermVectorsLocal(this);

            if (termsIndexDivisor == 0)
            {
                throw new ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)");
            }

            Codec     codec = si.Info.Codec;
            Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory.

            bool success = false;

            try
            {
                if (si.Info.UseCompoundFile)
                {
                    cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false);
                }
                else
                {
                    cfsReader = null;
                    cfsDir    = dir;
                }

                FieldInfos fieldInfos = owner.FieldInfos;

                this.termsIndexDivisor = termsIndexDivisor;
                PostingsFormat   format           = codec.PostingsFormat;
                SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor);
                // Ask codec for its Fields
                fields = format.FieldsProducer(segmentReadState);
                Debug.Assert(fields != null);
                // ask codec for its Norms:
                // TODO: since we don't write any norms file if there are no norms,
                // kinda jaky to assume the codec handles the case of no norms file at all gracefully?!

                if (fieldInfos.HasNorms)
                {
                    normsProducer = codec.NormsFormat.NormsProducer(segmentReadState);
                    Debug.Assert(normsProducer != null);
                }
                else
                {
                    normsProducer = null;
                }

                fieldsReaderOrig = si.Info.Codec.StoredFieldsFormat.FieldsReader(cfsDir, si.Info, fieldInfos, context);

                if (fieldInfos.HasVectors) // open term vector files only as needed
                {
                    termVectorsReaderOrig = si.Info.Codec.TermVectorsFormat.VectorsReader(cfsDir, si.Info, fieldInfos, context);
                }
                else
                {
                    termVectorsReaderOrig = null;
                }

                success = true;
            }
            finally
            {
                if (!success)
                {
                    DecRef();
                }
            }
        }
 internal AssertingTermVectorsReader(TermVectorsReader @in)
 {
     this.@in = @in;
 }
		private void  CopyVectorsNoDeletions(TermVectorsWriter termVectorsWriter, TermVectorsReader matchingVectorsReader, IndexReader reader)
		{
			int maxDoc = reader.MaxDoc;
			if (matchingVectorsReader != null)
			{
				// We can bulk-copy because the fieldInfos are "congruent"
				int docCount = 0;
				while (docCount < maxDoc)
				{
					int len = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
					matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, docCount, len);
					termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, len);
					docCount += len;
					checkAbort.Work(300 * len);
				}
			}
			else
			{
				for (int docNum = 0; docNum < maxDoc; docNum++)
				{
					// NOTE: it's very important to first assign to vectors then pass it to
					// termVectorsWriter.addAllDocVectors; see LUCENE-1282
					ITermFreqVector[] vectors = reader.GetTermFreqVectors(docNum);
					termVectorsWriter.AddAllDocVectors(vectors);
					checkAbort.Work(300);
				}
			}
		}
		private void  CopyVectorsWithDeletions(TermVectorsWriter termVectorsWriter, TermVectorsReader matchingVectorsReader, IndexReader reader)
		{
			int maxDoc = reader.MaxDoc;
			if (matchingVectorsReader != null)
			{
				// We can bulk-copy because the fieldInfos are "congruent"
				for (int docNum = 0; docNum < maxDoc; )
				{
					if (reader.IsDeleted(docNum))
					{
						// skip deleted docs
						++docNum;
						continue;
					}
					// We can optimize this case (doing a bulk byte copy) since the field 
					// numbers are identical
					int start = docNum, numDocs = 0;
					do 
					{
						docNum++;
						numDocs++;
						if (docNum >= maxDoc)
							break;
						if (reader.IsDeleted(docNum))
						{
							docNum++;
							break;
						}
					}
					while (numDocs < MAX_RAW_MERGE_DOCS);
					
					matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, start, numDocs);
					termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs);
					checkAbort.Work(300 * numDocs);
				}
			}
			else
			{
				for (int docNum = 0; docNum < maxDoc; docNum++)
				{
					if (reader.IsDeleted(docNum))
					{
						// skip deleted docs
						continue;
					}
					
					// NOTE: it's very important to first assign to vectors then pass it to
					// termVectorsWriter.addAllDocVectors; see LUCENE-1282
					ITermFreqVector[] vectors = reader.GetTermFreqVectors(docNum);
					termVectorsWriter.AddAllDocVectors(vectors);
					checkAbort.Work(300);
				}
			}
		}
		/// <summary> Do a bulk copy of numDocs documents from reader to our
		/// streams.  This is used to expedite merging, if the
		/// field numbers are congruent.
		/// </summary>
		internal void  AddRawDocuments(TermVectorsReader reader, int[] tvdLengths, int[] tvfLengths, int numDocs)
		{
			long tvdPosition = tvd.FilePointer;
			long tvfPosition = tvf.FilePointer;
			long tvdStart = tvdPosition;
			long tvfStart = tvfPosition;
			for (int i = 0; i < numDocs; i++)
			{
				tvx.WriteLong(tvdPosition);
				tvdPosition += tvdLengths[i];
				tvx.WriteLong(tvfPosition);
				tvfPosition += tvfLengths[i];
			}
			tvd.CopyBytes(reader.GetTvdStream(), tvdPosition - tvdStart);
			tvf.CopyBytes(reader.GetTvfStream(), tvfPosition - tvfStart);
			System.Diagnostics.Debug.Assert(tvd.FilePointer == tvdPosition);
			System.Diagnostics.Debug.Assert(tvf.FilePointer == tvfPosition);
		}