public override void CheckIntegrity() { EnsureOpen(); // stored fields FieldsReader.CheckIntegrity(); // term vectors TermVectorsReader termVectorsReader = TermVectorsReader; if (termVectorsReader != null) { termVectorsReader.CheckIntegrity(); } // terms/postings if (core.fields != null) { core.fields.CheckIntegrity(); } // norms if (core.normsProducer != null) { core.normsProducer.CheckIntegrity(); } // docvalues if (dvProducers != null) { foreach (DocValuesProducer producer in dvProducers) { producer.CheckIntegrity(); } } }
public override int Merge(MergeState mergeState) { // Used for bulk-reading raw bytes for term vectors int[] rawDocLengths = new int[MAX_RAW_MERGE_DOCS]; int[] rawDocLengths2 = new int[MAX_RAW_MERGE_DOCS]; int idx = 0; int numDocs = 0; for (int i = 0; i < mergeState.Readers.Count; i++) { AtomicReader reader = mergeState.Readers[i]; SegmentReader matchingSegmentReader = mergeState.MatchingSegmentReaders[idx++]; Lucene40TermVectorsReader matchingVectorsReader = null; if (matchingSegmentReader != null) { TermVectorsReader vectorsReader = matchingSegmentReader.TermVectorsReader; if (vectorsReader != null && vectorsReader is Lucene40TermVectorsReader lucene40TermVectorsReader) { matchingVectorsReader = lucene40TermVectorsReader; } } if (reader.LiveDocs != null) { numDocs += CopyVectorsWithDeletions(mergeState, matchingVectorsReader, reader, rawDocLengths, rawDocLengths2); } else { numDocs += CopyVectorsNoDeletions(mergeState, matchingVectorsReader, reader, rawDocLengths, rawDocLengths2); } } Finish(mergeState.FieldInfos, numDocs); return(numDocs); }
internal AssertingTermVectorsReader(TermVectorsReader @in) { this.@in = @in; }
public override int Merge(MergeState mergeState) { int docCount = 0; int idx = 0; foreach (AtomicReader reader in mergeState.Readers) { SegmentReader matchingSegmentReader = mergeState.MatchingSegmentReaders[idx++]; CompressingTermVectorsReader matchingVectorsReader = null; if (matchingSegmentReader != null) { TermVectorsReader vectorsReader = matchingSegmentReader.TermVectorsReader; // we can only bulk-copy if the matching reader is also a CompressingTermVectorsReader if (vectorsReader != null && vectorsReader is CompressingTermVectorsReader) { matchingVectorsReader = (CompressingTermVectorsReader)vectorsReader; } } int maxDoc = reader.MaxDoc; Bits liveDocs = reader.LiveDocs; if (matchingVectorsReader == null || matchingVectorsReader.Version != VERSION_CURRENT || matchingVectorsReader.CompressionMode != CompressionMode || matchingVectorsReader.ChunkSize != ChunkSize || matchingVectorsReader.PackedIntsVersion != PackedInts.VERSION_CURRENT) { // naive merge... for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = NextLiveDoc(i + 1, liveDocs, maxDoc)) { Fields vectors = reader.GetTermVectors(i); AddAllDocVectors(vectors, mergeState); ++docCount; mergeState.checkAbort.Work(300); } } else { CompressingStoredFieldsIndexReader index = matchingVectorsReader.Index; IndexInput vectorsStreamOrig = matchingVectorsReader.VectorsStream; vectorsStreamOrig.Seek(0); ChecksumIndexInput vectorsStream = new BufferedChecksumIndexInput((IndexInput)vectorsStreamOrig.Clone()); for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc;) { // We make sure to move the checksum input in any case, otherwise the final // integrity check might need to read the whole file a second time long startPointer = index.GetStartPointer(i); if (startPointer > vectorsStream.FilePointer) { vectorsStream.Seek(startPointer); } if ((PendingDocs.Count == 0) && (i == 0 || index.GetStartPointer(i - 1) < startPointer)) // start of a chunk { int docBase = vectorsStream.ReadVInt(); int chunkDocs = vectorsStream.ReadVInt(); Debug.Assert(docBase + chunkDocs <= matchingSegmentReader.MaxDoc); if (docBase + chunkDocs < matchingSegmentReader.MaxDoc && NextDeletedDoc(docBase, liveDocs, docBase + chunkDocs) == docBase + chunkDocs) { long chunkEnd = index.GetStartPointer(docBase + chunkDocs); long chunkLength = chunkEnd - vectorsStream.FilePointer; IndexWriter.WriteIndex(chunkDocs, this.VectorsStream.FilePointer); this.VectorsStream.WriteVInt(docCount); this.VectorsStream.WriteVInt(chunkDocs); this.VectorsStream.CopyBytes(vectorsStream, chunkLength); docCount += chunkDocs; this.NumDocs += chunkDocs; mergeState.checkAbort.Work(300 * chunkDocs); i = NextLiveDoc(docBase + chunkDocs, liveDocs, maxDoc); } else { for (; i < docBase + chunkDocs; i = NextLiveDoc(i + 1, liveDocs, maxDoc)) { Fields vectors = reader.GetTermVectors(i); AddAllDocVectors(vectors, mergeState); ++docCount; mergeState.checkAbort.Work(300); } } } else { Fields vectors = reader.GetTermVectors(i); AddAllDocVectors(vectors, mergeState); ++docCount; mergeState.checkAbort.Work(300); i = NextLiveDoc(i + 1, liveDocs, maxDoc); } } vectorsStream.Seek(vectorsStream.Length() - CodecUtil.FooterLength()); CodecUtil.CheckFooter(vectorsStream); } } Finish(mergeState.FieldInfos, docCount); return(docCount); }
internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor) { fieldsReaderLocal = new AnonymousFieldsReaderLocal(this); termVectorsLocal = new AnonymousTermVectorsLocal(this); if (termsIndexDivisor == 0) { throw new ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)"); } Codec codec = si.Info.Codec; Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory. bool success = false; try { if (si.Info.UseCompoundFile) { cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false); } else { cfsReader = null; cfsDir = dir; } FieldInfos fieldInfos = owner.FieldInfos; this.termsIndexDivisor = termsIndexDivisor; PostingsFormat format = codec.PostingsFormat; SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor); // Ask codec for its Fields fields = format.FieldsProducer(segmentReadState); Debug.Assert(fields != null); // ask codec for its Norms: // TODO: since we don't write any norms file if there are no norms, // kinda jaky to assume the codec handles the case of no norms file at all gracefully?! if (fieldInfos.HasNorms) { normsProducer = codec.NormsFormat.NormsProducer(segmentReadState); Debug.Assert(normsProducer != null); } else { normsProducer = null; } fieldsReaderOrig = si.Info.Codec.StoredFieldsFormat.FieldsReader(cfsDir, si.Info, fieldInfos, context); if (fieldInfos.HasVectors) // open term vector files only as needed { termVectorsReaderOrig = si.Info.Codec.TermVectorsFormat.VectorsReader(cfsDir, si.Info, fieldInfos, context); } else { termVectorsReaderOrig = null; } success = true; } finally { if (!success) { DecRef(); } } }
private void CopyVectorsNoDeletions(TermVectorsWriter termVectorsWriter, TermVectorsReader matchingVectorsReader, IndexReader reader) { int maxDoc = reader.MaxDoc; if (matchingVectorsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" int docCount = 0; while (docCount < maxDoc) { int len = System.Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount); matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, docCount, len); termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, len); docCount += len; checkAbort.Work(300 * len); } } else { for (int docNum = 0; docNum < maxDoc; docNum++) { // NOTE: it's very important to first assign to vectors then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 ITermFreqVector[] vectors = reader.GetTermFreqVectors(docNum); termVectorsWriter.AddAllDocVectors(vectors); checkAbort.Work(300); } } }
private void CopyVectorsWithDeletions(TermVectorsWriter termVectorsWriter, TermVectorsReader matchingVectorsReader, IndexReader reader) { int maxDoc = reader.MaxDoc; if (matchingVectorsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" for (int docNum = 0; docNum < maxDoc; ) { if (reader.IsDeleted(docNum)) { // skip deleted docs ++docNum; continue; } // We can optimize this case (doing a bulk byte copy) since the field // numbers are identical int start = docNum, numDocs = 0; do { docNum++; numDocs++; if (docNum >= maxDoc) break; if (reader.IsDeleted(docNum)) { docNum++; break; } } while (numDocs < MAX_RAW_MERGE_DOCS); matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, start, numDocs); termVectorsWriter.AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs); checkAbort.Work(300 * numDocs); } } else { for (int docNum = 0; docNum < maxDoc; docNum++) { if (reader.IsDeleted(docNum)) { // skip deleted docs continue; } // NOTE: it's very important to first assign to vectors then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 ITermFreqVector[] vectors = reader.GetTermFreqVectors(docNum); termVectorsWriter.AddAllDocVectors(vectors); checkAbort.Work(300); } } }
/// <summary> Do a bulk copy of numDocs documents from reader to our /// streams. This is used to expedite merging, if the /// field numbers are congruent. /// </summary> internal void AddRawDocuments(TermVectorsReader reader, int[] tvdLengths, int[] tvfLengths, int numDocs) { long tvdPosition = tvd.FilePointer; long tvfPosition = tvf.FilePointer; long tvdStart = tvdPosition; long tvfStart = tvfPosition; for (int i = 0; i < numDocs; i++) { tvx.WriteLong(tvdPosition); tvdPosition += tvdLengths[i]; tvx.WriteLong(tvfPosition); tvfPosition += tvfLengths[i]; } tvd.CopyBytes(reader.GetTvdStream(), tvdPosition - tvdStart); tvf.CopyBytes(reader.GetTvfStream(), tvfPosition - tvfStart); System.Diagnostics.Debug.Assert(tvd.FilePointer == tvdPosition); System.Diagnostics.Debug.Assert(tvf.FilePointer == tvfPosition); }