private int CopyVectorsNoDeletions(MergeState mergeState, Lucene40TermVectorsReader matchingVectorsReader, AtomicReader reader, int[] rawDocLengths, int[] rawDocLengths2) { int maxDoc = reader.MaxDoc; if (matchingVectorsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" int docCount = 0; while (docCount < maxDoc) { int len = Math.Min(MAX_RAW_MERGE_DOCS, maxDoc - docCount); matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, docCount, len); AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, len); docCount += len; mergeState.CheckAbort.Work(300 * len); } } else { for (int docNum = 0; docNum < maxDoc; docNum++) { // NOTE: it's very important to first assign to vectors then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 Fields vectors = reader.GetTermVectors(docNum); AddAllDocVectors(vectors, mergeState); mergeState.CheckAbort.Work(300); } } return(maxDoc); }
public TVTerms(Lucene40TermVectorsReader outerInstance, long tvfFP) { this.outerInstance = outerInstance; outerInstance.tvf.Seek(tvfFP); numTerms = outerInstance.tvf.ReadVInt32(); byte bits = outerInstance.tvf.ReadByte(); storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0; storeOffsets = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0; storePayloads = (bits & STORE_PAYLOAD_WITH_TERMVECTOR) != 0; tvfFPStart = outerInstance.tvf.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream }
public TVTerms(Lucene40TermVectorsReader outerInstance, long tvfFP) { this.outerInstance = outerInstance; outerInstance.tvf.Seek(tvfFP); numTerms = outerInstance.tvf.ReadVInt32(); byte bits = outerInstance.tvf.ReadByte(); storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0; storeOffsets = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0; storePayloads = (bits & STORE_PAYLOAD_WITH_TERMVECTOR) != 0; tvfFPStart = outerInstance.tvf.GetFilePointer(); }
/// <summary> /// Do a bulk copy of numDocs documents from reader to our /// streams. This is used to expedite merging, if the /// field numbers are congruent. /// </summary> private void AddRawDocuments(Lucene40TermVectorsReader reader, int[] tvdLengths, int[] tvfLengths, int numDocs) { long tvdPosition = tvd.GetFilePointer(); long tvfPosition = tvf.GetFilePointer(); long tvdStart = tvdPosition; long tvfStart = tvfPosition; for (int i = 0; i < numDocs; i++) { tvx.WriteInt64(tvdPosition); tvdPosition += tvdLengths[i]; tvx.WriteInt64(tvfPosition); tvfPosition += tvfLengths[i]; } tvd.CopyBytes(reader.TvdStream, tvdPosition - tvdStart); tvf.CopyBytes(reader.TvfStream, tvfPosition - tvfStart); Debug.Assert(tvd.GetFilePointer() == tvdPosition); Debug.Assert(tvf.GetFilePointer() == tvfPosition); }
public TVFields(Lucene40TermVectorsReader outerInstance, int docID) { this.outerInstance = outerInstance; outerInstance.SeekTvx(docID); outerInstance.tvd.Seek(outerInstance.tvx.ReadInt64()); int fieldCount = outerInstance.tvd.ReadVInt32(); if (Debugging.AssertsEnabled) { Debugging.Assert(fieldCount >= 0); } if (fieldCount != 0) { fieldNumbers = new int[fieldCount]; fieldFPs = new long[fieldCount]; for (int fieldUpto = 0; fieldUpto < fieldCount; fieldUpto++) { int fieldNumber = outerInstance.tvd.ReadVInt32(); fieldNumbers[fieldUpto] = fieldNumber; fieldNumberToIndex[fieldNumber] = fieldUpto; } long position = outerInstance.tvx.ReadInt64(); fieldFPs[0] = position; for (int fieldUpto = 1; fieldUpto < fieldCount; fieldUpto++) { position += outerInstance.tvd.ReadVInt64(); fieldFPs[fieldUpto] = position; } } else { // TODO: we can improve writer here, eg write 0 into // tvx file, so we know on first read from tvx that // this doc has no TVs fieldNumbers = null; fieldFPs = null; } }
/// <summary> /// Do a bulk copy of numDocs documents from reader to our /// streams. This is used to expedite merging, if the /// field numbers are congruent. /// </summary> private void AddRawDocuments(Lucene40TermVectorsReader reader, int[] tvdLengths, int[] tvfLengths, int numDocs) { long tvdPosition = tvd.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream long tvfPosition = tvf.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream long tvdStart = tvdPosition; long tvfStart = tvfPosition; for (int i = 0; i < numDocs; i++) { tvx.WriteInt64(tvdPosition); tvdPosition += tvdLengths[i]; tvx.WriteInt64(tvfPosition); tvfPosition += tvfLengths[i]; } tvd.CopyBytes(reader.TvdStream, tvdPosition - tvdStart); tvf.CopyBytes(reader.TvfStream, tvfPosition - tvfStart); if (Debugging.AssertsEnabled) { Debugging.Assert(tvd.Position == tvdPosition); // LUCENENET specific: Renamed from getFilePointer() to match FileStream Debugging.Assert(tvf.Position == tvfPosition); // LUCENENET specific: Renamed from getFilePointer() to match FileStream } }
public override int Merge(MergeState mergeState) { // Used for bulk-reading raw bytes for term vectors int[] rawDocLengths = new int[MAX_RAW_MERGE_DOCS]; int[] rawDocLengths2 = new int[MAX_RAW_MERGE_DOCS]; int idx = 0; int numDocs = 0; for (int i = 0; i < mergeState.Readers.Count; i++) { AtomicReader reader = mergeState.Readers[i]; SegmentReader matchingSegmentReader = mergeState.MatchingSegmentReaders[idx++]; Lucene40TermVectorsReader matchingVectorsReader = null; if (matchingSegmentReader != null) { TermVectorsReader vectorsReader = matchingSegmentReader.TermVectorsReader; if (vectorsReader != null && vectorsReader is Lucene40TermVectorsReader) { matchingVectorsReader = (Lucene40TermVectorsReader)vectorsReader; } } if (reader.LiveDocs != null) { numDocs += CopyVectorsWithDeletions(mergeState, matchingVectorsReader, reader, rawDocLengths, rawDocLengths2); } else { numDocs += CopyVectorsNoDeletions(mergeState, matchingVectorsReader, reader, rawDocLengths, rawDocLengths2); } } Finish(mergeState.FieldInfos, numDocs); return(numDocs); }
// NOTE: tvf is pre-positioned by caller public TVTermsEnum(Lucene40TermVectorsReader outerInstance) { this.outerInstance = outerInstance; this.origTVF = outerInstance.tvf; tvf = (IndexInput)origTVF.Clone(); }
private int CopyVectorsWithDeletions(MergeState mergeState, Lucene40TermVectorsReader matchingVectorsReader, AtomicReader reader, int[] rawDocLengths, int[] rawDocLengths2) { int maxDoc = reader.MaxDoc; IBits liveDocs = reader.LiveDocs; int totalNumDocs = 0; if (matchingVectorsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" for (int docNum = 0; docNum < maxDoc;) { if (!liveDocs.Get(docNum)) { // skip deleted docs ++docNum; continue; } // We can optimize this case (doing a bulk byte copy) since the field // numbers are identical int start = docNum, numDocs = 0; do { docNum++; numDocs++; if (docNum >= maxDoc) { break; } if (!liveDocs.Get(docNum)) { docNum++; break; } } while (numDocs < MAX_RAW_MERGE_DOCS); matchingVectorsReader.RawDocs(rawDocLengths, rawDocLengths2, start, numDocs); AddRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs); totalNumDocs += numDocs; mergeState.CheckAbort.Work(300 * numDocs); } } else { for (int docNum = 0; docNum < maxDoc; docNum++) { if (!liveDocs.Get(docNum)) { // skip deleted docs continue; } // NOTE: it's very important to first assign to vectors then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 Fields vectors = reader.GetTermVectors(docNum); AddAllDocVectors(vectors, mergeState); totalNumDocs++; mergeState.CheckAbort.Work(300); } } return(totalNumDocs); }