private IDictionary <string, long?> ReadFields(IndexInput @in) { ChecksumIndexInput input = new BufferedChecksumIndexInput(@in); var scratch = new BytesRef(10); // LUCENENET specific: Use StringComparer.Ordinal to get the same ordering as Java var fields = new JCG.SortedDictionary <string, long?>(StringComparer.Ordinal); while (true) { SimpleTextUtil.ReadLine(input, scratch); if (scratch.Equals(SimpleTextFieldsWriter.END)) { SimpleTextUtil.CheckFooter(input); return(fields); } if (StringHelper.StartsWith(scratch, SimpleTextFieldsWriter.FIELD)) { var fieldName = Encoding.UTF8.GetString(scratch.Bytes, scratch.Offset + SimpleTextFieldsWriter.FIELD.Length, scratch.Length - SimpleTextFieldsWriter.FIELD.Length); fields[fieldName] = input.GetFilePointer(); } } }
/// <summary> /// Clones the provided input, reads all bytes from the file, and calls <see cref="CheckFooter(ChecksumIndexInput)"/> /// <para/> /// Note that this method may be slow, as it must process the entire file. /// If you just need to extract the checksum value, call <see cref="RetrieveChecksum(IndexInput)"/>. /// </summary> public static long ChecksumEntireFile(IndexInput input) { IndexInput clone = (IndexInput)input.Clone(); clone.Seek(0); ChecksumIndexInput @in = new BufferedChecksumIndexInput(clone); Debug.Assert(@in.GetFilePointer() == 0); @in.Seek(@in.Length - FooterLength()); return(CheckFooter(@in)); }
/// <summary> /// Clones the provided input, reads all bytes from the file, and calls <see cref="CheckFooter(ChecksumIndexInput)"/> /// <para/> /// Note that this method may be slow, as it must process the entire file. /// If you just need to extract the checksum value, call <see cref="RetrieveChecksum(IndexInput)"/>. /// </summary> public static long ChecksumEntireFile(IndexInput input) { IndexInput clone = (IndexInput)input.Clone(); clone.Seek(0); ChecksumIndexInput @in = new BufferedChecksumIndexInput(clone); if (Debugging.AssertsEnabled) { Debugging.Assert(@in.Position == 0); // LUCENENET specific: Renamed from getFilePointer() to match FileStream } @in.Seek(@in.Length - FooterLength()); return(CheckFooter(@in)); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: @Override public void checkIntegrity() throws java.io.IOException public override void checkIntegrity() { BytesRef scratch = new BytesRef(); IndexInput clone = data.clone(); clone.seek(0); ChecksumIndexInput input = new BufferedChecksumIndexInput(clone); while (true) { SimpleTextUtil.ReadLine(input, scratch); if (scratch.Equals(END)) { SimpleTextUtil.CheckFooter(input); break; } } }
/// <remarks> /// we don't actually write a .fdx-like index, instead we read the /// stored fields file in entirety up-front and save the offsets /// so we can seek to the documents later. /// </remarks> private void ReadIndex(int size) { ChecksumIndexInput input = new BufferedChecksumIndexInput(_input); _offsets = new long[size]; var upto = 0; while (!_scratch.Equals(SimpleTextStoredFieldsWriter.END)) { SimpleTextUtil.ReadLine(input, _scratch); if (StringHelper.StartsWith(_scratch, SimpleTextStoredFieldsWriter.DOC)) { _offsets[upto] = input.GetFilePointer(); upto++; } } SimpleTextUtil.CheckFooter(input); Debug.Assert(upto == _offsets.Length); }
// we don't actually write a .fdx-like index, instead we read the // stored fields file in entirety up-front and save the offsets // so we can seek to the documents later. //JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: private void readIndex(int size) throws java.io.IOException private void readIndex(int size) { ChecksumIndexInput input = new BufferedChecksumIndexInput(@in); offsets = new long[size]; int upto = 0; while (!scratch.Equals(END)) { SimpleTextUtil.ReadLine(input, scratch); if (StringHelper.StartsWith(scratch, DOC)) { offsets[upto] = input.FilePointer; upto++; } } SimpleTextUtil.CheckFooter(input); Debug.Assert(upto == offsets.Length); }
// we don't actually write a .tvx-like index, instead we read the // vectors file in entirety up-front and save the offsets // so we can seek to the data later. private void ReadIndex(int maxDoc) { ChecksumIndexInput input = new BufferedChecksumIndexInput(_input); _offsets = new long[maxDoc]; int upto = 0; while (!_scratch.Equals(SimpleTextTermVectorsWriter.END)) { SimpleTextUtil.ReadLine(input, _scratch); if (StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.DOC)) { _offsets[upto] = input.FilePointer; upto++; } } SimpleTextUtil.CheckFooter(input); Debug.Assert(upto == _offsets.Length); }
public override void CheckIntegrity() { var iScratch = new BytesRef(); var clone = (IndexInput)data.Clone(); clone.Seek(0); ChecksumIndexInput input = new BufferedChecksumIndexInput(clone); while (true) { SimpleTextUtil.ReadLine(input, iScratch); if (!iScratch.Equals(SimpleTextDocValuesWriter.END)) { continue; } SimpleTextUtil.CheckFooter(input); break; } }
// we don't actually write a .tvx-like index, instead we read the // vectors file in entirety up-front and save the offsets // so we can seek to the data later. private void ReadIndex(int maxDoc) { ChecksumIndexInput input = new BufferedChecksumIndexInput(_input); _offsets = new long[maxDoc]; int upto = 0; while (!_scratch.Equals(SimpleTextTermVectorsWriter.END)) { SimpleTextUtil.ReadLine(input, _scratch); if (StringHelper.StartsWith(_scratch, SimpleTextTermVectorsWriter.DOC)) { _offsets[upto] = input.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream upto++; } } SimpleTextUtil.CheckFooter(input); if (Debugging.AssertsEnabled) { Debugging.Assert(upto == _offsets.Length); } }
private SortedDictionary <string, long?> ReadFields(IndexInput @in) { ChecksumIndexInput input = new BufferedChecksumIndexInput(@in); BytesRef scratch = new BytesRef(10); SortedDictionary <string, long?> fields = new SortedDictionary <string, long?>(); while (true) { SimpleTextUtil.ReadLine(input, scratch); if (scratch.Equals(END)) { SimpleTextUtil.CheckFooter(input); return(fields); } else if (StringHelper.StartsWith(scratch, FIELD)) { string fieldName = new string(scratch.Bytes, scratch.Offset + FIELD.length, scratch.Length - FIELD.length, StandardCharsets.UTF_8); fields[fieldName] = input.FilePointer; } } }
public override int Merge(MergeState mergeState) { int docCount = 0; int idx = 0; foreach (AtomicReader reader in mergeState.Readers) { SegmentReader matchingSegmentReader = mergeState.MatchingSegmentReaders[idx++]; CompressingTermVectorsReader matchingVectorsReader = null; if (matchingSegmentReader != null) { TermVectorsReader vectorsReader = matchingSegmentReader.TermVectorsReader; // we can only bulk-copy if the matching reader is also a CompressingTermVectorsReader if (vectorsReader != null && vectorsReader is CompressingTermVectorsReader) { matchingVectorsReader = (CompressingTermVectorsReader)vectorsReader; } } int maxDoc = reader.MaxDoc; Bits liveDocs = reader.LiveDocs; if (matchingVectorsReader == null || matchingVectorsReader.Version != VERSION_CURRENT || matchingVectorsReader.CompressionMode != CompressionMode || matchingVectorsReader.ChunkSize != ChunkSize || matchingVectorsReader.PackedIntsVersion != PackedInts.VERSION_CURRENT) { // naive merge... for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = NextLiveDoc(i + 1, liveDocs, maxDoc)) { Fields vectors = reader.GetTermVectors(i); AddAllDocVectors(vectors, mergeState); ++docCount; mergeState.checkAbort.Work(300); } } else { CompressingStoredFieldsIndexReader index = matchingVectorsReader.Index; IndexInput vectorsStreamOrig = matchingVectorsReader.VectorsStream; vectorsStreamOrig.Seek(0); ChecksumIndexInput vectorsStream = new BufferedChecksumIndexInput((IndexInput)vectorsStreamOrig.Clone()); for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc;) { // We make sure to move the checksum input in any case, otherwise the final // integrity check might need to read the whole file a second time long startPointer = index.GetStartPointer(i); if (startPointer > vectorsStream.FilePointer) { vectorsStream.Seek(startPointer); } if ((PendingDocs.Count == 0) && (i == 0 || index.GetStartPointer(i - 1) < startPointer)) // start of a chunk { int docBase = vectorsStream.ReadVInt(); int chunkDocs = vectorsStream.ReadVInt(); Debug.Assert(docBase + chunkDocs <= matchingSegmentReader.MaxDoc); if (docBase + chunkDocs < matchingSegmentReader.MaxDoc && NextDeletedDoc(docBase, liveDocs, docBase + chunkDocs) == docBase + chunkDocs) { long chunkEnd = index.GetStartPointer(docBase + chunkDocs); long chunkLength = chunkEnd - vectorsStream.FilePointer; IndexWriter.WriteIndex(chunkDocs, this.VectorsStream.FilePointer); this.VectorsStream.WriteVInt(docCount); this.VectorsStream.WriteVInt(chunkDocs); this.VectorsStream.CopyBytes(vectorsStream, chunkLength); docCount += chunkDocs; this.NumDocs += chunkDocs; mergeState.checkAbort.Work(300 * chunkDocs); i = NextLiveDoc(docBase + chunkDocs, liveDocs, maxDoc); } else { for (; i < docBase + chunkDocs; i = NextLiveDoc(i + 1, liveDocs, maxDoc)) { Fields vectors = reader.GetTermVectors(i); AddAllDocVectors(vectors, mergeState); ++docCount; mergeState.checkAbort.Work(300); } } } else { Fields vectors = reader.GetTermVectors(i); AddAllDocVectors(vectors, mergeState); ++docCount; mergeState.checkAbort.Work(300); i = NextLiveDoc(i + 1, liveDocs, maxDoc); } } vectorsStream.Seek(vectorsStream.Length() - CodecUtil.FooterLength()); CodecUtil.CheckFooter(vectorsStream); } } Finish(mergeState.FieldInfos, docCount); return(docCount); }
public override int Merge(MergeState mergeState) { int docCount = 0; int idx = 0; foreach (AtomicReader reader in mergeState.Readers) { SegmentReader matchingSegmentReader = mergeState.MatchingSegmentReaders[idx++]; CompressingTermVectorsReader matchingVectorsReader = null; if (matchingSegmentReader != null) { TermVectorsReader vectorsReader = matchingSegmentReader.TermVectorsReader; // we can only bulk-copy if the matching reader is also a CompressingTermVectorsReader if (vectorsReader != null && vectorsReader is CompressingTermVectorsReader) { matchingVectorsReader = (CompressingTermVectorsReader)vectorsReader; } } int maxDoc = reader.MaxDoc; Bits liveDocs = reader.LiveDocs; if (matchingVectorsReader == null || matchingVectorsReader.Version != VERSION_CURRENT || matchingVectorsReader.CompressionMode != CompressionMode || matchingVectorsReader.ChunkSize != ChunkSize || matchingVectorsReader.PackedIntsVersion != PackedInts.VERSION_CURRENT) { // naive merge... for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = NextLiveDoc(i + 1, liveDocs, maxDoc)) { Fields vectors = reader.GetTermVectors(i); AddAllDocVectors(vectors, mergeState); ++docCount; mergeState.checkAbort.Work(300); } } else { CompressingStoredFieldsIndexReader index = matchingVectorsReader.Index; IndexInput vectorsStreamOrig = matchingVectorsReader.VectorsStream; vectorsStreamOrig.Seek(0); ChecksumIndexInput vectorsStream = new BufferedChecksumIndexInput((IndexInput)vectorsStreamOrig.Clone()); for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; ) { // We make sure to move the checksum input in any case, otherwise the final // integrity check might need to read the whole file a second time long startPointer = index.GetStartPointer(i); if (startPointer > vectorsStream.FilePointer) { vectorsStream.Seek(startPointer); } if ((PendingDocs.Count == 0) && (i == 0 || index.GetStartPointer(i - 1) < startPointer)) // start of a chunk { int docBase = vectorsStream.ReadVInt(); int chunkDocs = vectorsStream.ReadVInt(); Debug.Assert(docBase + chunkDocs <= matchingSegmentReader.MaxDoc); if (docBase + chunkDocs < matchingSegmentReader.MaxDoc && NextDeletedDoc(docBase, liveDocs, docBase + chunkDocs) == docBase + chunkDocs) { long chunkEnd = index.GetStartPointer(docBase + chunkDocs); long chunkLength = chunkEnd - vectorsStream.FilePointer; IndexWriter.WriteIndex(chunkDocs, this.VectorsStream.FilePointer); this.VectorsStream.WriteVInt(docCount); this.VectorsStream.WriteVInt(chunkDocs); this.VectorsStream.CopyBytes(vectorsStream, chunkLength); docCount += chunkDocs; this.NumDocs += chunkDocs; mergeState.checkAbort.Work(300 * chunkDocs); i = NextLiveDoc(docBase + chunkDocs, liveDocs, maxDoc); } else { for (; i < docBase + chunkDocs; i = NextLiveDoc(i + 1, liveDocs, maxDoc)) { Fields vectors = reader.GetTermVectors(i); AddAllDocVectors(vectors, mergeState); ++docCount; mergeState.checkAbort.Work(300); } } } else { Fields vectors = reader.GetTermVectors(i); AddAllDocVectors(vectors, mergeState); ++docCount; mergeState.checkAbort.Work(300); i = NextLiveDoc(i + 1, liveDocs, maxDoc); } } vectorsStream.Seek(vectorsStream.Length() - CodecUtil.FooterLength()); CodecUtil.CheckFooter(vectorsStream); } } Finish(mergeState.FieldInfos, docCount); return docCount; }