private void AssertSameSeekBehavior(System.String msg, IndexInput expected, IndexInput actual) { // seek to 0 long point = 0; AssertSameStreams(msg + ", seek(0)", expected, actual, point); // seek to middle point = expected.Length() / 2L; AssertSameStreams(msg + ", seek(mid)", expected, actual, point); // seek to end - 2 point = expected.Length() - 2; AssertSameStreams(msg + ", seek(end-2)", expected, actual, point); // seek to end - 1 point = expected.Length() - 1; AssertSameStreams(msg + ", seek(end-1)", expected, actual, point); // seek to the end point = expected.Length(); AssertSameStreams(msg + ", seek(end)", expected, actual, point); // seek past end point = expected.Length() + 1; AssertSameStreams(msg + ", seek(end+1)", expected, actual, point); }
/// <summary>Returns the length in bytes of each raw document in a /// contiguous range of length numDocs starting with /// startDocID. Returns the IndexInput (the fieldStream), /// already seeked to the starting point for startDocID. /// </summary> internal IndexInput RawDocs(int[] lengths, int startDocID, int numDocs) { SeekIndex(startDocID); long startOffset = indexStream.ReadLong(); long lastOffset = startOffset; int count = 0; while (count < numDocs) { long offset; int docID = docStoreOffset + startDocID + count + 1; System.Diagnostics.Debug.Assert(docID <= numTotalDocs); if (docID < numTotalDocs) { offset = indexStream.ReadLong(); } else { offset = fieldsStream.Length(); } lengths[count++] = (int)(offset - lastOffset); lastOffset = offset; } fieldsStream.Seek(startOffset); return(fieldsStream); }
/// <summary>Retrieve the length (in bytes) of the tvd and tvf /// entries for the next numDocs starting with /// startDocID. This is used for bulk copying when /// merging segments, if the field numbers are /// congruent. Once this returns, the tvf & tvd streams /// are seeked to the startDocID. /// </summary> internal void RawDocs(int[] tvdLengths, int[] tvfLengths, int startDocID, int numDocs) { if (tvx == null) { for (int i = 0; i < tvdLengths.Length; i++) { tvdLengths[i] = 0; } for (int i = 0; i < tvfLengths.Length; i++) { tvfLengths[i] = 0; } return; } // SegmentMerger calls canReadRawDocs() first and should // not call us if that returns false. if (format < FORMAT_VERSION2) { throw new System.SystemException("cannot read raw docs with older term vector formats"); } SeekTvx(startDocID); long tvdPosition = tvx.ReadLong(); tvd.Seek(tvdPosition); long tvfPosition = tvx.ReadLong(); tvf.Seek(tvfPosition); long lastTvdPosition = tvdPosition; long lastTvfPosition = tvfPosition; int count = 0; while (count < numDocs) { int docID = docStoreOffset + startDocID + count + 1; System.Diagnostics.Debug.Assert(docID <= numTotalDocs); if (docID < numTotalDocs) { tvdPosition = tvx.ReadLong(); tvfPosition = tvx.ReadLong(); } else { tvdPosition = tvd.Length(); tvfPosition = tvf.Length(); System.Diagnostics.Debug.Assert(count == numDocs - 1); } tvdLengths[count] = (int)(tvdPosition - lastTvdPosition); tvfLengths[count] = (int)(tvfPosition - lastTvfPosition); count++; lastTvdPosition = tvdPosition; lastTvfPosition = tvfPosition; } }
internal TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos, int readBufferSize, int docStoreOffset, int size) { bool success = false; try { if (d.FileExists(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION)) { tvx = d.OpenInput(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION, readBufferSize); CheckValidFormat(tvx); tvd = d.OpenInput(segment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION, readBufferSize); tvdFormat = CheckValidFormat(tvd); tvf = d.OpenInput(segment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION, readBufferSize); tvfFormat = CheckValidFormat(tvf); if (-1 == docStoreOffset) { this.docStoreOffset = 0; this.size = (int)(tvx.Length() >> 3); } else { this.docStoreOffset = docStoreOffset; this.size = size; // Verify the file is long enough to hold all of our // docs System.Diagnostics.Debug.Assert(((int)(tvx.Length() / 8)) >= size + docStoreOffset); } } this.fieldInfos = fieldInfos; success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { Close(); } } }
internal TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos, int readBufferSize, int docStoreOffset, int size) { bool success = false; try { if (d.FileExists(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION)) { tvx = d.OpenInput(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION, readBufferSize); CheckValidFormat(tvx); tvd = d.OpenInput(segment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION, readBufferSize); tvdFormat = CheckValidFormat(tvd); tvf = d.OpenInput(segment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION, readBufferSize); tvfFormat = CheckValidFormat(tvf); if (- 1 == docStoreOffset) { this.docStoreOffset = 0; this.size = (int) (tvx.Length() >> 3); } else { this.docStoreOffset = docStoreOffset; this.size = size; // Verify the file is long enough to hold all of our // docs System.Diagnostics.Debug.Assert(((int) (tvx.Length() / 8)) >= size + docStoreOffset); } } this.fieldInfos = fieldInfos; success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { Close(); } } }
/*internal*/ public FieldsReader(Directory d, System.String segment, FieldInfos fn) { fieldInfos = fn; fieldsStream = d.OpenInput(segment + ".fdt"); indexStream = d.OpenInput(segment + ".fdx"); size = (int) (indexStream.Length() / 8); }
public virtual void TestReadPastEOF() { SetUp_2(); CompoundFileReader cr = new CompoundFileReader(dir, "f.comp", null); IndexInput is_Renamed = cr.OpenInput("f2", null); is_Renamed.Seek(is_Renamed.Length(null) - 10, null); byte[] b = new byte[100]; is_Renamed.ReadBytes(b, 0, 10, null); Assert.Throws <System.IO.IOException>(() => is_Renamed.ReadByte(null), "Single byte read past end of file"); is_Renamed.Seek(is_Renamed.Length(null) - 10, null); Assert.Throws <System.IO.IOException>(() => is_Renamed.ReadBytes(b, 0, 50, null), "Block read past end of file"); is_Renamed.Close(); cr.Close(); }
public /*internal*/ FieldsReader(Directory d, System.String segment, FieldInfos fn) { fieldInfos = fn; fieldsStream = d.OpenInput(segment + ".fdt"); indexStream = d.OpenInput(segment + ".fdx"); size = (int)(indexStream.Length() / 8); }
public FieldsReader(Directory d, System.String segment, FieldInfos fn) { fieldInfos = fn; cloneableFieldsStream = d.OpenInput(segment + ".fdt"); fieldsStream = (IndexInput)cloneableFieldsStream.Clone(); indexStream = d.OpenInput(segment + ".fdx"); size = (int)(indexStream.Length() / 8); }
private void AssertSameStreams(System.String msg, IndexInput expected, IndexInput actual, long seekTo) { if (seekTo >= 0 && seekTo < expected.Length()) { expected.Seek(seekTo); actual.Seek(seekTo); AssertSameStreams(msg + ", seek(mid)", expected, actual); } }
internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size) { bool success = false; try { fieldInfos = fn; cloneableFieldsStream = d.OpenInput(segment + ".fdt", readBufferSize); fieldsStream = (IndexInput)cloneableFieldsStream.Clone(); indexStream = d.OpenInput(segment + ".fdx", readBufferSize); if (docStoreOffset != -1) { // We read only a slice out of this shared fields file this.docStoreOffset = docStoreOffset; this.size = size; // Verify the file is long enough to hold all of our // docs System.Diagnostics.Debug.Assert(((int)(indexStream.Length() / 8)) >= size + this.docStoreOffset); } else { this.docStoreOffset = 0; this.size = (int)(indexStream.Length() >> 3); } numTotalDocs = (int)(indexStream.Length() >> 3); success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { Close(); } } }
public CompoundFileReader(Directory dir, string name, int readBufferSize) { directory = dir; fileName = name; this.readBufferSize = readBufferSize; bool success = false; try { stream = dir.OpenInput(name, readBufferSize); // read the directory and init files int count = stream.ReadVInt(); FileEntry entry = null; for (int i = 0; i < count; i++) { long offset = stream.ReadLong(); string id = stream.ReadString(); if (entry != null) { // set length of the previous entry entry.length = offset - entry.offset; } entry = new FileEntry { offset = offset }; entries[id] = entry; } // set the length of the final entry if (entry != null) { entry.length = stream.Length() - entry.offset; } success = true; } finally { if (!success && (stream != null)) { try { stream.Close(); } catch (System.IO.IOException) { } } } }
private void AssertSameStreams(System.String msg, IndexInput expected, IndexInput test) { Assert.IsNotNull(expected, msg + " null expected"); Assert.IsNotNull(test, msg + " null test"); Assert.AreEqual(expected.Length(), test.Length(), msg + " length"); Assert.AreEqual(expected.GetFilePointer(), test.GetFilePointer(), msg + " position"); byte[] expectedBuffer = new byte[512]; byte[] testBuffer = new byte[expectedBuffer.Length]; long remainder = expected.Length() - expected.GetFilePointer(); while (remainder > 0) { int readLen = (int)System.Math.Min(remainder, expectedBuffer.Length); expected.ReadBytes(expectedBuffer, 0, readLen); test.ReadBytes(testBuffer, 0, readLen); AssertEqualArrays(msg + ", remainder " + remainder, expectedBuffer, testBuffer, 0, readLen); remainder -= readLen; } }
private void Read(IndexInput input, System.String fileName) { int firstInt = input.ReadVInt(); if (firstInt < 0) { // This is a real format format = firstInt; } else { format = FORMAT_PRE; } if (format != FORMAT_PRE & format != FORMAT_START) { throw new CorruptIndexException("unrecognized format " + format + " in file \"" + fileName + "\""); } int size; if (format == FORMAT_PRE) { size = firstInt; } else { size = input.ReadVInt(); //read in the size } for (int i = 0; i < size; i++) { System.String name = StringHelper.Intern(input.ReadString()); byte bits = input.ReadByte(); bool isIndexed = (bits & IS_INDEXED) != 0; bool storeTermVector = (bits & STORE_TERMVECTOR) != 0; bool storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0; bool storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0; bool omitNorms = (bits & OMIT_NORMS) != 0; bool storePayloads = (bits & STORE_PAYLOADS) != 0; bool omitTermFreqAndPositions = (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0; AddInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions); } if (input.GetFilePointer() != input.Length()) { throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.GetFilePointer() + " vs size " + input.Length()); } }
public void Read(Directory directory) { IndexInput input = directory.OpenInput(IndexFileNames.SEGMENTS); try { int format = input.ReadInt(); if (format < 0) { // file contains explicit format info // check that it is a format we can understand if (format < FORMAT) { throw new System.IO.IOException("Unknown format version: " + format); } version = input.ReadLong(); // read version counter = input.ReadInt(); // read counter } else { // file is in old format without explicit format info counter = format; } for (int i = input.ReadInt(); i > 0; i--) { // read segmentInfos SegmentInfo si = new SegmentInfo(input.ReadString(), input.ReadInt(), directory); Add(si); } if (format >= 0) { // in old format the version number may be at the end of the file if (input.GetFilePointer() >= input.Length()) { version = (System.DateTime.Now.Ticks - 621355968000000000) / 10000; } // old file format without version number else { version = input.ReadLong(); // read version } } } finally { input.Close(); } }
/// <summary>Copy the contents of the file with specified extension into the /// provided output stream. Use the provided buffer for moving data /// to reduce memory allocation. /// </summary> private void CopyFile(FileEntry source, IndexOutput os, byte[] buffer) { IndexInput is_Renamed = null; try { long startPtr = os.GetFilePointer(); is_Renamed = directory.OpenInput(source.file); long length = is_Renamed.Length(); long remainder = length; int chunk = buffer.Length; while (remainder > 0) { int len = (int)System.Math.Min(chunk, remainder); is_Renamed.ReadBytes(buffer, 0, len, false); os.WriteBytes(buffer, len); remainder -= len; if (checkAbort != null) { // Roughly every 2 MB we will check if // it's time to abort checkAbort.Work(80); } } // Verify that remainder is 0 if (remainder != 0) { throw new System.IO.IOException("Non-zero remainder length after copying: " + remainder + " (id: " + source.file + ", length: " + length + ", buffer size: " + chunk + ")"); } // Verify that the output length diff is equal to original file long endPtr = os.GetFilePointer(); long diff = endPtr - startPtr; if (diff != length) { throw new System.IO.IOException("Difference in the output file offsets " + diff + " does not match the original file length " + length); } } finally { if (is_Renamed != null) { is_Renamed.Close(); } } }
public virtual void TestReadPastEOF() { SetUp_2(); CompoundFileReader cr = new CompoundFileReader(dir, "f.comp"); IndexInput is_Renamed = cr.OpenInput("f2"); is_Renamed.Seek(is_Renamed.Length() - 10); byte[] b = new byte[100]; is_Renamed.ReadBytes(b, 0, 10); try { byte test = is_Renamed.ReadByte(); Assert.Fail("Single byte read past end of file"); } catch (System.IO.IOException e) { /* success */ //System.out.println("SUCCESS: single byte read past end of file: " + e); } is_Renamed.Seek(is_Renamed.Length() - 10); try { is_Renamed.ReadBytes(b, 0, 50); Assert.Fail("Block read past end of file"); } catch (System.IO.IOException e) { /* success */ //System.out.println("SUCCESS: block read past end of file: " + e); } is_Renamed.Close(); cr.Close(); }
public /*internal*/ TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos) { if (d.FileExists(segment + TermVectorsWriter.TVX_EXTENSION)) { tvx = d.OpenInput(segment + TermVectorsWriter.TVX_EXTENSION); CheckValidFormat(tvx); tvd = d.OpenInput(segment + TermVectorsWriter.TVD_EXTENSION); tvdFormat = CheckValidFormat(tvd); tvf = d.OpenInput(segment + TermVectorsWriter.TVF_EXTENSION); tvfFormat = CheckValidFormat(tvf); size = (int)tvx.Length() / 8; } this.fieldInfos = fieldInfos; }
/*internal*/ public TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos) { if (d.FileExists(segment + TermVectorsWriter.TVX_EXTENSION)) { tvx = d.OpenInput(segment + TermVectorsWriter.TVX_EXTENSION); CheckValidFormat(tvx); tvd = d.OpenInput(segment + TermVectorsWriter.TVD_EXTENSION); tvdFormat = CheckValidFormat(tvd); tvf = d.OpenInput(segment + TermVectorsWriter.TVF_EXTENSION); tvfFormat = CheckValidFormat(tvf); size = (int) tvx.Length() / 8; } this.fieldInfos = fieldInfos; }
public virtual void CopyFile(Directory dir, System.String src, System.String dest) { IndexInput in_Renamed = dir.OpenInput(src); IndexOutput out_Renamed = dir.CreateOutput(dest); byte[] b = new byte[1024]; long remainder = in_Renamed.Length(); while (remainder > 0) { int len = (int)System.Math.Min(b.Length, remainder); in_Renamed.ReadBytes(b, 0, len); out_Renamed.WriteBytes(b, len); remainder -= len; } in_Renamed.Close(); out_Renamed.Close(); }
/// <summary> /// Sole constructor. </summary> public Lucene40StoredFieldsReader(Directory d, SegmentInfo si, FieldInfos fn, IOContext context) { string segment = si.Name; bool success = false; FieldInfos = fn; try { FieldsStream = d.OpenInput(IndexFileNames.SegmentFileName(segment, "", Lucene40StoredFieldsWriter.FIELDS_EXTENSION), context); string indexStreamFN = IndexFileNames.SegmentFileName(segment, "", Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION); IndexStream = d.OpenInput(indexStreamFN, context); CodecUtil.CheckHeader(IndexStream, Lucene40StoredFieldsWriter.CODEC_NAME_IDX, Lucene40StoredFieldsWriter.VERSION_START, Lucene40StoredFieldsWriter.VERSION_CURRENT); CodecUtil.CheckHeader(FieldsStream, Lucene40StoredFieldsWriter.CODEC_NAME_DAT, Lucene40StoredFieldsWriter.VERSION_START, Lucene40StoredFieldsWriter.VERSION_CURRENT); Debug.Assert(Lucene40StoredFieldsWriter.HEADER_LENGTH_DAT == FieldsStream.FilePointer); Debug.Assert(Lucene40StoredFieldsWriter.HEADER_LENGTH_IDX == IndexStream.FilePointer); long indexSize = IndexStream.Length() - Lucene40StoredFieldsWriter.HEADER_LENGTH_IDX; this.Size_Renamed = (int)(indexSize >> 3); // Verify two sources of "maxDoc" agree: if (this.Size_Renamed != si.DocCount) { throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + this.Size_Renamed + " but segmentInfo shows " + si.DocCount); } NumTotalDocs = (int)(indexSize >> 3); success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { try { Dispose(); } // ensure we throw our original exception catch (Exception) { } } } }
internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size) { bool success = false; isOriginal = true; try { fieldInfos = fn; cloneableFieldsStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_EXTENSION, readBufferSize); cloneableIndexStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION, readBufferSize); // First version of fdx did not include a format // header, but, the first int will always be 0 in that // case int firstInt = cloneableIndexStream.ReadInt(); format = firstInt == 0 ? 0 : firstInt; if (format > FieldsWriter.FORMAT_CURRENT) throw new CorruptIndexException("Incompatible format version: " + format + " expected " + FieldsWriter.FORMAT_CURRENT + " or lower"); formatSize = format > FieldsWriter.FORMAT ? 4 : 0; if (format < FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) cloneableFieldsStream.SetModifiedUTF8StringsMode(); fieldsStream = (IndexInput) cloneableFieldsStream.Clone(); long indexSize = cloneableIndexStream.Length() - formatSize; if (docStoreOffset != - 1) { // We read only a slice out of this shared fields file this.docStoreOffset = docStoreOffset; this.size = size; // Verify the file is long enough to hold all of our // docs System.Diagnostics.Debug.Assert(((int)(indexSize / 8)) >= size + this.docStoreOffset, "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset); } else { this.docStoreOffset = 0; this.size = (int) (indexSize >> 3); } indexStream = (IndexInput) cloneableIndexStream.Clone(); numTotalDocs = (int) (indexSize >> 3); success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { Dispose(); } } }
internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size) { bool success = false; try { fieldInfos = fn; cloneableFieldsStream = d.OpenInput(segment + ".fdt", readBufferSize); fieldsStream = (IndexInput) cloneableFieldsStream.Clone(); indexStream = d.OpenInput(segment + ".fdx", readBufferSize); if (docStoreOffset != - 1) { // We read only a slice out of this shared fields file this.docStoreOffset = docStoreOffset; this.size = size; // Verify the file is long enough to hold all of our // docs System.Diagnostics.Debug.Assert(((int)(indexStream.Length() / 8)) >= size + this.docStoreOffset); } else { this.docStoreOffset = 0; this.size = (int) (indexStream.Length() >> 3); } numTotalDocs = (int) (indexStream.Length() >> 3); success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { Close(); } } }
/// <summary> Read a particular segmentFileName. Note that this may /// throw an IOException if a commit is in process. /// /// </summary> /// <param name="directory">-- directory containing the segments file /// </param> /// <param name="segmentFileName">-- segment file to load /// </param> public void Read(Directory directory, System.String segmentFileName) { bool success = false; IndexInput input = directory.OpenInput(segmentFileName); if (segmentFileName.Equals(IndexFileNames.SEGMENTS)) { generation = 0; } else { #if !PRE_LUCENE_NET_2_0_0_COMPATIBLE generation = Lucene.Net.Documents.NumberTools.ToLong(segmentFileName.Substring(1 + IndexFileNames.SEGMENTS.Length)); #else generation = System.Convert.ToInt64(segmentFileName.Substring(1 + IndexFileNames.SEGMENTS.Length), 16); #endif } lastGeneration = generation; try { int format = input.ReadInt(); if (format < 0) { // file contains explicit format info // check that it is a format we can understand if (format < FORMAT_SINGLE_NORM_FILE) { throw new System.IO.IOException("Unknown format version: " + format); } version = input.ReadLong(); // read version counter = input.ReadInt(); // read counter } else { // file is in old format without explicit format info counter = format; } for (int i = input.ReadInt(); i > 0; i--) { // read segmentInfos Add(new SegmentInfo(directory, format, input)); } if (format >= 0) { // in old format the version number may be at the end of the file if (input.GetFilePointer() >= input.Length()) { version = System.DateTime.Now.Millisecond; } // old file format without version number else { version = input.ReadLong(); // read version } } success = true; } finally { input.Close(); if (!success) { // Clear any segment infos we had loaded so we // have a clean slate on retry: Clear(); } } }
private void AssertSameStreams(System.String msg, IndexInput expected, IndexInput test) { Assert.IsNotNull(expected, msg + " null expected"); Assert.IsNotNull(test, msg + " null test"); Assert.AreEqual(expected.Length(), test.Length(), msg + " length"); Assert.AreEqual(expected.FilePointer, test.FilePointer, msg + " position"); byte[] expectedBuffer = new byte[512]; byte[] testBuffer = new byte[expectedBuffer.Length]; long remainder = expected.Length() - expected.FilePointer; while (remainder > 0) { int readLen = (int) System.Math.Min(remainder, expectedBuffer.Length); expected.ReadBytes(expectedBuffer, 0, readLen); test.ReadBytes(testBuffer, 0, readLen); AssertEqualArrays(msg + ", remainder " + remainder, expectedBuffer, testBuffer, 0, readLen); remainder -= readLen; } }
internal TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos, int readBufferSize, int docStoreOffset, int size) { bool success = false; try { if (d.FileExists(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION)) { tvx = d.OpenInput(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION, readBufferSize); format = CheckValidFormat(tvx); tvd = d.OpenInput(segment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION, readBufferSize); int tvdFormat = CheckValidFormat(tvd); tvf = d.OpenInput(segment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION, readBufferSize); int tvfFormat = CheckValidFormat(tvf); System.Diagnostics.Debug.Assert(format == tvdFormat); System.Diagnostics.Debug.Assert(format == tvfFormat); if (format >= FORMAT_VERSION2) { System.Diagnostics.Debug.Assert((tvx.Length() - FORMAT_SIZE) % 16 == 0); numTotalDocs = (int)(tvx.Length() >> 4); } else { System.Diagnostics.Debug.Assert((tvx.Length() - FORMAT_SIZE) % 8 == 0); numTotalDocs = (int)(tvx.Length() >> 3); } if (-1 == docStoreOffset) { this.docStoreOffset = 0; this.size = numTotalDocs; System.Diagnostics.Debug.Assert(size == 0 || numTotalDocs == size); } else { this.docStoreOffset = docStoreOffset; this.size = size; // Verify the file is long enough to hold all of our // docs System.Diagnostics.Debug.Assert(numTotalDocs >= size + docStoreOffset, "numTotalDocs=" + numTotalDocs + " size=" + size + " docStoreOffset=" + docStoreOffset); } } else { // If all documents flushed in a segment had hit // non-aborting exceptions, it's possible that // FieldInfos.hasVectors returns true yet the term // vector files don't exist. format = 0; } this.fieldInfos = fieldInfos; success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { Close(); } } }
public override long Length() { return(delegate_Renamed.Length()); }
public CompoundFileReader(Directory dir, System.String name) { directory = dir; fileName = name; bool success = false; try { stream = dir.OpenInput(name); // read the directory and init files int count = stream.ReadVInt(); FileEntry entry = null; for (int i = 0; i < count; i++) { long offset = stream.ReadLong(); System.String id = stream.ReadString(); if (entry != null) { // set length of the previous entry entry.length = offset - entry.offset; } entry = new FileEntry(); entry.offset = offset; entries[id] = entry; } // set the length of the final entry if (entry != null) { entry.length = stream.Length() - entry.offset; } success = true; } finally { if (!success && (stream != null)) { try { stream.Close(); } catch (System.IO.IOException) { } } } }
internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size) { bool success = false; isOriginal = true; try { fieldInfos = fn; cloneableFieldsStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_EXTENSION, readBufferSize); cloneableIndexStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION, readBufferSize); // First version of fdx did not include a format // header, but, the first int will always be 0 in that // case int firstInt = cloneableIndexStream.ReadInt(); format = firstInt == 0 ? 0 : firstInt; if (format > FieldsWriter.FORMAT_CURRENT) { throw new CorruptIndexException("Incompatible format version: " + format + " expected " + FieldsWriter.FORMAT_CURRENT + " or lower"); } formatSize = format > FieldsWriter.FORMAT ? 4 : 0; if (format < FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) { cloneableFieldsStream.SetModifiedUTF8StringsMode(); } fieldsStream = (IndexInput)cloneableFieldsStream.Clone(); long indexSize = cloneableIndexStream.Length() - formatSize; if (docStoreOffset != -1) { // We read only a slice out of this shared fields file this.docStoreOffset = docStoreOffset; this.size = size; // Verify the file is long enough to hold all of our // docs System.Diagnostics.Debug.Assert(((int)(indexSize / 8)) >= size + this.docStoreOffset, "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset); } else { this.docStoreOffset = 0; this.size = (int)(indexSize >> 3); } indexStream = (IndexInput)cloneableIndexStream.Clone(); numTotalDocs = (int)(indexSize >> 3); success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { Dispose(); } } }
/// <summary> Read a particular segmentFileName. Note that this may /// throw an IOException if a commit is in process. /// /// </summary> /// <param name="directory">-- directory containing the segments file /// </param> /// <param name="segmentFileName">-- segment file to load /// </param> /// <throws> CorruptIndexException if the index is corrupt </throws> /// <throws> IOException if there is a low-level IO error </throws> public void Read(Directory directory, System.String segmentFileName) { bool success = false; // Clear any previous segments: Clear(); IndexInput input = directory.OpenInput(segmentFileName); generation = GenerationFromSegmentsFileName(segmentFileName); lastGeneration = generation; try { int format = input.ReadInt(); if (format < 0) { // file contains explicit format info // check that it is a format we can understand if (format < CURRENT_FORMAT) { throw new CorruptIndexException("Unknown format version: " + format); } version = input.ReadLong(); // read version counter = input.ReadInt(); // read counter } else { // file is in old format without explicit format info counter = format; } for (int i = input.ReadInt(); i > 0; i--) { // read segmentInfos Add(new SegmentInfo(directory, format, input)); } if (format >= 0) { // in old format the version number may be at the end of the file if (input.GetFilePointer() >= input.Length()) { version = (System.DateTime.Now.Ticks - 621355968000000000) / 10000; } // old file format without version number else { version = input.ReadLong(); // read version } } success = true; } finally { input.Close(); if (!success) { // Clear any segment infos we had loaded so we // have a clean slate on retry: Clear(); } } }
public override FieldInfos Read(Directory directory, string segmentName, string segmentSuffix, IOContext iocontext) { string fileName = IndexFileNames.SegmentFileName(segmentName, "", PreFlexRWFieldInfosWriter.FIELD_INFOS_EXTENSION); IndexInput input = directory.OpenInput(fileName, iocontext); try { int format = input.ReadVInt(); if (format > FORMAT_MINIMUM) { throw new IndexFormatTooOldException(input, format, FORMAT_MINIMUM, PreFlexRWFieldInfosWriter.FORMAT_CURRENT); } if (format < PreFlexRWFieldInfosWriter.FORMAT_CURRENT && format != PreFlexRWFieldInfosWriter.FORMAT_PREFLEX_RW) { throw new IndexFormatTooNewException(input, format, FORMAT_MINIMUM, PreFlexRWFieldInfosWriter.FORMAT_CURRENT); } int size = input.ReadVInt(); //read in the size FieldInfo[] infos = new FieldInfo[size]; for (int i = 0; i < size; i++) { string name = input.ReadString(); int fieldNumber = format == PreFlexRWFieldInfosWriter.FORMAT_PREFLEX_RW ? input.ReadInt() : i; byte bits = input.ReadByte(); bool isIndexed = (bits & PreFlexRWFieldInfosWriter.IS_INDEXED) != 0; bool storeTermVector = (bits & PreFlexRWFieldInfosWriter.STORE_TERMVECTOR) != 0; bool omitNorms = (bits & PreFlexRWFieldInfosWriter.OMIT_NORMS) != 0; bool storePayloads = (bits & PreFlexRWFieldInfosWriter.STORE_PAYLOADS) != 0; FieldInfo.IndexOptions?indexOptions; if (!isIndexed) { indexOptions = null; } else if ((bits & PreFlexRWFieldInfosWriter.OMIT_TERM_FREQ_AND_POSITIONS) != 0) { indexOptions = FieldInfo.IndexOptions.DOCS_ONLY; } else if ((bits & PreFlexRWFieldInfosWriter.OMIT_POSITIONS) != 0) { if (format <= PreFlexRWFieldInfosWriter.FORMAT_OMIT_POSITIONS) { indexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS; } else { throw new CorruptIndexException("Corrupt fieldinfos, OMIT_POSITIONS set but format=" + format + " (resource: " + input + ")"); } } else { indexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; } // LUCENE-3027: past indices were able to write // storePayloads=true when omitTFAP is also true, // which is invalid. We correct that, here: if (indexOptions != FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { storePayloads = false; } DocValuesType_e?normType = isIndexed && !omitNorms ? (DocValuesType_e?)DocValuesType_e.NUMERIC : null; if (format == PreFlexRWFieldInfosWriter.FORMAT_PREFLEX_RW && normType != null) { // RW can have norms but doesn't write them normType = input.ReadByte() != 0 ? (DocValuesType_e?)DocValuesType_e.NUMERIC : null; } infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, null, normType, null); } if (input.FilePointer != input.Length()) { throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.FilePointer + " vs size " + input.Length() + " (resource: " + input + ")"); } return(new FieldInfos(infos)); } finally { input.Dispose(); } }