/// <summary> /// Checks that the stream is positioned at the end, and throws exception /// if it is not. </summary> /// @deprecated Use <seealso cref="#checkFooter"/> instead, this should only used for files without checksums public static void CheckEOF(IndexInput @in) { if (@in.FilePointer != @in.Length()) { throw new System.IO.IOException("did not read all bytes from file: read " + @in.FilePointer + " vs size " + @in.Length() + " (resource: " + @in + ")"); } }
/// <summary>Returns the length in bytes of each raw document in a /// contiguous range of length numDocs starting with /// startDocID. Returns the IndexInput (the fieldStream), /// already seeked to the starting point for startDocID. /// </summary> internal IndexInput RawDocs(int[] lengths, int startDocID, int numDocs, IState state) { SeekIndex(startDocID, state); long startOffset = indexStream.ReadLong(state); long lastOffset = startOffset; int count = 0; while (count < numDocs) { long offset; int docID = docStoreOffset + startDocID + count + 1; System.Diagnostics.Debug.Assert(docID <= numTotalDocs); if (docID < numTotalDocs) { offset = indexStream.ReadLong(state); } else { offset = fieldsStream.Length(state); } lengths[count++] = (int)(offset - lastOffset); lastOffset = offset; } fieldsStream.Seek(startOffset, state); return(fieldsStream); }
/// <summary>Retrieve the length (in bytes) of the tvd and tvf /// entries for the next numDocs starting with /// startDocID. This is used for bulk copying when /// merging segments, if the field numbers are /// congruent. Once this returns, the tvf & tvd streams /// are seeked to the startDocID. /// </summary> internal void RawDocs(int[] tvdLengths, int[] tvfLengths, int startDocID, int numDocs, IState state) { if (tvx == null) { for (int i = 0; i < tvdLengths.Length; i++) { tvdLengths[i] = 0; } for (int i = 0; i < tvfLengths.Length; i++) { tvfLengths[i] = 0; } return; } // SegmentMerger calls canReadRawDocs() first and should // not call us if that returns false. if (format < FORMAT_VERSION2) { throw new System.SystemException("cannot read raw docs with older term vector formats"); } SeekTvx(startDocID, state); long tvdPosition = tvx.ReadLong(state); tvd.Seek(tvdPosition, state); long tvfPosition = tvx.ReadLong(state); tvf.Seek(tvfPosition, state); long lastTvdPosition = tvdPosition; long lastTvfPosition = tvfPosition; int count = 0; while (count < numDocs) { int docID = docStoreOffset + startDocID + count + 1; System.Diagnostics.Debug.Assert(docID <= numTotalDocs); if (docID < numTotalDocs) { tvdPosition = tvx.ReadLong(state); tvfPosition = tvx.ReadLong(state); } else { tvdPosition = tvd.Length(state); tvfPosition = tvf.Length(state); System.Diagnostics.Debug.Assert(count == numDocs - 1); } tvdLengths[count] = (int)(tvdPosition - lastTvdPosition); tvfLengths[count] = (int)(tvfPosition - lastTvfPosition); count++; lastTvdPosition = tvdPosition; lastTvfPosition = tvfPosition; } }
public CompoundFileReader(Directory dir, System.String name, int readBufferSize, IState state) { directory = dir; fileName = name; this.readBufferSize = readBufferSize; bool success = false; try { stream = dir.OpenInput(name, readBufferSize, state); // read the directory and init files int count = stream.ReadVInt(state); FileEntry entry = null; for (int i = 0; i < count; i++) { long offset = stream.ReadLong(state); System.String id = stream.ReadString(state); if (entry != null) { // set length of the previous entry entry.length = offset - entry.offset; } entry = new FileEntry { offset = offset }; entries[id] = entry; } // set the length of the final entry if (entry != null) { entry.length = stream.Length(state) - entry.offset; } success = true; } finally { if (!success && (stream != null)) { try { stream.Close(); } catch (System.IO.IOException) { } } } }
private void Read(IndexInput input, String fileName, IState state) { int firstInt = input.ReadVInt(state); if (firstInt < 0) { // This is a real format format = firstInt; } else { format = FORMAT_PRE; } if (format != FORMAT_PRE & format != FORMAT_START) { throw new CorruptIndexException("unrecognized format " + format + " in file \"" + fileName + "\""); } int size; if (format == FORMAT_PRE) { size = firstInt; } else { size = input.ReadVInt(state); //read in the size } for (int i = 0; i < size; i++) { String name = StringHelper.Intern(input.ReadString(state)); byte bits = input.ReadByte(state); bool isIndexed = (bits & IS_INDEXED) != 0; bool storeTermVector = (bits & STORE_TERMVECTOR) != 0; bool storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0; bool storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0; bool omitNorms = (bits & OMIT_NORMS) != 0; bool storePayloads = (bits & STORE_PAYLOADS) != 0; bool omitTermFreqAndPositions = (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0; AddInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions); } if (input.FilePointer(state) != input.Length(state)) { throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.FilePointer(state) + " vs size " + input.Length(state)); } }
/// <summary>Copy the contents of the file with specified extension into the /// provided output stream. Use the provided buffer for moving data /// to reduce memory allocation. /// </summary> private void CopyFile(FileEntry source, IndexOutput os, byte[] buffer, IState state) { IndexInput isRenamed = null; try { long startPtr = os.FilePointer; isRenamed = directory.OpenInput(source.file, state); long length = isRenamed.Length(state); long remainder = length; int chunk = buffer.Length; while (remainder > 0) { var len = (int)Math.Min(chunk, remainder); isRenamed.ReadBytes(buffer, 0, len, false, state); os.WriteBytes(buffer, len); remainder -= len; if (checkAbort != null) { // Roughly every 2 MB we will check if // it's time to abort checkAbort.Work(80, state); } } // Verify that remainder is 0 if (remainder != 0) { throw new System.IO.IOException("Non-zero remainder length after copying: " + remainder + " (id: " + source.file + ", length: " + length + ", buffer size: " + chunk + ")"); } // Verify that the output length diff is equal to original file long endPtr = os.FilePointer; long diff = endPtr - startPtr; if (diff != length) { throw new System.IO.IOException("Difference in the output file offsets " + diff + " does not match the original file length " + length); } } finally { if (isRenamed != null) { isRenamed.Close(); } } }
/// <summary> /// Copies the file <i>src</i> to <seealso cref="Directory"/> <i>to</i> under the new /// file name <i>dest</i>. /// <p> /// If you want to copy the entire source directory to the destination one, you /// can do so like this: /// /// <pre class="prettyprint"> /// Directory to; // the directory to copy to /// for (String file : dir.listAll()) { /// dir.copy(to, file, newFile, IOContext.DEFAULT); // newFile can be either file, or a new name /// } /// </pre> /// <p> /// <b>NOTE:</b> this method does not check whether <i>dest</i> exist and will /// overwrite it if it does. /// </summary> public virtual void Copy(Directory to, string src, string dest, IOContext context) { IndexOutput os = null; IndexInput @is = null; System.IO.IOException priorException = null; try { os = to.CreateOutput(dest, context); @is = OpenInput(src, context); os.CopyBytes(@is, @is.Length()); } catch (System.IO.IOException ioe) { priorException = ioe; } finally { bool success = false; try { IOUtils.CloseWhileHandlingException(priorException, os, @is); success = true; } finally { if (!success) { try { to.DeleteFile(dest); } catch (Exception) { } } } } }
private void UnCache(string fileName) { // Only let one thread uncache at a time; this only // happens during commit() or close(): lock (UncacheLock) { if (VERBOSE) { Console.WriteLine("nrtdir.unCache name=" + fileName); } if (!Cache.FileExists(fileName)) { // Another thread beat us... return; } IOContext context = IOContext.DEFAULT; IndexOutput @out = @delegate.CreateOutput(fileName, context); IndexInput @in = null; try { @in = Cache.OpenInput(fileName, context); @out.CopyBytes(@in, @in.Length()); } finally { IOUtils.Close(@in, @out); } // Lock order: uncacheLock -> this lock (this) { // Must sync here because other sync methods have // if (cache.fileExists(name)) { ... } else { ... }: Cache.DeleteFile(fileName); } } }
public override long Length() { return(main.Length()); }
protected override void SeekDir(IndexInput input, long dirOffset) { input.Seek(input.Length() - sizeof(long)/8); long offset = input.ReadLong(); input.Seek(offset); }
public override long Length() { EnsureOpen(); return(@delegate.Length()); }
private static IDictionary <string, FileEntry> ReadLegacyEntries(IndexInput stream, int firstInt) { IDictionary <string, FileEntry> entries = new Dictionary <string, FileEntry>(); int count; bool stripSegmentName; if (firstInt < CompoundFileWriter.FORMAT_PRE_VERSION) { if (firstInt < CompoundFileWriter.FORMAT_NO_SEGMENT_PREFIX) { throw new CorruptIndexException("Incompatible format version: " + firstInt + " expected >= " + CompoundFileWriter.FORMAT_NO_SEGMENT_PREFIX + " (resource: " + stream + ")"); } // It's a post-3.1 index, read the count. count = stream.ReadVInt(); stripSegmentName = false; } else { count = firstInt; stripSegmentName = true; } // read the directory and init files long streamLength = stream.Length(); FileEntry entry = null; for (int i = 0; i < count; i++) { long offset = stream.ReadLong(); if (offset < 0 || offset > streamLength) { throw new CorruptIndexException("Invalid CFS entry offset: " + offset + " (resource: " + stream + ")"); } string id = stream.ReadString(); if (stripSegmentName) { // Fix the id to not include the segment names. this is relevant for // pre-3.1 indexes. id = IndexFileNames.StripSegmentName(id); } if (entry != null) { // set length of the previous entry entry.Length = offset - entry.Offset; } entry = new FileEntry(); entry.Offset = offset; FileEntry previous = entries[id] = entry; if (previous != null) { throw new CorruptIndexException("Duplicate cfs entry id=" + id + " in CFS: " + stream); } } // set the length of the final entry if (entry != null) { entry.Length = streamLength - entry.Offset; } return(entries); }
internal TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos, int readBufferSize, int docStoreOffset, int size, IState state) { bool success = false; try { if (d.FileExists(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION, state)) { tvx = d.OpenInput(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION, readBufferSize, state); format = CheckValidFormat(tvx, state); tvd = d.OpenInput(segment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION, readBufferSize, state); int tvdFormat = CheckValidFormat(tvd, state); tvf = d.OpenInput(segment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION, readBufferSize, state); int tvfFormat = CheckValidFormat(tvf, state); System.Diagnostics.Debug.Assert(format == tvdFormat); System.Diagnostics.Debug.Assert(format == tvfFormat); if (format >= FORMAT_VERSION2) { System.Diagnostics.Debug.Assert((tvx.Length(state) - FORMAT_SIZE) % 16 == 0); numTotalDocs = (int)(tvx.Length(state) >> 4); } else { System.Diagnostics.Debug.Assert((tvx.Length(state) - FORMAT_SIZE) % 8 == 0); numTotalDocs = (int)(tvx.Length(state) >> 3); } if (-1 == docStoreOffset) { this.docStoreOffset = 0; this.size = numTotalDocs; System.Diagnostics.Debug.Assert(size == 0 || numTotalDocs == size); } else { this.docStoreOffset = docStoreOffset; this.size = size; // Verify the file is long enough to hold all of our // docs System.Diagnostics.Debug.Assert(numTotalDocs >= size + docStoreOffset, "numTotalDocs=" + numTotalDocs + " size=" + size + " docStoreOffset=" + docStoreOffset); } } else { // If all documents flushed in a segment had hit // non-aborting exceptions, it's possible that // FieldInfos.hasVectors returns true yet the term // vector files don't exist. format = 0; } this.fieldInfos = fieldInfos; success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { Dispose(); } } }
private void AssertSameSeekBehavior(string msg, IndexInput expected, IndexInput actual) { // seek to 0 long point = 0; AssertSameStreams(msg + ", seek(0)", expected, actual, point); // seek to middle point = expected.Length() / 2l; AssertSameStreams(msg + ", seek(mid)", expected, actual, point); // seek to end - 2 point = expected.Length() - 2; AssertSameStreams(msg + ", seek(end-2)", expected, actual, point); // seek to end - 1 point = expected.Length() - 1; AssertSameStreams(msg + ", seek(end-1)", expected, actual, point); // seek to the end point = expected.Length(); AssertSameStreams(msg + ", seek(end)", expected, actual, point); // seek past end point = expected.Length() + 1; AssertSameStreams(msg + ", seek(end+1)", expected, actual, point); }
/// <summary> /// Returns (but does not validate) the checksum previously written by <seealso cref="#checkFooter"/>. </summary> /// <returns> actual checksum value </returns> /// <exception cref="IOException"> if the footer is invalid </exception> public static long RetrieveChecksum(IndexInput @in) { @in.Seek(@in.Length() - FooterLength()); ValidateFooter(@in); return @in.ReadLong(); }
internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size, IState state) { bool success = false; isOriginal = true; try { fieldInfos = fn; cloneableFieldsStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_EXTENSION, readBufferSize, state); cloneableIndexStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION, readBufferSize, state); // First version of fdx did not include a format // header, but, the first int will always be 0 in that // case int firstInt = cloneableIndexStream.ReadInt(state); format = firstInt == 0 ? 0 : firstInt; if (format > FieldsWriter.FORMAT_CURRENT) { throw new CorruptIndexException("Incompatible format version: " + format + " expected " + FieldsWriter.FORMAT_CURRENT + " or lower"); } formatSize = format > FieldsWriter.FORMAT ? 4 : 0; if (format < FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) { cloneableFieldsStream.SetModifiedUTF8StringsMode(); } fieldsStream = (IndexInput)cloneableFieldsStream.Clone(state); long indexSize = cloneableIndexStream.Length(state) - formatSize; if (docStoreOffset != -1) { // We read only a slice out of this shared fields file this.docStoreOffset = docStoreOffset; this.size = size; // Verify the file is long enough to hold all of our // docs System.Diagnostics.Debug.Assert(((int)(indexSize / 8)) >= size + this.docStoreOffset, "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset); } else { this.docStoreOffset = 0; this.size = (int)(indexSize >> 3); } indexStream = (IndexInput)cloneableIndexStream.Clone(state); numTotalDocs = (int)(indexSize >> 3); success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { Dispose(); } } }
public override long Length(IState state) { return(main.Length(state)); }
private void AssertSameStreams(string msg, IndexInput expected, IndexInput actual, long seekTo) { if (seekTo >= 0 && seekTo < expected.Length()) { expected.Seek(seekTo); actual.Seek(seekTo); AssertSameStreams(msg + ", seek(mid)", expected, actual); } }
public override long Length(IState state) { return(delegate_Renamed.Length(null)); }
private void AssertSameStreams(string msg, IndexInput expected, IndexInput test) { Assert.IsNotNull(expected, msg + " null expected"); Assert.IsNotNull(test, msg + " null test"); Assert.AreEqual(expected.Length(), test.Length(), msg + " length"); Assert.AreEqual(expected.FilePointer, test.FilePointer, msg + " position"); var expectedBuffer = new byte[512]; var testBuffer = new byte[expectedBuffer.Length]; long remainder = expected.Length() - expected.FilePointer; while (remainder > 0) { int readLen = (int)Math.Min(remainder, expectedBuffer.Length); expected.ReadBytes(expectedBuffer, 0, readLen); test.ReadBytes(testBuffer, 0, readLen); AssertEqualArrays(msg + ", remainder " + remainder, expectedBuffer, testBuffer, 0, readLen); remainder -= readLen; } }
private static IDictionary<string, FileEntry> ReadLegacyEntries(IndexInput stream, int firstInt) { IDictionary<string, FileEntry> entries = new Dictionary<string, FileEntry>(); int count; bool stripSegmentName; if (firstInt < CompoundFileWriter.FORMAT_PRE_VERSION) { if (firstInt < CompoundFileWriter.FORMAT_NO_SEGMENT_PREFIX) { throw new CorruptIndexException("Incompatible format version: " + firstInt + " expected >= " + CompoundFileWriter.FORMAT_NO_SEGMENT_PREFIX + " (resource: " + stream + ")"); } // It's a post-3.1 index, read the count. count = stream.ReadVInt(); stripSegmentName = false; } else { count = firstInt; stripSegmentName = true; } // read the directory and init files long streamLength = stream.Length(); FileEntry entry = null; for (int i = 0; i < count; i++) { long offset = stream.ReadLong(); if (offset < 0 || offset > streamLength) { throw new CorruptIndexException("Invalid CFS entry offset: " + offset + " (resource: " + stream + ")"); } string id = stream.ReadString(); if (stripSegmentName) { // Fix the id to not include the segment names. this is relevant for // pre-3.1 indexes. id = IndexFileNames.StripSegmentName(id); } if (entry != null) { // set length of the previous entry entry.Length = offset - entry.Offset; } entry = new FileEntry(); entry.Offset = offset; FileEntry previous = entries[id] = entry; if (previous != null) { throw new CorruptIndexException("Duplicate cfs entry id=" + id + " in CFS: " + stream); } } // set the length of the final entry if (entry != null) { entry.Length = streamLength - entry.Offset; } return entries; }