/// <summary> /// Retrieve the length (in bytes) of the tvd and tvf /// entries for the next numDocs starting with /// startDocID. this is used for bulk copying when /// merging segments, if the field numbers are /// congruent. Once this returns, the tvf & tvd streams /// are seeked to the startDocID. /// </summary> internal void RawDocs(int[] tvdLengths, int[] tvfLengths, int startDocID, int numDocs) { if (Tvx == null) { CollectionsHelper.Fill(tvdLengths, 0); CollectionsHelper.Fill(tvfLengths, 0); return; } SeekTvx(startDocID); long tvdPosition = Tvx.ReadLong(); Tvd.Seek(tvdPosition); long tvfPosition = Tvx.ReadLong(); Tvf.Seek(tvfPosition); long lastTvdPosition = tvdPosition; long lastTvfPosition = tvfPosition; int count = 0; while (count < numDocs) { int docID = startDocID + count + 1; Debug.Assert(docID <= NumTotalDocs); if (docID < NumTotalDocs) { tvdPosition = Tvx.ReadLong(); tvfPosition = Tvx.ReadLong(); } else { tvdPosition = Tvd.Length(); tvfPosition = Tvf.Length(); Debug.Assert(count == numDocs - 1); } tvdLengths[count] = (int)(tvdPosition - lastTvdPosition); tvfLengths[count] = (int)(tvfPosition - lastTvfPosition); count++; lastTvdPosition = tvdPosition; lastTvfPosition = tvfPosition; } }
/// <summary> /// Sole constructor. </summary> public Lucene40StoredFieldsReader(Directory d, SegmentInfo si, FieldInfos fn, IOContext context) { string segment = si.Name; bool success = false; FieldInfos = fn; try { FieldsStream = d.OpenInput(IndexFileNames.SegmentFileName(segment, "", Lucene40StoredFieldsWriter.FIELDS_EXTENSION), context); string indexStreamFN = IndexFileNames.SegmentFileName(segment, "", Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION); IndexStream = d.OpenInput(indexStreamFN, context); CodecUtil.CheckHeader(IndexStream, Lucene40StoredFieldsWriter.CODEC_NAME_IDX, Lucene40StoredFieldsWriter.VERSION_START, Lucene40StoredFieldsWriter.VERSION_CURRENT); CodecUtil.CheckHeader(FieldsStream, Lucene40StoredFieldsWriter.CODEC_NAME_DAT, Lucene40StoredFieldsWriter.VERSION_START, Lucene40StoredFieldsWriter.VERSION_CURRENT); Debug.Assert(Lucene40StoredFieldsWriter.HEADER_LENGTH_DAT == FieldsStream.FilePointer); Debug.Assert(Lucene40StoredFieldsWriter.HEADER_LENGTH_IDX == IndexStream.FilePointer); long indexSize = IndexStream.Length() - Lucene40StoredFieldsWriter.HEADER_LENGTH_IDX; this.Size_Renamed = (int)(indexSize >> 3); // Verify two sources of "maxDoc" agree: if (this.Size_Renamed != si.DocCount) { throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + this.Size_Renamed + " but segmentInfo shows " + si.DocCount); } NumTotalDocs = (int)(indexSize >> 3); success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { try { Dispose(); } // ensure we throw our original exception catch (Exception) { } } } }
public virtual void CopyFile(Directory dir, string src, string dest) { IndexInput @in = dir.OpenInput(src, NewIOContext(Random())); IndexOutput @out = dir.CreateOutput(dest, NewIOContext(Random())); sbyte[] b = new sbyte[1024]; long remainder = @in.Length(); while (remainder > 0) { int len = (int)Math.Min(b.Length, remainder); @in.ReadBytes(b, 0, len); @out.WriteBytes(b, len); remainder -= len; } @in.Dispose(); @out.Dispose(); }
public virtual void TestAppend() { Directory newDir = NewDirectory(); CompoundFileDirectory csw = new CompoundFileDirectory(newDir, "d.cfs", NewIOContext(Random()), true); int size = 5 + Random().Next(128); for (int j = 0; j < 2; j++) { IndexOutput os = csw.CreateOutput("seg_" + j + "_foo.txt", NewIOContext(Random())); for (int i = 0; i < size; i++) { os.WriteInt(i * j); } os.Dispose(); string[] listAll = newDir.ListAll(); Assert.AreEqual(1, listAll.Length); Assert.AreEqual("d.cfs", listAll[0]); } CreateSequenceFile(Dir, "d1", (sbyte)0, 15); Dir.Copy(csw, "d1", "d1", NewIOContext(Random())); string[] listAll_ = newDir.ListAll(); Assert.AreEqual(1, listAll_.Length); Assert.AreEqual("d.cfs", listAll_[0]); csw.Dispose(); CompoundFileDirectory csr = new CompoundFileDirectory(newDir, "d.cfs", NewIOContext(Random()), false); for (int j = 0; j < 2; j++) { IndexInput openInput = csr.OpenInput("seg_" + j + "_foo.txt", NewIOContext(Random())); Assert.AreEqual(size * 4, openInput.Length()); for (int i = 0; i < size; i++) { Assert.AreEqual(i * j, openInput.ReadInt()); } openInput.Dispose(); } IndexInput expected = Dir.OpenInput("d1", NewIOContext(Random())); IndexInput actual = csr.OpenInput("d1", NewIOContext(Random())); AssertSameStreams("d1", expected, actual); AssertSameSeekBehavior("d1", expected, actual); expected.Dispose(); actual.Dispose(); csr.Dispose(); newDir.Dispose(); }
public bool MoveNext() { if (input.FilePointer < input.Length()) { int code = input.ReadVInt(); if ((code & 1) != 0) { field = input.ReadString(); } int prefix = Number.URShift(code, 1); int suffix = input.ReadVInt(); bytes.Grow(prefix + suffix); input.ReadBytes(bytes.Bytes, prefix, suffix); bytes.Length = prefix + suffix; term.Set(field, bytes); return(true); } return(false); }
private MemoryStream CompressStream(string fileName, long originalLength) { // unfortunately, deflate stream doesn't allow seek, and we need a seekable stream // to pass to the blob storage stuff, so we compress into a memory stream MemoryStream compressedStream = new MemoryStream(); IndexInput indexInput = null; try { indexInput = CacheDirectory.OpenInput(fileName); using (var compressor = new DeflateStream(compressedStream, CompressionMode.Compress, true)) { // compress to compressedOutputStream byte[] bytes = new byte[indexInput.Length()]; indexInput.ReadBytes(bytes, 0, (int)bytes.Length); compressor.Write(bytes, 0, (int)bytes.Length); } // seek back to beginning of comrpessed stream compressedStream.Seek(0, SeekOrigin.Begin); Debug.WriteLine(string.Format("COMPRESSED {0} -> {1} {2}% to {3}", originalLength, compressedStream.Length, ((float)compressedStream.Length / (float)originalLength) * 100, _name)); } catch { // release the compressed stream resources if an error occurs compressedStream.Dispose(); throw; } finally { if (indexInput != null) { indexInput.Close(); } } return(compressedStream); }
private void AssertSameStreams(string msg, IndexInput expected, IndexInput test) { Assert.IsNotNull(expected, msg + " null expected"); Assert.IsNotNull(test, msg + " null test"); Assert.AreEqual(expected.Length(), test.Length(), msg + " length"); Assert.AreEqual(expected.FilePointer, test.FilePointer, msg + " position"); var expectedBuffer = new byte[512]; var testBuffer = new byte[expectedBuffer.Length]; long remainder = expected.Length() - expected.FilePointer; while (remainder > 0) { int readLen = (int)Math.Min(remainder, expectedBuffer.Length); expected.ReadBytes(expectedBuffer, 0, readLen); test.ReadBytes(testBuffer, 0, readLen); AssertEqualArrays(msg + ", remainder " + remainder, expectedBuffer, testBuffer, 0, readLen); remainder -= readLen; } }
public virtual void TestDataInputOutput() { Random random = Random(); for (int iter = 0; iter < 5 * RANDOM_MULTIPLIER; iter++) { BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("testOverflow")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } int blockBits = TestUtil.NextInt(random, 1, 20); int blockSize = 1 << blockBits; PagedBytes p = new PagedBytes(blockBits); IndexOutput @out = dir.CreateOutput("foo", IOContext.DEFAULT); int numBytes = TestUtil.NextInt(Random(), 2, 10000000); byte[] answer = new byte[numBytes]; Random().NextBytes(answer); int written = 0; while (written < numBytes) { if (Random().Next(10) == 7) { @out.WriteByte(answer[written++]); } else { int chunk = Math.Min(Random().Next(1000), numBytes - written); @out.WriteBytes(answer, written, chunk); written += chunk; } } @out.Dispose(); IndexInput input = dir.OpenInput("foo", IOContext.DEFAULT); DataInput @in = (DataInput)input.Clone(); p.Copy(input, input.Length()); PagedBytes.Reader reader = p.Freeze(random.NextBoolean()); byte[] verify = new byte[numBytes]; int read = 0; while (read < numBytes) { if (Random().Next(10) == 7) { verify[read++] = @in.ReadByte(); } else { int chunk = Math.Min(Random().Next(1000), numBytes - read); @in.ReadBytes(verify, read, chunk); read += chunk; } } Assert.IsTrue(Arrays.Equals(answer, verify)); BytesRef slice = new BytesRef(); for (int iter2 = 0; iter2 < 100; iter2++) { int pos = random.Next(numBytes - 1); int len = random.Next(Math.Min(blockSize + 1, numBytes - pos)); reader.FillSlice(slice, pos, len); for (int byteUpto = 0; byteUpto < len; byteUpto++) { Assert.AreEqual(answer[pos + byteUpto], (byte)slice.Bytes[slice.Offset + byteUpto]); } } input.Dispose(); dir.Dispose(); } }
public Lucene3xStoredFieldsReader(Directory d, SegmentInfo si, FieldInfos fn, IOContext context) { string segment = Lucene3xSegmentInfoFormat.GetDocStoreSegment(si); int docStoreOffset = Lucene3xSegmentInfoFormat.GetDocStoreOffset(si); int size = si.DocCount; bool success = false; FieldInfos = fn; try { if (docStoreOffset != -1 && Lucene3xSegmentInfoFormat.GetDocStoreIsCompoundFile(si)) { d = StoreCFSReader = new CompoundFileDirectory(si.Dir, IndexFileNames.SegmentFileName(segment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), context, false); } else { StoreCFSReader = null; } FieldsStream = d.OpenInput(IndexFileNames.SegmentFileName(segment, "", FIELDS_EXTENSION), context); string indexStreamFN = IndexFileNames.SegmentFileName(segment, "", FIELDS_INDEX_EXTENSION); IndexStream = d.OpenInput(indexStreamFN, context); Format = IndexStream.ReadInt(); if (Format < FORMAT_MINIMUM) { throw new IndexFormatTooOldException(IndexStream, Format, FORMAT_MINIMUM, FORMAT_CURRENT); } if (Format > FORMAT_CURRENT) { throw new IndexFormatTooNewException(IndexStream, Format, FORMAT_MINIMUM, FORMAT_CURRENT); } long indexSize = IndexStream.Length() - FORMAT_SIZE; if (docStoreOffset != -1) { // We read only a slice out of this shared fields file this.DocStoreOffset = docStoreOffset; this.Size = size; // Verify the file is long enough to hold all of our // docs Debug.Assert(((int)(indexSize / 8)) >= size + this.DocStoreOffset, "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset); } else { this.DocStoreOffset = 0; this.Size = (int)(indexSize >> 3); // Verify two sources of "maxDoc" agree: if (this.Size != si.DocCount) { throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + this.Size + " but segmentInfo shows " + si.DocCount); } } NumTotalDocs = (int)(indexSize >> 3); success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { try { Dispose(); } // keep our original exception catch (Exception t) { } } } }
public override long Length() { return(ii.Length()); }
protected override void Dispose(bool disposing) { _fileMutex.WaitOne(); try { string fileName = _name; // make sure it's all written out _indexOutput.Flush(); long originalLength = _indexOutput.Length; _indexOutput.Dispose(); Stream blobStream; #if COMPRESSBLOBS // optionally put a compressor around the blob stream if (_azureDirectory.ShouldCompressFile(_name)) { // unfortunately, deflate stream doesn't allow seek, and we need a seekable stream // to pass to the blob storage stuff, so we compress into a memory stream MemoryStream compressedStream = new MemoryStream(); try { IndexInput indexInput = CacheDirectory.OpenInput(fileName); using (DeflateStream compressor = new DeflateStream(compressedStream, CompressionMode.Compress, true)) { // compress to compressedOutputStream byte[] bytes = new byte[indexInput.Length()]; indexInput.ReadBytes(bytes, 0, (int)bytes.Length); compressor.Write(bytes, 0, (int)bytes.Length); } indexInput.Close(); // seek back to beginning of comrpessed stream compressedStream.Seek(0, SeekOrigin.Begin); Debug.WriteLine(string.Format("COMPRESSED {0} -> {1} {2}% to {3}", originalLength, compressedStream.Length, ((float)compressedStream.Length / (float)originalLength) * 100, _name)); } catch { // release the compressed stream resources if an error occurs compressedStream.Dispose(); throw; } blobStream = compressedStream; } else #endif { blobStream = new StreamInput(CacheDirectory.OpenInput(fileName)); } try { // push the blobStream up to the cloud _blob.UploadFromStream(blobStream); // set the metadata with the original index file properties _blob.Metadata["CachedLength"] = originalLength.ToString(); _blob.Metadata["CachedLastModified"] = CacheDirectory.FileModified(fileName).ToString(); _blob.SetMetadata(); Debug.WriteLine(string.Format("PUT {1} bytes to {0} in cloud", _name, blobStream.Length)); } finally { blobStream.Dispose(); } #if FULLDEBUG Debug.WriteLine(string.Format("CLOSED WRITESTREAM {0}", _name)); #endif // clean up _indexOutput = null; _blobContainer = null; _blob = null; GC.SuppressFinalize(this); } finally { _fileMutex.ReleaseMutex(); } }
public override FieldInfos Read(Directory directory, string segmentName, string segmentSuffix, IOContext iocontext) { string fileName = IndexFileNames.SegmentFileName(segmentName, "", FIELD_INFOS_EXTENSION); IndexInput input = directory.OpenInput(fileName, iocontext); bool success = false; try { int format = input.ReadVInt(); if (format > FORMAT_MINIMUM) { throw new IndexFormatTooOldException(input, format, FORMAT_MINIMUM, FORMAT_CURRENT); } if (format < FORMAT_CURRENT) { throw new IndexFormatTooNewException(input, format, FORMAT_MINIMUM, FORMAT_CURRENT); } int size = input.ReadVInt(); //read in the size FieldInfo[] infos = new FieldInfo[size]; for (int i = 0; i < size; i++) { string name = input.ReadString(); int fieldNumber = i; byte bits = input.ReadByte(); bool isIndexed = (bits & IS_INDEXED) != 0; bool storeTermVector = (bits & STORE_TERMVECTOR) != 0; bool omitNorms = (bits & OMIT_NORMS) != 0; bool storePayloads = (bits & STORE_PAYLOADS) != 0; FieldInfo.IndexOptions indexOptions; if (!isIndexed) { indexOptions = default(FieldInfo.IndexOptions); } else if ((bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0) { indexOptions = FieldInfo.IndexOptions.DOCS_ONLY; } else if ((bits & OMIT_POSITIONS) != 0) { if (format <= FORMAT_OMIT_POSITIONS) { indexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS; } else { throw new CorruptIndexException("Corrupt fieldinfos, OMIT_POSITIONS set but format=" + format + " (resource: " + input + ")"); } } else { indexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; } // LUCENE-3027: past indices were able to write // storePayloads=true when omitTFAP is also true, // which is invalid. We correct that, here: if (indexOptions != FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { storePayloads = false; } // LUCENE TO-DO infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, null, isIndexed && !omitNorms ? FieldInfo.DocValuesType_e.NUMERIC : default(FieldInfo.DocValuesType_e), CollectionsHelper.EmptyMap <string, string>()); } if (input.FilePointer != input.Length()) { throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.FilePointer + " vs size " + input.Length() + " (resource: " + input + ")"); } FieldInfos fieldInfos = new FieldInfos(infos); success = true; return(fieldInfos); } finally { if (success) { input.Dispose(); } else { IOUtils.CloseWhileHandlingException(input); } } }
internal TermVectorsReader(Directory d, System.String segment, FieldInfos fieldInfos, int readBufferSize, int docStoreOffset, int size) { bool success = false; try { if (d.FileExists(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION)) { tvx = d.OpenInput(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION, readBufferSize); format = CheckValidFormat(tvx); tvd = d.OpenInput(segment + "." + IndexFileNames.VECTORS_DOCUMENTS_EXTENSION, readBufferSize); int tvdFormat = CheckValidFormat(tvd); tvf = d.OpenInput(segment + "." + IndexFileNames.VECTORS_FIELDS_EXTENSION, readBufferSize); int tvfFormat = CheckValidFormat(tvf); System.Diagnostics.Debug.Assert(format == tvdFormat); System.Diagnostics.Debug.Assert(format == tvfFormat); if (format >= FORMAT_VERSION2) { System.Diagnostics.Debug.Assert((tvx.Length() - FORMAT_SIZE) % 16 == 0); numTotalDocs = (int)(tvx.Length() >> 4); } else { System.Diagnostics.Debug.Assert((tvx.Length() - FORMAT_SIZE) % 8 == 0); numTotalDocs = (int)(tvx.Length() >> 3); } if (-1 == docStoreOffset) { this.docStoreOffset = 0; this.size = numTotalDocs; System.Diagnostics.Debug.Assert(size == 0 || numTotalDocs == size); } else { this.docStoreOffset = docStoreOffset; this.size = size; // Verify the file is long enough to hold all of our // docs System.Diagnostics.Debug.Assert(numTotalDocs >= size + docStoreOffset, "numTotalDocs=" + numTotalDocs + " size=" + size + " docStoreOffset=" + docStoreOffset); } } else { // If all documents flushed in a segment had hit // non-aborting exceptions, it's possible that // FieldInfos.hasVectors returns true yet the term // vector files don't exist. format = 0; } this.fieldInfos = fieldInfos; success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { Dispose(); } } }
/// <summary> /// Sole constructor. </summary> public CompressingStoredFieldsReader(Directory d, SegmentInfo si, string segmentSuffix, FieldInfos fn, IOContext context, string formatName, CompressionMode compressionMode) { this.CompressionMode_Renamed = compressionMode; string segment = si.Name; bool success = false; FieldInfos = fn; NumDocs = si.DocCount; ChecksumIndexInput indexStream = null; try { string indexStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION); string fieldsStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_EXTENSION); // Load the index into memory indexStream = d.OpenChecksumInput(indexStreamFN, context); string codecNameIdx = formatName + CompressingStoredFieldsWriter.CODEC_SFX_IDX; Version_Renamed = CodecUtil.CheckHeader(indexStream, codecNameIdx, CompressingStoredFieldsWriter.VERSION_START, CompressingStoredFieldsWriter.VERSION_CURRENT); Debug.Assert(CodecUtil.HeaderLength(codecNameIdx) == indexStream.FilePointer); IndexReader = new CompressingStoredFieldsIndexReader(indexStream, si); long maxPointer = -1; if (Version_Renamed >= CompressingStoredFieldsWriter.VERSION_CHECKSUM) { maxPointer = indexStream.ReadVLong(); CodecUtil.CheckFooter(indexStream); } else { CodecUtil.CheckEOF(indexStream); } indexStream.Dispose(); indexStream = null; // Open the data file and read metadata FieldsStream = d.OpenInput(fieldsStreamFN, context); if (Version_Renamed >= CompressingStoredFieldsWriter.VERSION_CHECKSUM) { if (maxPointer + CodecUtil.FooterLength() != FieldsStream.Length()) { throw new CorruptIndexException("Invalid fieldsStream maxPointer (file truncated?): maxPointer=" + maxPointer + ", length=" + FieldsStream.Length()); } } else { maxPointer = FieldsStream.Length(); } this.MaxPointer = maxPointer; string codecNameDat = formatName + CompressingStoredFieldsWriter.CODEC_SFX_DAT; int fieldsVersion = CodecUtil.CheckHeader(FieldsStream, codecNameDat, CompressingStoredFieldsWriter.VERSION_START, CompressingStoredFieldsWriter.VERSION_CURRENT); if (Version_Renamed != fieldsVersion) { throw new CorruptIndexException("Version mismatch between stored fields index and data: " + Version_Renamed + " != " + fieldsVersion); } Debug.Assert(CodecUtil.HeaderLength(codecNameDat) == FieldsStream.FilePointer); if (Version_Renamed >= CompressingStoredFieldsWriter.VERSION_BIG_CHUNKS) { ChunkSize_Renamed = FieldsStream.ReadVInt(); } else { ChunkSize_Renamed = -1; } PackedIntsVersion = FieldsStream.ReadVInt(); Decompressor = compressionMode.NewDecompressor(); this.Bytes = new BytesRef(); success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(this, indexStream); } } }
public override long Length() { return(main.Length()); }
public override long Length() { return(_cacheDirIndexInput.Length()); }
/// <summary> /// Seek {@code input} to the directory offset. </summary> protected internal virtual void SeekDir(IndexInput input, long dirOffset) { if (Version >= BlockTreeTermsWriter.VERSION_CHECKSUM) { input.Seek(input.Length() - CodecUtil.FooterLength() - 8); dirOffset = input.ReadLong(); } else if (Version >= BlockTreeTermsWriter.VERSION_APPEND_ONLY) { input.Seek(input.Length() - 8); dirOffset = input.ReadLong(); } input.Seek(dirOffset); }
/// <summary> /// Returns (but does not validate) the checksum previously written by <seealso cref="#checkFooter"/>. </summary> /// <returns> actual checksum value </returns> /// <exception cref="IOException"> if the footer is invalid </exception> public static long RetrieveChecksum(IndexInput @in) { @in.Seek(@in.Length() - FooterLength()); ValidateFooter(@in); return(@in.ReadLong()); }
internal FieldsReader(Directory d, System.String segment, FieldInfos fn, int readBufferSize, int docStoreOffset, int size) { bool success = false; isOriginal = true; try { fieldInfos = fn; cloneableFieldsStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_EXTENSION, readBufferSize); cloneableIndexStream = d.OpenInput(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION, readBufferSize); // First version of fdx did not include a format // header, but, the first int will always be 0 in that // case int firstInt = cloneableIndexStream.ReadInt(); format = firstInt == 0 ? 0 : firstInt; if (format > FieldsWriter.FORMAT_CURRENT) { throw new CorruptIndexException("Incompatible format version: " + format + " expected " + FieldsWriter.FORMAT_CURRENT + " or lower"); } formatSize = format > FieldsWriter.FORMAT ? 4 : 0; if (format < FieldsWriter.FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) { cloneableFieldsStream.SetModifiedUTF8StringsMode(); } fieldsStream = (IndexInput)cloneableFieldsStream.Clone(); long indexSize = cloneableIndexStream.Length() - formatSize; if (docStoreOffset != -1) { // We read only a slice out of this shared fields file this.docStoreOffset = docStoreOffset; this.size = size; // Verify the file is long enough to hold all of our // docs System.Diagnostics.Debug.Assert(((int)(indexSize / 8)) >= size + this.docStoreOffset, "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset); } else { this.docStoreOffset = 0; this.size = (int)(indexSize >> 3); } indexStream = (IndexInput)cloneableIndexStream.Clone(); numTotalDocs = (int)(indexSize >> 3); success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { Dispose(); } } }
protected override void SeekDir(IndexInput input, long dirOffset) { input.Seek(input.Length() - sizeof(long)/8); long offset = input.ReadLong(); input.Seek(offset); }
// note: just like segmentreader in 3.x, we open up all the files here (including separate norms) up front. // but we just don't do any seeks or reading yet. public Lucene3xNormsProducer(Directory dir, SegmentInfo info, FieldInfos fields, IOContext context) { Directory separateNormsDir = info.Dir; // separate norms are never inside CFS Maxdoc = info.DocCount; string segmentName = info.Name; bool success = false; try { long nextNormSeek = NORMS_HEADER.Length; //skip header (header unused for now) foreach (FieldInfo fi in fields) { if (fi.HasNorms()) { string fileName = GetNormFilename(info, fi.Number); Directory d = HasSeparateNorms(info, fi.Number) ? separateNormsDir : dir; // singleNormFile means multiple norms share this file bool singleNormFile = IndexFileNames.MatchesExtension(fileName, NORMS_EXTENSION); IndexInput normInput = null; long normSeek; if (singleNormFile) { normSeek = nextNormSeek; if (SingleNormStream == null) { SingleNormStream = d.OpenInput(fileName, context); OpenFiles.Add(SingleNormStream); } // All norms in the .nrm file can share a single IndexInput since // they are only used in a synchronized context. // If this were to change in the future, a clone could be done here. normInput = SingleNormStream; } else { normInput = d.OpenInput(fileName, context); OpenFiles.Add(normInput); // if the segment was created in 3.2 or after, we wrote the header for sure, // and don't need to do the sketchy file size check. otherwise, we check // if the size is exactly equal to maxDoc to detect a headerless file. // NOTE: remove this check in Lucene 5.0! string version = info.Version; bool isUnversioned = (version == null || StringHelper.VersionComparator.Compare(version, "3.2") < 0) && normInput.Length() == Maxdoc; if (isUnversioned) { normSeek = 0; } else { normSeek = NORMS_HEADER.Length; } } NormsDocValues norm = new NormsDocValues(this, normInput, normSeek); Norms[fi.Name] = norm; nextNormSeek += Maxdoc; // increment also if some norms are separate } } // TODO: change to a real check? see LUCENE-3619 Debug.Assert(SingleNormStream == null || nextNormSeek == SingleNormStream.Length(), SingleNormStream != null ? "len: " + SingleNormStream.Length() + " expected: " + nextNormSeek : "null"); success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(OpenFiles); } } ramBytesUsed = new AtomicLong(); }
private void SeekDir(IndexInput input, long dirOffset) { if (_version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM) { input.Seek(input.Length() - CodecUtil.FooterLength() - 8); dirOffset = input.ReadLong(); } else if (_version >= FixedGapTermsIndexWriter.VERSION_APPEND_ONLY) { input.Seek(input.Length() - 8); dirOffset = input.ReadLong(); } input.Seek(dirOffset); }
public override long Length() { return(_indexInput.Length()); }
public Lucene3xTermVectorsReader(Directory d, SegmentInfo si, FieldInfos fieldInfos, IOContext context) { string segment = Lucene3xSegmentInfoFormat.GetDocStoreSegment(si); int docStoreOffset = Lucene3xSegmentInfoFormat.GetDocStoreOffset(si); int size = si.DocCount; bool success = false; try { if (docStoreOffset != -1 && Lucene3xSegmentInfoFormat.GetDocStoreIsCompoundFile(si)) { d = StoreCFSReader = new CompoundFileDirectory(si.Dir, IndexFileNames.SegmentFileName(segment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), context, false); } else { StoreCFSReader = null; } string idxName = IndexFileNames.SegmentFileName(segment, "", VECTORS_INDEX_EXTENSION); Tvx = d.OpenInput(idxName, context); Format = CheckValidFormat(Tvx); string fn = IndexFileNames.SegmentFileName(segment, "", VECTORS_DOCUMENTS_EXTENSION); Tvd = d.OpenInput(fn, context); int tvdFormat = CheckValidFormat(Tvd); fn = IndexFileNames.SegmentFileName(segment, "", VECTORS_FIELDS_EXTENSION); Tvf = d.OpenInput(fn, context); int tvfFormat = CheckValidFormat(Tvf); Debug.Assert(Format == tvdFormat); Debug.Assert(Format == tvfFormat); NumTotalDocs = (int)(Tvx.Length() >> 4); if (-1 == docStoreOffset) { this.DocStoreOffset = 0; this.Size_Renamed = NumTotalDocs; Debug.Assert(size == 0 || NumTotalDocs == size); } else { this.DocStoreOffset = docStoreOffset; this.Size_Renamed = size; // Verify the file is long enough to hold all of our // docs Debug.Assert(NumTotalDocs >= size + docStoreOffset, "numTotalDocs=" + NumTotalDocs + " size=" + size + " docStoreOffset=" + docStoreOffset); } this.FieldInfos = fieldInfos; success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { try { Dispose(); } // keep our original exception catch (Exception) { } } } }
public override long Length() { return(@delegate.Length()); }
/// <summary> /// Sole constructor. </summary> public Lucene40TermVectorsReader(Directory d, SegmentInfo si, FieldInfos fieldInfos, IOContext context) { string segment = si.Name; int size = si.DocCount; bool success = false; try { string idxName = IndexFileNames.SegmentFileName(segment, "", VECTORS_INDEX_EXTENSION); Tvx = d.OpenInput(idxName, context); int tvxVersion = CodecUtil.CheckHeader(Tvx, CODEC_NAME_INDEX, VERSION_START, VERSION_CURRENT); string fn = IndexFileNames.SegmentFileName(segment, "", VECTORS_DOCUMENTS_EXTENSION); Tvd = d.OpenInput(fn, context); int tvdVersion = CodecUtil.CheckHeader(Tvd, CODEC_NAME_DOCS, VERSION_START, VERSION_CURRENT); fn = IndexFileNames.SegmentFileName(segment, "", VECTORS_FIELDS_EXTENSION); Tvf = d.OpenInput(fn, context); int tvfVersion = CodecUtil.CheckHeader(Tvf, CODEC_NAME_FIELDS, VERSION_START, VERSION_CURRENT); Debug.Assert(HEADER_LENGTH_INDEX == Tvx.FilePointer); Debug.Assert(HEADER_LENGTH_DOCS == Tvd.FilePointer); Debug.Assert(HEADER_LENGTH_FIELDS == Tvf.FilePointer); Debug.Assert(tvxVersion == tvdVersion); Debug.Assert(tvxVersion == tvfVersion); NumTotalDocs = (int)(Tvx.Length() - HEADER_LENGTH_INDEX >> 4); this.Size_Renamed = NumTotalDocs; Debug.Assert(size == 0 || NumTotalDocs == size); this.FieldInfos = fieldInfos; success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { try { Dispose(); } // ensure we throw our original exception catch (Exception t) { } } } }