public void Read(IndexInput input, FieldInfos fieldInfos) { this.Term = null; // invalidate cache NewSuffixStart = input.ReadVInt(); int length = input.ReadVInt(); int totalLength = NewSuffixStart + length; Debug.Assert(totalLength <= ByteBlockPool.BYTE_BLOCK_SIZE - 2, "termLength=" + totalLength + ",resource=" + input); if (Bytes.Bytes.Length < totalLength) { Bytes.Grow(totalLength); } Bytes.Length = totalLength; input.ReadBytes(Bytes.Bytes, NewSuffixStart, length); int fieldNumber = input.ReadVInt(); if (fieldNumber != CurrentFieldNumber) { CurrentFieldNumber = fieldNumber; // NOTE: too much sneakiness here, seriously this is a negative vint?! if (CurrentFieldNumber == -1) { Field = ""; } else { Debug.Assert(fieldInfos.FieldInfo(CurrentFieldNumber) != null, CurrentFieldNumber.ToString()); Field = String.Intern(fieldInfos.FieldInfo(CurrentFieldNumber).Name); } } else { Debug.Assert(Field.Equals(fieldInfos.FieldInfo(fieldNumber).Name), "currentFieldNumber=" + CurrentFieldNumber + " field=" + Field + " vs " + fieldInfos.FieldInfo(fieldNumber) == null ? "null" : fieldInfos.FieldInfo(fieldNumber).Name); } }
public AzureIndexInput(AzureDirectory azuredirectory, ICloudBlob blob) : base(blob.Name) { _name = blob.Uri.Segments[blob.Uri.Segments.Length - 1]; _fileMutex = BlobMutexManager.GrabMutex(_name); _fileMutex.WaitOne(); try { _azureDirectory = azuredirectory; _blobContainer = azuredirectory.BlobContainer; _blob = blob; string fileName = _name; StreamOutput fileStream = _azureDirectory.CreateCachedOutputAsStream(fileName); // get the blob _blob.DownloadToStream(fileStream); fileStream.Flush(); Debug.WriteLine("GET {0} RETREIVED {1} bytes", _name, fileStream.Length); fileStream.Close(); // and open it as an input _indexInput = CacheDirectory.openInput(fileName, IOContext.DEFAULT); } finally { _fileMutex.ReleaseMutex(); } }
protected override MonotonicBlockPackedReader GetAddressInstance(IndexInput data, FieldInfo field, BinaryEntry bytes) { data.Seek(bytes.AddressesOffset); return new MonotonicBlockPackedReader((IndexInput)data.Clone(), bytes.PackedIntsVersion, bytes.BlockSize, bytes.Count, true); }
protected override MonotonicBlockPackedReader GetOrdIndexInstance(IndexInput data, FieldInfo field, NumericEntry entry) { data.Seek(entry.Offset); return new MonotonicBlockPackedReader((IndexInput)data.Clone(), entry.PackedIntsVersion, entry.BlockSize, entry.Count, true); }
public VariableGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor, String segmentSuffix, IOContext context) { _input = dir.OpenInput( IndexFileNames.SegmentFileName(segment, segmentSuffix, VariableGapTermsIndexWriter.TERMS_INDEX_EXTENSION), new IOContext(context, true)); var success = false; Debug.Assert(indexDivisor == -1 || indexDivisor > 0); try { _version = ReadHeader(_input); _indexDivisor = indexDivisor; if (_version >= VariableGapTermsIndexWriter.VERSION_CHECKSUM) CodecUtil.ChecksumEntireFile(_input); SeekDir(_input, _dirOffset); // Read directory var numFields = _input.ReadVInt(); if (numFields < 0) { throw new CorruptIndexException("invalid numFields: " + numFields + " (resource=" + _input + ")"); } for (var i = 0; i < numFields; i++) { var field = _input.ReadVInt(); var indexStart = _input.ReadVLong(); var fieldInfo = fieldInfos.FieldInfo(field); try { _fields.Add(fieldInfo, new FieldIndexData(indexStart, this)); } catch (ArgumentException) { throw new CorruptIndexException(String.Format("Duplicate Field: {0}, Resource: {1}", fieldInfo.Name, _input)); } } success = true; } finally { if (indexDivisor > 0) { _input.Dispose(); _input = null; if (success) { _indexLoaded = true; } } } }
internal SepSkipListReader(IndexInput skipStream, IntIndexInput freqIn, IntIndexInput docIn, IntIndexInput posIn, int maxSkipLevels, int skipInterval) : base(skipStream, maxSkipLevels, skipInterval) { if (freqIn != null) _freqIndex = new IntIndexInputIndex[maxSkipLevels]; _docIndex = new IntIndexInputIndex[maxSkipLevels]; if (posIn != null) _posIndex = new IntIndexInputIndex[MaxNumberOfSkipLevels]; for (var i = 0; i < maxSkipLevels; i++) { if (freqIn != null) _freqIndex[i] = freqIn.Index(); _docIndex[i] = docIn.Index(); if (posIn != null) _posIndex[i] = posIn.Index(); } _payloadPointer = new long[maxSkipLevels]; _payloadLength = new int[maxSkipLevels]; _lastFreqIndex = freqIn != null ? freqIn.Index() : null; _lastDocIndex = docIn.Index(); _lastPosIndex = posIn != null ? posIn.Index() : null; }
/// <summary> /// Used only by clone. </summary> private Lucene40StoredFieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, IndexInput fieldsStream, IndexInput indexStream) { this.FieldInfos = fieldInfos; this.NumTotalDocs = numTotalDocs; this.Size_Renamed = size; this.FieldsStream = fieldsStream; this.IndexStream = indexStream; }
public IntBlockIndexReader(IndexInput input, int[] pending, IBlockReader blockReader) { _input = input; PENDING = pending; _blockReader = blockReader; _blockSize = pending.Length; _upto = _blockSize; }
public SegmentTermDocs(IndexInput freqStream, TermInfosReader tis, FieldInfos fieldInfos) { this.FreqStream = (IndexInput)freqStream.Clone(); this.Tis = tis; this.FieldInfos = fieldInfos; SkipInterval = tis.SkipInterval; MaxSkipLevels = tis.MaxSkipLevels; }
public static void ReadLegacyInfos(SegmentInfos infos, Directory directory, IndexInput input, int format) { infos.Version = input.ReadLong(); // read version infos.Counter = input.ReadInt(); // read counter Lucene3xSegmentInfoReader reader = new Lucene3xSegmentInfoReader(); for (int i = input.ReadInt(); i > 0; i--) // read segmentInfos { SegmentCommitInfo siPerCommit = reader.ReadLegacySegmentInfo(directory, format, input); SegmentInfo si = siPerCommit.Info; if (si.Version == null) { // Could be a 3.0 - try to open the doc stores - if it fails, it's a // 2.x segment, and an IndexFormatTooOldException will be thrown, // which is what we want. Directory dir = directory; if (Lucene3xSegmentInfoFormat.GetDocStoreOffset(si) != -1) { if (Lucene3xSegmentInfoFormat.GetDocStoreIsCompoundFile(si)) { dir = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(Lucene3xSegmentInfoFormat.GetDocStoreSegment(si), "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), IOContext.READONCE, false); } } else if (si.UseCompoundFile) { dir = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), IOContext.READONCE, false); } try { Lucene3xStoredFieldsReader.CheckCodeVersion(dir, Lucene3xSegmentInfoFormat.GetDocStoreSegment(si)); } finally { // If we opened the directory, close it if (dir != directory) { dir.Dispose(); } } // Above call succeeded, so it's a 3.0 segment. Upgrade it so the next // time the segment is read, its version won't be null and we won't // need to open FieldsReader every time for each such segment. si.Version = "3.0"; } else if (si.Version.Equals("2.x")) { // If it's a 3x index touched by 3.1+ code, then segments record their // version, whether they are 2.x ones or not. We detect that and throw // appropriate exception. throw new IndexFormatTooOldException("segment " + si.Name + " in resource " + input, si.Version); } infos.Add(siPerCommit); } infos.UserData = input.ReadStringStringMap(); }
public SepPostingsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo segmentInfo, IOContext context, IntStreamFactory intFactory, string segmentSuffix) { var success = false; try { var docFileName = IndexFileNames.SegmentFileName(segmentInfo.Name, segmentSuffix, SepPostingsWriter.DOC_EXTENSION); _docIn = intFactory.OpenInput(dir, docFileName, context); _skipIn = dir.OpenInput( IndexFileNames.SegmentFileName(segmentInfo.Name, segmentSuffix, SepPostingsWriter.SKIP_EXTENSION), context); if (fieldInfos.HasFreq()) { _freqIn = intFactory.OpenInput(dir, IndexFileNames.SegmentFileName(segmentInfo.Name, segmentSuffix, SepPostingsWriter.FREQ_EXTENSION), context); } else { _freqIn = null; } if (fieldInfos.HasProx()) { _posIn = intFactory.OpenInput(dir, IndexFileNames.SegmentFileName(segmentInfo.Name, segmentSuffix, SepPostingsWriter.POS_EXTENSION), context); _payloadIn = dir.OpenInput( IndexFileNames.SegmentFileName(segmentInfo.Name, segmentSuffix, SepPostingsWriter.PAYLOAD_EXTENSION), context); } else { _posIn = null; _payloadIn = null; } success = true; } finally { if (!success) { Dispose(); } } }
// used by clone private CompressingStoredFieldsReader(CompressingStoredFieldsReader reader) { this.Version_Renamed = reader.Version_Renamed; this.FieldInfos = reader.FieldInfos; this.FieldsStream = (IndexInput)reader.FieldsStream.Clone(); this.IndexReader = (CompressingStoredFieldsIndexReader)reader.IndexReader.Clone(); this.MaxPointer = reader.MaxPointer; this.ChunkSize_Renamed = reader.ChunkSize_Renamed; this.PackedIntsVersion = reader.PackedIntsVersion; this.CompressionMode_Renamed = reader.CompressionMode_Renamed; this.Decompressor = (Decompressor)reader.Decompressor.Clone(); this.NumDocs = reader.NumDocs; this.Bytes = new BytesRef(reader.Bytes.Bytes.Length); this.Closed = false; }
/// <summary> /// Sole constructor. </summary> public Lucene40StoredFieldsReader(Directory d, SegmentInfo si, FieldInfos fn, IOContext context) { string segment = si.Name; bool success = false; FieldInfos = fn; try { FieldsStream = d.OpenInput(IndexFileNames.SegmentFileName(segment, "", Lucene40StoredFieldsWriter.FIELDS_EXTENSION), context); string indexStreamFN = IndexFileNames.SegmentFileName(segment, "", Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION); IndexStream = d.OpenInput(indexStreamFN, context); CodecUtil.CheckHeader(IndexStream, Lucene40StoredFieldsWriter.CODEC_NAME_IDX, Lucene40StoredFieldsWriter.VERSION_START, Lucene40StoredFieldsWriter.VERSION_CURRENT); CodecUtil.CheckHeader(FieldsStream, Lucene40StoredFieldsWriter.CODEC_NAME_DAT, Lucene40StoredFieldsWriter.VERSION_START, Lucene40StoredFieldsWriter.VERSION_CURRENT); Debug.Assert(Lucene40StoredFieldsWriter.HEADER_LENGTH_DAT == FieldsStream.FilePointer); Debug.Assert(Lucene40StoredFieldsWriter.HEADER_LENGTH_IDX == IndexStream.FilePointer); long indexSize = IndexStream.Length() - Lucene40StoredFieldsWriter.HEADER_LENGTH_IDX; this.Size_Renamed = (int)(indexSize >> 3); // Verify two sources of "maxDoc" agree: if (this.Size_Renamed != si.DocCount) { throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + this.Size_Renamed + " but segmentInfo shows " + si.DocCount); } NumTotalDocs = (int)(indexSize >> 3); success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { try { Dispose(); } // ensure we throw our original exception catch (Exception) { } } } }
public override void Init(IndexInput termsIn) { _version = CodecUtil.CheckHeader(termsIn, PulsingPostingsWriter.CODEC, PulsingPostingsWriter.VERSION_START, PulsingPostingsWriter.VERSION_CURRENT); _maxPositions = termsIn.ReadVInt(); _wrappedPostingsReader.Init(termsIn); if (_wrappedPostingsReader is PulsingPostingsReader || _version < PulsingPostingsWriter.VERSION_META_ARRAY) { _fields = null; } else { _fields = new SortedDictionary<int, int>(); var summaryFileName = IndexFileNames.SegmentFileName(_segmentState.SegmentInfo.Name, _segmentState.SegmentSuffix, PulsingPostingsWriter.SUMMARY_EXTENSION); IndexInput input = null; try { input = _segmentState.Directory.OpenInput(summaryFileName, _segmentState.Context); CodecUtil.CheckHeader(input, PulsingPostingsWriter.CODEC, _version, PulsingPostingsWriter.VERSION_CURRENT); var numField = input.ReadVInt(); for (var i = 0; i < numField; i++) { var fieldNum = input.ReadVInt(); var longsSize = input.ReadVInt(); _fields.Add(fieldNum, longsSize); } } finally { IOUtils.CloseWhileHandlingException(input); } } }
public SegmentTermEnum(IndexInput i, FieldInfos fis, bool isi) { Input = i; FieldInfos = fis; IsIndex = isi; MaxSkipLevels = 1; // use single-level skip lists for formats > -3 int firstInt = Input.ReadInt(); if (firstInt >= 0) { // original-format file, without explicit format version number Format = 0; Size = firstInt; // back-compatible settings IndexInterval = 128; SkipInterval = int.MaxValue; // switch off skipTo optimization } else { // we have a format version number Format = firstInt; // check that it is a format we can understand if (Format > FORMAT_MINIMUM) { throw new IndexFormatTooOldException(Input, Format, FORMAT_MINIMUM, FORMAT_CURRENT); } if (Format < FORMAT_CURRENT) { throw new IndexFormatTooNewException(Input, Format, FORMAT_MINIMUM, FORMAT_CURRENT); } Size = Input.ReadLong(); // read the size IndexInterval = Input.ReadInt(); SkipInterval = Input.ReadInt(); MaxSkipLevels = Input.ReadInt(); Debug.Assert(IndexInterval > 0, "indexInterval=" + IndexInterval + " is negative; must be > 0"); Debug.Assert(SkipInterval > 0, "skipInterval=" + SkipInterval + " is negative; must be > 0"); } }
public SimpleTextTermVectorsReader(Directory directory, SegmentInfo si, IOContext context) { bool success = false; try { _input = directory.OpenInput(IndexFileNames.SegmentFileName(si.Name, "", SimpleTextTermVectorsWriter.VECTORS_EXTENSION), context); success = true; } finally { if (!success) { try { Dispose(); } catch (Exception) { // ensure we throw our original exception } } } ReadIndex(si.DocCount); }
public AzureIndexInput(AzureIndexInput cloneInput) : base(cloneInput._blob.Name) { _fileMutex = BlobMutexManager.GrabMutex(cloneInput._name); _fileMutex.WaitOne(); try { _azureDirectory = cloneInput._azureDirectory; _blobContainer = cloneInput._blobContainer; _blob = cloneInput._blob; _indexInput = cloneInput._indexInput.clone(); } catch (Exception) { // sometimes we get access denied on the 2nd stream...but not always. I haven't tracked it down yet // but this covers our tail until I do Debug.WriteLine(String.Format("Dagnabbit, falling back to memory clone for {0}", cloneInput._name)); } finally { _fileMutex.ReleaseMutex(); } }
internal LiveDocsSegmentDocsEnum(Lucene40PostingsReader outerInstance, IndexInput startFreqIn, Bits liveDocs) : base(outerInstance, startFreqIn, liveDocs) { this.OuterInstance = outerInstance; Debug.Assert(liveDocs != null); }
public virtual void TestDataInputOutput() { Random random = Random(); for (int iter = 0; iter < 5 * RANDOM_MULTIPLIER; iter++) { BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("testOverflow")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } int blockBits = TestUtil.NextInt(random, 1, 20); int blockSize = 1 << blockBits; PagedBytes p = new PagedBytes(blockBits); IndexOutput @out = dir.CreateOutput("foo", IOContext.DEFAULT); int numBytes = TestUtil.NextInt(Random(), 2, 10000000); byte[] answer = new byte[numBytes]; Random().NextBytes(answer); int written = 0; while (written < numBytes) { if (Random().Next(10) == 7) { @out.WriteByte(answer[written++]); } else { int chunk = Math.Min(Random().Next(1000), numBytes - written); @out.WriteBytes(answer, written, chunk); written += chunk; } } @out.Dispose(); IndexInput input = dir.OpenInput("foo", IOContext.DEFAULT); DataInput @in = (DataInput)input.Clone(); p.Copy(input, input.Length()); PagedBytes.Reader reader = p.Freeze(random.NextBoolean()); byte[] verify = new byte[numBytes]; int read = 0; while (read < numBytes) { if (Random().Next(10) == 7) { verify[read++] = @in.ReadByte(); } else { int chunk = Math.Min(Random().Next(1000), numBytes - read); @in.ReadBytes(verify, read, chunk); read += chunk; } } Assert.IsTrue(Arrays.Equals(answer, verify)); BytesRef slice = new BytesRef(); for (int iter2 = 0; iter2 < 100; iter2++) { int pos = random.Next(numBytes - 1); int len = random.Next(Math.Min(blockSize + 1, numBytes - pos)); reader.FillSlice(slice, pos, len); for (int byteUpto = 0; byteUpto < len; byteUpto++) { Assert.AreEqual(answer[pos + byteUpto], (byte)slice.Bytes[slice.Offset + byteUpto]); } } input.Dispose(); dir.Dispose(); } }
public Lucene3xTermVectorsReader(Directory d, SegmentInfo si, FieldInfos fieldInfos, IOContext context) { string segment = Lucene3xSegmentInfoFormat.GetDocStoreSegment(si); int docStoreOffset = Lucene3xSegmentInfoFormat.GetDocStoreOffset(si); int size = si.DocCount; bool success = false; try { if (docStoreOffset != -1 && Lucene3xSegmentInfoFormat.GetDocStoreIsCompoundFile(si)) { d = storeCFSReader = new CompoundFileDirectory(si.Dir, IndexFileNames.SegmentFileName(segment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), context, false); } else { storeCFSReader = null; } string idxName = IndexFileNames.SegmentFileName(segment, "", VECTORS_INDEX_EXTENSION); tvx = d.OpenInput(idxName, context); format = CheckValidFormat(tvx); string fn = IndexFileNames.SegmentFileName(segment, "", VECTORS_DOCUMENTS_EXTENSION); tvd = d.OpenInput(fn, context); int tvdFormat = CheckValidFormat(tvd); fn = IndexFileNames.SegmentFileName(segment, "", VECTORS_FIELDS_EXTENSION); tvf = d.OpenInput(fn, context); int tvfFormat = CheckValidFormat(tvf); if (Debugging.AssertsEnabled) { Debugging.Assert(format == tvdFormat); Debugging.Assert(format == tvfFormat); } numTotalDocs = (int)(tvx.Length >> 4); if (-1 == docStoreOffset) { this.docStoreOffset = 0; this.size = numTotalDocs; if (Debugging.AssertsEnabled) { Debugging.Assert(size == 0 || numTotalDocs == size); } } else { this.docStoreOffset = docStoreOffset; this.size = size; // Verify the file is long enough to hold all of our // docs if (Debugging.AssertsEnabled) { Debugging.Assert(numTotalDocs >= size + docStoreOffset, "numTotalDocs={0} size={1} docStoreOffset={2}", numTotalDocs, size, docStoreOffset); } } this.fieldInfos = fieldInfos; success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { try { Dispose(); } // keep our original exception catch (Exception) { } } } }
public IndexInputStream(IndexInput input) { this.input = input; remaining = input.Length; }
public override NumericDocValues GetNumeric(FieldInfo field) { lock (this) { NumericDocValues instance; if (!numericInstances.TryGetValue(field.Number, out instance)) { string fileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name + "_" + Convert.ToString(field.Number, CultureInfo.InvariantCulture), segmentSuffix, "dat"); IndexInput input = dir.OpenInput(fileName, state.Context); bool success = false; try { var type = field.GetAttribute(legacyKey).ToLegacyDocValuesType(); //switch (Enum.Parse(typeof(LegacyDocValuesType), field.GetAttribute(LegacyKey))) //{ if (type == LegacyDocValuesType.VAR_INTS) { instance = LoadVarInt32sField(field, input); } else if (type == LegacyDocValuesType.FIXED_INTS_8) { instance = LoadByteField(field, input); } else if (type == LegacyDocValuesType.FIXED_INTS_16) { instance = LoadInt16Field(field, input); } else if (type == LegacyDocValuesType.FIXED_INTS_32) { instance = LoadInt32Field(field, input); } else if (type == LegacyDocValuesType.FIXED_INTS_64) { instance = LoadInt64Field(field, input); } else if (type == LegacyDocValuesType.FLOAT_32) { instance = LoadSingleField(field, input); } else if (type == LegacyDocValuesType.FLOAT_64) { instance = LoadDoubleField(field, input); } else { throw new InvalidOperationException(); } CodecUtil.CheckEOF(input); success = true; } finally { if (success) { IOUtils.Dispose(input); } else { IOUtils.DisposeWhileHandlingException(input); } } numericInstances[field.Number] = instance; } return(instance); } }
public NormsDocValues(Lucene3xNormsProducer outerInstance, IndexInput normInput, long normSeek) { this.outerInstance = outerInstance; this.file = normInput; this.offset = normSeek; }
private void ReadFields(IndexInput meta, FieldInfos infos) { int fieldNumber = meta.ReadVInt32(); while (fieldNumber != -1) { // check should be: infos.fieldInfo(fieldNumber) != null, which incorporates negative check // but docvalues updates are currently buggy here (loading extra stuff, etc): LUCENE-5616 if (fieldNumber < 0) { // trickier to validate more: because we re-use for norms, because we use multiple entries // for "composite" types like sortedset, etc. throw new CorruptIndexException("Invalid field number: " + fieldNumber + ", input=" + meta); } int fieldType = meta.ReadByte(); if (fieldType == NUMBER) { var entry = new NumericEntry(); entry.Offset = meta.ReadInt64(); entry.Format = (sbyte)meta.ReadByte(); switch (entry.Format) { case DELTA_COMPRESSED: case TABLE_COMPRESSED: case GCD_COMPRESSED: case UNCOMPRESSED: break; default: throw new CorruptIndexException("Unknown format: " + entry.Format + ", input=" + meta); } if (entry.Format != UNCOMPRESSED) { entry.PackedInt32sVersion = meta.ReadVInt32(); } numerics[fieldNumber] = entry; } else if (fieldType == BYTES) { BinaryEntry entry = new BinaryEntry(); entry.Offset = meta.ReadInt64(); entry.NumBytes = meta.ReadInt64(); entry.MinLength = meta.ReadVInt32(); entry.MaxLength = meta.ReadVInt32(); if (entry.MinLength != entry.MaxLength) { entry.PackedInt32sVersion = meta.ReadVInt32(); entry.BlockSize = meta.ReadVInt32(); } binaries[fieldNumber] = entry; } else if (fieldType == FST) { FSTEntry entry = new FSTEntry(); entry.Offset = meta.ReadInt64(); entry.NumOrds = meta.ReadVInt64(); fsts[fieldNumber] = entry; } else { throw new CorruptIndexException("invalid entry type: " + fieldType + ", input=" + meta); } fieldNumber = meta.ReadVInt32(); } }
private readonly int numTotalDocs; // LUCENENET: marked readonly /// <summary> /// Used by clone. </summary> internal Lucene40TermVectorsReader(FieldInfos fieldInfos, IndexInput tvx, IndexInput tvd, IndexInput tvf, int size, int numTotalDocs) { this.fieldInfos = fieldInfos; this.tvx = tvx; this.tvd = tvd; this.tvf = tvf; this.size = size; this.numTotalDocs = numTotalDocs; }
public virtual bool CanReuse(IndexInput tvf) { return(tvf == origTVF); }
// NOTE: tvf is pre-positioned by caller public TVTermsEnum(Lucene40TermVectorsReader outerInstance) { this.origTVF = outerInstance.tvf; tvf = (IndexInput)origTVF.Clone(); }
/* * SegmentTermPositions(SegmentReader p) { * super(p); * this.proxStream = null; // the proxStream will be cloned lazily when nextPosition() is called for the first time * } */ public SegmentTermPositions(IndexInput freqStream, IndexInput proxStream, TermInfosReader tis, FieldInfos fieldInfos) : base(freqStream, tis, fieldInfos) { this.proxStreamOrig = proxStream; // the proxStream will be cloned lazily when nextPosition() is called for the first time }
internal FaultyIndexInput(IndexInput @delegate) : base("FaultyIndexInput(" + @delegate + ")", BufferedIndexInput.BUFFER_SIZE) { this.@delegate = @delegate; }
public SegmentDocsAndPositionsEnum(Lucene40PostingsReader outerInstance, IndexInput freqIn, IndexInput proxIn) { this.OuterInstance = outerInstance; StartFreqIn = freqIn; this.FreqIn = (IndexInput)freqIn.Clone(); this.ProxIn = (IndexInput)proxIn.Clone(); }
public AzureIndexInput(AzureDirectory azuredirectory, ICloudBlob blob, string resourceDescription) : base(resourceDescription) { _name = blob.Uri.Segments[blob.Uri.Segments.Length - 1]; #if FULLDEBUG Debug.WriteLine(String.Format("opening {0} ", _name)); #endif _fileMutex = BlobMutexManager.GrabMutex(_name); _fileMutex.WaitOne(); try { _azureDirectory = azuredirectory; _blobContainer = azuredirectory.BlobContainer; _blob = blob; var fileName = _name; var fFileNeeded = false; if (!CacheDirectory.FileExists(fileName)) { fFileNeeded = true; } else { long cachedLength = CacheDirectory.FileLength(fileName); string blobLengthMetadata; bool hasMetadataValue = blob.Metadata.TryGetValue("CachedLength", out blobLengthMetadata); long blobLength = blob.Properties.Length; if (hasMetadataValue) { long.TryParse(blobLengthMetadata, out blobLength); } string blobLastModifiedMetadata; long longLastModified = 0; DateTime blobLastModifiedUTC = blob.Properties.LastModified.Value.UtcDateTime; if (blob.Metadata.TryGetValue("CachedLastModified", out blobLastModifiedMetadata)) { if (long.TryParse(blobLastModifiedMetadata, out longLastModified)) { blobLastModifiedUTC = DateTime.FromFileTimeUtc(longLastModified); } //blobLastModifiedUTC = new DateTime(longLastModified).ToUniversalTime(); } if (cachedLength != blobLength) { fFileNeeded = true; } else { // cachedLastModifiedUTC was not ouputting with a date (just time) and the time was always off var filePath = Path.Combine(_azureDirectory.CatalogPath, fileName); var lastModified = File.GetLastWriteTimeUtc(filePath); //long unixDate = CacheDirectory.FileModified(fileName); //DateTime start = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc); //var cachedLastModifiedUTC = start.AddMilliseconds(unixDate).ToUniversalTime(); var cachedLastModifiedUTC = lastModified; if (cachedLastModifiedUTC != blobLastModifiedUTC) { var timeSpan = blobLastModifiedUTC.Subtract(cachedLastModifiedUTC); if (timeSpan.TotalSeconds > 1) { fFileNeeded = true; } else { #if FULLDEBUG Debug.WriteLine(timeSpan.TotalSeconds); #endif // file not needed } } } } // if the file does not exist // or if it exists and it is older then the lastmodified time in the blobproperties (which always comes from the blob storage) if (fFileNeeded) { if (_azureDirectory.ShouldCompressFile(_name)) { InflateStream(fileName); } else { using (var fileStream = _azureDirectory.CreateCachedOutputAsStream(fileName)) { // get the blob _blob.DownloadToStream(fileStream); fileStream.Flush(); Debug.WriteLine(string.Format("GET {0} RETREIVED {1} bytes", _name, fileStream.Length)); } } // and open it as an input _indexInput = CacheDirectory.OpenInput(fileName, IOContext.DEFAULT); } else { #if FULLDEBUG Debug.WriteLine(String.Format("Using cached file for {0}", _name)); #endif // open the file in read only mode _indexInput = CacheDirectory.OpenInput(fileName, IOContext.DEFAULT); } } finally { _fileMutex.ReleaseMutex(); } }
internal Lucene42DocValuesProducer(SegmentReadState state, string dataCodec, string dataExtension, string metaCodec, string metaExtension) { maxDoc = state.SegmentInfo.DocCount; string metaName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, metaExtension); // read in the entries from the metadata file. ChecksumIndexInput @in = state.Directory.OpenChecksumInput(metaName, state.Context); bool success = false; ramBytesUsed = new AtomicInt64(RamUsageEstimator.ShallowSizeOfInstance(this.GetType())); try { version = CodecUtil.CheckHeader(@in, metaCodec, VERSION_START, VERSION_CURRENT); numerics = new Dictionary <int, NumericEntry>(); binaries = new Dictionary <int, BinaryEntry>(); fsts = new Dictionary <int, FSTEntry>(); ReadFields(@in, state.FieldInfos); if (version >= VERSION_CHECKSUM) { CodecUtil.CheckFooter(@in); } else { #pragma warning disable 612, 618 CodecUtil.CheckEOF(@in); #pragma warning restore 612, 618 } success = true; } finally { if (success) { IOUtils.Close(@in); } else { IOUtils.CloseWhileHandlingException(@in); } } success = false; try { string dataName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, dataExtension); data = state.Directory.OpenInput(dataName, state.Context); int version2 = CodecUtil.CheckHeader(data, dataCodec, VERSION_START, VERSION_CURRENT); if (version != version2) { throw new CorruptIndexException("Format versions mismatch"); } success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(this.data); } } }
public CompressedBinaryDocValues(BinaryEntry bytes, MonotonicBlockPackedReader addresses, IndexInput data) { this.bytes = bytes; this.interval = bytes.AddressInterval; this.addresses = addresses; this.data = data; this.numValues = bytes.Count; this.numIndexValues = addresses.Count; this.termsEnum = GetTermsEnum(data); }
public SegmentDocsAndPositionsEnum(Lucene40PostingsReader outerInstance, IndexInput freqIn, IndexInput proxIn) { this.outerInstance = outerInstance; startFreqIn = freqIn; this.freqIn = (IndexInput)freqIn.Clone(); this.proxIn = (IndexInput)proxIn.Clone(); }
// note: just like segmentreader in 3.x, we open up all the files here (including separate norms) up front. // but we just don't do any seeks or reading yet. public Lucene3xNormsProducer(Directory dir, SegmentInfo info, FieldInfos fields, IOContext context) { Directory separateNormsDir = info.Dir; // separate norms are never inside CFS maxdoc = info.DocCount; //string segmentName = info.Name; // LUCENENET: IDE0059: Remove unnecessary value assignment bool success = false; try { long nextNormSeek = NORMS_HEADER.Length; //skip header (header unused for now) foreach (FieldInfo fi in fields) { if (fi.HasNorms) { string fileName = GetNormFilename(info, fi.Number); Directory d = HasSeparateNorms(info, fi.Number) ? separateNormsDir : dir; // singleNormFile means multiple norms share this file bool singleNormFile = IndexFileNames.MatchesExtension(fileName, NORMS_EXTENSION); IndexInput normInput = null; long normSeek; if (singleNormFile) { normSeek = nextNormSeek; if (singleNormStream == null) { singleNormStream = d.OpenInput(fileName, context); openFiles.Add(singleNormStream); } // All norms in the .nrm file can share a single IndexInput since // they are only used in a synchronized context. // If this were to change in the future, a clone could be done here. normInput = singleNormStream; } else { normInput = d.OpenInput(fileName, context); openFiles.Add(normInput); // if the segment was created in 3.2 or after, we wrote the header for sure, // and don't need to do the sketchy file size check. otherwise, we check // if the size is exactly equal to maxDoc to detect a headerless file. // NOTE: remove this check in Lucene 5.0! string version = info.Version; bool isUnversioned = (version == null || StringHelper.VersionComparer.Compare(version, "3.2") < 0) && normInput.Length == maxdoc; if (isUnversioned) { normSeek = 0; } else { normSeek = NORMS_HEADER.Length; } } NormsDocValues norm = new NormsDocValues(this, normInput, normSeek); norms[fi.Name] = norm; nextNormSeek += maxdoc; // increment also if some norms are separate } } // TODO: change to a real check? see LUCENE-3619 if (Debugging.AssertsEnabled) { Debugging.Assert(singleNormStream == null || nextNormSeek == singleNormStream.Length, singleNormStream != null ? "len: {0} expected: {1}" : "null", singleNormStream?.Length ?? 0, nextNormSeek); } success = true; } finally { if (!success) { IOUtils.DisposeWhileHandlingException(openFiles); } } ramBytesUsed = new AtomicInt64(); }
internal SeeksCountingStream(TestLazyProxSkipping outerInstance, IndexInput input) : base("SeekCountingStream(" + input + ")") { this.OuterInstance = outerInstance; this.Input = input; }
protected abstract IBlockReader GetBlockReader(IndexInput @in, int[] buffer);
public Reader(IndexInput input, int[] pending, IBlockReader blockReader) { this.input = input; this.pending = pending; this.blockReader = blockReader; }
// used by clone internal Lucene3xTermVectorsReader(FieldInfos fieldInfos, IndexInput tvx, IndexInput tvd, IndexInput tvf, int size, int numTotalDocs, int docStoreOffset, int format) { this.fieldInfos = fieldInfos; this.tvx = tvx; this.tvd = tvd; this.tvf = tvf; this.size = size; this.numTotalDocs = numTotalDocs; this.docStoreOffset = docStoreOffset; this.format = format; this.storeCFSReader = null; }
internal int ReadFreq(IndexInput freqIn, int code) { if ((code & 1) != 0) // if low bit is set { return 1; // freq is one } else { return freqIn.ReadVInt(); // else read freq } }
public override int Merge(MergeState mergeState) { int docCount = 0; int idx = 0; foreach (AtomicReader reader in mergeState.Readers) { SegmentReader matchingSegmentReader = mergeState.MatchingSegmentReaders[idx++]; CompressingTermVectorsReader matchingVectorsReader = null; if (matchingSegmentReader != null) { TermVectorsReader vectorsReader = matchingSegmentReader.TermVectorsReader; // we can only bulk-copy if the matching reader is also a CompressingTermVectorsReader if (vectorsReader != null && vectorsReader is CompressingTermVectorsReader) { matchingVectorsReader = (CompressingTermVectorsReader)vectorsReader; } } int maxDoc = reader.MaxDoc; IBits liveDocs = reader.LiveDocs; if (matchingVectorsReader == null || matchingVectorsReader.Version != VERSION_CURRENT || matchingVectorsReader.CompressionMode != compressionMode || matchingVectorsReader.ChunkSize != chunkSize || matchingVectorsReader.PackedInt32sVersion != PackedInt32s.VERSION_CURRENT) { // naive merge... for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = NextLiveDoc(i + 1, liveDocs, maxDoc)) { Fields vectors = reader.GetTermVectors(i); AddAllDocVectors(vectors, mergeState); ++docCount; mergeState.CheckAbort.Work(300); } } else { CompressingStoredFieldsIndexReader index = matchingVectorsReader.Index; IndexInput vectorsStreamOrig = matchingVectorsReader.VectorsStream; vectorsStreamOrig.Seek(0); ChecksumIndexInput vectorsStream = new BufferedChecksumIndexInput((IndexInput)vectorsStreamOrig.Clone()); for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc;) { // We make sure to move the checksum input in any case, otherwise the final // integrity check might need to read the whole file a second time long startPointer = index.GetStartPointer(i); if (startPointer > vectorsStream.GetFilePointer()) { vectorsStream.Seek(startPointer); } if ((pendingDocs.Count == 0) && (i == 0 || index.GetStartPointer(i - 1) < startPointer)) // start of a chunk { int docBase = vectorsStream.ReadVInt32(); int chunkDocs = vectorsStream.ReadVInt32(); Debug.Assert(docBase + chunkDocs <= matchingSegmentReader.MaxDoc); if (docBase + chunkDocs < matchingSegmentReader.MaxDoc && NextDeletedDoc(docBase, liveDocs, docBase + chunkDocs) == docBase + chunkDocs) { long chunkEnd = index.GetStartPointer(docBase + chunkDocs); long chunkLength = chunkEnd - vectorsStream.GetFilePointer(); indexWriter.WriteIndex(chunkDocs, this.vectorsStream.GetFilePointer()); this.vectorsStream.WriteVInt32(docCount); this.vectorsStream.WriteVInt32(chunkDocs); this.vectorsStream.CopyBytes(vectorsStream, chunkLength); docCount += chunkDocs; this.numDocs += chunkDocs; mergeState.CheckAbort.Work(300 * chunkDocs); i = NextLiveDoc(docBase + chunkDocs, liveDocs, maxDoc); } else { for (; i < docBase + chunkDocs; i = NextLiveDoc(i + 1, liveDocs, maxDoc)) { Fields vectors = reader.GetTermVectors(i); AddAllDocVectors(vectors, mergeState); ++docCount; mergeState.CheckAbort.Work(300); } } } else { Fields vectors = reader.GetTermVectors(i); AddAllDocVectors(vectors, mergeState); ++docCount; mergeState.CheckAbort.Work(300); i = NextLiveDoc(i + 1, liveDocs, maxDoc); } } vectorsStream.Seek(vectorsStream.Length - CodecUtil.FooterLength()); CodecUtil.CheckFooter(vectorsStream); } } Finish(mergeState.FieldInfos, docCount); return(docCount); }
// used by clone internal SimpleTextTermVectorsReader(long[] offsets, IndexInput input) { _offsets = offsets; _input = input; }
protected internal VariableInt32BlockIndexInput(IndexInput input) { this.input = input; m_maxBlockSize = input.ReadInt32(); }
private void SeekDir(IndexInput input, long dirOffset) { if (_version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM) { input.Seek(input.Length() - CodecUtil.FooterLength() - 8); dirOffset = input.ReadLong(); } else if (_version >= FixedGapTermsIndexWriter.VERSION_APPEND_ONLY) { input.Seek(input.Length() - 8); dirOffset = input.ReadLong(); } input.Seek(dirOffset); }
internal SegmentDocsEnumBase(Lucene40PostingsReader outerInstance, IndexInput startFreqIn, Bits liveDocs) { this.OuterInstance = outerInstance; this.StartFreqIn = startFreqIn; this.FreqIn = (IndexInput)startFreqIn.Clone(); this.LiveDocs = liveDocs; }
/// <summary> /// Sole constructor. </summary> public Lucene40TermVectorsReader(Directory d, SegmentInfo si, FieldInfos fieldInfos, IOContext context) { string segment = si.Name; int size = si.DocCount; bool success = false; try { string idxName = IndexFileNames.SegmentFileName(segment, "", VECTORS_INDEX_EXTENSION); Tvx = d.OpenInput(idxName, context); int tvxVersion = CodecUtil.CheckHeader(Tvx, CODEC_NAME_INDEX, VERSION_START, VERSION_CURRENT); string fn = IndexFileNames.SegmentFileName(segment, "", VECTORS_DOCUMENTS_EXTENSION); Tvd = d.OpenInput(fn, context); int tvdVersion = CodecUtil.CheckHeader(Tvd, CODEC_NAME_DOCS, VERSION_START, VERSION_CURRENT); fn = IndexFileNames.SegmentFileName(segment, "", VECTORS_FIELDS_EXTENSION); Tvf = d.OpenInput(fn, context); int tvfVersion = CodecUtil.CheckHeader(Tvf, CODEC_NAME_FIELDS, VERSION_START, VERSION_CURRENT); Debug.Assert(HEADER_LENGTH_INDEX == Tvx.FilePointer); Debug.Assert(HEADER_LENGTH_DOCS == Tvd.FilePointer); Debug.Assert(HEADER_LENGTH_FIELDS == Tvf.FilePointer); Debug.Assert(tvxVersion == tvdVersion); Debug.Assert(tvxVersion == tvfVersion); NumTotalDocs = (int)(Tvx.Length() - HEADER_LENGTH_INDEX >> 4); this.Size_Renamed = NumTotalDocs; Debug.Assert(size == 0 || NumTotalDocs == size); this.FieldInfos = fieldInfos; success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { try { Dispose(); } // ensure we throw our original exception catch (Exception t) { } } } }
internal AllDocsSegmentDocsEnum(Lucene40PostingsReader outerInstance, IndexInput startFreqIn) : base(outerInstance, startFreqIn, null) { this.OuterInstance = outerInstance; Debug.Assert(LiveDocs == null); }
public virtual bool CanReuse(IndexInput tvf) { return tvf == OrigTVF; }
// private String segment; /// <summary> /// Sole constructor. </summary> public Lucene40PostingsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo segmentInfo, IOContext ioContext, string segmentSuffix) { bool success = false; IndexInput freqIn = null; IndexInput proxIn = null; try { freqIn = dir.OpenInput(IndexFileNames.SegmentFileName(segmentInfo.Name, segmentSuffix, Lucene40PostingsFormat.FREQ_EXTENSION), ioContext); CodecUtil.CheckHeader(freqIn, FRQ_CODEC, VERSION_START, VERSION_CURRENT); // TODO: hasProx should (somehow!) become codec private, // but it's tricky because 1) FIS.hasProx is global (it // could be all fields that have prox are written by a // different codec), 2) the field may have had prox in // the past but all docs w/ that field were deleted. // Really we'd need to init prxOut lazily on write, and // then somewhere record that we actually wrote it so we // know whether to open on read: if (fieldInfos.HasProx()) { proxIn = dir.OpenInput(IndexFileNames.SegmentFileName(segmentInfo.Name, segmentSuffix, Lucene40PostingsFormat.PROX_EXTENSION), ioContext); CodecUtil.CheckHeader(proxIn, PRX_CODEC, VERSION_START, VERSION_CURRENT); } else { proxIn = null; } this.FreqIn = freqIn; this.ProxIn = proxIn; success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(freqIn, proxIn); } } }
/// <summary> /// Sole constructor. </summary> public CompressingStoredFieldsReader(Directory d, SegmentInfo si, string segmentSuffix, FieldInfos fn, IOContext context, string formatName, CompressionMode compressionMode) { this.compressionMode = compressionMode; string segment = si.Name; bool success = false; fieldInfos = fn; numDocs = si.DocCount; ChecksumIndexInput indexStream = null; try { string indexStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION); string fieldsStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_EXTENSION); // Load the index into memory indexStream = d.OpenChecksumInput(indexStreamFN, context); string codecNameIdx = formatName + CompressingStoredFieldsWriter.CODEC_SFX_IDX; version = CodecUtil.CheckHeader(indexStream, codecNameIdx, CompressingStoredFieldsWriter.VERSION_START, CompressingStoredFieldsWriter.VERSION_CURRENT); Debug.Assert(CodecUtil.HeaderLength(codecNameIdx) == indexStream.GetFilePointer()); indexReader = new CompressingStoredFieldsIndexReader(indexStream, si); long maxPointer = -1; if (version >= CompressingStoredFieldsWriter.VERSION_CHECKSUM) { maxPointer = indexStream.ReadVInt64(); CodecUtil.CheckFooter(indexStream); } else { #pragma warning disable 612, 618 CodecUtil.CheckEOF(indexStream); #pragma warning restore 612, 618 } indexStream.Dispose(); indexStream = null; // Open the data file and read metadata fieldsStream = d.OpenInput(fieldsStreamFN, context); if (version >= CompressingStoredFieldsWriter.VERSION_CHECKSUM) { if (maxPointer + CodecUtil.FooterLength() != fieldsStream.Length) { throw new CorruptIndexException("Invalid fieldsStream maxPointer (file truncated?): maxPointer=" + maxPointer + ", length=" + fieldsStream.Length); } } else { maxPointer = fieldsStream.Length; } this.maxPointer = maxPointer; string codecNameDat = formatName + CompressingStoredFieldsWriter.CODEC_SFX_DAT; int fieldsVersion = CodecUtil.CheckHeader(fieldsStream, codecNameDat, CompressingStoredFieldsWriter.VERSION_START, CompressingStoredFieldsWriter.VERSION_CURRENT); if (version != fieldsVersion) { throw new CorruptIndexException("Version mismatch between stored fields index and data: " + version + " != " + fieldsVersion); } Debug.Assert(CodecUtil.HeaderLength(codecNameDat) == fieldsStream.GetFilePointer()); if (version >= CompressingStoredFieldsWriter.VERSION_BIG_CHUNKS) { chunkSize = fieldsStream.ReadVInt32(); } else { chunkSize = -1; } packedIntsVersion = fieldsStream.ReadVInt32(); decompressor = compressionMode.NewDecompressor(); this.bytes = new BytesRef(); success = true; } finally { if (!success) { IOUtils.DisposeWhileHandlingException(this, indexStream); } } }
private int ReadHeader(IndexInput input) { int version = CodecUtil.CheckHeader(input, VariableGapTermsIndexWriter.CODEC_NAME, VariableGapTermsIndexWriter.VERSION_START, VariableGapTermsIndexWriter.VERSION_CURRENT); if (version < VariableGapTermsIndexWriter.VERSION_APPEND_ONLY) { _dirOffset = input.ReadLong(); } return version; }
/// <summary> /// Reads from legacy 3.x segments_N. </summary> private SegmentCommitInfo ReadLegacySegmentInfo(Directory dir, int format, IndexInput input) { // check that it is a format we can understand if (format > Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS) { throw new IndexFormatTooOldException(input, format, Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS, Lucene3xSegmentInfoFormat.FORMAT_3_1); } if (format < Lucene3xSegmentInfoFormat.FORMAT_3_1) { throw new IndexFormatTooNewException(input, format, Lucene3xSegmentInfoFormat.FORMAT_DIAGNOSTICS, Lucene3xSegmentInfoFormat.FORMAT_3_1); } string version; if (format <= Lucene3xSegmentInfoFormat.FORMAT_3_1) { version = input.ReadString(); } else { version = null; } string name = input.ReadString(); int docCount = input.ReadInt32(); long delGen = input.ReadInt64(); int docStoreOffset = input.ReadInt32(); IDictionary <string, string> attributes = new Dictionary <string, string>(); // parse the docstore stuff and shove it into attributes string docStoreSegment; bool docStoreIsCompoundFile; if (docStoreOffset != -1) { docStoreSegment = input.ReadString(); docStoreIsCompoundFile = input.ReadByte() == SegmentInfo.YES; attributes[Lucene3xSegmentInfoFormat.DS_OFFSET_KEY] = Convert.ToString(docStoreOffset, CultureInfo.InvariantCulture); attributes[Lucene3xSegmentInfoFormat.DS_NAME_KEY] = docStoreSegment; attributes[Lucene3xSegmentInfoFormat.DS_COMPOUND_KEY] = Convert.ToString(docStoreIsCompoundFile, CultureInfo.InvariantCulture); } else { docStoreSegment = name; docStoreIsCompoundFile = false; } // pre-4.0 indexes write a byte if there is a single norms file byte b = input.ReadByte(); //System.out.println("version=" + version + " name=" + name + " docCount=" + docCount + " delGen=" + delGen + " dso=" + docStoreOffset + " dss=" + docStoreSegment + " dssCFs=" + docStoreIsCompoundFile + " b=" + b + " format=" + format); if (Debugging.AssertsEnabled) { Debugging.Assert(1 == b, "expected 1 but was: {0} format: {1}", b, format); } int numNormGen = input.ReadInt32(); IDictionary <int, long> normGen; if (numNormGen == SegmentInfo.NO) { normGen = null; } else { normGen = new Dictionary <int, long>(); for (int j = 0; j < numNormGen; j++) { normGen[j] = input.ReadInt64(); } } bool isCompoundFile = input.ReadByte() == SegmentInfo.YES; int delCount = input.ReadInt32(); if (Debugging.AssertsEnabled) { Debugging.Assert(delCount <= docCount); } //bool hasProx = input.ReadByte() == 1; input.ReadByte(); // LUCENENET: IDE0059: Remove unnecessary value assignment IDictionary <string, string> diagnostics = input.ReadStringStringMap(); if (format <= Lucene3xSegmentInfoFormat.FORMAT_HAS_VECTORS) { // NOTE: unused //int hasVectors = input.ReadByte(); input.ReadByte(); // LUCENENET: IDE0059: Remove unnecessary value assignment } // Replicate logic from 3.x's SegmentInfo.files(): ISet <string> files = new JCG.HashSet <string>(); if (isCompoundFile) { files.Add(IndexFileNames.SegmentFileName(name, "", IndexFileNames.COMPOUND_FILE_EXTENSION)); } else { AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xFieldInfosReader.FIELD_INFOS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.FREQ_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.PROX_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.TERMS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xPostingsFormat.TERMS_INDEX_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xNormsProducer.NORMS_EXTENSION)); } if (docStoreOffset != -1) { if (docStoreIsCompoundFile) { files.Add(IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION)); } else { files.Add(IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION)); files.Add(IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(docStoreSegment, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION)); } } else if (!isCompoundFile) { files.Add(IndexFileNames.SegmentFileName(name, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION)); files.Add(IndexFileNames.SegmentFileName(name, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION)); AddIfExists(dir, files, IndexFileNames.SegmentFileName(name, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION)); } // parse the normgen stuff and shove it into attributes if (normGen != null) { attributes[Lucene3xSegmentInfoFormat.NORMGEN_KEY] = Convert.ToString(numNormGen, CultureInfo.InvariantCulture); foreach (KeyValuePair <int, long> ent in normGen) { long gen = ent.Value; if (gen >= SegmentInfo.YES) { // Definitely a separate norm file, with generation: files.Add(IndexFileNames.FileNameFromGeneration(name, "s" + ent.Key, gen)); attributes[Lucene3xSegmentInfoFormat.NORMGEN_PREFIX + ent.Key] = Convert.ToString(gen, CultureInfo.InvariantCulture); } else if (gen == SegmentInfo.NO) { // No separate norm } else { // We should have already hit indexformat too old exception if (Debugging.AssertsEnabled) { Debugging.Assert(false); } } } } SegmentInfo info = new SegmentInfo(dir, version, name, docCount, isCompoundFile, null, diagnostics, attributes.AsReadOnly()); info.SetFiles(files); SegmentCommitInfo infoPerCommit = new SegmentCommitInfo(info, delCount, delGen, -1); return(infoPerCommit); }
/// <summary> /// Reads the snapshots information from the given <see cref="Directory"/>. This /// method can be used if the snapshots information is needed, however you /// cannot instantiate the deletion policy (because e.g., some other process /// keeps a lock on the snapshots directory). /// </summary> private void LoadPriorSnapshots() { lock (this) { long genLoaded = -1; IOException ioe = null; IList <string> snapshotFiles = new List <string>(); foreach (string file in dir.ListAll()) { if (file.StartsWith(SNAPSHOTS_PREFIX, StringComparison.Ordinal)) { long gen = Convert.ToInt64(file.Substring(SNAPSHOTS_PREFIX.Length), CultureInfo.InvariantCulture); if (genLoaded == -1 || gen > genLoaded) { snapshotFiles.Add(file); IDictionary <long, int> m = new Dictionary <long, int>(); IndexInput @in = dir.OpenInput(file, IOContext.DEFAULT); try { CodecUtil.CheckHeader(@in, CODEC_NAME, VERSION_START, VERSION_START); int count = @in.ReadVInt32(); for (int i = 0; i < count; i++) { long commitGen = @in.ReadVInt64(); int refCount = @in.ReadVInt32(); m[commitGen] = refCount; } } catch (IOException ioe2) { // Save first exception & throw in the end if (ioe == null) { ioe = ioe2; } } finally { @in.Dispose(); } genLoaded = gen; m_refCounts.Clear(); m_refCounts.PutAll(m); } } } if (genLoaded == -1) { // Nothing was loaded... if (ioe != null) { // ... not for lack of trying: throw ioe; } } else { if (snapshotFiles.Count > 1) { // Remove any broken / old snapshot files: string curFileName = SNAPSHOTS_PREFIX + genLoaded; foreach (string file in snapshotFiles) { if (!curFileName.Equals(file, StringComparison.Ordinal)) { dir.DeleteFile(file); } } } nextWriteGen = 1 + genLoaded; } } }
private SortedDocValues LoadBytesFixedSorted(FieldInfo field, IndexInput data, IndexInput index) { CodecUtil.CheckHeader(data, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_START, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT); CodecUtil.CheckHeader(index, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_START, Lucene40DocValuesFormat.BYTES_FIXED_SORTED_VERSION_CURRENT); int fixedLength = data.ReadInt32(); int valueCount = index.ReadInt32(); PagedBytes bytes = new PagedBytes(16); bytes.Copy(data, fixedLength * (long)valueCount); PagedBytes.Reader bytesReader = bytes.Freeze(true); PackedInt32s.Reader reader = PackedInt32s.GetReader(index); ramBytesUsed.AddAndGet(bytes.RamBytesUsed() + reader.RamBytesUsed()); return(CorrectBuggyOrds(new SortedDocValuesAnonymousInnerClassHelper(fixedLength, valueCount, bytesReader, reader))); }
public FixedGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor, IComparer<BytesRef> termComp, String segmentSuffix, IOContext context) { _termComp = termComp; Debug.Assert(indexDivisor == -1 || indexDivisor > 0); _input = dir.OpenInput( IndexFileNames.SegmentFileName(segment, segmentSuffix, FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION), context); var success = false; try { _version = ReadHeader(_input); if (_version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM) CodecUtil.ChecksumEntireFile(_input); indexInterval = _input.ReadInt(); if (indexInterval < 1) { throw new CorruptIndexException(String.Format("Invalid indexInterval: {0}, Resource: {1}", indexInterval, _input)); } _indexDivisor = indexDivisor; if (indexDivisor < 0) { _totalIndexInterval = indexInterval; } else { // In case terms index gets loaded, later, on demand _totalIndexInterval = indexInterval*indexDivisor; } Debug.Assert(_totalIndexInterval > 0); SeekDir(_input, _dirOffset); // Read directory int numFields = _input.ReadVInt(); if (numFields < 0) throw new CorruptIndexException(String.Format("Invalid numFields: {0}, Resource: {1}", numFields, _input)); for (int i = 0; i < numFields; i++) { int field = _input.ReadVInt(); int numIndexTerms = _input.ReadVInt(); if (numIndexTerms < 0) throw new CorruptIndexException(String.Format("Invalid numIndexTerms: {0}, Resource: {1}", numIndexTerms, _input)); long termsStart = _input.ReadVLong(); long indexStart = _input.ReadVLong(); long packedIndexStart = _input.ReadVLong(); long packedOffsetsStart = _input.ReadVLong(); if (packedIndexStart < indexStart) throw new CorruptIndexException( String.Format( "Invalid packedIndexStart: {0}, IndexStart: {1}, NumIndexTerms: {2}, Resource: {3}", packedIndexStart, indexStart, numIndexTerms, _input)); FieldInfo fieldInfo = fieldInfos.FieldInfo(field); try { _fields.Add(fieldInfo, new FieldIndexData(numIndexTerms, indexStart, termsStart, packedIndexStart, packedOffsetsStart, this)); } catch (ArgumentException) { throw new CorruptIndexException(String.Format("Duplicate field: {0}, Resource {1}", fieldInfo.Name, _input)); } } success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(_input); } if (indexDivisor > 0) { _input.Dispose(); _input = null; if (success) _indexLoaded = true; _termBytesReader = _termBytes.Freeze(true); } } }
/// <summary> /// Used only by clone. </summary> private Lucene40StoredFieldsReader(FieldInfos fieldInfos, int numTotalDocs, int size, IndexInput fieldsStream, IndexInput indexStream) { this.fieldInfos = fieldInfos; this.numTotalDocs = numTotalDocs; this.size = size; this.fieldsStream = fieldsStream; this.indexStream = indexStream; }
// NOTE: tvf is pre-positioned by caller public TVTermsEnum(Lucene40TermVectorsReader outerInstance) { this.OuterInstance = outerInstance; this.OrigTVF = outerInstance.Tvf; Tvf = (IndexInput)OrigTVF.Clone(); }
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET: //ORIGINAL LINE: private int readHeader(store.IndexInput in) throws java.io.IOException private int readHeader(IndexInput @in) { return(CodecUtil.checkHeader(@in, FSTTermsWriter.TERMS_CODEC_NAME, FSTTermsWriter.TERMS_VERSION_START, FSTTermsWriter.TERMS_VERSION_CURRENT)); }
/// <summary> /// Used by clone. </summary> internal Lucene40TermVectorsReader(FieldInfos fieldInfos, IndexInput tvx, IndexInput tvd, IndexInput tvf, int size, int numTotalDocs) { this.FieldInfos = fieldInfos; this.Tvx = tvx; this.Tvd = tvd; this.Tvf = tvf; this.Size_Renamed = size; this.NumTotalDocs = numTotalDocs; }
private int CopyFieldsWithDeletions(MergeState mergeState, AtomicReader reader, Lucene40StoredFieldsReader matchingFieldsReader, int[] rawDocLengths) { int docCount = 0; int maxDoc = reader.MaxDoc; IBits liveDocs = reader.LiveDocs; if (Debugging.AssertsEnabled) { Debugging.Assert(liveDocs != null); } if (matchingFieldsReader != null) { // We can bulk-copy because the fieldInfos are "congruent" for (int j = 0; j < maxDoc;) { if (!liveDocs.Get(j)) { // skip deleted docs ++j; continue; } // We can optimize this case (doing a bulk byte copy) since the field // numbers are identical int start = j, numDocs = 0; do { j++; numDocs++; if (j >= maxDoc) { break; } if (!liveDocs.Get(j)) { j++; break; } } while (numDocs < MAX_RAW_MERGE_DOCS); IndexInput stream = matchingFieldsReader.RawDocs(rawDocLengths, start, numDocs); AddRawDocuments(stream, rawDocLengths, numDocs); docCount += numDocs; mergeState.CheckAbort.Work(300 * numDocs); } } else { for (int j = 0; j < maxDoc; j++) { if (!liveDocs.Get(j)) { // skip deleted docs continue; } // TODO: this could be more efficient using // FieldVisitor instead of loading/writing entire // doc; ie we just have to renumber the field number // on the fly? // NOTE: it's very important to first assign to doc then pass it to // fieldsWriter.addDocument; see LUCENE-1282 Document doc = reader.Document(j); AddDocument(doc, mergeState.FieldInfos); docCount++; mergeState.CheckAbort.Work(300); } } return(docCount); }