private void WriteHeader(IndexOutput output) { CodecUtil.WriteHeader(output, CODEC_NAME, VERSION_CURRENT); }
public FixedGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor, IComparer <BytesRef> termComp, String segmentSuffix, IOContext context) { _termComp = termComp; Debug.Assert(indexDivisor == -1 || indexDivisor > 0); _input = dir.OpenInput( IndexFileNames.SegmentFileName(segment, segmentSuffix, FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION), context); var success = false; try { _version = ReadHeader(_input); if (_version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM) { CodecUtil.ChecksumEntireFile(_input); } indexInterval = _input.ReadInt(); if (indexInterval < 1) { throw new CorruptIndexException(String.Format("Invalid indexInterval: {0}, Resource: {1}", indexInterval, _input)); } _indexDivisor = indexDivisor; if (indexDivisor < 0) { _totalIndexInterval = indexInterval; } else { // In case terms index gets loaded, later, on demand _totalIndexInterval = indexInterval * indexDivisor; } Debug.Assert(_totalIndexInterval > 0); SeekDir(_input, _dirOffset); // Read directory int numFields = _input.ReadVInt(); if (numFields < 0) { throw new CorruptIndexException(String.Format("Invalid numFields: {0}, Resource: {1}", numFields, _input)); } for (int i = 0; i < numFields; i++) { int field = _input.ReadVInt(); int numIndexTerms = _input.ReadVInt(); if (numIndexTerms < 0) { throw new CorruptIndexException(String.Format("Invalid numIndexTerms: {0}, Resource: {1}", numIndexTerms, _input)); } long termsStart = _input.ReadVLong(); long indexStart = _input.ReadVLong(); long packedIndexStart = _input.ReadVLong(); long packedOffsetsStart = _input.ReadVLong(); if (packedIndexStart < indexStart) { throw new CorruptIndexException( String.Format( "Invalid packedIndexStart: {0}, IndexStart: {1}, NumIndexTerms: {2}, Resource: {3}", packedIndexStart, indexStart, numIndexTerms, _input)); } FieldInfo fieldInfo = fieldInfos.FieldInfo(field); try { _fields.Add(fieldInfo, new FieldIndexData(numIndexTerms, indexStart, termsStart, packedIndexStart, packedOffsetsStart, this)); } catch (ArgumentException) { throw new CorruptIndexException(String.Format("Duplicate field: {0}, Resource {1}", fieldInfo.Name, _input)); } } success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(_input); } if (indexDivisor > 0) { _input.Dispose(); _input = null; if (success) { _indexLoaded = true; } _termBytesReader = _termBytes.Freeze(true); } } }
public VariableGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor, String segmentSuffix, IOContext context) { _input = dir.OpenInput( IndexFileNames.SegmentFileName(segment, segmentSuffix, VariableGapTermsIndexWriter.TERMS_INDEX_EXTENSION), new IOContext(context, true)); var success = false; Debug.Assert(indexDivisor == -1 || indexDivisor > 0); try { _version = ReadHeader(_input); _indexDivisor = indexDivisor; if (_version >= VariableGapTermsIndexWriter.VERSION_CHECKSUM) { CodecUtil.ChecksumEntireFile(_input); } SeekDir(_input, _dirOffset); // Read directory var numFields = _input.ReadVInt(); if (numFields < 0) { throw new CorruptIndexException("invalid numFields: " + numFields + " (resource=" + _input + ")"); } for (var i = 0; i < numFields; i++) { var field = _input.ReadVInt(); var indexStart = _input.ReadVLong(); var fieldInfo = fieldInfos.FieldInfo(field); try { _fields.Add(fieldInfo, new FieldIndexData(indexStart, this)); } catch (ArgumentException) { throw new CorruptIndexException(String.Format("Duplicate Field: {0}, Resource: {1}", fieldInfo.Name, _input)); } } success = true; } finally { if (indexDivisor > 0) { _input.Dispose(); _input = null; if (success) { _indexLoaded = true; } } } }
public override int Merge(MergeState mergeState) { int docCount = 0; int idx = 0; foreach (AtomicReader reader in mergeState.Readers) { SegmentReader matchingSegmentReader = mergeState.MatchingSegmentReaders[idx++]; CompressingTermVectorsReader matchingVectorsReader = null; if (matchingSegmentReader != null) { TermVectorsReader vectorsReader = matchingSegmentReader.TermVectorsReader; // we can only bulk-copy if the matching reader is also a CompressingTermVectorsReader if (vectorsReader != null && vectorsReader is CompressingTermVectorsReader) { matchingVectorsReader = (CompressingTermVectorsReader)vectorsReader; } } int maxDoc = reader.MaxDoc; IBits liveDocs = reader.LiveDocs; if (matchingVectorsReader == null || matchingVectorsReader.Version != VERSION_CURRENT || matchingVectorsReader.CompressionMode != compressionMode || matchingVectorsReader.ChunkSize != chunkSize || matchingVectorsReader.PackedInt32sVersion != PackedInt32s.VERSION_CURRENT) { // naive merge... for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = NextLiveDoc(i + 1, liveDocs, maxDoc)) { Fields vectors = reader.GetTermVectors(i); AddAllDocVectors(vectors, mergeState); ++docCount; mergeState.CheckAbort.Work(300); } } else { CompressingStoredFieldsIndexReader index = matchingVectorsReader.Index; IndexInput vectorsStreamOrig = matchingVectorsReader.VectorsStream; vectorsStreamOrig.Seek(0); ChecksumIndexInput vectorsStream = new BufferedChecksumIndexInput((IndexInput)vectorsStreamOrig.Clone()); for (int i = NextLiveDoc(0, liveDocs, maxDoc); i < maxDoc;) { // We make sure to move the checksum input in any case, otherwise the final // integrity check might need to read the whole file a second time long startPointer = index.GetStartPointer(i); if (startPointer > vectorsStream.GetFilePointer()) { vectorsStream.Seek(startPointer); } if ((pendingDocs.Count == 0) && (i == 0 || index.GetStartPointer(i - 1) < startPointer)) // start of a chunk { int docBase = vectorsStream.ReadVInt32(); int chunkDocs = vectorsStream.ReadVInt32(); Debug.Assert(docBase + chunkDocs <= matchingSegmentReader.MaxDoc); if (docBase + chunkDocs < matchingSegmentReader.MaxDoc && NextDeletedDoc(docBase, liveDocs, docBase + chunkDocs) == docBase + chunkDocs) { long chunkEnd = index.GetStartPointer(docBase + chunkDocs); long chunkLength = chunkEnd - vectorsStream.GetFilePointer(); indexWriter.WriteIndex(chunkDocs, this.vectorsStream.GetFilePointer()); this.vectorsStream.WriteVInt32(docCount); this.vectorsStream.WriteVInt32(chunkDocs); this.vectorsStream.CopyBytes(vectorsStream, chunkLength); docCount += chunkDocs; this.numDocs += chunkDocs; mergeState.CheckAbort.Work(300 * chunkDocs); i = NextLiveDoc(docBase + chunkDocs, liveDocs, maxDoc); } else { for (; i < docBase + chunkDocs; i = NextLiveDoc(i + 1, liveDocs, maxDoc)) { Fields vectors = reader.GetTermVectors(i); AddAllDocVectors(vectors, mergeState); ++docCount; mergeState.CheckAbort.Work(300); } } } else { Fields vectors = reader.GetTermVectors(i); AddAllDocVectors(vectors, mergeState); ++docCount; mergeState.CheckAbort.Work(300); i = NextLiveDoc(i + 1, liveDocs, maxDoc); } } vectorsStream.Seek(vectorsStream.Length - CodecUtil.FooterLength()); CodecUtil.CheckFooter(vectorsStream); } } Finish(mergeState.FieldInfos, docCount); return(docCount); }
private readonly int version; // LUCENENET: marked readonly //static final boolean TEST = false; public FSTOrdTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) { string termsIndexFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, FSTOrdTermsWriter.TERMS_INDEX_EXTENSION); string termsBlockFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, FSTOrdTermsWriter.TERMS_BLOCK_EXTENSION); this.postingsReader = postingsReader; ChecksumIndexInput indexIn = null; IndexInput blockIn = null; bool success = false; try { indexIn = state.Directory.OpenChecksumInput(termsIndexFileName, state.Context); blockIn = state.Directory.OpenInput(termsBlockFileName, state.Context); version = ReadHeader(indexIn); ReadHeader(blockIn); if (version >= FSTOrdTermsWriter.TERMS_VERSION_CHECKSUM) { CodecUtil.ChecksumEntireFile(blockIn); } this.postingsReader.Init(blockIn); SeekDir(blockIn); FieldInfos fieldInfos = state.FieldInfos; int numFields = blockIn.ReadVInt32(); for (int i = 0; i < numFields; i++) { FieldInfo fieldInfo = fieldInfos.FieldInfo(blockIn.ReadVInt32()); bool hasFreq = fieldInfo.IndexOptions != IndexOptions.DOCS_ONLY; long numTerms = blockIn.ReadVInt64(); long sumTotalTermFreq = hasFreq ? blockIn.ReadVInt64() : -1; long sumDocFreq = blockIn.ReadVInt64(); int docCount = blockIn.ReadVInt32(); int longsSize = blockIn.ReadVInt32(); var index = new FST <Int64>(indexIn, PositiveInt32Outputs.Singleton); var current = new TermsReader(this, fieldInfo, blockIn, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize, index); // LUCENENET NOTE: This simulates a put operation in Java, // getting the prior value first before setting it. fields.TryGetValue(fieldInfo.Name, out TermsReader previous); fields[fieldInfo.Name] = current; CheckFieldSummary(state.SegmentInfo, indexIn, blockIn, current, previous); } if (version >= FSTOrdTermsWriter.TERMS_VERSION_CHECKSUM) { CodecUtil.CheckFooter(indexIn); } else { #pragma warning disable 612, 618 CodecUtil.CheckEOF(indexIn); #pragma warning restore 612, 618 } success = true; } finally { if (success) { IOUtils.Dispose(indexIn, blockIn); } else { IOUtils.DisposeWhileHandlingException(indexIn, blockIn); } } }
/// <summary> /// Sole constructor. </summary> public CompressingStoredFieldsReader(Directory d, SegmentInfo si, string segmentSuffix, FieldInfos fn, IOContext context, string formatName, CompressionMode compressionMode) { this.compressionMode = compressionMode; string segment = si.Name; bool success = false; fieldInfos = fn; numDocs = si.DocCount; ChecksumIndexInput indexStream = null; try { string indexStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION); string fieldsStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_EXTENSION); // Load the index into memory indexStream = d.OpenChecksumInput(indexStreamFN, context); string codecNameIdx = formatName + CompressingStoredFieldsWriter.CODEC_SFX_IDX; version = CodecUtil.CheckHeader(indexStream, codecNameIdx, CompressingStoredFieldsWriter.VERSION_START, CompressingStoredFieldsWriter.VERSION_CURRENT); Debug.Assert(CodecUtil.HeaderLength(codecNameIdx) == indexStream.GetFilePointer()); indexReader = new CompressingStoredFieldsIndexReader(indexStream, si); long maxPointer = -1; if (version >= CompressingStoredFieldsWriter.VERSION_CHECKSUM) { maxPointer = indexStream.ReadVInt64(); CodecUtil.CheckFooter(indexStream); } else { #pragma warning disable 612, 618 CodecUtil.CheckEOF(indexStream); #pragma warning restore 612, 618 } indexStream.Dispose(); indexStream = null; // Open the data file and read metadata fieldsStream = d.OpenInput(fieldsStreamFN, context); if (version >= CompressingStoredFieldsWriter.VERSION_CHECKSUM) { if (maxPointer + CodecUtil.FooterLength() != fieldsStream.Length) { throw new CorruptIndexException("Invalid fieldsStream maxPointer (file truncated?): maxPointer=" + maxPointer + ", length=" + fieldsStream.Length); } } else { maxPointer = fieldsStream.Length; } this.maxPointer = maxPointer; string codecNameDat = formatName + CompressingStoredFieldsWriter.CODEC_SFX_DAT; int fieldsVersion = CodecUtil.CheckHeader(fieldsStream, codecNameDat, CompressingStoredFieldsWriter.VERSION_START, CompressingStoredFieldsWriter.VERSION_CURRENT); if (version != fieldsVersion) { throw new CorruptIndexException("Version mismatch between stored fields index and data: " + version + " != " + fieldsVersion); } Debug.Assert(CodecUtil.HeaderLength(codecNameDat) == fieldsStream.GetFilePointer()); if (version >= CompressingStoredFieldsWriter.VERSION_BIG_CHUNKS) { chunkSize = fieldsStream.ReadVInt32(); } else { chunkSize = -1; } packedIntsVersion = fieldsStream.ReadVInt32(); decompressor = compressionMode.NewDecompressor(); this.bytes = new BytesRef(); success = true; } finally { if (!success) { IOUtils.DisposeWhileHandlingException(this, indexStream); } } }
public override void Write(Directory directory, string segmentName, string segmentSuffix, FieldInfos infos, IOContext context) { string fileName = IndexFileNames.SegmentFileName(segmentName, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION); IndexOutput output = directory.CreateOutput(fileName, context); bool success = false; try { CodecUtil.WriteHeader(output, Lucene46FieldInfosFormat.CODEC_NAME, Lucene46FieldInfosFormat.FORMAT_CURRENT); output.WriteVInt(infos.Size()); foreach (FieldInfo fi in infos) { FieldInfo.IndexOptions?indexOptions = fi.FieldIndexOptions; sbyte bits = 0x0; if (fi.HasVectors()) { bits |= Lucene46FieldInfosFormat.STORE_TERMVECTOR; } if (fi.OmitsNorms()) { bits |= Lucene46FieldInfosFormat.OMIT_NORMS; } if (fi.HasPayloads()) { bits |= Lucene46FieldInfosFormat.STORE_PAYLOADS; } if (fi.Indexed) { bits |= Lucene46FieldInfosFormat.IS_INDEXED; Debug.Assert(indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !fi.HasPayloads()); if (indexOptions == FieldInfo.IndexOptions.DOCS_ONLY) { bits |= Lucene46FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS; } else if (indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) { bits |= Lucene46FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS; } else if (indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS) { bits |= Lucene46FieldInfosFormat.OMIT_POSITIONS; } } output.WriteString(fi.Name); output.WriteVInt(fi.Number); output.WriteByte(bits); // pack the DV types in one byte sbyte dv = DocValuesByte(fi.DocValuesType); sbyte nrm = DocValuesByte(fi.NormType); Debug.Assert((dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0); sbyte val = unchecked ((sbyte)(0xff & ((nrm << 4) | dv))); output.WriteByte(val); output.WriteLong(fi.DocValuesGen); output.WriteStringStringMap(fi.Attributes()); } CodecUtil.WriteFooter(output); success = true; } finally { if (success) { output.Dispose(); } else { IOUtils.CloseWhileHandlingException(output); } } }
public override void Init(IndexOutput termsOut) { CodecUtil.WriteHeader(termsOut, TERMS_CODEC, VERSION_CURRENT); termsOut.WriteVInt32(Lucene41PostingsFormat.BLOCK_SIZE); }
private void WriteHeader(IndexOutput @out) { CodecUtil.WriteHeader(@out, TERMS_CODEC_NAME, TERMS_VERSION_CURRENT); }
protected BinaryDictionary() { int[] targetMapOffsets = null, targetMap = null; string[] posDict = null; string[] inflFormDict = null; string[] inflTypeDict = null; ByteBuffer buffer = null; using (Stream mapIS = GetResource(TARGETMAP_FILENAME_SUFFIX)) { DataInput @in = new InputStreamDataInput(mapIS); CodecUtil.CheckHeader(@in, TARGETMAP_HEADER, VERSION, VERSION); targetMap = new int[@in.ReadVInt32()]; targetMapOffsets = new int[@in.ReadVInt32()]; int accum = 0, sourceId = 0; for (int ofs = 0; ofs < targetMap.Length; ofs++) { int val = @in.ReadVInt32(); if ((val & 0x01) != 0) { targetMapOffsets[sourceId] = ofs; sourceId++; } accum += (int)((uint)val) >> 1; targetMap[ofs] = accum; } if (sourceId + 1 != targetMapOffsets.Length) { throw new IOException("targetMap file format broken"); } targetMapOffsets[sourceId] = targetMap.Length; } using (Stream posIS = GetResource(POSDICT_FILENAME_SUFFIX)) { DataInput @in = new InputStreamDataInput(posIS); CodecUtil.CheckHeader(@in, POSDICT_HEADER, VERSION, VERSION); int posSize = @in.ReadVInt32(); posDict = new string[posSize]; inflTypeDict = new string[posSize]; inflFormDict = new string[posSize]; for (int j = 0; j < posSize; j++) { posDict[j] = @in.ReadString(); inflTypeDict[j] = @in.ReadString(); inflFormDict[j] = @in.ReadString(); // this is how we encode null inflections if (inflTypeDict[j].Length == 0) { inflTypeDict[j] = null; } if (inflFormDict[j].Length == 0) { inflFormDict[j] = null; } } } ByteBuffer tmpBuffer; using (Stream dictIS = GetResource(DICT_FILENAME_SUFFIX)) { // no buffering here, as we load in one large buffer DataInput @in = new InputStreamDataInput(dictIS); CodecUtil.CheckHeader(@in, DICT_HEADER, VERSION, VERSION); int size = @in.ReadVInt32(); tmpBuffer = ByteBuffer.Allocate(size); // AllocateDirect..? int read = dictIS.Read(tmpBuffer.Array, 0, size); if (read != size) { throw new EndOfStreamException("Cannot read whole dictionary"); } } buffer = tmpBuffer.AsReadOnlyBuffer(); this.targetMap = targetMap; this.targetMapOffsets = targetMapOffsets; this.posDict = posDict; this.inflTypeDict = inflTypeDict; this.inflFormDict = inflFormDict; this.buffer = buffer; }
/// <summary> /// Reads the snapshots information from the given <see cref="Directory"/>. This /// method can be used if the snapshots information is needed, however you /// cannot instantiate the deletion policy (because e.g., some other process /// keeps a lock on the snapshots directory). /// </summary> private void LoadPriorSnapshots() { UninterruptableMonitor.Enter(this); try { long genLoaded = -1; Exception ioe = null; // LUCENENET: No need to cast to IOExcpetion IList <string> snapshotFiles = new JCG.List <string>(); foreach (string file in dir.ListAll()) { if (file.StartsWith(SNAPSHOTS_PREFIX, StringComparison.Ordinal)) { // LUCENENET: Optimized to not allocate a substring during the parse long gen = Long.Parse(file, SNAPSHOTS_PREFIX.Length, file.Length - SNAPSHOTS_PREFIX.Length, radix: 10); if (genLoaded == -1 || gen > genLoaded) { snapshotFiles.Add(file); IDictionary <long, int> m = new Dictionary <long, int>(); IndexInput @in = dir.OpenInput(file, IOContext.DEFAULT); try { CodecUtil.CheckHeader(@in, CODEC_NAME, VERSION_START, VERSION_START); int count = @in.ReadVInt32(); for (int i = 0; i < count; i++) { long commitGen = @in.ReadVInt64(); int refCount = @in.ReadVInt32(); m[commitGen] = refCount; } } catch (Exception ioe2) when(ioe2.IsIOException()) { // Save first exception & throw in the end if (ioe is null) { ioe = ioe2; } } finally { @in.Dispose(); } genLoaded = gen; m_refCounts.Clear(); m_refCounts.PutAll(m); } } } if (genLoaded == -1) { // Nothing was loaded... if (ioe != null) { // ... not for lack of trying: ExceptionDispatchInfo.Capture(ioe).Throw(); // LUCENENET: Rethrow to preserve stack details from the original throw } } else { if (snapshotFiles.Count > 1) { // Remove any broken / old snapshot files: string curFileName = SNAPSHOTS_PREFIX + genLoaded; foreach (string file in snapshotFiles) { if (!curFileName.Equals(file, StringComparison.Ordinal)) { dir.DeleteFile(file); } } } nextWriteGen = 1 + genLoaded; } } finally { UninterruptableMonitor.Exit(this); } }
internal void Persist() { UninterruptableMonitor.Enter(this); try { string fileName = SNAPSHOTS_PREFIX + nextWriteGen; IndexOutput @out = dir.CreateOutput(fileName, IOContext.DEFAULT); bool success = false; try { CodecUtil.WriteHeader(@out, CODEC_NAME, VERSION_CURRENT); @out.WriteVInt32(m_refCounts.Count); foreach (KeyValuePair <long, int> ent in m_refCounts) { @out.WriteVInt64(ent.Key); @out.WriteVInt32(ent.Value); } success = true; } finally { if (!success) { IOUtils.DisposeWhileHandlingException(@out); try { dir.DeleteFile(fileName); } catch (Exception e) when(e.IsException()) { // Suppress so we keep throwing original exception } } else { IOUtils.Dispose(@out); } } dir.Sync(/*Collections.singletonList(*/ new[] { fileName } /*)*/); if (nextWriteGen > 0) { string lastSaveFile = SNAPSHOTS_PREFIX + (nextWriteGen - 1); try { dir.DeleteFile(lastSaveFile); } catch (Exception ioe) when(ioe.IsIOException()) { // OK: likely it didn't exist } } nextWriteGen++; } finally { UninterruptableMonitor.Exit(this); } }
protected override int ReadIndexHeader(IndexInput input) { return(CodecUtil.CheckHeader(input, APPENDING_TERMS_INDEX_CODEC_NAME, BlockTreeTermsWriter.VERSION_START, BlockTreeTermsWriter.VERSION_CURRENT)); }
/// <summary> /// Reads the snapshots information from the given <see cref="Directory"/>. This /// method can be used if the snapshots information is needed, however you /// cannot instantiate the deletion policy (because e.g., some other process /// keeps a lock on the snapshots directory). /// </summary> private void LoadPriorSnapshots() { lock (this) { long genLoaded = -1; IOException ioe = null; IList <string> snapshotFiles = new List <string>(); foreach (string file in dir.ListAll()) { if (file.StartsWith(SNAPSHOTS_PREFIX, StringComparison.Ordinal)) { long gen = Convert.ToInt64(file.Substring(SNAPSHOTS_PREFIX.Length), CultureInfo.InvariantCulture); if (genLoaded == -1 || gen > genLoaded) { snapshotFiles.Add(file); IDictionary <long, int> m = new Dictionary <long, int>(); IndexInput @in = dir.OpenInput(file, IOContext.DEFAULT); try { CodecUtil.CheckHeader(@in, CODEC_NAME, VERSION_START, VERSION_START); int count = @in.ReadVInt32(); for (int i = 0; i < count; i++) { long commitGen = @in.ReadVInt64(); int refCount = @in.ReadVInt32(); m[commitGen] = refCount; } } catch (IOException ioe2) { // Save first exception & throw in the end if (ioe == null) { ioe = ioe2; } } finally { @in.Dispose(); } genLoaded = gen; m_refCounts.Clear(); m_refCounts.PutAll(m); } } } if (genLoaded == -1) { // Nothing was loaded... if (ioe != null) { // ... not for lack of trying: throw ioe; } } else { if (snapshotFiles.Count > 1) { // Remove any broken / old snapshot files: string curFileName = SNAPSHOTS_PREFIX + genLoaded; foreach (string file in snapshotFiles) { if (!curFileName.Equals(file, StringComparison.Ordinal)) { dir.DeleteFile(file); } } } nextWriteGen = 1 + genLoaded; } } }
public override void Write(Directory directory, string segmentName, string segmentSuffix, FieldInfos infos, IOContext context) { string fileName = IndexFileNames.SegmentFileName(segmentName, "", Lucene40FieldInfosFormat.FIELD_INFOS_EXTENSION); IndexOutput output = directory.CreateOutput(fileName, context); bool success = false; try { CodecUtil.WriteHeader(output, Lucene40FieldInfosFormat.CODEC_NAME, Lucene40FieldInfosFormat.FORMAT_CURRENT); output.WriteVInt32(infos.Count); foreach (FieldInfo fi in infos) { IndexOptions indexOptions = fi.IndexOptions; sbyte bits = 0x0; if (fi.HasVectors) { bits |= Lucene40FieldInfosFormat.STORE_TERMVECTOR; } if (fi.OmitsNorms) { bits |= Lucene40FieldInfosFormat.OMIT_NORMS; } if (fi.HasPayloads) { bits |= Lucene40FieldInfosFormat.STORE_PAYLOADS; } if (fi.IsIndexed) { bits |= Lucene40FieldInfosFormat.IS_INDEXED; Debug.Assert(indexOptions.CompareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.HasPayloads); if (indexOptions == IndexOptions.DOCS_ONLY) { bits |= Lucene40FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS; } else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) { bits |= Lucene40FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS; } else if (indexOptions == IndexOptions.DOCS_AND_FREQS) { bits |= Lucene40FieldInfosFormat.OMIT_POSITIONS; } } output.WriteString(fi.Name); output.WriteVInt32(fi.Number); output.WriteByte((byte)bits); // pack the DV types in one byte byte dv = DocValuesByte(fi.DocValuesType, fi.GetAttribute(Lucene40FieldInfosReader.LEGACY_DV_TYPE_KEY)); byte nrm = DocValuesByte(fi.NormType, fi.GetAttribute(Lucene40FieldInfosReader.LEGACY_NORM_TYPE_KEY)); Debug.Assert((dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0); var val = (byte)(0xff & ((nrm << 4) | (byte)dv)); output.WriteByte(val); output.WriteStringStringMap(fi.Attributes); } success = true; } finally { if (success) { output.Dispose(); } else { IOUtils.DisposeWhileHandlingException(output); } } }
/// <summary> /// Creates a postings writer with the specified PackedInts overhead ratio </summary> // TODO: does this ctor even make sense? public Lucene41PostingsWriter(SegmentWriteState state, float acceptableOverheadRatio) : base() { docOut = state.Directory.CreateOutput(IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, Lucene41PostingsFormat.DOC_EXTENSION), state.Context); IndexOutput posOut = null; IndexOutput payOut = null; bool success = false; try { CodecUtil.WriteHeader(docOut, DOC_CODEC, VERSION_CURRENT); forUtil = new ForUtil(acceptableOverheadRatio, docOut); if (state.FieldInfos.HasProx) { posDeltaBuffer = new int[ForUtil.MAX_DATA_SIZE]; posOut = state.Directory.CreateOutput(IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, Lucene41PostingsFormat.POS_EXTENSION), state.Context); CodecUtil.WriteHeader(posOut, POS_CODEC, VERSION_CURRENT); if (state.FieldInfos.HasPayloads) { payloadBytes = new byte[128]; payloadLengthBuffer = new int[ForUtil.MAX_DATA_SIZE]; } else { payloadBytes = null; payloadLengthBuffer = null; } if (state.FieldInfos.HasOffsets) { offsetStartDeltaBuffer = new int[ForUtil.MAX_DATA_SIZE]; offsetLengthBuffer = new int[ForUtil.MAX_DATA_SIZE]; } else { offsetStartDeltaBuffer = null; offsetLengthBuffer = null; } if (state.FieldInfos.HasPayloads || state.FieldInfos.HasOffsets) { payOut = state.Directory.CreateOutput(IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, Lucene41PostingsFormat.PAY_EXTENSION), state.Context); CodecUtil.WriteHeader(payOut, PAY_CODEC, VERSION_CURRENT); } } else { posDeltaBuffer = null; payloadLengthBuffer = null; offsetStartDeltaBuffer = null; offsetLengthBuffer = null; payloadBytes = null; } this.payOut = payOut; this.posOut = posOut; success = true; } finally { if (!success) { IOUtils.DisposeWhileHandlingException(docOut, posOut, payOut); } } docDeltaBuffer = new int[ForUtil.MAX_DATA_SIZE]; freqBuffer = new int[ForUtil.MAX_DATA_SIZE]; // TODO: should we try skipping every 2/4 blocks...? skipWriter = new Lucene41SkipWriter(maxSkipLevels, Lucene41PostingsFormat.BLOCK_SIZE, state.SegmentInfo.DocCount, docOut, posOut, payOut); encoded = new byte[ForUtil.MAX_ENCODED_SIZE]; }
public override NumericDocValues GetNumeric(FieldInfo field) { UninterruptableMonitor.Enter(this); try { if (!numericInstances.TryGetValue(field.Number, out NumericDocValues instance)) { string fileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name + "_" + Convert.ToString(field.Number, CultureInfo.InvariantCulture), segmentSuffix, "dat"); IndexInput input = dir.OpenInput(fileName, state.Context); bool success = false; try { var type = field.GetAttribute(legacyKey).ToLegacyDocValuesType(); //switch (Enum.Parse(typeof(LegacyDocValuesType), field.GetAttribute(LegacyKey))) //{ if (type == LegacyDocValuesType.VAR_INTS) { instance = LoadVarInt32sField(/* field, // LUCENENET: Never read */ input); } else if (type == LegacyDocValuesType.FIXED_INTS_8) { instance = LoadByteField(/* field, // LUCENENET: Never read */ input); } else if (type == LegacyDocValuesType.FIXED_INTS_16) { instance = LoadInt16Field(/* field, // LUCENENET: Never read */ input); } else if (type == LegacyDocValuesType.FIXED_INTS_32) { instance = LoadInt32Field(/* field, // LUCENENET: Never read */ input); } else if (type == LegacyDocValuesType.FIXED_INTS_64) { instance = LoadInt64Field(/* field, // LUCENENET: Never read */ input); } else if (type == LegacyDocValuesType.FLOAT_32) { instance = LoadSingleField(/* field, // LUCENENET: Never read */ input); } else if (type == LegacyDocValuesType.FLOAT_64) { instance = LoadDoubleField(/* field, // LUCENENET: Never read */ input); } else { throw AssertionError.Create(); } CodecUtil.CheckEOF(input); success = true; } finally { if (success) { IOUtils.Dispose(input); } else { IOUtils.DisposeWhileHandlingException(input); } } numericInstances[field.Number] = instance; } return(instance); } finally { UninterruptableMonitor.Exit(this); } }
//static final boolean TEST = false; public FSTOrdTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) { string termsIndexFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, FSTOrdTermsWriter.TERMS_INDEX_EXTENSION); string termsBlockFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, FSTOrdTermsWriter.TERMS_BLOCK_EXTENSION); this.postingsReader = postingsReader; ChecksumIndexInput indexIn = null; IndexInput blockIn = null; bool success = false; try { indexIn = state.Directory.OpenChecksumInput(termsIndexFileName, state.Context); blockIn = state.Directory.OpenInput(termsBlockFileName, state.Context); version = ReadHeader(indexIn); ReadHeader(blockIn); if (version >= FSTOrdTermsWriter.TERMS_VERSION_CHECKSUM) { CodecUtil.ChecksumEntireFile(blockIn); } this.postingsReader.Init(blockIn); SeekDir(blockIn); FieldInfos fieldInfos = state.FieldInfos; int numFields = blockIn.ReadVInt(); for (int i = 0; i < numFields; i++) { FieldInfo fieldInfo = fieldInfos.FieldInfo(blockIn.ReadVInt()); bool hasFreq = fieldInfo.FieldIndexOptions != FieldInfo.IndexOptions.DOCS_ONLY; long numTerms = blockIn.ReadVLong(); long sumTotalTermFreq = hasFreq ? blockIn.ReadVLong() : -1; long sumDocFreq = blockIn.ReadVLong(); int docCount = blockIn.ReadVInt(); int longsSize = blockIn.ReadVInt(); var index = new FST <long?>(indexIn, PositiveIntOutputs.Singleton); var current = new TermsReader(this, fieldInfo, blockIn, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize, index); var previous = fields[fieldInfo.Name] = current; CheckFieldSummary(state.SegmentInfo, indexIn, blockIn, current, previous); } if (version >= FSTOrdTermsWriter.TERMS_VERSION_CHECKSUM) { CodecUtil.CheckFooter(indexIn); } else { CodecUtil.CheckEOF(indexIn); } success = true; } finally { if (success) { IOUtils.Close(indexIn, blockIn); } else { IOUtils.CloseWhileHandlingException(indexIn, blockIn); } } }
private void AddVarSortedBytesField(FieldInfo field, IndexOutput data, IndexOutput index, IEnumerable <BytesRef> values, IEnumerable <long?> docToOrd) { field.PutAttribute(legacyKey, LegacyDocValuesType.BYTES_VAR_SORTED.ToString()); CodecUtil.WriteHeader(data, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_DAT, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT); CodecUtil.WriteHeader(index, Lucene40DocValuesFormat.BYTES_VAR_SORTED_CODEC_NAME_IDX, Lucene40DocValuesFormat.BYTES_VAR_SORTED_VERSION_CURRENT); /* values */ long startPos = data.Position; // LUCENENET specific: Renamed from getFilePointer() to match FileStream int valueCount = 0; foreach (BytesRef v in values) { data.WriteBytes(v.Bytes, v.Offset, v.Length); valueCount++; } /* addresses */ long maxAddress = data.Position - startPos; // LUCENENET specific: Renamed from getFilePointer() to match FileStream index.WriteInt64(maxAddress); if (Debugging.AssertsEnabled) { Debugging.Assert(valueCount != int.MaxValue); // unsupported by the 4.0 impl } PackedInt32s.Writer w = PackedInt32s.GetWriter(index, valueCount + 1, PackedInt32s.BitsRequired(maxAddress), PackedInt32s.DEFAULT); long currentPosition = 0; foreach (BytesRef v in values) { w.Add(currentPosition); currentPosition += v.Length; } // write sentinel if (Debugging.AssertsEnabled) { Debugging.Assert(currentPosition == maxAddress); } w.Add(currentPosition); w.Finish(); /* ordinals */ int maxDoc = state.SegmentInfo.DocCount; if (Debugging.AssertsEnabled) { Debugging.Assert(valueCount > 0); } PackedInt32s.Writer ords = PackedInt32s.GetWriter(index, maxDoc, PackedInt32s.BitsRequired(valueCount - 1), PackedInt32s.DEFAULT); foreach (long n in docToOrd) { ords.Add((long)n); } ords.Finish(); }
/// <summary> /// Sole constructor. </summary> public CompressingTermVectorsReader(Directory d, SegmentInfo si, string segmentSuffix, FieldInfos fn, IOContext context, string formatName, CompressionMode compressionMode) { this.compressionMode = compressionMode; string segment = si.Name; bool success = false; fieldInfos = fn; numDocs = si.DocCount; ChecksumIndexInput indexStream = null; try { // Load the index into memory string indexStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, CompressingTermVectorsWriter.VECTORS_INDEX_EXTENSION); indexStream = d.OpenChecksumInput(indexStreamFN, context); string codecNameIdx = formatName + CompressingTermVectorsWriter.CODEC_SFX_IDX; version = CodecUtil.CheckHeader(indexStream, codecNameIdx, CompressingTermVectorsWriter.VERSION_START, CompressingTermVectorsWriter.VERSION_CURRENT); if (Debugging.AssertsEnabled) { Debugging.Assert(CodecUtil.HeaderLength(codecNameIdx) == indexStream.Position); // LUCENENET specific: Renamed from getFilePointer() to match FileStream } indexReader = new CompressingStoredFieldsIndexReader(indexStream, si); if (version >= CompressingTermVectorsWriter.VERSION_CHECKSUM) { indexStream.ReadVInt64(); // the end of the data file CodecUtil.CheckFooter(indexStream); } else { #pragma warning disable 612, 618 CodecUtil.CheckEOF(indexStream); #pragma warning restore 612, 618 } indexStream.Dispose(); indexStream = null; // Open the data file and read metadata string vectorsStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, CompressingTermVectorsWriter.VECTORS_EXTENSION); vectorsStream = d.OpenInput(vectorsStreamFN, context); string codecNameDat = formatName + CompressingTermVectorsWriter.CODEC_SFX_DAT; int version2 = CodecUtil.CheckHeader(vectorsStream, codecNameDat, CompressingTermVectorsWriter.VERSION_START, CompressingTermVectorsWriter.VERSION_CURRENT); if (version != version2) { throw RuntimeException.Create("Version mismatch between stored fields index and data: " + version + " != " + version2); } if (Debugging.AssertsEnabled) { Debugging.Assert(CodecUtil.HeaderLength(codecNameDat) == vectorsStream.Position); // LUCENENET specific: Renamed from getFilePointer() to match FileStream } packedIntsVersion = vectorsStream.ReadVInt32(); chunkSize = vectorsStream.ReadVInt32(); decompressor = compressionMode.NewDecompressor(); this.reader = new BlockPackedReaderIterator(vectorsStream, packedIntsVersion, CompressingTermVectorsWriter.BLOCK_SIZE, 0); success = true; } finally { if (!success) { IOUtils.DisposeWhileHandlingException(this, indexStream); } } }
/// <summary> /// expert: instantiates a new reader </summary> protected internal Lucene45DocValuesProducer(SegmentReadState state, string dataCodec, string dataExtension, string metaCodec, string metaExtension) { string metaName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, metaExtension); // read in the entries from the metadata file. ChecksumIndexInput @in = state.Directory.OpenChecksumInput(metaName, state.Context); this.MaxDoc = state.SegmentInfo.DocCount; bool success = false; try { Version = CodecUtil.CheckHeader(@in, metaCodec, Lucene45DocValuesFormat.VERSION_START, Lucene45DocValuesFormat.VERSION_CURRENT); Numerics = new Dictionary <int, NumericEntry>(); Ords = new Dictionary <int, NumericEntry>(); OrdIndexes = new Dictionary <int, NumericEntry>(); Binaries = new Dictionary <int, BinaryEntry>(); SortedSets = new Dictionary <int, SortedSetEntry>(); ReadFields(@in, state.FieldInfos); if (Version >= Lucene45DocValuesFormat.VERSION_CHECKSUM) { CodecUtil.CheckFooter(@in); } else { CodecUtil.CheckEOF(@in); } success = true; } finally { if (success) { IOUtils.Close(@in); } else { IOUtils.CloseWhileHandlingException(@in); } } success = false; try { string dataName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, dataExtension); Data = state.Directory.OpenInput(dataName, state.Context); int version2 = CodecUtil.CheckHeader(Data, dataCodec, Lucene45DocValuesFormat.VERSION_START, Lucene45DocValuesFormat.VERSION_CURRENT); if (Version != version2) { throw new Exception("Format versions mismatch"); } success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(this.Data); } } RamBytesUsed_Renamed = new AtomicLong(RamUsageEstimator.ShallowSizeOfInstance(this.GetType())); }
private int ReadHeader(IndexInput @in) { return(CodecUtil.CheckHeader(@in, FSTTermsWriter.TERMS_CODEC_NAME, FSTTermsWriter.TERMS_VERSION_START, FSTTermsWriter.TERMS_VERSION_CURRENT)); }
public override FieldInfos Read(Directory directory, string segmentName, string segmentSuffix, IOContext context) { string fileName = IndexFileNames.SegmentFileName(segmentName, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION); ChecksumIndexInput input = directory.OpenChecksumInput(fileName, context); bool success = false; try { int codecVersion = CodecUtil.CheckHeader(input, Lucene46FieldInfosFormat.CODEC_NAME, Lucene46FieldInfosFormat.FORMAT_START, Lucene46FieldInfosFormat.FORMAT_CURRENT); int size = input.ReadVInt32(); //read in the size FieldInfo[] infos = new FieldInfo[size]; for (int i = 0; i < size; i++) { string name = input.ReadString(); int fieldNumber = input.ReadVInt32(); byte bits = input.ReadByte(); bool isIndexed = (bits & Lucene46FieldInfosFormat.IS_INDEXED) != 0; bool storeTermVector = (bits & Lucene46FieldInfosFormat.STORE_TERMVECTOR) != 0; bool omitNorms = (bits & Lucene46FieldInfosFormat.OMIT_NORMS) != 0; bool storePayloads = (bits & Lucene46FieldInfosFormat.STORE_PAYLOADS) != 0; IndexOptions indexOptions; if (!isIndexed) { indexOptions = IndexOptions.NONE; } else if ((bits & Lucene46FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0) { indexOptions = IndexOptions.DOCS_ONLY; } else if ((bits & Lucene46FieldInfosFormat.OMIT_POSITIONS) != 0) { indexOptions = IndexOptions.DOCS_AND_FREQS; } else if ((bits & Lucene46FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0) { indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; } else { indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; } // DV Types are packed in one byte byte val = input.ReadByte(); DocValuesType docValuesType = GetDocValuesType(input, (sbyte)(val & 0x0F)); DocValuesType normsType = GetDocValuesType(input, (sbyte)(((int)((uint)val >> 4)) & 0x0F)); long dvGen = input.ReadInt64(); IDictionary <string, string> attributes = input.ReadStringStringMap(); infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValuesType, normsType, Collections.UnmodifiableMap(attributes)); infos[i].DocValuesGen = dvGen; } if (codecVersion >= Lucene46FieldInfosFormat.FORMAT_CHECKSUM) { CodecUtil.CheckFooter(input); } else { #pragma warning disable 612, 618 CodecUtil.CheckEOF(input); #pragma warning restore 612, 618 } FieldInfos fieldInfos = new FieldInfos(infos); success = true; return(fieldInfos); } finally { if (success) { input.Dispose(); } else { IOUtils.DisposeWhileHandlingException(input); } } }
/// <summary> /// Helper method that reads CFS entries from an input stream </summary> private static IDictionary <string, FileEntry> ReadEntries(IndexInputSlicer handle, Directory dir, string name) { IOException priorE = null; IndexInput stream = null; ChecksumIndexInput entriesStream = null; // read the first VInt. If it is negative, it's the version number // otherwise it's the count (pre-3.1 indexes) try { IDictionary <string, FileEntry> mapping; #pragma warning disable 612, 618 stream = handle.OpenFullSlice(); #pragma warning restore 612, 618 int firstInt = stream.ReadVInt32(); // impossible for 3.0 to have 63 files in a .cfs, CFS writer was not visible // and separate norms/etc are outside of cfs. if (firstInt == CODEC_MAGIC_BYTE1) { sbyte secondByte = (sbyte)stream.ReadByte(); sbyte thirdByte = (sbyte)stream.ReadByte(); sbyte fourthByte = (sbyte)stream.ReadByte(); if (secondByte != CODEC_MAGIC_BYTE2 || thirdByte != CODEC_MAGIC_BYTE3 || fourthByte != CODEC_MAGIC_BYTE4) { throw new CorruptIndexException("Illegal/impossible header for CFS file: " + secondByte + "," + thirdByte + "," + fourthByte); } int version = CodecUtil.CheckHeaderNoMagic(stream, CompoundFileWriter.DATA_CODEC, CompoundFileWriter.VERSION_START, CompoundFileWriter.VERSION_CURRENT); string entriesFileName = IndexFileNames.SegmentFileName( IndexFileNames.StripExtension(name), "", IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION); entriesStream = dir.OpenChecksumInput(entriesFileName, IOContext.READ_ONCE); CodecUtil.CheckHeader(entriesStream, CompoundFileWriter.ENTRY_CODEC, CompoundFileWriter.VERSION_START, CompoundFileWriter.VERSION_CURRENT); int numEntries = entriesStream.ReadVInt32(); mapping = new Dictionary <string, FileEntry>(numEntries); for (int i = 0; i < numEntries; i++) { FileEntry fileEntry = new FileEntry(); string id = entriesStream.ReadString(); FileEntry previous = mapping.Put(id, fileEntry); if (previous != null) { throw new CorruptIndexException("Duplicate cfs entry id=" + id + " in CFS: " + entriesStream); } fileEntry.Offset = entriesStream.ReadInt64(); fileEntry.Length = entriesStream.ReadInt64(); } if (version >= CompoundFileWriter.VERSION_CHECKSUM) { CodecUtil.CheckFooter(entriesStream); } else { #pragma warning disable 612, 618 CodecUtil.CheckEOF(entriesStream); #pragma warning restore 612, 618 } } else { // TODO remove once 3.x is not supported anymore mapping = ReadLegacyEntries(stream, firstInt); } return(mapping); } catch (IOException ioe) { priorE = ioe; } finally { IOUtils.DisposeWhileHandlingException(priorE, stream, entriesStream); } // this is needed until Java 7's real try-with-resources: throw new InvalidOperationException("impossible to get here"); }
public override void Write(Directory directory, string segmentName, string segmentSuffix, FieldInfos infos, IOContext context) { string fileName = IndexFileNames.SegmentFileName(segmentName, segmentSuffix, Lucene46FieldInfosFormat.EXTENSION); IndexOutput output = directory.CreateOutput(fileName, context); bool success = false; try { CodecUtil.WriteHeader(output, Lucene46FieldInfosFormat.CODEC_NAME, Lucene46FieldInfosFormat.FORMAT_CURRENT); output.WriteVInt32(infos.Count); foreach (FieldInfo fi in infos) { IndexOptions indexOptions = fi.IndexOptions; sbyte bits = 0x0; if (fi.HasVectors) { bits |= Lucene46FieldInfosFormat.STORE_TERMVECTOR; } if (fi.OmitsNorms) { bits |= Lucene46FieldInfosFormat.OMIT_NORMS; } if (fi.HasPayloads) { bits |= Lucene46FieldInfosFormat.STORE_PAYLOADS; } if (fi.IsIndexed) { bits |= Lucene46FieldInfosFormat.IS_INDEXED; // LUCENENET specific - to avoid boxing, changed from CompareTo() to IndexOptionsComparer.Compare() if (Debugging.AssertsEnabled) { Debugging.Assert(IndexOptionsComparer.Default.Compare(indexOptions, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.HasPayloads); } if (indexOptions == IndexOptions.DOCS_ONLY) { bits |= Lucene46FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS; } else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) { bits |= Lucene46FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS; } else if (indexOptions == IndexOptions.DOCS_AND_FREQS) { bits |= Lucene46FieldInfosFormat.OMIT_POSITIONS; } } output.WriteString(fi.Name); output.WriteVInt32(fi.Number); output.WriteByte((byte)bits); // pack the DV types in one byte var dv = DocValuesByte(fi.DocValuesType); var nrm = DocValuesByte(fi.NormType); if (Debugging.AssertsEnabled) { Debugging.Assert((dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0); } var val = (byte)(0xff & ((nrm << 4) | (byte)dv)); output.WriteByte(val); output.WriteInt64(fi.DocValuesGen); output.WriteStringStringMap(fi.Attributes); } CodecUtil.WriteFooter(output); success = true; } finally { if (success) { output.Dispose(); } else { IOUtils.DisposeWhileHandlingException(output); } } }
public override FieldInfos Read(Directory directory, string segmentName, string segmentSuffix, IOContext iocontext) { string fileName = IndexFileNames.SegmentFileName(segmentName, "", Lucene40FieldInfosFormat.FIELD_INFOS_EXTENSION); IndexInput input = directory.OpenInput(fileName, iocontext); bool success = false; try { CodecUtil.CheckHeader(input, Lucene40FieldInfosFormat.CODEC_NAME, Lucene40FieldInfosFormat.FORMAT_START, Lucene40FieldInfosFormat.FORMAT_CURRENT); int size = input.ReadVInt32(); //read in the size FieldInfo[] infos = new FieldInfo[size]; for (int i = 0; i < size; i++) { string name = input.ReadString(); int fieldNumber = input.ReadVInt32(); byte bits = input.ReadByte(); bool isIndexed = (bits & Lucene40FieldInfosFormat.IS_INDEXED) != 0; bool storeTermVector = (bits & Lucene40FieldInfosFormat.STORE_TERMVECTOR) != 0; bool omitNorms = (bits & Lucene40FieldInfosFormat.OMIT_NORMS) != 0; bool storePayloads = (bits & Lucene40FieldInfosFormat.STORE_PAYLOADS) != 0; IndexOptions indexOptions; if (!isIndexed) { indexOptions = IndexOptions.NONE; } else if ((bits & Lucene40FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0) { indexOptions = IndexOptions.DOCS_ONLY; } else if ((bits & Lucene40FieldInfosFormat.OMIT_POSITIONS) != 0) { indexOptions = IndexOptions.DOCS_AND_FREQS; } else if ((bits & Lucene40FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0) { indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; } else { indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; } // LUCENE-3027: past indices were able to write // storePayloads=true when omitTFAP is also true, // which is invalid. We correct that, here: if (isIndexed && indexOptions.CompareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { storePayloads = false; } // DV Types are packed in one byte byte val = input.ReadByte(); LegacyDocValuesType oldValuesType = GetDocValuesType((sbyte)(val & 0x0F)); LegacyDocValuesType oldNormsType = GetDocValuesType((sbyte)(((int)((uint)val >> 4)) & 0x0F)); IDictionary <string, string> attributes = input.ReadStringStringMap(); if (oldValuesType.GetMapping() != DocValuesType.NONE) { attributes[LEGACY_DV_TYPE_KEY] = oldValuesType.ToString(); } if (oldNormsType.GetMapping() != DocValuesType.NONE) { if (oldNormsType.GetMapping() != DocValuesType.NUMERIC) { throw new CorruptIndexException("invalid norm type: " + oldNormsType + " (resource=" + input + ")"); } attributes[LEGACY_NORM_TYPE_KEY] = oldNormsType.ToString(); } infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, oldValuesType.GetMapping(), oldNormsType.GetMapping(), attributes); } CodecUtil.CheckEOF(input); FieldInfos fieldInfos = new FieldInfos(infos); success = true; return(fieldInfos); } finally { if (success) { input.Dispose(); } else { IOUtils.CloseWhileHandlingException(input); } } }
public FixedGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, string segment, int indexDivisor, IComparer <BytesRef> termComp, string segmentSuffix, IOContext context) { this.termComp = termComp; Debug.Assert(indexDivisor == -1 || indexDivisor > 0); input = dir.OpenInput(IndexFileNames.SegmentFileName(segment, segmentSuffix, FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION), context); bool success = false; try { version = ReadHeader(input); if (version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM) { CodecUtil.ChecksumEntireFile(input); } indexInterval = input.ReadInt32(); if (indexInterval < 1) { throw new CorruptIndexException("invalid indexInterval: " + indexInterval + " (resource=" + input + ")"); } this.indexDivisor = indexDivisor; if (indexDivisor < 0) { totalIndexInterval = indexInterval; } else { // In case terms index gets loaded, later, on demand totalIndexInterval = indexInterval * indexDivisor; } Debug.Assert(totalIndexInterval > 0); SeekDir(input, dirOffset); // Read directory int numFields = input.ReadVInt32(); if (numFields < 0) { throw new CorruptIndexException("invalid numFields: " + numFields + " (resource=" + input + ")"); } //System.out.println("FGR: init seg=" + segment + " div=" + indexDivisor + " nF=" + numFields); for (int i = 0; i < numFields; i++) { int field = input.ReadVInt32(); int numIndexTerms = input.ReadVInt32(); if (numIndexTerms < 0) { throw new CorruptIndexException("invalid numIndexTerms: " + numIndexTerms + " (resource=" + input + ")"); } long termsStart = input.ReadVInt64(); long indexStart = input.ReadVInt64(); long packedIndexStart = input.ReadVInt64(); long packedOffsetsStart = input.ReadVInt64(); if (packedIndexStart < indexStart) { throw new CorruptIndexException("invalid packedIndexStart: " + packedIndexStart + " indexStart: " + indexStart + "numIndexTerms: " + numIndexTerms + " (resource=" + input + ")"); } FieldInfo fieldInfo = fieldInfos.FieldInfo(field); FieldIndexData previous = fields.Put(fieldInfo, new FieldIndexData(this, fieldInfo, numIndexTerms, indexStart, termsStart, packedIndexStart, packedOffsetsStart)); if (previous != null) { throw new CorruptIndexException("duplicate field: " + fieldInfo.Name + " (resource=" + input + ")"); } } success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(input); } if (indexDivisor > 0) { input.Dispose(); input = null; if (success) { indexLoaded = true; } termBytesReader = termBytes.Freeze(true); } } }
internal Lucene42DocValuesProducer(SegmentReadState state, string dataCodec, string dataExtension, string metaCodec, string metaExtension) { maxDoc = state.SegmentInfo.DocCount; string metaName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, metaExtension); // read in the entries from the metadata file. ChecksumIndexInput @in = state.Directory.OpenChecksumInput(metaName, state.Context); bool success = false; ramBytesUsed = new AtomicInt64(RamUsageEstimator.ShallowSizeOfInstance(this.GetType())); try { version = CodecUtil.CheckHeader(@in, metaCodec, VERSION_START, VERSION_CURRENT); numerics = new Dictionary <int, NumericEntry>(); binaries = new Dictionary <int, BinaryEntry>(); fsts = new Dictionary <int, FSTEntry>(); ReadFields(@in /*, state.FieldInfos // LUCENENET: Never read */); if (version >= VERSION_CHECKSUM) { CodecUtil.CheckFooter(@in); } else { #pragma warning disable 612, 618 CodecUtil.CheckEOF(@in); #pragma warning restore 612, 618 } success = true; } finally { if (success) { IOUtils.Dispose(@in); } else { IOUtils.DisposeWhileHandlingException(@in); } } success = false; try { string dataName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, dataExtension); data = state.Directory.OpenInput(dataName, state.Context); int version2 = CodecUtil.CheckHeader(data, dataCodec, VERSION_START, VERSION_CURRENT); if (version != version2) { throw new CorruptIndexException("Format versions mismatch"); } success = true; } finally { if (!success) { IOUtils.DisposeWhileHandlingException(this.data); } } }
internal MemoryDocValuesProducer(SegmentReadState state, string dataCodec, string dataExtension, string metaCodec, string metaExtension) { maxDoc = state.SegmentInfo.DocCount; string metaName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, metaExtension); // read in the entries from the metadata file. ChecksumIndexInput @in = state.Directory.OpenChecksumInput(metaName, state.Context); bool success = false; try { version = CodecUtil.CheckHeader(@in, metaCodec, VERSION_START, VERSION_CURRENT); numerics = new Dictionary <>(); binaries = new Dictionary <>(); fsts = new Dictionary <>(); ReadFields(@in, state.FieldInfos); if (version >= VERSION_CHECKSUM) { CodecUtil.CheckFooter(@in); } else { CodecUtil.CheckEOF(@in); } ramBytesUsed_Renamed = new AtomicLong(RamUsageEstimator.ShallowSizeOfInstance(this.GetType())); success = true; } finally { if (success) { IOUtils.Close(@in); } else { IOUtils.CloseWhileHandlingException(@in); } } success = false; try { string dataName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, dataExtension); data = state.Directory.OpenInput(dataName, state.Context); int version2 = CodecUtil.CheckHeader(data, dataCodec, VERSION_START, VERSION_CURRENT); if (version != version2) { throw new CorruptIndexException("Format versions mismatch"); } success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(this.data); } } }