public override void Write(Directory directory, string segmentName, string segmentSuffix, FieldInfos infos, IOContext context) { string fileName = IndexFileNames.SegmentFileName(segmentName, "", Lucene42FieldInfosFormat.EXTENSION); IndexOutput output = directory.CreateOutput(fileName, context); bool success = false; try { CodecUtil.WriteHeader(output, Lucene42FieldInfosFormat.CODEC_NAME, Lucene42FieldInfosFormat.FORMAT_CURRENT); output.WriteVInt32(infos.Count); foreach (FieldInfo fi in infos) { IndexOptions indexOptions = fi.IndexOptions; sbyte bits = 0x0; if (fi.HasVectors) { bits |= Lucene42FieldInfosFormat.STORE_TERMVECTOR; } if (fi.OmitsNorms) { bits |= Lucene42FieldInfosFormat.OMIT_NORMS; } if (fi.HasPayloads) { bits |= Lucene42FieldInfosFormat.STORE_PAYLOADS; } if (fi.IsIndexed) { bits |= Lucene42FieldInfosFormat.IS_INDEXED; Debug.Assert(indexOptions.CompareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !fi.HasPayloads); if (indexOptions == IndexOptions.DOCS_ONLY) { bits |= Lucene42FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS; } else if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) { bits |= Lucene42FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS; } else if (indexOptions == IndexOptions.DOCS_AND_FREQS) { bits |= Lucene42FieldInfosFormat.OMIT_POSITIONS; } } output.WriteString(fi.Name); output.WriteVInt32(fi.Number); output.WriteByte((byte)bits); // pack the DV types in one byte var dv = DocValuesByte(fi.DocValuesType); var nrm = DocValuesByte(fi.NormType); Debug.Assert((dv & (~0xF)) == 0 && (nrm & (~0x0F)) == 0); var val = (byte)(0xff & ((nrm << 4) | (byte)dv)); output.WriteByte(val); output.WriteStringStringMap(fi.Attributes); } success = true; } finally { if (success) { output.Dispose(); } else { IOUtils.CloseWhileHandlingException(output); } } }
public override SegmentInfo Read(Directory directory, string segmentName, IOContext context) { var scratch = new BytesRef(); string segFileName = IndexFileNames.SegmentFileName(segmentName, "", SimpleTextSegmentInfoFormat.SI_EXTENSION); ChecksumIndexInput input = directory.OpenChecksumInput(segFileName, context); bool success = false; try { SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_VERSION)); string version = ReadString(SimpleTextSegmentInfoWriter.SI_VERSION.Length, scratch); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_DOCCOUNT)); int docCount = Convert.ToInt32(ReadString(SimpleTextSegmentInfoWriter.SI_DOCCOUNT.Length, scratch), CultureInfo.InvariantCulture); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_USECOMPOUND)); bool isCompoundFile = Convert.ToBoolean(ReadString(SimpleTextSegmentInfoWriter.SI_USECOMPOUND.Length, scratch), CultureInfo.InvariantCulture); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_NUM_DIAG)); int numDiag = Convert.ToInt32(ReadString(SimpleTextSegmentInfoWriter.SI_NUM_DIAG.Length, scratch), CultureInfo.InvariantCulture); IDictionary <string, string> diagnostics = new Dictionary <string, string>(); for (int i = 0; i < numDiag; i++) { SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_DIAG_KEY)); string key = ReadString(SimpleTextSegmentInfoWriter.SI_DIAG_KEY.Length, scratch); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_DIAG_VALUE)); string value = ReadString(SimpleTextSegmentInfoWriter.SI_DIAG_VALUE.Length, scratch); diagnostics[key] = value; } SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_NUM_FILES)); int numFiles = Convert.ToInt32(ReadString(SimpleTextSegmentInfoWriter.SI_NUM_FILES.Length, scratch), CultureInfo.InvariantCulture); var files = new HashSet <string>(); for (int i = 0; i < numFiles; i++) { SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextSegmentInfoWriter.SI_FILE)); string fileName = ReadString(SimpleTextSegmentInfoWriter.SI_FILE.Length, scratch); files.Add(fileName); } SimpleTextUtil.CheckFooter(input); var info = new SegmentInfo(directory, version, segmentName, docCount, isCompoundFile, null, diagnostics); info.SetFiles(files); success = true; return(info); } finally { if (!success) { IOUtils.CloseWhileHandlingException(input); } else { input.Dispose(); } } }
// note: just like segmentreader in 3.x, we open up all the files here (including separate norms) up front. // but we just don't do any seeks or reading yet. public Lucene3xNormsProducer(Directory dir, SegmentInfo info, FieldInfos fields, IOContext context) { Directory separateNormsDir = info.Dir; // separate norms are never inside CFS maxdoc = info.DocCount; //string segmentName = info.Name; // LUCENENET: IDE0059: Remove unnecessary value assignment bool success = false; try { long nextNormSeek = NORMS_HEADER.Length; //skip header (header unused for now) foreach (FieldInfo fi in fields) { if (fi.HasNorms) { string fileName = GetNormFilename(info, fi.Number); Directory d = HasSeparateNorms(info, fi.Number) ? separateNormsDir : dir; // singleNormFile means multiple norms share this file bool singleNormFile = IndexFileNames.MatchesExtension(fileName, NORMS_EXTENSION); IndexInput normInput = null; long normSeek; if (singleNormFile) { normSeek = nextNormSeek; if (singleNormStream == null) { singleNormStream = d.OpenInput(fileName, context); openFiles.Add(singleNormStream); } // All norms in the .nrm file can share a single IndexInput since // they are only used in a synchronized context. // If this were to change in the future, a clone could be done here. normInput = singleNormStream; } else { normInput = d.OpenInput(fileName, context); openFiles.Add(normInput); // if the segment was created in 3.2 or after, we wrote the header for sure, // and don't need to do the sketchy file size check. otherwise, we check // if the size is exactly equal to maxDoc to detect a headerless file. // NOTE: remove this check in Lucene 5.0! string version = info.Version; bool isUnversioned = (version == null || StringHelper.VersionComparer.Compare(version, "3.2") < 0) && normInput.Length == maxdoc; if (isUnversioned) { normSeek = 0; } else { normSeek = NORMS_HEADER.Length; } } NormsDocValues norm = new NormsDocValues(this, normInput, normSeek); norms[fi.Name] = norm; nextNormSeek += maxdoc; // increment also if some norms are separate } } // TODO: change to a real check? see LUCENE-3619 if (Debugging.AssertsEnabled) { Debugging.Assert(singleNormStream == null || nextNormSeek == singleNormStream.Length, singleNormStream != null ? "len: {0} expected: {1}" : "null", singleNormStream?.Length ?? 0, nextNormSeek); } success = true; } finally { if (!success) { IOUtils.DisposeWhileHandlingException(openFiles); } } ramBytesUsed = new AtomicInt64(); }
/// <summary> /// Sole constructor. </summary> public CompressingStoredFieldsReader(Directory d, SegmentInfo si, string segmentSuffix, FieldInfos fn, IOContext context, string formatName, CompressionMode compressionMode) { this.compressionMode = compressionMode; string segment = si.Name; bool success = false; fieldInfos = fn; numDocs = si.DocCount; ChecksumIndexInput indexStream = null; try { string indexStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_INDEX_EXTENSION); string fieldsStreamFN = IndexFileNames.SegmentFileName(segment, segmentSuffix, Lucene40StoredFieldsWriter.FIELDS_EXTENSION); // Load the index into memory indexStream = d.OpenChecksumInput(indexStreamFN, context); string codecNameIdx = formatName + CompressingStoredFieldsWriter.CODEC_SFX_IDX; version = CodecUtil.CheckHeader(indexStream, codecNameIdx, CompressingStoredFieldsWriter.VERSION_START, CompressingStoredFieldsWriter.VERSION_CURRENT); Debug.Assert(CodecUtil.HeaderLength(codecNameIdx) == indexStream.GetFilePointer()); indexReader = new CompressingStoredFieldsIndexReader(indexStream, si); long maxPointer = -1; if (version >= CompressingStoredFieldsWriter.VERSION_CHECKSUM) { maxPointer = indexStream.ReadVInt64(); CodecUtil.CheckFooter(indexStream); } else { #pragma warning disable 612, 618 CodecUtil.CheckEOF(indexStream); #pragma warning restore 612, 618 } indexStream.Dispose(); indexStream = null; // Open the data file and read metadata fieldsStream = d.OpenInput(fieldsStreamFN, context); if (version >= CompressingStoredFieldsWriter.VERSION_CHECKSUM) { if (maxPointer + CodecUtil.FooterLength() != fieldsStream.Length) { throw new CorruptIndexException("Invalid fieldsStream maxPointer (file truncated?): maxPointer=" + maxPointer + ", length=" + fieldsStream.Length); } } else { maxPointer = fieldsStream.Length; } this.maxPointer = maxPointer; string codecNameDat = formatName + CompressingStoredFieldsWriter.CODEC_SFX_DAT; int fieldsVersion = CodecUtil.CheckHeader(fieldsStream, codecNameDat, CompressingStoredFieldsWriter.VERSION_START, CompressingStoredFieldsWriter.VERSION_CURRENT); if (version != fieldsVersion) { throw new CorruptIndexException("Version mismatch between stored fields index and data: " + version + " != " + fieldsVersion); } Debug.Assert(CodecUtil.HeaderLength(codecNameDat) == fieldsStream.GetFilePointer()); if (version >= CompressingStoredFieldsWriter.VERSION_BIG_CHUNKS) { chunkSize = fieldsStream.ReadVInt32(); } else { chunkSize = -1; } packedIntsVersion = fieldsStream.ReadVInt32(); decompressor = compressionMode.NewDecompressor(); this.bytes = new BytesRef(); success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(this, indexStream); } } }
private static IDictionary <string, FileEntry> ReadLegacyEntries(IndexInput stream, int firstInt) { IDictionary <string, FileEntry> entries = new Dictionary <string, FileEntry>(); int count; bool stripSegmentName; if (firstInt < CompoundFileWriter.FORMAT_PRE_VERSION) { if (firstInt < CompoundFileWriter.FORMAT_NO_SEGMENT_PREFIX) { throw new CorruptIndexException("Incompatible format version: " + firstInt + " expected >= " + CompoundFileWriter.FORMAT_NO_SEGMENT_PREFIX + " (resource: " + stream + ")"); } // It's a post-3.1 index, read the count. count = stream.ReadVInt32(); stripSegmentName = false; } else { count = firstInt; stripSegmentName = true; } // read the directory and init files long streamLength = stream.Length; FileEntry entry = null; for (int i = 0; i < count; i++) { long offset = stream.ReadInt64(); if (offset < 0 || offset > streamLength) { throw new CorruptIndexException("Invalid CFS entry offset: " + offset + " (resource: " + stream + ")"); } string id = stream.ReadString(); if (stripSegmentName) { // Fix the id to not include the segment names. this is relevant for // pre-3.1 indexes. id = IndexFileNames.StripSegmentName(id); } if (entry != null) { // set length of the previous entry entry.Length = offset - entry.Offset; } entry = new FileEntry(); entry.Offset = offset; FileEntry previous = entries.Put(id, entry); if (previous != null) { throw new CorruptIndexException("Duplicate cfs entry id=" + id + " in CFS: " + stream); } } // set the length of the final entry if (entry != null) { entry.Length = streamLength - entry.Offset; } return(entries); }
public override FieldsProducer FieldsProducer(SegmentReadState state) { string seedFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, SEED_EXT); IndexInput @in = state.Directory.OpenInput(seedFileName, state.Context); long seed = @in.ReadInt64(); if (LuceneTestCase.VERBOSE) { Console.WriteLine("MockRandomCodec: reading from seg=" + state.SegmentInfo.Name + " formatID=" + state.SegmentSuffix + " seed=" + seed); } @in.Dispose(); Random random = new Random((int)seed); int readBufferSize = TestUtil.NextInt32(random, 1, 4096); if (LuceneTestCase.VERBOSE) { Console.WriteLine("MockRandomCodec: readBufferSize=" + readBufferSize); } PostingsReaderBase postingsReader; if (random.nextBoolean()) { if (LuceneTestCase.VERBOSE) { Console.WriteLine("MockRandomCodec: reading Sep postings"); } postingsReader = new SepPostingsReader(state.Directory, state.FieldInfos, state.SegmentInfo, state.Context, new MockInt32StreamFactory(random), state.SegmentSuffix); } else { if (LuceneTestCase.VERBOSE) { Console.WriteLine("MockRandomCodec: reading Standard postings"); } postingsReader = new Lucene41PostingsReader(state.Directory, state.FieldInfos, state.SegmentInfo, state.Context, state.SegmentSuffix); } if (random.nextBoolean()) { int totTFCutoff = TestUtil.NextInt32(random, 1, 20); if (LuceneTestCase.VERBOSE) { Console.WriteLine("MockRandomCodec: reading pulsing postings with totTFCutoff=" + totTFCutoff); } postingsReader = new PulsingPostingsReader(state, postingsReader); } FieldsProducer fields; int t1 = random.nextInt(4); if (t1 == 0) { bool success = false; try { fields = new FSTTermsReader(state, postingsReader); success = true; } finally { if (!success) { postingsReader.Dispose(); } } } else if (t1 == 1) { bool success = false; try { fields = new FSTOrdTermsReader(state, postingsReader); success = true; } finally { if (!success) { postingsReader.Dispose(); } } } else if (t1 == 2) { // Use BlockTree terms dict if (LuceneTestCase.VERBOSE) { Console.WriteLine("MockRandomCodec: reading BlockTree terms dict"); } bool success = false; try { fields = new BlockTreeTermsReader(state.Directory, state.FieldInfos, state.SegmentInfo, postingsReader, state.Context, state.SegmentSuffix, state.TermsIndexDivisor); success = true; } finally { if (!success) { postingsReader.Dispose(); } } } else { if (LuceneTestCase.VERBOSE) { Console.WriteLine("MockRandomCodec: reading Block terms dict"); } TermsIndexReaderBase indexReader; bool success = false; try { bool doFixedGap = random.nextBoolean(); // randomness diverges from writer, here: if (state.TermsIndexDivisor != -1) { state.TermsIndexDivisor = TestUtil.NextInt32(random, 1, 10); } if (doFixedGap) { // if termsIndexDivisor is set to -1, we should not touch it. It means a // test explicitly instructed not to load the terms index. if (LuceneTestCase.VERBOSE) { Console.WriteLine("MockRandomCodec: fixed-gap terms index (divisor=" + state.TermsIndexDivisor + ")"); } indexReader = new FixedGapTermsIndexReader(state.Directory, state.FieldInfos, state.SegmentInfo.Name, state.TermsIndexDivisor, BytesRef.UTF8SortedAsUnicodeComparer, state.SegmentSuffix, state.Context); } else { int n2 = random.nextInt(3); if (n2 == 1) { random.nextInt(); } else if (n2 == 2) { random.nextLong(); } if (LuceneTestCase.VERBOSE) { Console.WriteLine("MockRandomCodec: variable-gap terms index (divisor=" + state.TermsIndexDivisor + ")"); } indexReader = new VariableGapTermsIndexReader(state.Directory, state.FieldInfos, state.SegmentInfo.Name, state.TermsIndexDivisor, state.SegmentSuffix, state.Context); } success = true; } finally { if (!success) { postingsReader.Dispose(); } } success = false; try { fields = new BlockTermsReader(indexReader, state.Directory, state.FieldInfos, state.SegmentInfo, postingsReader, state.Context, state.SegmentSuffix); success = true; } finally { if (!success) { try { postingsReader.Dispose(); } finally { indexReader.Dispose(); } } } } return(fields); }
//static final boolean TEST = false; public FSTOrdTermsReader(SegmentReadState state, PostingsReaderBase postingsReader) { string termsIndexFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, FSTOrdTermsWriter.TERMS_INDEX_EXTENSION); string termsBlockFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, FSTOrdTermsWriter.TERMS_BLOCK_EXTENSION); this.postingsReader = postingsReader; ChecksumIndexInput indexIn = null; IndexInput blockIn = null; bool success = false; try { indexIn = state.Directory.OpenChecksumInput(termsIndexFileName, state.Context); blockIn = state.Directory.OpenInput(termsBlockFileName, state.Context); version = ReadHeader(indexIn); ReadHeader(blockIn); if (version >= FSTOrdTermsWriter.TERMS_VERSION_CHECKSUM) { CodecUtil.ChecksumEntireFile(blockIn); } this.postingsReader.Init(blockIn); SeekDir(blockIn); FieldInfos fieldInfos = state.FieldInfos; int numFields = blockIn.ReadVInt32(); for (int i = 0; i < numFields; i++) { FieldInfo fieldInfo = fieldInfos.FieldInfo(blockIn.ReadVInt32()); bool hasFreq = fieldInfo.IndexOptions != IndexOptions.DOCS_ONLY; long numTerms = blockIn.ReadVInt64(); long sumTotalTermFreq = hasFreq ? blockIn.ReadVInt64() : -1; long sumDocFreq = blockIn.ReadVInt64(); int docCount = blockIn.ReadVInt32(); int longsSize = blockIn.ReadVInt32(); var index = new FST <long?>(indexIn, PositiveInt32Outputs.Singleton); var current = new TermsReader(this, fieldInfo, blockIn, numTerms, sumTotalTermFreq, sumDocFreq, docCount, longsSize, index); TermsReader previous; // LUCENENET NOTE: This simulates a put operation in Java, // getting the prior value first before setting it. fields.TryGetValue(fieldInfo.Name, out previous); fields[fieldInfo.Name] = current; CheckFieldSummary(state.SegmentInfo, indexIn, blockIn, current, previous); } if (version >= FSTOrdTermsWriter.TERMS_VERSION_CHECKSUM) { CodecUtil.CheckFooter(indexIn); } else { #pragma warning disable 612, 618 CodecUtil.CheckEOF(indexIn); #pragma warning restore 612, 618 } success = true; } finally { if (success) { IOUtils.Dispose(indexIn, blockIn); } else { IOUtils.DisposeWhileHandlingException(indexIn, blockIn); } } }
public override DocValuesProducer FieldsProducer(SegmentReadState state) { string filename = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, "dv", IndexFileNames.COMPOUND_FILE_EXTENSION); return(new Lucene40DocValuesReader(state, filename, Lucene40FieldInfosReader.LEGACY_DV_TYPE_KEY)); }
public FixedGapTermsIndexReader(Directory dir, FieldInfos fieldInfos, String segment, int indexDivisor, IComparer <BytesRef> termComp, String segmentSuffix, IOContext context) { _termComp = termComp; Debug.Assert(indexDivisor == -1 || indexDivisor > 0); _input = dir.OpenInput( IndexFileNames.SegmentFileName(segment, segmentSuffix, FixedGapTermsIndexWriter.TERMS_INDEX_EXTENSION), context); var success = false; try { _version = ReadHeader(_input); if (_version >= FixedGapTermsIndexWriter.VERSION_CHECKSUM) { CodecUtil.ChecksumEntireFile(_input); } indexInterval = _input.ReadInt(); if (indexInterval < 1) { throw new CorruptIndexException(String.Format("Invalid indexInterval: {0}, Resource: {1}", indexInterval, _input)); } _indexDivisor = indexDivisor; if (indexDivisor < 0) { _totalIndexInterval = indexInterval; } else { // In case terms index gets loaded, later, on demand _totalIndexInterval = indexInterval * indexDivisor; } Debug.Assert(_totalIndexInterval > 0); SeekDir(_input, _dirOffset); // Read directory int numFields = _input.ReadVInt(); if (numFields < 0) { throw new CorruptIndexException(String.Format("Invalid numFields: {0}, Resource: {1}", numFields, _input)); } for (int i = 0; i < numFields; i++) { int field = _input.ReadVInt(); int numIndexTerms = _input.ReadVInt(); if (numIndexTerms < 0) { throw new CorruptIndexException(String.Format("Invalid numIndexTerms: {0}, Resource: {1}", numIndexTerms, _input)); } long termsStart = _input.ReadVLong(); long indexStart = _input.ReadVLong(); long packedIndexStart = _input.ReadVLong(); long packedOffsetsStart = _input.ReadVLong(); if (packedIndexStart < indexStart) { throw new CorruptIndexException( String.Format( "Invalid packedIndexStart: {0}, IndexStart: {1}, NumIndexTerms: {2}, Resource: {3}", packedIndexStart, indexStart, numIndexTerms, _input)); } FieldInfo fieldInfo = fieldInfos.FieldInfo(field); try { _fields.Add(fieldInfo, new FieldIndexData(numIndexTerms, indexStart, termsStart, packedIndexStart, packedOffsetsStart, this)); } catch (ArgumentException) { throw new CorruptIndexException(String.Format("Duplicate field: {0}, Resource {1}", fieldInfo.Name, _input)); } } success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(_input); } if (indexDivisor > 0) { _input.Dispose(); _input = null; if (success) { _indexLoaded = true; } _termBytesReader = _termBytes.Freeze(true); } } }
/// <summary> /// expert: instantiates a new reader </summary> protected internal Lucene45DocValuesProducer(SegmentReadState state, string dataCodec, string dataExtension, string metaCodec, string metaExtension) { string metaName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, metaExtension); // read in the entries from the metadata file. ChecksumIndexInput @in = state.Directory.OpenChecksumInput(metaName, state.Context); this.MaxDoc = state.SegmentInfo.DocCount; bool success = false; try { Version = CodecUtil.CheckHeader(@in, metaCodec, Lucene45DocValuesFormat.VERSION_START, Lucene45DocValuesFormat.VERSION_CURRENT); Numerics = new Dictionary <int, NumericEntry>(); Ords = new Dictionary <int, NumericEntry>(); OrdIndexes = new Dictionary <int, NumericEntry>(); Binaries = new Dictionary <int, BinaryEntry>(); SortedSets = new Dictionary <int, SortedSetEntry>(); ReadFields(@in, state.FieldInfos); if (Version >= Lucene45DocValuesFormat.VERSION_CHECKSUM) { CodecUtil.CheckFooter(@in); } else { CodecUtil.CheckEOF(@in); } success = true; } finally { if (success) { IOUtils.Close(@in); } else { IOUtils.CloseWhileHandlingException(@in); } } success = false; try { string dataName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, dataExtension); Data = state.Directory.OpenInput(dataName, state.Context); int version2 = CodecUtil.CheckHeader(Data, dataCodec, Lucene45DocValuesFormat.VERSION_START, Lucene45DocValuesFormat.VERSION_CURRENT); if (Version != version2) { throw new Exception("Format versions mismatch"); } success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(this.Data); } } RamBytesUsed_Renamed = new AtomicLong(RamUsageEstimator.ShallowSizeOfInstance(this.GetType())); }
// LUCENENET NOTE: Changed from public to internal because the class had to be made public, but is not for public use. internal SimpleTextDocValuesReader(SegmentReadState state, string ext) { data = state.Directory.OpenInput( IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, ext), state.Context); maxDoc = state.SegmentInfo.DocCount; while (true) { ReadLine(); if (scratch.Equals(SimpleTextDocValuesWriter.END)) { break; } if (Debugging.AssertsEnabled) { Debugging.Assert(StartsWith(SimpleTextDocValuesWriter.FIELD), () => scratch.Utf8ToString()); } var fieldName = StripPrefix(SimpleTextDocValuesWriter.FIELD); var field = new OneField(); fields[fieldName] = field; ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StartsWith(SimpleTextDocValuesWriter.TYPE), () => scratch.Utf8ToString()); } var dvType = (DocValuesType)Enum.Parse(typeof(DocValuesType), StripPrefix(SimpleTextDocValuesWriter.TYPE)); // if (Debugging.AssertsEnabled) Debugging.Assert(dvType != null); // LUCENENET: Not possible for an enum to be null in .NET if (dvType == DocValuesType.NUMERIC) { ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StartsWith(SimpleTextDocValuesWriter.MINVALUE), () => "got " + scratch.Utf8ToString() + " field=" + fieldName + " ext=" + ext); } field.MinValue = Convert.ToInt64(StripPrefix(SimpleTextDocValuesWriter.MINVALUE), CultureInfo.InvariantCulture); ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StartsWith(SimpleTextDocValuesWriter.PATTERN)); } field.Pattern = StripPrefix(SimpleTextDocValuesWriter.PATTERN); field.DataStartFilePointer = data.GetFilePointer(); data.Seek(data.GetFilePointer() + (1 + field.Pattern.Length + 2) * maxDoc); } else if (dvType == DocValuesType.BINARY) { ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StartsWith(SimpleTextDocValuesWriter.MAXLENGTH)); } field.MaxLength = Convert.ToInt32(StripPrefix(SimpleTextDocValuesWriter.MAXLENGTH), CultureInfo.InvariantCulture); ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StartsWith(SimpleTextDocValuesWriter.PATTERN)); } field.Pattern = StripPrefix(SimpleTextDocValuesWriter.PATTERN); field.DataStartFilePointer = data.GetFilePointer(); data.Seek(data.GetFilePointer() + (9 + field.Pattern.Length + field.MaxLength + 2) * maxDoc); } else if (dvType == DocValuesType.SORTED || dvType == DocValuesType.SORTED_SET) { ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StartsWith(SimpleTextDocValuesWriter.NUMVALUES)); } field.NumValues = Convert.ToInt64(StripPrefix(SimpleTextDocValuesWriter.NUMVALUES), CultureInfo.InvariantCulture); ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StartsWith(SimpleTextDocValuesWriter.MAXLENGTH)); } field.MaxLength = Convert.ToInt32(StripPrefix(SimpleTextDocValuesWriter.MAXLENGTH), CultureInfo.InvariantCulture); ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StartsWith(SimpleTextDocValuesWriter.PATTERN)); } field.Pattern = StripPrefix(SimpleTextDocValuesWriter.PATTERN); ReadLine(); if (Debugging.AssertsEnabled) { Debugging.Assert(StartsWith(SimpleTextDocValuesWriter.ORDPATTERN)); } field.OrdPattern = StripPrefix(SimpleTextDocValuesWriter.ORDPATTERN); field.DataStartFilePointer = data.GetFilePointer(); data.Seek(data.GetFilePointer() + (9 + field.Pattern.Length + field.MaxLength) * field.NumValues + (1 + field.OrdPattern.Length) * maxDoc); } else { throw new ArgumentOutOfRangeException(); } } // We should only be called from above if at least one // field has DVs: if (Debugging.AssertsEnabled) { Debugging.Assert(fields.Count > 0); } }
public override void Abort() { try { Dispose(); } catch (Exception ignored) { } IOUtils.DeleteFilesIgnoringExceptions(Directory, IndexFileNames.SegmentFileName(Segment, "", Lucene3xTermVectorsReader.VECTORS_INDEX_EXTENSION), IndexFileNames.SegmentFileName(Segment, "", Lucene3xTermVectorsReader.VECTORS_DOCUMENTS_EXTENSION), IndexFileNames.SegmentFileName(Segment, "", Lucene3xTermVectorsReader.VECTORS_FIELDS_EXTENSION)); }
public override FieldInfos Read(Directory directory, string segmentName, string segmentSuffix, IOContext iocontext) { string fileName = IndexFileNames.SegmentFileName(segmentName, "", Lucene40FieldInfosFormat.FIELD_INFOS_EXTENSION); IndexInput input = directory.OpenInput(fileName, iocontext); bool success = false; try { CodecUtil.CheckHeader(input, Lucene40FieldInfosFormat.CODEC_NAME, Lucene40FieldInfosFormat.FORMAT_START, Lucene40FieldInfosFormat.FORMAT_CURRENT); int size = input.ReadVInt32(); //read in the size FieldInfo[] infos = new FieldInfo[size]; for (int i = 0; i < size; i++) { string name = input.ReadString(); int fieldNumber = input.ReadVInt32(); byte bits = input.ReadByte(); bool isIndexed = (bits & Lucene40FieldInfosFormat.IS_INDEXED) != 0; bool storeTermVector = (bits & Lucene40FieldInfosFormat.STORE_TERMVECTOR) != 0; bool omitNorms = (bits & Lucene40FieldInfosFormat.OMIT_NORMS) != 0; bool storePayloads = (bits & Lucene40FieldInfosFormat.STORE_PAYLOADS) != 0; IndexOptions indexOptions; if (!isIndexed) { indexOptions = IndexOptions.NONE; } else if ((bits & Lucene40FieldInfosFormat.OMIT_TERM_FREQ_AND_POSITIONS) != 0) { indexOptions = IndexOptions.DOCS_ONLY; } else if ((bits & Lucene40FieldInfosFormat.OMIT_POSITIONS) != 0) { indexOptions = IndexOptions.DOCS_AND_FREQS; } else if ((bits & Lucene40FieldInfosFormat.STORE_OFFSETS_IN_POSTINGS) != 0) { indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; } else { indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; } // LUCENE-3027: past indices were able to write // storePayloads=true when omitTFAP is also true, // which is invalid. We correct that, here: if (isIndexed && indexOptions.CompareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { storePayloads = false; } // DV Types are packed in one byte byte val = input.ReadByte(); LegacyDocValuesType oldValuesType = GetDocValuesType((sbyte)(val & 0x0F)); LegacyDocValuesType oldNormsType = GetDocValuesType((sbyte)(((int)((uint)val >> 4)) & 0x0F)); IDictionary <string, string> attributes = input.ReadStringStringMap(); if (oldValuesType.GetMapping() != DocValuesType.NONE) { attributes[LEGACY_DV_TYPE_KEY] = oldValuesType.ToString(); } if (oldNormsType.GetMapping() != DocValuesType.NONE) { if (oldNormsType.GetMapping() != DocValuesType.NUMERIC) { throw new CorruptIndexException("invalid norm type: " + oldNormsType + " (resource=" + input + ")"); } attributes[LEGACY_NORM_TYPE_KEY] = oldNormsType.ToString(); } infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, oldValuesType.GetMapping(), oldNormsType.GetMapping(), attributes); } CodecUtil.CheckEOF(input); FieldInfos fieldInfos = new FieldInfos(infos); success = true; return(fieldInfos); } finally { if (success) { input.Dispose(); } else { IOUtils.DisposeWhileHandlingException(input); } } }
public Lucene3xStoredFieldsReader(Directory d, SegmentInfo si, FieldInfos fn, IOContext context) { string segment = Lucene3xSegmentInfoFormat.GetDocStoreSegment(si); int docStoreOffset = Lucene3xSegmentInfoFormat.GetDocStoreOffset(si); int size = si.DocCount; bool success = false; fieldInfos = fn; try { if (docStoreOffset != -1 && Lucene3xSegmentInfoFormat.GetDocStoreIsCompoundFile(si)) { d = storeCFSReader = new CompoundFileDirectory(si.Dir, IndexFileNames.SegmentFileName(segment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), context, false); } else { storeCFSReader = null; } fieldsStream = d.OpenInput(IndexFileNames.SegmentFileName(segment, "", FIELDS_EXTENSION), context); string indexStreamFN = IndexFileNames.SegmentFileName(segment, "", FIELDS_INDEX_EXTENSION); indexStream = d.OpenInput(indexStreamFN, context); format = indexStream.ReadInt32(); if (format < FORMAT_MINIMUM) { throw new IndexFormatTooOldException(indexStream, format, FORMAT_MINIMUM, FORMAT_CURRENT); } if (format > FORMAT_CURRENT) { throw new IndexFormatTooNewException(indexStream, format, FORMAT_MINIMUM, FORMAT_CURRENT); } long indexSize = indexStream.Length - FORMAT_SIZE; if (docStoreOffset != -1) { // We read only a slice out of this shared fields file this.docStoreOffset = docStoreOffset; this.size = size; // Verify the file is long enough to hold all of our // docs Debug.Assert(((int)(indexSize / 8)) >= size + this.docStoreOffset, "indexSize=" + indexSize + " size=" + size + " docStoreOffset=" + docStoreOffset); } else { this.docStoreOffset = 0; this.size = (int)(indexSize >> 3); // Verify two sources of "maxDoc" agree: if (this.size != si.DocCount) { throw new CorruptIndexException("doc counts differ for segment " + segment + ": fieldsReader shows " + this.size + " but segmentInfo shows " + si.DocCount); } } numTotalDocs = (int)(indexSize >> 3); success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { try { Dispose(); } // keep our original exception #pragma warning disable 168 catch (Exception t) #pragma warning restore 168 { } } } }
public override void Abort() { try { Dispose(); } catch (Exception) { } IOUtils.DeleteFilesIgnoringExceptions(Directory, IndexFileNames.SegmentFileName(Segment, "", FIELDS_EXTENSION), IndexFileNames.SegmentFileName(Segment, "", FIELDS_INDEX_EXTENSION)); }
// private string segment; public BlockTermsReader(TermsIndexReaderBase indexReader, Directory dir, FieldInfos fieldInfos, SegmentInfo info, PostingsReaderBase postingsReader, IOContext context, string segmentSuffix) { this.postingsReader = postingsReader; // this.segment = segment; input = dir.OpenInput(IndexFileNames.SegmentFileName(info.Name, segmentSuffix, BlockTermsWriter.TERMS_EXTENSION), context); bool success = false; try { version = ReadHeader(input); // Have PostingsReader init itself postingsReader.Init(input); // Read per-field details SeekDir(input, dirOffset); int numFields = input.ReadVInt32(); if (numFields < 0) { throw new CorruptIndexException("invalid number of fields: " + numFields + " (resource=" + input + ")"); } for (int i = 0; i < numFields; i++) { int field = input.ReadVInt32(); long numTerms = input.ReadVInt64(); Debug.Assert(numTerms >= 0); long termsStartPointer = input.ReadVInt64(); FieldInfo fieldInfo = fieldInfos.FieldInfo(field); long sumTotalTermFreq = fieldInfo.IndexOptions == IndexOptions.DOCS_ONLY ? -1 : input.ReadVInt64(); long sumDocFreq = input.ReadVInt64(); int docCount = input.ReadVInt32(); int longsSize = version >= BlockTermsWriter.VERSION_META_ARRAY ? input.ReadVInt32() : 0; if (docCount < 0 || docCount > info.DocCount) { // #docs with field must be <= #docs throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + info.DocCount + " (resource=" + input + ")"); } if (sumDocFreq < docCount) { // #postings must be >= #docs with field throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount + " (resource=" + input + ")"); } if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq + " (resource=" + input + ")"); } FieldReader previous = fields.Put(fieldInfo.Name, new FieldReader(this, fieldInfo, numTerms, termsStartPointer, sumTotalTermFreq, sumDocFreq, docCount, longsSize)); if (previous != null) { throw new CorruptIndexException("duplicate fields: " + fieldInfo.Name + " (resource=" + input + ")"); } } success = true; } finally { if (!success) { input.Dispose(); } } this.indexReader = indexReader; }
public override FieldsConsumer FieldsConsumer(SegmentWriteState state) { int minSkipInterval; if (state.SegmentInfo.DocCount > 1000000) { // Test2BPostings can OOME otherwise: minSkipInterval = 3; } else { minSkipInterval = 2; } // we pull this before the seed intentionally: because its not consumed at runtime // (the skipInterval is written into postings header) int skipInterval = TestUtil.NextInt32(seedRandom, minSkipInterval, 10); if (LuceneTestCase.VERBOSE) { Console.WriteLine("MockRandomCodec: skipInterval=" + skipInterval); } long seed = seedRandom.nextLong(); if (LuceneTestCase.VERBOSE) { Console.WriteLine("MockRandomCodec: writing to seg=" + state.SegmentInfo.Name + " formatID=" + state.SegmentSuffix + " seed=" + seed); } string seedFileName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, SEED_EXT); IndexOutput @out = state.Directory.CreateOutput(seedFileName, state.Context); try { @out.WriteInt64(seed); } finally { @out.Dispose(); } Random random = new Random((int)seed); random.nextInt(); // consume a random for buffersize PostingsWriterBase postingsWriter; if (random.nextBoolean()) { postingsWriter = new SepPostingsWriter(state, new MockInt32StreamFactory(random), skipInterval); } else { if (LuceneTestCase.VERBOSE) { Console.WriteLine("MockRandomCodec: writing Standard postings"); } // TODO: randomize variables like acceptibleOverHead?! postingsWriter = new Lucene41PostingsWriter(state, skipInterval); } if (random.nextBoolean()) { int totTFCutoff = TestUtil.NextInt32(random, 1, 20); if (LuceneTestCase.VERBOSE) { Console.WriteLine("MockRandomCodec: writing pulsing postings with totTFCutoff=" + totTFCutoff); } postingsWriter = new PulsingPostingsWriter(state, totTFCutoff, postingsWriter); } FieldsConsumer fields; int t1 = random.nextInt(4); if (t1 == 0) { bool success = false; try { fields = new FSTTermsWriter(state, postingsWriter); success = true; } finally { if (!success) { postingsWriter.Dispose(); } } } else if (t1 == 1) { bool success = false; try { fields = new FSTOrdTermsWriter(state, postingsWriter); success = true; } finally { if (!success) { postingsWriter.Dispose(); } } } else if (t1 == 2) { // Use BlockTree terms dict if (LuceneTestCase.VERBOSE) { Console.WriteLine("MockRandomCodec: writing BlockTree terms dict"); } // TODO: would be nice to allow 1 but this is very // slow to write int minTermsInBlock = TestUtil.NextInt32(random, 2, 100); int maxTermsInBlock = Math.Max(2, (minTermsInBlock - 1) * 2 + random.nextInt(100)); bool success = false; try { fields = new BlockTreeTermsWriter(state, postingsWriter, minTermsInBlock, maxTermsInBlock); success = true; } finally { if (!success) { postingsWriter.Dispose(); } } } else { if (LuceneTestCase.VERBOSE) { Console.WriteLine("MockRandomCodec: writing Block terms dict"); } bool success = false; TermsIndexWriterBase indexWriter; try { if (random.nextBoolean()) { state.TermIndexInterval = TestUtil.NextInt32(random, 1, 100); if (LuceneTestCase.VERBOSE) { Console.WriteLine("MockRandomCodec: fixed-gap terms index (tii=" + state.TermIndexInterval + ")"); } indexWriter = new FixedGapTermsIndexWriter(state); } else { VariableGapTermsIndexWriter.IndexTermSelector selector; int n2 = random.nextInt(3); if (n2 == 0) { int tii = TestUtil.NextInt32(random, 1, 100); selector = new VariableGapTermsIndexWriter.EveryNTermSelector(tii); if (LuceneTestCase.VERBOSE) { Console.WriteLine("MockRandomCodec: variable-gap terms index (tii=" + tii + ")"); } } else if (n2 == 1) { int docFreqThresh = TestUtil.NextInt32(random, 2, 100); int tii = TestUtil.NextInt32(random, 1, 100); selector = new VariableGapTermsIndexWriter.EveryNOrDocFreqTermSelector(docFreqThresh, tii); } else { int seed2 = random.Next(); int gap = TestUtil.NextInt32(random, 2, 40); if (LuceneTestCase.VERBOSE) { Console.WriteLine("MockRandomCodec: random-gap terms index (max gap=" + gap + ")"); } selector = new IndexTermSelectorAnonymousHelper(seed2, gap); // selector = new VariableGapTermsIndexWriter.IndexTermSelector() { // Random rand = new Random(seed2); //@Override // public bool isIndexTerm(BytesRef term, TermStats stats) //{ // return rand.nextInt(gap) == gap / 2; //} //@Override // public void newField(FieldInfo fieldInfo) //{ //} // }; } indexWriter = new VariableGapTermsIndexWriter(state, selector); } success = true; } finally { if (!success) { postingsWriter.Dispose(); } } success = false; try { fields = new BlockTermsWriter(indexWriter, state, postingsWriter); success = true; } finally { if (!success) { try { postingsWriter.Dispose(); } finally { indexWriter.Dispose(); } } } } return(fields); }
internal Lucene42DocValuesProducer(SegmentReadState state, string dataCodec, string dataExtension, string metaCodec, string metaExtension) { maxDoc = state.SegmentInfo.DocCount; string metaName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, metaExtension); // read in the entries from the metadata file. ChecksumIndexInput @in = state.Directory.OpenChecksumInput(metaName, state.Context); bool success = false; ramBytesUsed = new AtomicInt64(RamUsageEstimator.ShallowSizeOfInstance(this.GetType())); try { version = CodecUtil.CheckHeader(@in, metaCodec, VERSION_START, VERSION_CURRENT); numerics = new Dictionary <int, NumericEntry>(); binaries = new Dictionary <int, BinaryEntry>(); fsts = new Dictionary <int, FSTEntry>(); ReadFields(@in, state.FieldInfos); if (version >= VERSION_CHECKSUM) { CodecUtil.CheckFooter(@in); } else { #pragma warning disable 612, 618 CodecUtil.CheckEOF(@in); #pragma warning restore 612, 618 } success = true; } finally { if (success) { IOUtils.Dispose(@in); } else { IOUtils.DisposeWhileHandlingException(@in); } } success = false; try { string dataName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, dataExtension); data = state.Directory.OpenInput(dataName, state.Context); int version2 = CodecUtil.CheckHeader(data, dataCodec, VERSION_START, VERSION_CURRENT); if (version != version2) { throw new CorruptIndexException("Format versions mismatch"); } success = true; } finally { if (!success) { IOUtils.DisposeWhileHandlingException(this.data); } } }
/// <summary> /// Creates a postings writer with the specified PackedInts overhead ratio </summary> // TODO: does this ctor even make sense? public Lucene41PostingsWriter(SegmentWriteState state, float acceptableOverheadRatio) : base() { docOut = state.Directory.CreateOutput(IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, Lucene41PostingsFormat.DOC_EXTENSION), state.Context); IndexOutput posOut = null; IndexOutput payOut = null; bool success = false; try { CodecUtil.WriteHeader(docOut, DOC_CODEC, VERSION_CURRENT); forUtil = new ForUtil(acceptableOverheadRatio, docOut); if (state.FieldInfos.HasProx) { posDeltaBuffer = new int[ForUtil.MAX_DATA_SIZE]; posOut = state.Directory.CreateOutput(IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, Lucene41PostingsFormat.POS_EXTENSION), state.Context); CodecUtil.WriteHeader(posOut, POS_CODEC, VERSION_CURRENT); if (state.FieldInfos.HasPayloads) { payloadBytes = new byte[128]; payloadLengthBuffer = new int[ForUtil.MAX_DATA_SIZE]; } else { payloadBytes = null; payloadLengthBuffer = null; } if (state.FieldInfos.HasOffsets) { offsetStartDeltaBuffer = new int[ForUtil.MAX_DATA_SIZE]; offsetLengthBuffer = new int[ForUtil.MAX_DATA_SIZE]; } else { offsetStartDeltaBuffer = null; offsetLengthBuffer = null; } if (state.FieldInfos.HasPayloads || state.FieldInfos.HasOffsets) { payOut = state.Directory.CreateOutput(IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, Lucene41PostingsFormat.PAY_EXTENSION), state.Context); CodecUtil.WriteHeader(payOut, PAY_CODEC, VERSION_CURRENT); } } else { posDeltaBuffer = null; payloadLengthBuffer = null; offsetStartDeltaBuffer = null; offsetLengthBuffer = null; payloadBytes = null; } this.payOut = payOut; this.posOut = posOut; success = true; } finally { if (!success) { IOUtils.DisposeWhileHandlingException(docOut, posOut, payOut); } } docDeltaBuffer = new int[ForUtil.MAX_DATA_SIZE]; freqBuffer = new int[ForUtil.MAX_DATA_SIZE]; // TODO: should we try skipping every 2/4 blocks...? skipWriter = new Lucene41SkipWriter(maxSkipLevels, Lucene41PostingsFormat.BLOCK_SIZE, state.SegmentInfo.DocCount, docOut, posOut, payOut); encoded = new byte[ForUtil.MAX_ENCODED_SIZE]; }
//private Directory cfsReader; // LUCENENET NOTE: cfsReader not used public Lucene3xFields(Directory dir, FieldInfos fieldInfos, SegmentInfo info, IOContext context, int indexDivisor) { si = info; // NOTE: we must always load terms index, even for // "sequential" scan during merging, because what is // sequential to merger may not be to TermInfosReader // since we do the surrogates dance: if (indexDivisor < 0) { indexDivisor = -indexDivisor; } bool success = false; try { var r = new TermInfosReader(dir, info.Name, fieldInfos, context, indexDivisor); if (indexDivisor == -1) { TisNoIndex = r; } else { TisNoIndex = null; Tis = r; } this.context = context; this.fieldInfos = fieldInfos; // make sure that all index files have been read or are kept open // so that if an index update removes them we'll still have them FreqStream = dir.OpenInput(IndexFileNames.SegmentFileName(info.Name, "", Lucene3xPostingsFormat.FREQ_EXTENSION), context); bool anyProx = false; foreach (FieldInfo fi in fieldInfos) { if (fi.IsIndexed) { fields[fi.Name] = fi; preTerms[fi.Name] = new PreTerms(this, fi); if (fi.IndexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { anyProx = true; } } } if (anyProx) { ProxStream = dir.OpenInput(IndexFileNames.SegmentFileName(info.Name, "", Lucene3xPostingsFormat.PROX_EXTENSION), context); } else { ProxStream = null; } success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { Dispose(); } } this.dir = dir; }
public override void Abort() { IOUtils.DisposeWhileHandlingException(this); IOUtils.DeleteFilesIgnoringExceptions(directory, IndexFileNames.SegmentFileName(segment, segmentSuffix, VECTORS_EXTENSION), IndexFileNames.SegmentFileName(segment, segmentSuffix, VECTORS_INDEX_EXTENSION)); }
public override void Write(Directory directory, string segmentName, string segmentSuffix, FieldInfos infos, IOContext context) { string fileName = IndexFileNames.SegmentFileName(segmentName, "", FIELD_INFOS_EXTENSION); IndexOutput output = directory.CreateOutput(fileName, context); bool success = false; try { output.WriteVInt32(FORMAT_PREFLEX_RW); output.WriteVInt32(infos.Count); foreach (FieldInfo fi in infos) { sbyte bits = 0x0; if (fi.HasVectors) { bits |= STORE_TERMVECTOR; } if (fi.OmitsNorms) { bits |= OMIT_NORMS; } if (fi.HasPayloads) { bits |= STORE_PAYLOADS; } if (fi.IsIndexed) { bits |= IS_INDEXED; Debug.Assert(fi.IndexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS || !fi.HasPayloads); if (fi.IndexOptions == IndexOptions.DOCS_ONLY) { bits |= OMIT_TERM_FREQ_AND_POSITIONS; } else if (fi.IndexOptions == IndexOptions.DOCS_AND_FREQS) { bits |= OMIT_POSITIONS; } } output.WriteString(fi.Name); /* * we need to write the field number since IW tries * to stabelize the field numbers across segments so the * FI ordinal is not necessarily equivalent to the field number */ output.WriteInt32(fi.Number); output.WriteByte((byte)bits); if (fi.IsIndexed && !fi.OmitsNorms) { // to allow null norm types we need to indicate if norms are written // only in RW case output.WriteByte((byte)(sbyte)(fi.NormType == Index.DocValuesType.NONE ? 0 : 1)); } Debug.Assert(fi.Attributes == null); // not used or supported } success = true; } finally { if (success) { output.Dispose(); } else { IOUtils.CloseWhileHandlingException(output); } } }
/// <summary> /// Helper method that reads CFS entries from an input stream </summary> private static IDictionary <string, FileEntry> ReadEntries(IndexInputSlicer handle, Directory dir, string name) { IOException priorE = null; IndexInput stream = null; ChecksumIndexInput entriesStream = null; // read the first VInt. If it is negative, it's the version number // otherwise it's the count (pre-3.1 indexes) try { IDictionary <string, FileEntry> mapping; #pragma warning disable 612, 618 stream = handle.OpenFullSlice(); #pragma warning restore 612, 618 int firstInt = stream.ReadVInt32(); // impossible for 3.0 to have 63 files in a .cfs, CFS writer was not visible // and separate norms/etc are outside of cfs. if (firstInt == CODEC_MAGIC_BYTE1) { sbyte secondByte = (sbyte)stream.ReadByte(); sbyte thirdByte = (sbyte)stream.ReadByte(); sbyte fourthByte = (sbyte)stream.ReadByte(); if (secondByte != CODEC_MAGIC_BYTE2 || thirdByte != CODEC_MAGIC_BYTE3 || fourthByte != CODEC_MAGIC_BYTE4) { throw new CorruptIndexException("Illegal/impossible header for CFS file: " + secondByte + "," + thirdByte + "," + fourthByte); } int version = CodecUtil.CheckHeaderNoMagic(stream, CompoundFileWriter.DATA_CODEC, CompoundFileWriter.VERSION_START, CompoundFileWriter.VERSION_CURRENT); string entriesFileName = IndexFileNames.SegmentFileName( IndexFileNames.StripExtension(name), "", IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION); entriesStream = dir.OpenChecksumInput(entriesFileName, IOContext.READ_ONCE); CodecUtil.CheckHeader(entriesStream, CompoundFileWriter.ENTRY_CODEC, CompoundFileWriter.VERSION_START, CompoundFileWriter.VERSION_CURRENT); int numEntries = entriesStream.ReadVInt32(); mapping = new Dictionary <string, FileEntry>(numEntries); for (int i = 0; i < numEntries; i++) { FileEntry fileEntry = new FileEntry(); string id = entriesStream.ReadString(); FileEntry previous = mapping.Put(id, fileEntry); if (previous != null) { throw new CorruptIndexException("Duplicate cfs entry id=" + id + " in CFS: " + entriesStream); } fileEntry.Offset = entriesStream.ReadInt64(); fileEntry.Length = entriesStream.ReadInt64(); } if (version >= CompoundFileWriter.VERSION_CHECKSUM) { CodecUtil.CheckFooter(entriesStream); } else { #pragma warning disable 612, 618 CodecUtil.CheckEOF(entriesStream); #pragma warning restore 612, 618 } } else { // TODO remove once 3.x is not supported anymore mapping = ReadLegacyEntries(stream, firstInt); } return(mapping); } catch (IOException ioe) { priorE = ioe; } finally { IOUtils.DisposeWhileHandlingException(priorE, stream, entriesStream); } // this is needed until Java 7's real try-with-resources: throw new InvalidOperationException("impossible to get here"); }
public override FieldInfos Read(Directory directory, string segmentName, string segmentSuffix, IOContext iocontext) { var fileName = IndexFileNames.SegmentFileName(segmentName, segmentSuffix, SimpleTextFieldInfosWriter.FIELD_INFOS_EXTENSION); var input = directory.OpenChecksumInput(fileName, iocontext); var scratch = new BytesRef(); var success = false; try { SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.NUMFIELDS)); var size = Convert.ToInt32(ReadString(SimpleTextFieldInfosWriter.NUMFIELDS.Length, scratch)); var infos = new FieldInfo[size]; for (var i = 0; i < size; i++) { SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.NAME)); string name = ReadString(SimpleTextFieldInfosWriter.NAME.Length, scratch); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.NUMBER)); int fieldNumber = Convert.ToInt32(ReadString(SimpleTextFieldInfosWriter.NUMBER.Length, scratch)); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.ISINDEXED)); bool isIndexed = Convert.ToBoolean(ReadString(SimpleTextFieldInfosWriter.ISINDEXED.Length, scratch)); FieldInfo.IndexOptions?indexOptions; if (isIndexed) { SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.INDEXOPTIONS)); indexOptions = (FieldInfo.IndexOptions)Enum.Parse(typeof(FieldInfo.IndexOptions), ReadString(SimpleTextFieldInfosWriter.INDEXOPTIONS.Length, scratch)); } else { indexOptions = null; } SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.STORETV)); bool storeTermVector = Convert.ToBoolean(ReadString(SimpleTextFieldInfosWriter.STORETV.Length, scratch)); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.PAYLOADS)); bool storePayloads = Convert.ToBoolean(ReadString(SimpleTextFieldInfosWriter.PAYLOADS.Length, scratch)); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.NORMS)); bool omitNorms = !Convert.ToBoolean(ReadString(SimpleTextFieldInfosWriter.NORMS.Length, scratch)); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.NORMS_TYPE)); string nrmType = ReadString(SimpleTextFieldInfosWriter.NORMS_TYPE.Length, scratch); FieldInfo.DocValuesType_e?normsType = DocValuesType(nrmType); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.DOCVALUES)); string dvType = ReadString(SimpleTextFieldInfosWriter.DOCVALUES.Length, scratch); FieldInfo.DocValuesType_e?docValuesType = DocValuesType(dvType); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.DOCVALUES_GEN)); long dvGen = Convert.ToInt64(ReadString(SimpleTextFieldInfosWriter.DOCVALUES_GEN.Length, scratch)); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.NUM_ATTS)); int numAtts = Convert.ToInt32(ReadString(SimpleTextFieldInfosWriter.NUM_ATTS.Length, scratch)); IDictionary <string, string> atts = new Dictionary <string, string>(); for (int j = 0; j < numAtts; j++) { SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.ATT_KEY)); string key = ReadString(SimpleTextFieldInfosWriter.ATT_KEY.Length, scratch); SimpleTextUtil.ReadLine(input, scratch); Debug.Assert(StringHelper.StartsWith(scratch, SimpleTextFieldInfosWriter.ATT_VALUE)); string value = ReadString(SimpleTextFieldInfosWriter.ATT_VALUE.Length, scratch); atts[key] = value; } infos[i] = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValuesType, normsType, new ReadOnlyDictionary <string, string>(atts)) { DocValuesGen = dvGen }; } SimpleTextUtil.CheckFooter(input); var fieldInfos = new FieldInfos(infos); success = true; return(fieldInfos); } finally { if (success) { input.Dispose(); } else { IOUtils.CloseWhileHandlingException(input); } } }
public override void Abort() { try { Dispose(); } #pragma warning disable 168 catch (Exception ignored) #pragma warning restore 168 { } IOUtils.DeleteFilesIgnoringExceptions(Directory, IndexFileNames.SegmentFileName(Segment, "", Lucene3xStoredFieldsReader.FIELDS_EXTENSION), IndexFileNames.SegmentFileName(Segment, "", Lucene3xStoredFieldsReader.FIELDS_INDEX_EXTENSION)); }
public Lucene3xTermVectorsReader(Directory d, SegmentInfo si, FieldInfos fieldInfos, IOContext context) { string segment = Lucene3xSegmentInfoFormat.GetDocStoreSegment(si); int docStoreOffset = Lucene3xSegmentInfoFormat.GetDocStoreOffset(si); int size = si.DocCount; bool success = false; try { if (docStoreOffset != -1 && Lucene3xSegmentInfoFormat.GetDocStoreIsCompoundFile(si)) { d = storeCFSReader = new CompoundFileDirectory(si.Dir, IndexFileNames.SegmentFileName(segment, "", Lucene3xCodec.COMPOUND_FILE_STORE_EXTENSION), context, false); } else { storeCFSReader = null; } string idxName = IndexFileNames.SegmentFileName(segment, "", VECTORS_INDEX_EXTENSION); tvx = d.OpenInput(idxName, context); format = CheckValidFormat(tvx); string fn = IndexFileNames.SegmentFileName(segment, "", VECTORS_DOCUMENTS_EXTENSION); tvd = d.OpenInput(fn, context); int tvdFormat = CheckValidFormat(tvd); fn = IndexFileNames.SegmentFileName(segment, "", VECTORS_FIELDS_EXTENSION); tvf = d.OpenInput(fn, context); int tvfFormat = CheckValidFormat(tvf); Debug.Assert(format == tvdFormat); Debug.Assert(format == tvfFormat); numTotalDocs = (int)(tvx.Length >> 4); if (-1 == docStoreOffset) { this.docStoreOffset = 0; this.size = numTotalDocs; Debug.Assert(size == 0 || numTotalDocs == size); } else { this.docStoreOffset = docStoreOffset; this.size = size; // Verify the file is long enough to hold all of our // docs Debug.Assert(numTotalDocs >= size + docStoreOffset, "numTotalDocs=" + numTotalDocs + " size=" + size + " docStoreOffset=" + docStoreOffset); } this.fieldInfos = fieldInfos; success = true; } finally { // With lock-less commits, it's entirely possible (and // fine) to hit a FileNotFound exception above. In // this case, we want to explicitly close any subset // of things that were opened so that we don't have to // wait for a GC to do so. if (!success) { try { Dispose(); } // keep our original exception catch (Exception) { } } } }