public override FieldsProducer FieldsProducer(SegmentReadState state) { PostingsReaderBase docsReader = null; PostingsReaderBase pulsingReaderInner = null; PostingsReaderBase pulsingReader = null; bool success = false; try { docsReader = new Lucene41PostingsReader(state.Directory, state.FieldInfos, state.SegmentInfo, state.Context, state.SegmentSuffix); pulsingReaderInner = new PulsingPostingsReader(state, docsReader); pulsingReader = new PulsingPostingsReader(state, pulsingReaderInner); FieldsProducer ret = new BlockTreeTermsReader( state.Directory, state.FieldInfos, state.SegmentInfo, pulsingReader, state.Context, state.SegmentSuffix, state.TermsIndexDivisor); success = true; return ret; } finally { if (!success) { IOUtils.CloseWhileHandlingException(docsReader, pulsingReaderInner, pulsingReader); } } }
internal DirectDocValuesProducer(SegmentReadState state, string dataCodec, string dataExtension, string metaCodec, string metaExtension) { maxDoc = state.SegmentInfo.DocCount; string metaName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, metaExtension); // read in the entries from the metadata file. ChecksumIndexInput @in = state.Directory.OpenChecksumInput(metaName, state.Context); ramBytesUsed = new AtomicLong(RamUsageEstimator.ShallowSizeOfInstance(this.GetType())); bool success = false; try { version = CodecUtil.CheckHeader(@in, metaCodec, VERSION_START, VERSION_CURRENT); ReadFields(@in); if (version >= VERSION_CHECKSUM) { CodecUtil.CheckFooter(@in); } else { CodecUtil.CheckEOF(@in); } success = true; } finally { if (success) { IOUtils.Close(@in); } else { IOUtils.CloseWhileHandlingException(@in); } } success = false; try { string dataName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, dataExtension); data = state.Directory.OpenInput(dataName, state.Context); int version2 = CodecUtil.CheckHeader(data, dataCodec, VERSION_START, VERSION_CURRENT); if (version != version2) { throw new CorruptIndexException("Format versions mismatch"); } success = true; } finally { if (!success) { IOUtils.CloseWhileHandlingException(this.data); } } }
public override FieldsProducer FieldsProducer(SegmentReadState state) { PostingsReaderBase postings = new Lucene41PostingsReader(state.Directory, state.FieldInfos, state.SegmentInfo, state.Context, state.SegmentSuffix); TermsIndexReaderBase indexReader; bool success = false; try { indexReader = new FixedGapTermsIndexReader(state.Directory, state.FieldInfos, state.SegmentInfo.Name, state.TermsIndexDivisor, BytesRef.UTF8SortedAsUnicodeComparer, state.SegmentSuffix, state.Context); success = true; } finally { if (!success) { postings.Dispose(); } } success = false; try { FieldsProducer ret = new BlockTermsReader(indexReader, state.Directory, state.FieldInfos, state.SegmentInfo, postings, state.Context, state.SegmentSuffix); success = true; return ret; } finally { if (!success) { try { postings.Dispose(); } finally { indexReader.Dispose(); } } } }
public override FieldsProducer FieldsProducer(SegmentReadState state) { PostingsReaderBase postings = new Lucene40PostingsReader(state.Directory, state.FieldInfos, state.SegmentInfo, state.Context, state.SegmentSuffix); var success = false; FieldsProducer ret; using (ret = new AppendingTermsReader( state.Directory, state.FieldInfos, state.SegmentInfo, postings, state.Context, state.SegmentSuffix, state.TermsIndexDivisor)) { success = true; } return ret; }
// maxAllowed = the "highest" we can index, but we will still // randomly index at lower IndexOption private FieldsProducer BuildIndex(Directory dir, FieldInfo.IndexOptions maxAllowed, bool allowPayloads, bool alwaysTestMax) { Codec codec = Codec; SegmentInfo segmentInfo = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, "_0", MaxDoc, false, codec, null); int maxIndexOption = Enum.GetValues(typeof(FieldInfo.IndexOptions)).Cast<FieldInfo.IndexOptions>().ToList().IndexOf(maxAllowed); if (VERBOSE) { Console.WriteLine("\nTEST: now build index"); } int maxIndexOptionNoOffsets = Enum.GetValues(typeof(FieldInfo.IndexOptions)).Cast<FieldInfo.IndexOptions>().ToList().IndexOf(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); // TODO use allowPayloads var newFieldInfoArray = new FieldInfo[Fields.Count]; for (int fieldUpto = 0; fieldUpto < Fields.Count; fieldUpto++) { FieldInfo oldFieldInfo = FieldInfos.FieldInfo(fieldUpto); string pf = TestUtil.GetPostingsFormat(codec, oldFieldInfo.Name); int fieldMaxIndexOption; if (DoesntSupportOffsets.Contains(pf)) { fieldMaxIndexOption = Math.Min(maxIndexOptionNoOffsets, maxIndexOption); } else { fieldMaxIndexOption = maxIndexOption; } // Randomly picked the IndexOptions to index this // field with: FieldInfo.IndexOptions indexOptions = Enum.GetValues(typeof(FieldInfo.IndexOptions)).Cast<FieldInfo.IndexOptions>().ToArray()[alwaysTestMax ? fieldMaxIndexOption : Random().Next(1 + fieldMaxIndexOption)]; bool doPayloads = indexOptions.CompareTo(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 && allowPayloads; newFieldInfoArray[fieldUpto] = new FieldInfo(oldFieldInfo.Name, true, fieldUpto, false, false, doPayloads, indexOptions, null, DocValuesType.NUMERIC, null); } FieldInfos newFieldInfos = new FieldInfos(newFieldInfoArray); // Estimate that flushed segment size will be 25% of // what we use in RAM: long bytes = TotalPostings * 8 + TotalPayloadBytes; SegmentWriteState writeState = new SegmentWriteState(null, dir, segmentInfo, newFieldInfos, 32, null, new IOContext(new FlushInfo(MaxDoc, bytes))); FieldsConsumer fieldsConsumer = codec.PostingsFormat().FieldsConsumer(writeState); foreach (KeyValuePair<string, SortedDictionary<BytesRef, long>> fieldEnt in Fields) { string field = fieldEnt.Key; IDictionary<BytesRef, long> terms = fieldEnt.Value; FieldInfo fieldInfo = newFieldInfos.FieldInfo(field); FieldInfo.IndexOptions? indexOptions = fieldInfo.FieldIndexOptions; if (VERBOSE) { Console.WriteLine("field=" + field + " indexOtions=" + indexOptions); } bool doFreq = indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS; bool doPos = indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; bool doPayloads = indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS && allowPayloads; bool doOffsets = indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; TermsConsumer termsConsumer = fieldsConsumer.AddField(fieldInfo); long sumTotalTF = 0; long sumDF = 0; FixedBitSet seenDocs = new FixedBitSet(MaxDoc); foreach (KeyValuePair<BytesRef, long> termEnt in terms) { BytesRef term = termEnt.Key; SeedPostings postings = GetSeedPostings(term.Utf8ToString(), termEnt.Value, false, maxAllowed); if (VERBOSE) { Console.WriteLine(" term=" + field + ":" + term.Utf8ToString() + " docFreq=" + postings.DocFreq + " seed=" + termEnt.Value); } PostingsConsumer postingsConsumer = termsConsumer.StartTerm(term); long totalTF = 0; int docID = 0; while ((docID = postings.NextDoc()) != DocsEnum.NO_MORE_DOCS) { int freq = postings.Freq(); if (VERBOSE) { Console.WriteLine(" " + postings.Upto + ": docID=" + docID + " freq=" + postings.Freq_Renamed); } postingsConsumer.StartDoc(docID, doFreq ? postings.Freq_Renamed : -1); seenDocs.Set(docID); if (doPos) { totalTF += postings.Freq_Renamed; for (int posUpto = 0; posUpto < freq; posUpto++) { int pos = postings.NextPosition(); BytesRef payload = postings.Payload; if (VERBOSE) { if (doPayloads) { Console.WriteLine(" pos=" + pos + " payload=" + (payload == null ? "null" : payload.Length + " bytes")); } else { Console.WriteLine(" pos=" + pos); } } postingsConsumer.AddPosition(pos, doPayloads ? payload : null, doOffsets ? postings.StartOffset() : -1, doOffsets ? postings.EndOffset() : -1); } } else if (doFreq) { totalTF += freq; } else { totalTF++; } postingsConsumer.FinishDoc(); } termsConsumer.FinishTerm(term, new TermStats(postings.DocFreq, doFreq ? totalTF : -1)); sumTotalTF += totalTF; sumDF += postings.DocFreq; } termsConsumer.Finish(doFreq ? sumTotalTF : -1, sumDF, seenDocs.Cardinality()); } fieldsConsumer.Dispose(); if (VERBOSE) { Console.WriteLine("TEST: after indexing: files="); foreach (string file in dir.ListAll()) { Console.WriteLine(" " + file + ": " + dir.FileLength(file) + " bytes"); } } CurrentFieldInfos = newFieldInfos; SegmentReadState readState = new SegmentReadState(dir, segmentInfo, newFieldInfos, IOContext.READ, 1); return codec.PostingsFormat().FieldsProducer(readState); }
internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor) { fieldsReaderLocal = new AnonymousFieldsReaderLocal(this); termVectorsLocal = new AnonymousTermVectorsLocal(this); if (termsIndexDivisor == 0) { throw new System.ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)"); } Codec codec = si.Info.Codec; Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory. bool success = false; try { if (si.Info.UseCompoundFile) { cfsDir = CfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false); } else { CfsReader = null; cfsDir = dir; } FieldInfos fieldInfos = owner.FieldInfos_Renamed; this.TermsIndexDivisor = termsIndexDivisor; PostingsFormat format = codec.PostingsFormat(); SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor); // Ask codec for its Fields Fields = format.FieldsProducer(segmentReadState); Debug.Assert(Fields != null); // ask codec for its Norms: // TODO: since we don't write any norms file if there are no norms, // kinda jaky to assume the codec handles the case of no norms file at all gracefully?! if (fieldInfos.HasNorms()) { NormsProducer = codec.NormsFormat().NormsProducer(segmentReadState); Debug.Assert(NormsProducer != null); } else { NormsProducer = null; } StoredFieldsFormat sff = si.Info.Codec.StoredFieldsFormat(); try { FieldsReaderOrig = sff.FieldsReader(cfsDir, si.Info, fieldInfos, context); } catch (System.AccessViolationException ave) { } //FieldsReaderOrig = si.Info.Codec.StoredFieldsFormat().FieldsReader(cfsDir, si.Info, fieldInfos, context); if (fieldInfos.HasVectors()) // open term vector files only as needed { TermVectorsReaderOrig = si.Info.Codec.TermVectorsFormat().VectorsReader(cfsDir, si.Info, fieldInfos, context); } else { TermVectorsReaderOrig = null; } success = true; } finally { if (!success) { DecRef(); } } }
internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor) { fieldsReaderLocal = new DisposableThreadLocal <StoredFieldsReader>(() => (StoredFieldsReader)fieldsReaderOrig.Clone()); termVectorsLocal = new DisposableThreadLocal <TermVectorsReader>(() => (termVectorsReaderOrig is null) ? null : (TermVectorsReader)termVectorsReaderOrig.Clone()); if (termsIndexDivisor == 0) { throw new ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)"); } Codec codec = si.Info.Codec; Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory. bool success = false; try { if (si.Info.UseCompoundFile) { cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false); } else { cfsReader = null; cfsDir = dir; } FieldInfos fieldInfos = owner.FieldInfos; this.termsIndexDivisor = termsIndexDivisor; PostingsFormat format = codec.PostingsFormat; SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor); // Ask codec for its Fields fields = format.FieldsProducer(segmentReadState); if (Debugging.AssertsEnabled) { Debugging.Assert(fields != null); } // ask codec for its Norms: // TODO: since we don't write any norms file if there are no norms, // kinda jaky to assume the codec handles the case of no norms file at all gracefully?! if (fieldInfos.HasNorms) { normsProducer = codec.NormsFormat.NormsProducer(segmentReadState); if (Debugging.AssertsEnabled) { Debugging.Assert(normsProducer != null); } } else { normsProducer = null; } fieldsReaderOrig = si.Info.Codec.StoredFieldsFormat.FieldsReader(cfsDir, si.Info, fieldInfos, context); if (fieldInfos.HasVectors) // open term vector files only as needed { termVectorsReaderOrig = si.Info.Codec.TermVectorsFormat.VectorsReader(cfsDir, si.Info, fieldInfos, context); } else { termVectorsReaderOrig = null; } success = true; } finally { if (!success) { DecRef(); } } }
public override DocValuesProducer FieldsProducer(SegmentReadState state) { return new DiskDocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION); }
public DiskDocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) : base(state, dataCodec, dataExtension, metaCodec, metaExtension) { }
public override FieldsProducer FieldsProducer(SegmentReadState state) { return @delegate.FieldsProducer(state); }
public PulsingPostingsReader(SegmentReadState state, PostingsReaderBase wrappedPostingsReader) { this._wrappedPostingsReader = wrappedPostingsReader; this.segmentState = state; }
/// <summary> /// Create a {@code SegmentReadState}. </summary> public SegmentReadState(SegmentReadState other, string newSegmentSuffix) { this.Directory = other.Directory; this.SegmentInfo = other.SegmentInfo; this.FieldInfos = other.FieldInfos; this.Context = other.Context; this.TermsIndexDivisor = other.TermsIndexDivisor; this.SegmentSuffix = newSegmentSuffix; }
internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor) { fieldsReaderLocal = new AnonymousFieldsReaderLocal(this); termVectorsLocal = new AnonymousTermVectorsLocal(this); if (termsIndexDivisor == 0) { throw new System.ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)"); } Codec codec = si.Info.Codec; Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory. bool success = false; try { if (si.Info.UseCompoundFile) { cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false); } else { cfsReader = null; cfsDir = dir; } FieldInfos fieldInfos = owner.FieldInfos; this.termsIndexDivisor = termsIndexDivisor; PostingsFormat format = codec.PostingsFormat; SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor); // Ask codec for its Fields fields = format.FieldsProducer(segmentReadState); Debug.Assert(fields != null); // ask codec for its Norms: // TODO: since we don't write any norms file if there are no norms, // kinda jaky to assume the codec handles the case of no norms file at all gracefully?! if (fieldInfos.HasNorms) { normsProducer = codec.NormsFormat.NormsProducer(segmentReadState); Debug.Assert(normsProducer != null); } else { normsProducer = null; } // LUCENENET TODO: EXCEPTIONS Not sure why this catch block is swallowing AccessViolationException, // because it didn't exist in Lucene. Is it really needed? AVE is for protected memory...could // this be needed because we are using unchecked?? #if !NETSTANDARD try { #endif fieldsReaderOrig = si.Info.Codec.StoredFieldsFormat.FieldsReader(cfsDir, si.Info, fieldInfos, context); #if !NETSTANDARD } #pragma warning disable 168 catch (System.AccessViolationException ave) #pragma warning restore 168 { } #endif if (fieldInfos.HasVectors) // open term vector files only as needed { termVectorsReaderOrig = si.Info.Codec.TermVectorsFormat.VectorsReader(cfsDir, si.Info, fieldInfos, context); } else { termVectorsReaderOrig = null; } success = true; } finally { if (!success) { DecRef(); } } }