コード例 #1
0
 public override FieldsProducer FieldsProducer(SegmentReadState state)
 {
     PostingsReaderBase docsReader = null;
     PostingsReaderBase pulsingReaderInner = null;
     PostingsReaderBase pulsingReader = null;
     bool success = false;
     try
     {
         docsReader = new Lucene41PostingsReader(state.Directory, state.FieldInfos, state.SegmentInfo, state.Context, state.SegmentSuffix);
         pulsingReaderInner = new PulsingPostingsReader(state, docsReader);
         pulsingReader = new PulsingPostingsReader(state, pulsingReaderInner);
         FieldsProducer ret = new BlockTreeTermsReader(
                                                       state.Directory, state.FieldInfos, state.SegmentInfo,
                                                       pulsingReader,
                                                       state.Context,
                                                       state.SegmentSuffix,
                                                       state.TermsIndexDivisor);
         success = true;
         return ret;
     }
     finally
     {
         if (!success)
         {
             IOUtils.CloseWhileHandlingException(docsReader, pulsingReaderInner, pulsingReader);
         }
     }
 }
コード例 #2
0
        internal DirectDocValuesProducer(SegmentReadState state, string dataCodec, string dataExtension, string metaCodec, string metaExtension)
        {
            maxDoc = state.SegmentInfo.DocCount;
            string metaName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, metaExtension);
            // read in the entries from the metadata file.
            ChecksumIndexInput @in = state.Directory.OpenChecksumInput(metaName, state.Context);
            ramBytesUsed = new AtomicLong(RamUsageEstimator.ShallowSizeOfInstance(this.GetType()));
            bool success = false;
            try
            {
                version = CodecUtil.CheckHeader(@in, metaCodec, VERSION_START, VERSION_CURRENT);
                ReadFields(@in);

                if (version >= VERSION_CHECKSUM)
                {
                    CodecUtil.CheckFooter(@in);
                }
                else
                {
                    CodecUtil.CheckEOF(@in);
                }
                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Close(@in);
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(@in);
                }
            }

            success = false;
            try
            {
                string dataName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, dataExtension);
                data = state.Directory.OpenInput(dataName, state.Context);
                int version2 = CodecUtil.CheckHeader(data, dataCodec, VERSION_START, VERSION_CURRENT);
                if (version != version2)
                {
                    throw new CorruptIndexException("Format versions mismatch");
                }

                success = true;
            }
            finally
            {
                if (!success)
                {
                    IOUtils.CloseWhileHandlingException(this.data);
                }
            }
        }
コード例 #3
0
        public override FieldsProducer FieldsProducer(SegmentReadState state)
        {
            PostingsReaderBase postings = new Lucene41PostingsReader(state.Directory, state.FieldInfos, state.SegmentInfo, state.Context, state.SegmentSuffix);
            TermsIndexReaderBase indexReader;

            bool success = false;
            try
            {
                indexReader = new FixedGapTermsIndexReader(state.Directory,
                                                           state.FieldInfos,
                                                           state.SegmentInfo.Name,
                                                           state.TermsIndexDivisor,
                                                           BytesRef.UTF8SortedAsUnicodeComparer,
                                                           state.SegmentSuffix, state.Context);
                success = true;
            }
            finally
            {
                if (!success)
                {
                    postings.Dispose();
                }
            }

            success = false;
            try
            {
                FieldsProducer ret = new BlockTermsReader(indexReader,
                                                          state.Directory,
                                                          state.FieldInfos,
                                                          state.SegmentInfo,
                                                          postings,
                                                          state.Context,
                                                          state.SegmentSuffix);
                success = true;
                return ret;
            }
            finally
            {
                if (!success)
                {
                    try
                    {
                        postings.Dispose();
                    }
                    finally
                    {
                        indexReader.Dispose();
                    }
                }
            }
        }
コード例 #4
0
        public override FieldsProducer FieldsProducer(SegmentReadState state)
        {
            PostingsReaderBase postings = new Lucene40PostingsReader(state.Directory, state.FieldInfos,
                state.SegmentInfo,
                state.Context, state.SegmentSuffix);

            var success = false;
            FieldsProducer ret;
            using (ret = new AppendingTermsReader(
                state.Directory,
                state.FieldInfos,
                state.SegmentInfo,
                postings,
                state.Context,
                state.SegmentSuffix,
                state.TermsIndexDivisor))
            {
                success = true;
            }

            return ret;
            
        }
コード例 #5
0
        // maxAllowed = the "highest" we can index, but we will still
        // randomly index at lower IndexOption
        private FieldsProducer BuildIndex(Directory dir, FieldInfo.IndexOptions maxAllowed, bool allowPayloads, bool alwaysTestMax)
        {
            Codec codec = Codec;
            SegmentInfo segmentInfo = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, "_0", MaxDoc, false, codec, null);

            int maxIndexOption = Enum.GetValues(typeof(FieldInfo.IndexOptions)).Cast<FieldInfo.IndexOptions>().ToList().IndexOf(maxAllowed);
            if (VERBOSE)
            {
                Console.WriteLine("\nTEST: now build index");
            }

            int maxIndexOptionNoOffsets = Enum.GetValues(typeof(FieldInfo.IndexOptions)).Cast<FieldInfo.IndexOptions>().ToList().IndexOf(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);

            // TODO use allowPayloads

            var newFieldInfoArray = new FieldInfo[Fields.Count];
            for (int fieldUpto = 0; fieldUpto < Fields.Count; fieldUpto++)
            {
                FieldInfo oldFieldInfo = FieldInfos.FieldInfo(fieldUpto);

                string pf = TestUtil.GetPostingsFormat(codec, oldFieldInfo.Name);
                int fieldMaxIndexOption;
                if (DoesntSupportOffsets.Contains(pf))
                {
                    fieldMaxIndexOption = Math.Min(maxIndexOptionNoOffsets, maxIndexOption);
                }
                else
                {
                    fieldMaxIndexOption = maxIndexOption;
                }

                // Randomly picked the IndexOptions to index this
                // field with:
                FieldInfo.IndexOptions indexOptions = Enum.GetValues(typeof(FieldInfo.IndexOptions)).Cast<FieldInfo.IndexOptions>().ToArray()[alwaysTestMax ? fieldMaxIndexOption : Random().Next(1 + fieldMaxIndexOption)];
                bool doPayloads = indexOptions.CompareTo(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 && allowPayloads;

                newFieldInfoArray[fieldUpto] = new FieldInfo(oldFieldInfo.Name, true, fieldUpto, false, false, doPayloads, indexOptions, null, DocValuesType.NUMERIC, null);
            }

            FieldInfos newFieldInfos = new FieldInfos(newFieldInfoArray);

            // Estimate that flushed segment size will be 25% of
            // what we use in RAM:
            long bytes = TotalPostings * 8 + TotalPayloadBytes;

            SegmentWriteState writeState = new SegmentWriteState(null, dir, segmentInfo, newFieldInfos, 32, null, new IOContext(new FlushInfo(MaxDoc, bytes)));
            FieldsConsumer fieldsConsumer = codec.PostingsFormat().FieldsConsumer(writeState);

            foreach (KeyValuePair<string, SortedDictionary<BytesRef, long>> fieldEnt in Fields)
            {
                string field = fieldEnt.Key;
                IDictionary<BytesRef, long> terms = fieldEnt.Value;

                FieldInfo fieldInfo = newFieldInfos.FieldInfo(field);

                FieldInfo.IndexOptions? indexOptions = fieldInfo.FieldIndexOptions;

                if (VERBOSE)
                {
                    Console.WriteLine("field=" + field + " indexOtions=" + indexOptions);
                }

                bool doFreq = indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS;
                bool doPos = indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
                bool doPayloads = indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS && allowPayloads;
                bool doOffsets = indexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;

                TermsConsumer termsConsumer = fieldsConsumer.AddField(fieldInfo);
                long sumTotalTF = 0;
                long sumDF = 0;
                FixedBitSet seenDocs = new FixedBitSet(MaxDoc);
                foreach (KeyValuePair<BytesRef, long> termEnt in terms)
                {
                    BytesRef term = termEnt.Key;
                    SeedPostings postings = GetSeedPostings(term.Utf8ToString(), termEnt.Value, false, maxAllowed);
                    if (VERBOSE)
                    {
                        Console.WriteLine("  term=" + field + ":" + term.Utf8ToString() + " docFreq=" + postings.DocFreq + " seed=" + termEnt.Value);
                    }

                    PostingsConsumer postingsConsumer = termsConsumer.StartTerm(term);
                    long totalTF = 0;
                    int docID = 0;
                    while ((docID = postings.NextDoc()) != DocsEnum.NO_MORE_DOCS)
                    {
                        int freq = postings.Freq();
                        if (VERBOSE)
                        {
                            Console.WriteLine("    " + postings.Upto + ": docID=" + docID + " freq=" + postings.Freq_Renamed);
                        }
                        postingsConsumer.StartDoc(docID, doFreq ? postings.Freq_Renamed : -1);
                        seenDocs.Set(docID);
                        if (doPos)
                        {
                            totalTF += postings.Freq_Renamed;
                            for (int posUpto = 0; posUpto < freq; posUpto++)
                            {
                                int pos = postings.NextPosition();
                                BytesRef payload = postings.Payload;

                                if (VERBOSE)
                                {
                                    if (doPayloads)
                                    {
                                        Console.WriteLine("      pos=" + pos + " payload=" + (payload == null ? "null" : payload.Length + " bytes"));
                                    }
                                    else
                                    {
                                        Console.WriteLine("      pos=" + pos);
                                    }
                                }
                                postingsConsumer.AddPosition(pos, doPayloads ? payload : null, doOffsets ? postings.StartOffset() : -1, doOffsets ? postings.EndOffset() : -1);
                            }
                        }
                        else if (doFreq)
                        {
                            totalTF += freq;
                        }
                        else
                        {
                            totalTF++;
                        }
                        postingsConsumer.FinishDoc();
                    }
                    termsConsumer.FinishTerm(term, new TermStats(postings.DocFreq, doFreq ? totalTF : -1));
                    sumTotalTF += totalTF;
                    sumDF += postings.DocFreq;
                }

                termsConsumer.Finish(doFreq ? sumTotalTF : -1, sumDF, seenDocs.Cardinality());
            }

            fieldsConsumer.Dispose();

            if (VERBOSE)
            {
                Console.WriteLine("TEST: after indexing: files=");
                foreach (string file in dir.ListAll())
                {
                    Console.WriteLine("  " + file + ": " + dir.FileLength(file) + " bytes");
                }
            }

            CurrentFieldInfos = newFieldInfos;

            SegmentReadState readState = new SegmentReadState(dir, segmentInfo, newFieldInfos, IOContext.READ, 1);

            return codec.PostingsFormat().FieldsProducer(readState);
        }
コード例 #6
0
        internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor)
        {
            fieldsReaderLocal = new AnonymousFieldsReaderLocal(this);
            termVectorsLocal  = new AnonymousTermVectorsLocal(this);

            if (termsIndexDivisor == 0)
            {
                throw new System.ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)");
            }

            Codec     codec = si.Info.Codec;
            Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory.

            bool success = false;

            try
            {
                if (si.Info.UseCompoundFile)
                {
                    cfsDir = CfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false);
                }
                else
                {
                    CfsReader = null;
                    cfsDir    = dir;
                }

                FieldInfos fieldInfos = owner.FieldInfos_Renamed;

                this.TermsIndexDivisor = termsIndexDivisor;
                PostingsFormat   format           = codec.PostingsFormat();
                SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor);
                // Ask codec for its Fields
                Fields = format.FieldsProducer(segmentReadState);
                Debug.Assert(Fields != null);
                // ask codec for its Norms:
                // TODO: since we don't write any norms file if there are no norms,
                // kinda jaky to assume the codec handles the case of no norms file at all gracefully?!

                if (fieldInfos.HasNorms())
                {
                    NormsProducer = codec.NormsFormat().NormsProducer(segmentReadState);
                    Debug.Assert(NormsProducer != null);
                }
                else
                {
                    NormsProducer = null;
                }

                StoredFieldsFormat sff = si.Info.Codec.StoredFieldsFormat();

                try
                {
                    FieldsReaderOrig = sff.FieldsReader(cfsDir, si.Info, fieldInfos, context);
                }
                catch (System.AccessViolationException ave)
                {
                }

                //FieldsReaderOrig = si.Info.Codec.StoredFieldsFormat().FieldsReader(cfsDir, si.Info, fieldInfos, context);

                if (fieldInfos.HasVectors()) // open term vector files only as needed
                {
                    TermVectorsReaderOrig = si.Info.Codec.TermVectorsFormat().VectorsReader(cfsDir, si.Info, fieldInfos, context);
                }
                else
                {
                    TermVectorsReaderOrig = null;
                }

                success = true;
            }
            finally
            {
                if (!success)
                {
                    DecRef();
                }
            }
        }
コード例 #7
0
        internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor)
        {
            fieldsReaderLocal = new DisposableThreadLocal <StoredFieldsReader>(()
                                                                               => (StoredFieldsReader)fieldsReaderOrig.Clone());
            termVectorsLocal = new DisposableThreadLocal <TermVectorsReader>(()
                                                                             => (termVectorsReaderOrig is null) ? null : (TermVectorsReader)termVectorsReaderOrig.Clone());

            if (termsIndexDivisor == 0)
            {
                throw new ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)");
            }

            Codec     codec = si.Info.Codec;
            Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory.

            bool success = false;

            try
            {
                if (si.Info.UseCompoundFile)
                {
                    cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false);
                }
                else
                {
                    cfsReader = null;
                    cfsDir    = dir;
                }

                FieldInfos fieldInfos = owner.FieldInfos;

                this.termsIndexDivisor = termsIndexDivisor;
                PostingsFormat   format           = codec.PostingsFormat;
                SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor);
                // Ask codec for its Fields
                fields = format.FieldsProducer(segmentReadState);
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(fields != null);
                }
                // ask codec for its Norms:
                // TODO: since we don't write any norms file if there are no norms,
                // kinda jaky to assume the codec handles the case of no norms file at all gracefully?!

                if (fieldInfos.HasNorms)
                {
                    normsProducer = codec.NormsFormat.NormsProducer(segmentReadState);
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(normsProducer != null);
                    }
                }
                else
                {
                    normsProducer = null;
                }

                fieldsReaderOrig = si.Info.Codec.StoredFieldsFormat.FieldsReader(cfsDir, si.Info, fieldInfos, context);

                if (fieldInfos.HasVectors) // open term vector files only as needed
                {
                    termVectorsReaderOrig = si.Info.Codec.TermVectorsFormat.VectorsReader(cfsDir, si.Info, fieldInfos, context);
                }
                else
                {
                    termVectorsReaderOrig = null;
                }

                success = true;
            }
            finally
            {
                if (!success)
                {
                    DecRef();
                }
            }
        }
コード例 #8
0
 public override DocValuesProducer FieldsProducer(SegmentReadState state)
 {
     return new DiskDocValuesProducer(state, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION);
 }
コード例 #9
0
 public DiskDocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec,
     String metaExtension) :
         base(state, dataCodec, dataExtension, metaCodec, metaExtension)
 {
 }
コード例 #10
0
 public override FieldsProducer FieldsProducer(SegmentReadState state)
 {
     return @delegate.FieldsProducer(state);
 }
コード例 #11
0
 public PulsingPostingsReader(SegmentReadState state, PostingsReaderBase wrappedPostingsReader)
 {
     this._wrappedPostingsReader = wrappedPostingsReader;
     this.segmentState = state;
 }
コード例 #12
0
 /// <summary>
 /// Create a {@code SegmentReadState}. </summary>
 public SegmentReadState(SegmentReadState other, string newSegmentSuffix)
 {
     this.Directory = other.Directory;
     this.SegmentInfo = other.SegmentInfo;
     this.FieldInfos = other.FieldInfos;
     this.Context = other.Context;
     this.TermsIndexDivisor = other.TermsIndexDivisor;
     this.SegmentSuffix = newSegmentSuffix;
 }
コード例 #13
0
        internal SegmentCoreReaders(SegmentReader owner, Directory dir, SegmentCommitInfo si, IOContext context, int termsIndexDivisor)
        {
            fieldsReaderLocal = new AnonymousFieldsReaderLocal(this);
            termVectorsLocal  = new AnonymousTermVectorsLocal(this);

            if (termsIndexDivisor == 0)
            {
                throw new System.ArgumentException("indexDivisor must be < 0 (don't load terms index) or greater than 0 (got 0)");
            }

            Codec     codec = si.Info.Codec;
            Directory cfsDir; // confusing name: if (cfs) its the cfsdir, otherwise its the segment's directory.

            bool success = false;

            try
            {
                if (si.Info.UseCompoundFile)
                {
                    cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.SegmentFileName(si.Info.Name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false);
                }
                else
                {
                    cfsReader = null;
                    cfsDir    = dir;
                }

                FieldInfos fieldInfos = owner.FieldInfos;

                this.termsIndexDivisor = termsIndexDivisor;
                PostingsFormat   format           = codec.PostingsFormat;
                SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.Info, fieldInfos, context, termsIndexDivisor);
                // Ask codec for its Fields
                fields = format.FieldsProducer(segmentReadState);
                Debug.Assert(fields != null);
                // ask codec for its Norms:
                // TODO: since we don't write any norms file if there are no norms,
                // kinda jaky to assume the codec handles the case of no norms file at all gracefully?!

                if (fieldInfos.HasNorms)
                {
                    normsProducer = codec.NormsFormat.NormsProducer(segmentReadState);
                    Debug.Assert(normsProducer != null);
                }
                else
                {
                    normsProducer = null;
                }

                // LUCENENET TODO: EXCEPTIONS Not sure why this catch block is swallowing AccessViolationException,
                // because it didn't exist in Lucene. Is it really needed? AVE is for protected memory...could
                // this be needed because we are using unchecked??

#if !NETSTANDARD
                try
                {
#endif
                fieldsReaderOrig = si.Info.Codec.StoredFieldsFormat.FieldsReader(cfsDir, si.Info, fieldInfos, context);
#if !NETSTANDARD
            }
#pragma warning disable 168
            catch (System.AccessViolationException ave)
#pragma warning restore 168
            {
            }
#endif

                if (fieldInfos.HasVectors) // open term vector files only as needed
                {
                    termVectorsReaderOrig = si.Info.Codec.TermVectorsFormat.VectorsReader(cfsDir, si.Info, fieldInfos, context);
                }
                else
                {
                    termVectorsReaderOrig = null;
                }

                success = true;
            }
            finally
            {
                if (!success)
                {
                    DecRef();
                }
            }
        }