TermsIndexReader for simple every Nth terms indexes See FixedGapTermsIndexWriter lucene.experimental
Inheritance: TermsIndexReaderBase
        public override FieldsProducer FieldsProducer(SegmentReadState state)
        {
            PostingsReaderBase postings = new Lucene41PostingsReader(state.Directory, state.FieldInfos, state.SegmentInfo, state.Context, state.SegmentSuffix);
            TermsIndexReaderBase indexReader;

            bool success = false;
            try
            {
                indexReader = new FixedGapTermsIndexReader(state.Directory,
                                                           state.FieldInfos,
                                                           state.SegmentInfo.Name,
                                                           state.TermsIndexDivisor,
                                                           BytesRef.UTF8SortedAsUnicodeComparer,
                                                           state.SegmentSuffix, state.Context);
                success = true;
            }
            finally
            {
                if (!success)
                {
                    postings.Dispose();
                }
            }

            success = false;
            try
            {
                FieldsProducer ret = new BlockTermsReader(indexReader,
                                                          state.Directory,
                                                          state.FieldInfos,
                                                          state.SegmentInfo,
                                                          postings,
                                                          state.Context,
                                                          state.SegmentSuffix);
                success = true;
                return ret;
            }
            finally
            {
                if (!success)
                {
                    try
                    {
                        postings.Dispose();
                    }
                    finally
                    {
                        indexReader.Dispose();
                    }
                }
            }
        }
Beispiel #2
0
            public FieldIndexData(FixedGapTermsIndexReader outerInstance, FieldInfo fieldInfo, int numIndexTerms, long indexStart, long termsStart,
                                  long packedIndexStart, long packedOffsetsStart)
            {
                this.outerInstance = outerInstance;

                this.termsStart         = termsStart;
                this.indexStart         = indexStart;
                this.packedIndexStart   = packedIndexStart;
                this.packedOffsetsStart = packedOffsetsStart;
                this.numIndexTerms      = numIndexTerms;

                if (outerInstance.indexDivisor > 0)
                {
                    LoadTermsIndex();
                }
            }
            public FieldIndexData(int numIndexTerms, long indexStart, long termsStart,
                                  long packedIndexStart,
                                  long packedOffsetsStart, FixedGapTermsIndexReader fgtir)
            {
                _termsStart         = termsStart;
                _indexStart         = indexStart;
                _packedIndexStart   = packedIndexStart;
                _packedOffsetsStart = packedOffsetsStart;
                _numIndexTerms      = numIndexTerms;
                _fgtir = fgtir;

                if (_fgtir._indexDivisor > 0)
                {
                    LoadTermsIndex();
                }
            }
                public CoreFieldIndex(long indexStart, long termsStart, long packedIndexStart, long packedOffsetsStart,
                    int numIndexTerms, FixedGapTermsIndexReader fgtir)
                {
                    TermsStart = termsStart;
                    TermBytesStart = fgtir._termBytes.Pointer;

                    var clone = (IndexInput)fgtir._input.Clone();
                    clone.Seek(indexStart);

                    // -1 is passed to mean "don't load term index", but
                    // if we are then later loaded it's overwritten with
                    // a real value
                    Debug.Assert(fgtir._indexDivisor > 0);

                    NumIndexTerms = 1 + (numIndexTerms - 1)/fgtir._indexDivisor;

                    Debug.Assert(NumIndexTerms > 0,
                        String.Format("NumIndexTerms: {0}, IndexDivisor: {1}", NumIndexTerms, fgtir._indexDivisor));

                    if (fgtir._indexDivisor == 1)
                    {
                        // Default (load all index terms) is fast -- slurp in the images from disk:

                        try
                        {
                            var numTermBytes = packedIndexStart - indexStart;
                            fgtir._termBytes.Copy(clone, numTermBytes);

                            // records offsets into main terms dict file
                            TermsDictOffsets = PackedInts.GetReader(clone);
                            Debug.Assert(TermsDictOffsets.Size() == numIndexTerms);

                            // records offsets into byte[] term data
                            TermOffsets = PackedInts.GetReader(clone);
                            Debug.Assert(TermOffsets.Size() == 1 + numIndexTerms);
                        }
                        finally
                        {
                            clone.Dispose();
                        }
                    }
                    else
                    {
                        // Get packed iterators
                        var clone1 = (IndexInput)fgtir._input.Clone();
                        var clone2 = (IndexInput)fgtir._input.Clone();

                        try
                        {
                            // Subsample the index terms
                            clone1.Seek(packedIndexStart);
                            
                            PackedInts.ReaderIterator termsDictOffsetsIter = PackedInts.GetReaderIterator(clone1,
                                PackedInts.DEFAULT_BUFFER_SIZE);

                            clone2.Seek(packedOffsetsStart);
                            
                            PackedInts.ReaderIterator termOffsetsIter = PackedInts.GetReaderIterator(clone2,
                                PackedInts.DEFAULT_BUFFER_SIZE);

                            // TODO: often we can get by w/ fewer bits per
                            // value, below.. .but this'd be more complex:
                            // we'd have to try @ fewer bits and then grow
                            // if we overflowed it.

                            PackedInts.Mutable termsDictOffsetsM = PackedInts.GetMutable(NumIndexTerms,
                                termsDictOffsetsIter.BitsPerValue, PackedInts.DEFAULT);
                            PackedInts.Mutable termOffsetsM = PackedInts.GetMutable(NumIndexTerms + 1,
                                termOffsetsIter.BitsPerValue, PackedInts.DEFAULT);

                            TermsDictOffsets = termsDictOffsetsM;
                            TermOffsets = termOffsetsM;

                            var upto = 0;
                            long termOffsetUpto = 0;

                            while (upto < NumIndexTerms)
                            {
                                // main file offset copies straight over
                                termsDictOffsetsM.Set(upto, termsDictOffsetsIter.Next());

                                termOffsetsM.Set(upto, termOffsetUpto);

                                var termOffset = termOffsetsIter.Next();
                                var nextTermOffset = termOffsetsIter.Next();
                                var numTermBytes = (int) (nextTermOffset - termOffset);

                                clone.Seek(indexStart + termOffset);
                                
                                Debug.Assert(indexStart + termOffset < clone.Length(),
                                    String.Format("IndexStart: {0}, TermOffset: {1}, Len: {2}", indexStart, termOffset,
                                        clone.Length()));
                                
                                Debug.Assert(indexStart + termOffset + numTermBytes < clone.Length());

                                fgtir._termBytes.Copy(clone, numTermBytes);
                                termOffsetUpto += numTermBytes;

                                upto++;
                                if (upto == NumIndexTerms)
                                    break;
                                
                                // skip terms:
                                termsDictOffsetsIter.Next();
                                for (var i = 0; i < fgtir._indexDivisor - 2; i++)
                                {
                                    termOffsetsIter.Next();
                                    termsDictOffsetsIter.Next();
                                }
                            }
                            termOffsetsM.Set(upto, termOffsetUpto);

                        }
                        finally
                        {
                            clone1.Dispose();
                            clone2.Dispose();
                            clone.Dispose();
                        }
                    }
                }
            public FieldIndexData(int numIndexTerms, long indexStart, long termsStart,
                long packedIndexStart,
                long packedOffsetsStart, FixedGapTermsIndexReader fgtir)
            {

                _termsStart = termsStart;
                _indexStart = indexStart;
                _packedIndexStart = packedIndexStart;
                _packedOffsetsStart = packedOffsetsStart;
                _numIndexTerms = numIndexTerms;
                _fgtir = fgtir;

                if (_fgtir._indexDivisor > 0)
                    LoadTermsIndex();
            }
 public IndexEnum(FieldIndexData.CoreFieldIndex fieldIndex, FixedGapTermsIndexReader fgtir)
 {
     Term = new BytesRef();
     _fieldIndex = fieldIndex;
     _fgtir = fgtir;
 }
 public IndexEnum(FixedGapTermsIndexReader outerInstance, FieldIndexData.CoreFieldIndex fieldIndex)
 {
     this.outerInstance = outerInstance;
     this.fieldIndex    = fieldIndex;
 }
                public CoreFieldIndex(long indexStart, long termsStart, long packedIndexStart, long packedOffsetsStart,
                                      int numIndexTerms, FixedGapTermsIndexReader fgtir)
                {
                    TermsStart     = termsStart;
                    TermBytesStart = fgtir._termBytes.Pointer;

                    var clone = (IndexInput)fgtir._input.Clone();

                    clone.Seek(indexStart);

                    // -1 is passed to mean "don't load term index", but
                    // if we are then later loaded it's overwritten with
                    // a real value
                    Debug.Assert(fgtir._indexDivisor > 0);

                    NumIndexTerms = 1 + (numIndexTerms - 1) / fgtir._indexDivisor;

                    Debug.Assert(NumIndexTerms > 0,
                                 String.Format("NumIndexTerms: {0}, IndexDivisor: {1}", NumIndexTerms, fgtir._indexDivisor));

                    if (fgtir._indexDivisor == 1)
                    {
                        // Default (load all index terms) is fast -- slurp in the images from disk:

                        try
                        {
                            var numTermBytes = packedIndexStart - indexStart;
                            fgtir._termBytes.Copy(clone, numTermBytes);

                            // records offsets into main terms dict file
                            TermsDictOffsets = PackedInts.GetReader(clone);
                            Debug.Assert(TermsDictOffsets.Size() == numIndexTerms);

                            // records offsets into byte[] term data
                            TermOffsets = PackedInts.GetReader(clone);
                            Debug.Assert(TermOffsets.Size() == 1 + numIndexTerms);
                        }
                        finally
                        {
                            clone.Dispose();
                        }
                    }
                    else
                    {
                        // Get packed iterators
                        var clone1 = (IndexInput)fgtir._input.Clone();
                        var clone2 = (IndexInput)fgtir._input.Clone();

                        try
                        {
                            // Subsample the index terms
                            clone1.Seek(packedIndexStart);

                            PackedInts.ReaderIterator termsDictOffsetsIter = PackedInts.GetReaderIterator(clone1,
                                                                                                          PackedInts.DEFAULT_BUFFER_SIZE);

                            clone2.Seek(packedOffsetsStart);

                            PackedInts.ReaderIterator termOffsetsIter = PackedInts.GetReaderIterator(clone2,
                                                                                                     PackedInts.DEFAULT_BUFFER_SIZE);

                            // TODO: often we can get by w/ fewer bits per
                            // value, below.. .but this'd be more complex:
                            // we'd have to try @ fewer bits and then grow
                            // if we overflowed it.

                            PackedInts.Mutable termsDictOffsetsM = PackedInts.GetMutable(NumIndexTerms,
                                                                                         termsDictOffsetsIter.BitsPerValue, PackedInts.DEFAULT);
                            PackedInts.Mutable termOffsetsM = PackedInts.GetMutable(NumIndexTerms + 1,
                                                                                    termOffsetsIter.BitsPerValue, PackedInts.DEFAULT);

                            TermsDictOffsets = termsDictOffsetsM;
                            TermOffsets      = termOffsetsM;

                            var  upto           = 0;
                            long termOffsetUpto = 0;

                            while (upto < NumIndexTerms)
                            {
                                // main file offset copies straight over
                                termsDictOffsetsM.Set(upto, termsDictOffsetsIter.Next());

                                termOffsetsM.Set(upto, termOffsetUpto);

                                var termOffset     = termOffsetsIter.Next();
                                var nextTermOffset = termOffsetsIter.Next();
                                var numTermBytes   = (int)(nextTermOffset - termOffset);

                                clone.Seek(indexStart + termOffset);

                                Debug.Assert(indexStart + termOffset < clone.Length(),
                                             String.Format("IndexStart: {0}, TermOffset: {1}, Len: {2}", indexStart, termOffset,
                                                           clone.Length()));

                                Debug.Assert(indexStart + termOffset + numTermBytes < clone.Length());

                                fgtir._termBytes.Copy(clone, numTermBytes);
                                termOffsetUpto += numTermBytes;

                                upto++;
                                if (upto == NumIndexTerms)
                                {
                                    break;
                                }

                                // skip terms:
                                termsDictOffsetsIter.Next();
                                for (var i = 0; i < fgtir._indexDivisor - 2; i++)
                                {
                                    termOffsetsIter.Next();
                                    termsDictOffsetsIter.Next();
                                }
                            }
                            termOffsetsM.Set(upto, termOffsetUpto);
                        }
                        finally
                        {
                            clone1.Dispose();
                            clone2.Dispose();
                            clone.Dispose();
                        }
                    }
                }
 public IndexEnum(FieldIndexData.CoreFieldIndex fieldIndex, FixedGapTermsIndexReader fgtir)
 {
     Term        = new BytesRef();
     _fieldIndex = fieldIndex;
     _fgtir      = fgtir;
 }