/// <summary>
        /// Loads the segment information at segment load time.
        /// </summary>
        /// <param name="indexEnum">
        ///          The term enum. </param>
        /// <param name="indexDivisor">
        ///          The index divisor. </param>
        /// <param name="tiiFileLength">
        ///          The size of the tii file, used to approximate the size of the
        ///          buffer. </param>
        /// <param name="totalIndexInterval">
        ///          The total index interval. </param>
        public TermInfosReaderIndex(SegmentTermEnum indexEnum, int indexDivisor, long tiiFileLength, int totalIndexInterval)
        {
            this.totalIndexInterval = totalIndexInterval;
            indexSize    = 1 + ((int)indexEnum.size - 1) / indexDivisor;
            skipInterval = indexEnum.skipInterval;
            // this is only an inital size, it will be GCed once the build is complete
            long                 initialSize    = (long)(tiiFileLength * 1.5) / indexDivisor;
            PagedBytes           dataPagedBytes = new PagedBytes(EstimatePageBits(initialSize));
            PagedBytesDataOutput dataOutput     = dataPagedBytes.GetDataOutput();

            int            bitEstimate  = 1 + MathUtil.Log(tiiFileLength, 2);
            GrowableWriter indexToTerms = new GrowableWriter(bitEstimate, indexSize, PackedInt32s.DEFAULT);

            string         currentField = null;
            IList <string> fieldStrs    = new List <string>();
            int            fieldCounter = -1;

            for (int i = 0; indexEnum.Next(); i++)
            {
                Term term = indexEnum.Term();
                if (currentField == null || !currentField.Equals(term.Field, StringComparison.Ordinal))
                {
                    currentField = term.Field;
                    fieldStrs.Add(currentField);
                    fieldCounter++;
                }
                TermInfo termInfo = indexEnum.TermInfo();
                indexToTerms.Set(i, dataOutput.GetPosition());
                dataOutput.WriteVInt32(fieldCounter);
                dataOutput.WriteString(term.Text());
                dataOutput.WriteVInt32(termInfo.DocFreq);
                if (termInfo.DocFreq >= skipInterval)
                {
                    dataOutput.WriteVInt32(termInfo.SkipOffset);
                }
                dataOutput.WriteVInt64(termInfo.FreqPointer);
                dataOutput.WriteVInt64(termInfo.ProxPointer);
                dataOutput.WriteVInt64(indexEnum.indexPointer);
                for (int j = 1; j < indexDivisor; j++)
                {
                    if (!indexEnum.Next())
                    {
                        break;
                    }
                }
            }

            fields = new Term[fieldStrs.Count];
            for (int i = 0; i < fields.Length; i++)
            {
                fields[i] = new Term(fieldStrs[i]);
            }

            dataPagedBytes.Freeze(true);
            dataInput         = dataPagedBytes.GetDataInput();
            indexToDataOffset = indexToTerms.Mutable;

            ramBytesUsed = fields.Length * (RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.ShallowSizeOfInstance(typeof(Term))) + dataPagedBytes.RamBytesUsed() + indexToDataOffset.RamBytesUsed();
        }
Esempio n. 2
0
        /// <summary>
        /// Returns the position of a <see cref="Term"/> in the set or -1. </summary>
        internal long GetPosition(Term term)
        {
            if (size == 0)
            {
                return(-1);
            }

            EnsureIndexIsRead();
            int indexOffset = index.GetIndexOffset(term);

            SegmentTermEnum enumerator = GetThreadResources().termEnum;

            index.SeekEnum(enumerator, indexOffset);

            while (CompareAsUTF16(term, enumerator.Term()) > 0 && enumerator.Next())
            {
            }

            if (CompareAsUTF16(term, enumerator.Term()) == 0)
            {
                return(enumerator.position);
            }
            else
            {
                return(-1);
            }
        }
Esempio n. 3
0
        /// <summary>
        /// Loads the segment information at segment load time.
        /// </summary>
        /// <param name="indexEnum">
        ///          the term enum. </param>
        /// <param name="indexDivisor">
        ///          the index divisor. </param>
        /// <param name="tiiFileLength">
        ///          the size of the tii file, used to approximate the size of the
        ///          buffer. </param>
        /// <param name="totalIndexInterval">
        ///          the total index interval. </param>
        public TermInfosReaderIndex(SegmentTermEnum indexEnum, int indexDivisor, long tiiFileLength, int totalIndexInterval)
        {
            this.TotalIndexInterval = totalIndexInterval;
            IndexSize = 1 + ((int)indexEnum.Size - 1) / indexDivisor;
            SkipInterval = indexEnum.SkipInterval;
            // this is only an inital size, it will be GCed once the build is complete
            long initialSize = (long)(tiiFileLength * 1.5) / indexDivisor;
            PagedBytes dataPagedBytes = new PagedBytes(EstimatePageBits(initialSize));
            PagedBytesDataOutput dataOutput = dataPagedBytes.DataOutput;

            int bitEstimate = 1 + MathUtil.Log(tiiFileLength, 2);
            GrowableWriter indexToTerms = new GrowableWriter(bitEstimate, IndexSize, PackedInts.DEFAULT);

            string currentField = null;
            IList<string> fieldStrs = new List<string>();
            int fieldCounter = -1;
            for (int i = 0; indexEnum.Next(); i++)
            {
                Term term = indexEnum.Term();
                if (currentField == null || !currentField.Equals(term.Field()))
                {
                    currentField = term.Field();
                    fieldStrs.Add(currentField);
                    fieldCounter++;
                }
                TermInfo termInfo = indexEnum.TermInfo();
                indexToTerms.Set(i, dataOutput.Position);
                dataOutput.WriteVInt(fieldCounter);
                dataOutput.WriteString(term.Text());
                dataOutput.WriteVInt(termInfo.DocFreq);
                if (termInfo.DocFreq >= SkipInterval)
                {
                    dataOutput.WriteVInt(termInfo.SkipOffset);
                }
                dataOutput.WriteVLong(termInfo.FreqPointer);
                dataOutput.WriteVLong(termInfo.ProxPointer);
                dataOutput.WriteVLong(indexEnum.IndexPointer);
                for (int j = 1; j < indexDivisor; j++)
                {
                    if (!indexEnum.Next())
                    {
                        break;
                    }
                }
            }

            Fields = new Term[fieldStrs.Count];
            for (int i = 0; i < Fields.Length; i++)
            {
                Fields[i] = new Term(fieldStrs[i]);
            }

            dataPagedBytes.Freeze(true);
            DataInput = dataPagedBytes.DataInput;
            IndexToDataOffset = indexToTerms.Mutable;

            RamBytesUsed_Renamed = Fields.Length * (RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.ShallowSizeOfInstance(typeof(Term))) + dataPagedBytes.RamBytesUsed() + IndexToDataOffset.RamBytesUsed();
        }
        private Term FindTermThatWouldBeAtIndex(SegmentTermEnum termEnum, int index)
        {
            int termPosition = index * TermIndexInterval * IndexDivisor;

            for (int i = 0; i < termPosition; i++)
            {
                // TODO: this test just uses random terms, so this is always possible
                AssumeTrue("ran out of terms", termEnum.Next());
            }
            Term term = termEnum.Term();

            // An indexed term is only written when the term after
            // it exists, so, if the number of terms is 0 mod
            // termIndexInterval, the last index term will not be
            // written; so we require a term after this term
            // as well:
            AssumeTrue("ran out of terms", termEnum.Next());
            return(term);
        }
Esempio n. 5
0
            public override BytesRef Next()
            {
                if (DEBUG_SURROGATES)
                {
                    Console.WriteLine("TE.next()");
                }
                if (skipNext)
                {
                    if (DEBUG_SURROGATES)
                    {
                        Console.WriteLine("  skipNext=true");
                    }
                    skipNext = false;
                    if (termEnum.Term() == null)
                    {
                        return(null);
                        // PreFlex codec interns field names:
                    }
                    else if (termEnum.Term().Field != internedFieldName)
                    {
                        return(null);
                    }
                    else
                    {
                        return(current = termEnum.Term().Bytes);
                    }
                }

                // TODO: can we use STE's prevBuffer here?
                prevTerm.CopyBytes(termEnum.Term().Bytes);

                if (termEnum.Next() && termEnum.Term().Field == internedFieldName)
                {
                    newSuffixStart = termEnum.newSuffixStart;
                    if (DEBUG_SURROGATES)
                    {
                        Console.WriteLine("  newSuffixStart=" + newSuffixStart);
                    }
                    SurrogateDance();
                    Term t = termEnum.Term();
                    if (t == null || t.Field != internedFieldName)
                    {
                        // PreFlex codec interns field names; verify:
                        Debug.Assert(t == null || !t.Field.Equals(internedFieldName, StringComparison.Ordinal));
                        current = null;
                    }
                    else
                    {
                        current = t.Bytes;
                    }
                    return(current);
                }
                else
                {
                    // this field is exhausted, but we have to give
                    // surrogateDance a chance to seek back:
                    if (DEBUG_SURROGATES)
                    {
                        Console.WriteLine("  force cont");
                    }
                    //newSuffixStart = prevTerm.length;
                    newSuffixStart = 0;
                    SurrogateDance();

                    Term t = termEnum.Term();
                    if (t == null || t.Field != internedFieldName)
                    {
                        // PreFlex codec interns field names; verify:
                        Debug.Assert(t == null || !t.Field.Equals(internedFieldName, StringComparison.Ordinal));
                        return(null);
                    }
                    else
                    {
                        current = t.Bytes;
                        return(current);
                    }
                }
            }
 private Term FindTermThatWouldBeAtIndex(SegmentTermEnum termEnum, int index)
 {
     int termPosition = index * TermIndexInterval * IndexDivisor;
     for (int i = 0; i < termPosition; i++)
     {
         // TODO: this test just uses random terms, so this is always possible
         AssumeTrue("ran out of terms", termEnum.Next());
     }
     Term term = termEnum.Term();
     // An indexed term is only written when the term after
     // it exists, so, if the number of terms is 0 mod
     // termIndexInterval, the last index term will not be
     // written; so we require a term after this term
     // as well:
     AssumeTrue("ran out of terms", termEnum.Next());
     return term;
 }
Esempio n. 7
0
            public override BytesRef Next()
            {
                if (DEBUG_SURROGATES)
                {
                    Console.WriteLine("TE.next()");
                }
                if (SkipNext)
                {
                    if (DEBUG_SURROGATES)
                    {
                        Console.WriteLine("  skipNext=true");
                    }
                    SkipNext = false;
                    if (TermEnum.Term() == null)
                    {
                        return(null);
                        // PreFlex codec interns field names:
                    }
                    else if (TermEnum.Term().Field() != InternedFieldName)
                    {
                        return(null);
                    }
                    else
                    {
                        return(Current = TermEnum.Term().Bytes());
                    }
                }

                // TODO: can we use STE's prevBuffer here?
                PrevTerm.CopyBytes(TermEnum.Term().Bytes());

                if (TermEnum.Next() && TermEnum.Term().Field() == InternedFieldName)
                {
                    NewSuffixStart = TermEnum.NewSuffixStart;
                    if (DEBUG_SURROGATES)
                    {
                        Console.WriteLine("  newSuffixStart=" + NewSuffixStart);
                    }
                    SurrogateDance();
                    Term t = TermEnum.Term();
                    if (t == null || t.Field() != InternedFieldName)
                    {
                        // PreFlex codec interns field names; verify:
                        Debug.Assert(t == null || !t.Field().Equals(InternedFieldName));
                        Current = null;
                    }
                    else
                    {
                        Current = t.Bytes();
                    }
                    return(Current);
                }
                else
                {
                    // this field is exhausted, but we have to give
                    // surrogateDance a chance to seek back:
                    if (DEBUG_SURROGATES)
                    {
                        Console.WriteLine("  force cont");
                    }
                    //newSuffixStart = prevTerm.length;
                    NewSuffixStart = 0;
                    SurrogateDance();

                    Term t = TermEnum.Term();
                    if (t == null || t.Field() != InternedFieldName)
                    {
                        // PreFlex codec interns field names; verify:
                        Debug.Assert(t == null || !t.Field().Equals(InternedFieldName));
                        return(null);
                    }
                    else
                    {
                        Current = t.Bytes();
                        return(Current);
                    }
                }
            }