예제 #1
0
        /// <summary>Returns the offset of the greatest index entry which is less than or equal to term.</summary>
        private int GetIndexOffset(Term term)
        {
            int lo = 0;             // binary search indexTerms[]
            int hi = indexTerms.Length - 1;

            while (hi >= lo)
            {
                int mid   = Number.URShift((lo + hi), 1);
                int delta = term.CompareTo(indexTerms[mid]);
                if (delta < 0)
                {
                    hi = mid - 1;
                }
                else if (delta > 0)
                {
                    lo = mid + 1;
                }
                else
                {
                    return(mid);
                }
            }
            return(hi);
        }
예제 #2
0
        /// <summary>Returns the TermInfo for a Term in the set, or null. </summary>
        private TermInfo Get(Term term, bool useCache)
        {
            if (size == 0)
            {
                return(null);
            }

            EnsureIndexIsRead();

            TermInfo               ti;
            ThreadResources        resources = GetThreadResources();
            Cache <Term, TermInfo> cache     = null;

            if (useCache)
            {
                cache = resources.termInfoCache;
                // check the cache first if the term was recently looked up
                ti = cache.Get(term);
                if (ti != null)
                {
                    return(ti);
                }
            }

            // optimize sequential access: first try scanning cached enum w/o seeking
            SegmentTermEnum enumerator = resources.termEnum;

            if (enumerator.Term != null && ((enumerator.Prev() != null && term.CompareTo(enumerator.Prev()) > 0) || term.CompareTo(enumerator.Term) >= 0))
            {
                int enumOffset = (int)(enumerator.position / totalIndexInterval) + 1;
                if (indexTerms.Length == enumOffset || term.CompareTo(indexTerms[enumOffset]) < 0)
                {
                    // no need to seek

                    int numScans = enumerator.ScanTo(term);
                    if (enumerator.Term != null && term.CompareTo(enumerator.Term) == 0)
                    {
                        ti = enumerator.TermInfo();
                        if (cache != null && numScans > 1)
                        {
                            // we only  want to put this TermInfo into the cache if
                            // scanEnum skipped more than one dictionary entry.
                            // This prevents RangeQueries or WildcardQueries to
                            // wipe out the cache when they iterate over a large numbers
                            // of terms in order
                            cache.Put(term, ti);
                        }
                    }
                    else
                    {
                        ti = null;
                    }

                    return(ti);
                }
            }

            // random-access: must seek
            SeekEnum(enumerator, GetIndexOffset(term));
            enumerator.ScanTo(term);
            if (enumerator.Term != null && term.CompareTo(enumerator.Term) == 0)
            {
                ti = enumerator.TermInfo();
                if (cache != null)
                {
                    cache.Put(term, ti);
                }
            }
            else
            {
                ti = null;
            }
            return(ti);
        }
예제 #3
0
        private void  MergeTermInfos(FormatPostingsFieldsConsumer consumer)
        {
            int base_Renamed = 0;
            int readerCount  = readers.Count;

            for (int i = 0; i < readerCount; i++)
            {
                IndexReader      reader   = readers[i];
                TermEnum         termEnum = reader.Terms();
                SegmentMergeInfo smi      = new SegmentMergeInfo(base_Renamed, termEnum, reader);
                int[]            docMap   = smi.GetDocMap();
                if (docMap != null)
                {
                    if (docMaps == null)
                    {
                        docMaps   = new int[readerCount][];
                        delCounts = new int[readerCount];
                    }
                    docMaps[i]   = docMap;
                    delCounts[i] = smi.reader.MaxDoc - smi.reader.NumDocs();
                }

                base_Renamed += reader.NumDocs();

                System.Diagnostics.Debug.Assert(reader.NumDocs() == reader.MaxDoc - smi.delCount);

                if (smi.Next())
                {
                    queue.Add(smi);
                }
                // initialize queue
                else
                {
                    smi.Dispose();
                }
            }

            SegmentMergeInfo[] match = new SegmentMergeInfo[readers.Count];

            System.String currentField = null;
            FormatPostingsTermsConsumer termsConsumer = null;

            while (queue.Size() > 0)
            {
                int matchSize = 0;                 // pop matching terms
                match[matchSize++] = queue.Pop();
                Term             term = match[0].term;
                SegmentMergeInfo top  = queue.Top();

                while (top != null && term.CompareTo(top.term) == 0)
                {
                    match[matchSize++] = queue.Pop();
                    top = queue.Top();
                }

                if ((System.Object)currentField != (System.Object)term.Field)
                {
                    currentField = term.Field;
                    if (termsConsumer != null)
                    {
                        termsConsumer.Finish();
                    }
                    FieldInfo fieldInfo = fieldInfos.FieldInfo(currentField);
                    termsConsumer            = consumer.AddField(fieldInfo);
                    omitTermFreqAndPositions = fieldInfo.omitTermFreqAndPositions;
                }

                int df = AppendPostings(termsConsumer, match, matchSize);                 // add new TermInfo

                checkAbort.Work(df / 3.0);

                while (matchSize > 0)
                {
                    SegmentMergeInfo smi = match[--matchSize];
                    if (smi.Next())
                    {
                        queue.Add(smi);
                    }
                    // restore queue
                    else
                    {
                        smi.Dispose();                         // done with a segment
                    }
                }
            }
        }