示例#1
0
            /// <summary>
            /// Creates an ordinal map that allows mapping ords to/from a merged
            /// space from <code>subs</code>. </summary>
            /// <param name="owner"> a cache key </param>
            /// <param name="subs"> TermsEnums that support <seealso cref="TermsEnum#ord()"/>. They need
            ///             not be dense (e.g. can be FilteredTermsEnums}. </param>
            /// <exception cref="IOException"> if an I/O error occurred. </exception>
            public OrdinalMap(object owner, TermsEnum[] subs)
            {
                // create the ordinal mappings by pulling a termsenum over each sub's
                // unique terms, and walking a multitermsenum over those
                this.Owner      = owner;
                GlobalOrdDeltas = new MonotonicAppendingLongBuffer(PackedInts.COMPACT);
                FirstSegments   = new AppendingPackedLongBuffer(PackedInts.COMPACT);
                OrdDeltas       = new MonotonicAppendingLongBuffer[subs.Length];
                for (int i = 0; i < OrdDeltas.Length; i++)
                {
                    OrdDeltas[i] = new MonotonicAppendingLongBuffer();
                }
                long[]           segmentOrds = new long[subs.Length];
                ReaderSlice[]    slices      = new ReaderSlice[subs.Length];
                TermsEnumIndex[] indexes     = new TermsEnumIndex[slices.Length];
                for (int i = 0; i < slices.Length; i++)
                {
                    slices[i]  = new ReaderSlice(0, 0, i);
                    indexes[i] = new TermsEnumIndex(subs[i], i);
                }
                MultiTermsEnum mte = new MultiTermsEnum(slices);

                mte.Reset(indexes);
                long globalOrd = 0;

                while (mte.Next() != null)
                {
                    TermsEnumWithSlice[] matches = mte.MatchArray;
                    for (int i = 0; i < mte.MatchCount; i++)
                    {
                        int  segmentIndex = matches[i].Index;
                        long segmentOrd   = matches[i].Terms.Ord();
                        long delta        = globalOrd - segmentOrd;
                        // for each unique term, just mark the first segment index/delta where it occurs
                        if (i == 0)
                        {
                            FirstSegments.Add(segmentIndex);
                            GlobalOrdDeltas.Add(delta);
                        }
                        // for each per-segment ord, map it back to the global term.
                        while (segmentOrds[segmentIndex] <= segmentOrd)
                        {
                            OrdDeltas[segmentIndex].Add(delta);
                            segmentOrds[segmentIndex]++;
                        }
                    }
                    globalOrd++;
                }
                FirstSegments.Freeze();
                GlobalOrdDeltas.Freeze();
                for (int i = 0; i < OrdDeltas.Length; ++i)
                {
                    OrdDeltas[i].Freeze();
                }
            }
示例#2
0
        // finalize currentDoc: this deduplicates the current term ids
        private void FinishCurrentDoc()
        {
            Array.Sort(CurrentValues, 0, CurrentUpto);
            int lastValue = -1;
            int count     = 0;

            for (int i = 0; i < CurrentUpto; i++)
            {
                int termID = CurrentValues[i];
                // if its not a duplicate
                if (termID != lastValue)
                {
                    Pending.Add(termID); // record the term id
                    count++;
                }
                lastValue = termID;
            }
            // record the number of unique term ids for this doc
            PendingCounts.Add(count);
            MaxCount    = Math.Max(MaxCount, count);
            CurrentUpto = 0;
            CurrentDoc++;
        }
示例#3
0
 /// <summary>
 /// Creates an ordinal map that allows mapping ords to/from a merged
 /// space from <code>subs</code>. </summary>
 /// <param name="owner"> a cache key </param>
 /// <param name="subs"> TermsEnums that support <seealso cref="TermsEnum#ord()"/>. They need
 ///             not be dense (e.g. can be FilteredTermsEnums}. </param>
 /// <exception cref="IOException"> if an I/O error occurred. </exception>
 public OrdinalMap(object owner, TermsEnum[] subs)
 {
     // create the ordinal mappings by pulling a termsenum over each sub's
     // unique terms, and walking a multitermsenum over those
     this.Owner = owner;
     GlobalOrdDeltas = new MonotonicAppendingLongBuffer(PackedInts.COMPACT);
     FirstSegments = new AppendingPackedLongBuffer(PackedInts.COMPACT);
     OrdDeltas = new MonotonicAppendingLongBuffer[subs.Length];
     for (int i = 0; i < OrdDeltas.Length; i++)
     {
         OrdDeltas[i] = new MonotonicAppendingLongBuffer();
     }
     long[] segmentOrds = new long[subs.Length];
     ReaderSlice[] slices = new ReaderSlice[subs.Length];
     TermsEnumIndex[] indexes = new TermsEnumIndex[slices.Length];
     for (int i = 0; i < slices.Length; i++)
     {
         slices[i] = new ReaderSlice(0, 0, i);
         indexes[i] = new TermsEnumIndex(subs[i], i);
     }
     MultiTermsEnum mte = new MultiTermsEnum(slices);
     mte.Reset(indexes);
     long globalOrd = 0;
     while (mte.Next() != null)
     {
         TermsEnumWithSlice[] matches = mte.MatchArray;
         for (int i = 0; i < mte.MatchCount; i++)
         {
             int segmentIndex = matches[i].Index;
             long segmentOrd = matches[i].Terms.Ord();
             long delta = globalOrd - segmentOrd;
             // for each unique term, just mark the first segment index/delta where it occurs
             if (i == 0)
             {
                 FirstSegments.Add(segmentIndex);
                 GlobalOrdDeltas.Add(delta);
             }
             // for each per-segment ord, map it back to the global term.
             while (segmentOrds[segmentIndex] <= segmentOrd)
             {
                 OrdDeltas[segmentIndex].Add(delta);
                 segmentOrds[segmentIndex]++;
             }
         }
         globalOrd++;
     }
     FirstSegments.Freeze();
     GlobalOrdDeltas.Freeze();
     for (int i = 0; i < OrdDeltas.Length; ++i)
     {
         OrdDeltas[i].Freeze();
     }
 }