Ejemplo n.º 1
0
        private void BuildSortedSet()
        {
            var comparer = new BuildSortedSetComparer(this);

            m_orderedGroups = new JCG.SortedSet <CollectedSearchGroup <TGroupValue> >(comparer);
            m_orderedGroups.UnionWith(groupMap.Values);
            Debug.Assert(m_orderedGroups.Count > 0);

            foreach (FieldComparer fc in comparers)
            {
                fc.SetBottom(m_orderedGroups.Last().ComparerSlot);
            }
        }
        public virtual void Collect(int doc)
        {
            //System.out.println("FP.collect doc=" + doc);

            // If orderedGroups != null we already have collected N groups and
            // can short circuit by comparing this document to the bottom group,
            // without having to find what group this document belongs to.

            // Even if this document belongs to a group in the top N, we'll know that
            // we don't have to update that group.

            // Downside: if the number of unique groups is very low, this is
            // wasted effort as we will most likely be updating an existing group.
            if (m_orderedGroups != null)
            {
                for (int compIDX = 0; ; compIDX++)
                {
                    int c = reversed[compIDX] * comparers[compIDX].CompareBottom(doc);
                    if (c < 0)
                    {
                        // Definitely not competitive. So don't even bother to continue
                        return;
                    }
                    else if (c > 0)
                    {
                        // Definitely competitive.
                        break;
                    }
                    else if (compIDX == compIDXEnd)
                    {
                        // Here c=0. If we're at the last comparer, this doc is not
                        // competitive, since docs are visited in doc Id order, which means
                        // this doc cannot compete with any other document in the queue.
                        return;
                    }
                }
            }

            // TODO: should we add option to mean "ignore docs that
            // don't have the group field" (instead of stuffing them
            // under null group)?
            TGroupValue groupValue = GetDocGroupValue(doc);

            if (!groupMap.TryGetValue(groupValue, out CollectedSearchGroup <TGroupValue> group))
            {
                // First time we are seeing this group, or, we've seen
                // it before but it fell out of the top N and is now
                // coming back

                if (groupMap.Count < topNGroups)
                {
                    // Still in startup transient: we have not
                    // seen enough unique groups to start pruning them;
                    // just keep collecting them

                    // Add a new CollectedSearchGroup:
                    CollectedSearchGroup <TGroupValue> sg = new CollectedSearchGroup <TGroupValue>();
                    sg.GroupValue   = CopyDocGroupValue(groupValue, default);
                    sg.ComparerSlot = groupMap.Count;
                    sg.TopDoc       = docBase + doc;
                    foreach (FieldComparer fc in comparers)
                    {
                        fc.Copy(sg.ComparerSlot, doc);
                    }
                    groupMap[sg.GroupValue] = sg;

                    if (groupMap.Count == topNGroups)
                    {
                        // End of startup transient: we now have max
                        // number of groups; from here on we will drop
                        // bottom group when we insert new one:
                        BuildSortedSet();
                    }

                    return;
                }

                // We already tested that the document is competitive, so replace
                // the bottom group with this new group.
                //CollectedSearchGroup<TGroupValue> bottomGroup = orderedGroups.PollLast();
                CollectedSearchGroup <TGroupValue> bottomGroup;
                UninterruptableMonitor.Enter(m_orderedGroups);
                try
                {
                    bottomGroup = m_orderedGroups.Last();
                    m_orderedGroups.Remove(bottomGroup);
                }
                finally
                {
                    UninterruptableMonitor.Exit(m_orderedGroups);
                }
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(m_orderedGroups.Count == topNGroups - 1);
                }

                groupMap.Remove(bottomGroup.GroupValue);

                // reuse the removed CollectedSearchGroup
                bottomGroup.GroupValue = CopyDocGroupValue(groupValue, bottomGroup.GroupValue);
                bottomGroup.TopDoc     = docBase + doc;

                foreach (FieldComparer fc in comparers)
                {
                    fc.Copy(bottomGroup.ComparerSlot, doc);
                }

                groupMap[bottomGroup.GroupValue] = bottomGroup;
                m_orderedGroups.Add(bottomGroup);
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(m_orderedGroups.Count == topNGroups);
                }

                int lastComparerSlot = m_orderedGroups.Last().ComparerSlot;
                foreach (FieldComparer fc in comparers)
                {
                    fc.SetBottom(lastComparerSlot);
                }

                return;
            }

            // Update existing group:
            for (int compIDX = 0; ; compIDX++)
            {
                FieldComparer fc = comparers[compIDX];
                fc.Copy(spareSlot, doc);

                int c = reversed[compIDX] * fc.Compare(group.ComparerSlot, spareSlot);
                if (c < 0)
                {
                    // Definitely not competitive.
                    return;
                }
                else if (c > 0)
                {
                    // Definitely competitive; set remaining comparers:
                    for (int compIDX2 = compIDX + 1; compIDX2 < comparers.Length; compIDX2++)
                    {
                        comparers[compIDX2].Copy(spareSlot, doc);
                    }
                    break;
                }
                else if (compIDX == compIDXEnd)
                {
                    // Here c=0. If we're at the last comparer, this doc is not
                    // competitive, since docs are visited in doc Id order, which means
                    // this doc cannot compete with any other document in the queue.
                    return;
                }
            }

            // Remove before updating the group since lookup is done via comparers
            // TODO: optimize this

            CollectedSearchGroup <TGroupValue> prevLast;

            if (m_orderedGroups != null)
            {
                UninterruptableMonitor.Enter(m_orderedGroups);
                try
                {
                    prevLast = m_orderedGroups.Last();
                    m_orderedGroups.Remove(group);
                }
                finally
                {
                    UninterruptableMonitor.Exit(m_orderedGroups);
                }
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(m_orderedGroups.Count == topNGroups - 1);
                }
            }
            else
            {
                prevLast = null;
            }

            group.TopDoc = docBase + doc;

            // Swap slots
            int tmp = spareSlot;

            spareSlot          = group.ComparerSlot;
            group.ComparerSlot = tmp;

            // Re-add the changed group
            if (m_orderedGroups != null)
            {
                m_orderedGroups.Add(group);
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(m_orderedGroups.Count == topNGroups);
                }
                var newLast = m_orderedGroups.Last();
                // If we changed the value of the last group, or changed which group was last, then update bottom:
                if (group == newLast || prevLast != newLast)
                {
                    foreach (FieldComparer fc in comparers)
                    {
                        fc.SetBottom(newLast.ComparerSlot);
                    }
                }
            }
        }
Ejemplo n.º 3
0
            private void UpdateNextGroup(int topN, ShardIter <T> shard)
            {
                while (shard.Iter.MoveNext())
                {
                    ISearchGroup <T> group = shard.Next();
                    bool             isNew = !groupsSeen.TryGetValue(group.GroupValue, out MergedGroup <T> mergedGroup) || mergedGroup == null;
                    //System.out.println("    next group=" + (group.groupValue == null ? "null" : ((BytesRef) group.groupValue).utf8ToString()) + " sort=" + Arrays.toString(group.sortValues));

                    if (isNew)
                    {
                        // Start a new group:
                        //System.out.println("      new");
                        mergedGroup = new MergedGroup <T>(group.GroupValue);
                        mergedGroup.MinShardIndex = shard.ShardIndex;
                        Debug.Assert(group.SortValues != null);
                        mergedGroup.TopValues        = group.SortValues;
                        groupsSeen[group.GroupValue] = mergedGroup;
                        mergedGroup.IsInQueue        = true;
                        queue.Add(mergedGroup);
                    }
                    else if (mergedGroup.IsProcessed)
                    {
                        // This shard produced a group that we already
                        // processed; move on to next group...
                        continue;
                    }
                    else
                    {
                        //System.out.println("      old");
                        bool competes = false;
                        for (int compIDX = 0; compIDX < groupComp.Comparers.Length; compIDX++)
                        {
                            int cmp = groupComp.Reversed[compIDX] * groupComp.Comparers[compIDX].CompareValues(group.SortValues[compIDX],
                                                                                                               mergedGroup.TopValues[compIDX]);
                            if (cmp < 0)
                            {
                                // Definitely competes
                                competes = true;
                                break;
                            }
                            else if (cmp > 0)
                            {
                                // Definitely does not compete
                                break;
                            }
                            else if (compIDX == groupComp.Comparers.Length - 1)
                            {
                                if (shard.ShardIndex < mergedGroup.MinShardIndex)
                                {
                                    competes = true;
                                }
                            }
                        }

                        //System.out.println("      competes=" + competes);

                        if (competes)
                        {
                            // Group's sort changed -- remove & re-insert
                            if (mergedGroup.IsInQueue)
                            {
                                queue.Remove(mergedGroup);
                            }
                            mergedGroup.TopValues     = group.SortValues;
                            mergedGroup.MinShardIndex = shard.ShardIndex;
                            queue.Add(mergedGroup);
                            mergedGroup.IsInQueue = true;
                        }
                    }

                    mergedGroup.Shards.Add(shard);
                    break;
                }

                // Prune un-competitive groups:
                while (queue.Count > topN)
                {
                    MergedGroup <T> group = queue.Last();
                    queue.Remove(group);
                    //System.out.println("PRUNE: " + group);
                    group.IsInQueue = false;
                }
            }