public virtual void AddFacetCount(BytesRef facetValue, int count) { if (count < currentMin) { return; } FacetEntry facetEntry = new FacetEntry(facetValue, count); if (facetEntries.Count == maxSize) { if (!facetEntries.TryGetSuccessor(facetEntry, out FacetEntry _)) { return; } var max = facetEntries.Max; if (max != null) { facetEntries.Remove(max); } } facetEntries.Add(facetEntry); if (facetEntries.Count == maxSize) { var max = facetEntries.Max; currentMin = max != null ? max.Count : 0; } }
private void UpdateNextGroup(int topN, ShardIter <T> shard) { while (shard.Iter.MoveNext()) { ISearchGroup <T> group = shard.Next(); bool isNew = !groupsSeen.TryGetValue(group.GroupValue, out MergedGroup <T> mergedGroup) || mergedGroup == null; //System.out.println(" next group=" + (group.groupValue == null ? "null" : ((BytesRef) group.groupValue).utf8ToString()) + " sort=" + Arrays.toString(group.sortValues)); if (isNew) { // Start a new group: //System.out.println(" new"); mergedGroup = new MergedGroup <T>(group.GroupValue); mergedGroup.MinShardIndex = shard.ShardIndex; if (Debugging.AssertsEnabled) { Debugging.Assert(group.SortValues != null); } mergedGroup.TopValues = group.SortValues; groupsSeen[group.GroupValue] = mergedGroup; mergedGroup.IsInQueue = true; queue.Add(mergedGroup); } else if (mergedGroup.IsProcessed) { // This shard produced a group that we already // processed; move on to next group... continue; } else { //System.out.println(" old"); bool competes = false; for (int compIDX = 0; compIDX < groupComp.Comparers.Length; compIDX++) { int cmp = groupComp.Reversed[compIDX] * groupComp.Comparers[compIDX].CompareValues(group.SortValues[compIDX], mergedGroup.TopValues[compIDX]); if (cmp < 0) { // Definitely competes competes = true; break; } else if (cmp > 0) { // Definitely does not compete break; } else if (compIDX == groupComp.Comparers.Length - 1) { if (shard.ShardIndex < mergedGroup.MinShardIndex) { competes = true; } } } //System.out.println(" competes=" + competes); if (competes) { // Group's sort changed -- remove & re-insert if (mergedGroup.IsInQueue) { queue.Remove(mergedGroup); } mergedGroup.TopValues = group.SortValues; mergedGroup.MinShardIndex = shard.ShardIndex; queue.Add(mergedGroup); mergedGroup.IsInQueue = true; } } mergedGroup.Shards.Add(shard); break; } // Prune un-competitive groups: while (queue.Count > topN) { MergedGroup <T> group = queue.Max; queue.Remove(group); //System.out.println("PRUNE: " + group); group.IsInQueue = false; } }
/// <summary> /// If back plus this arc is competitive then add to queue: /// </summary> protected virtual void AddIfCompetitive(FSTPath <T> path) { Debug.Assert(queue != null); T cost = fst.Outputs.Add(path.Cost, path.Arc.Output); //System.out.println(" addIfCompetitive queue.size()=" + queue.size() + " path=" + path + " + label=" + path.arc.label); if (queue.Count == maxQueueDepth) { FSTPath <T> bottom = queue.Max; int comp = comparer.Compare(cost, bottom.Cost); if (comp > 0) { // Doesn't compete return; } else if (comp == 0) { // Tie break by alpha sort on the input: path.Input.Grow(path.Input.Length + 1); path.Input.Int32s[path.Input.Length++] = path.Arc.Label; int cmp = bottom.Input.CompareTo(path.Input); path.Input.Length--; // We should never see dups: Debug.Assert(cmp != 0); if (cmp < 0) { // Doesn't compete return; } } // Competes } else { // Queue isn't full yet, so any path we hit competes: } // copy over the current input to the new input // and add the arc.label to the end Int32sRef newInput = new Int32sRef(path.Input.Length + 1); Array.Copy(path.Input.Int32s, 0, newInput.Int32s, 0, path.Input.Length); newInput.Int32s[path.Input.Length] = path.Arc.Label; newInput.Length = path.Input.Length + 1; FSTPath <T> newPath = new FSTPath <T>(cost, path.Arc, newInput); queue.Add(newPath); if (queue.Count == maxQueueDepth + 1) { // LUCENENET NOTE: SortedSet doesn't have atomic operations, // so we need to add some thread safety just in case. // Perhaps it might make sense to wrap SortedSet into a type // that provides thread safety. lock (syncLock) { queue.Remove(queue.Max); } } }
public virtual void Collect(int doc) { //System.out.println("FP.collect doc=" + doc); // If orderedGroups != null we already have collected N groups and // can short circuit by comparing this document to the bottom group, // without having to find what group this document belongs to. // Even if this document belongs to a group in the top N, we'll know that // we don't have to update that group. // Downside: if the number of unique groups is very low, this is // wasted effort as we will most likely be updating an existing group. if (m_orderedGroups != null) { for (int compIDX = 0; ; compIDX++) { int c = reversed[compIDX] * comparers[compIDX].CompareBottom(doc); if (c < 0) { // Definitely not competitive. So don't even bother to continue return; } else if (c > 0) { // Definitely competitive. break; } else if (compIDX == compIDXEnd) { // Here c=0. If we're at the last comparer, this doc is not // competitive, since docs are visited in doc Id order, which means // this doc cannot compete with any other document in the queue. return; } } } // TODO: should we add option to mean "ignore docs that // don't have the group field" (instead of stuffing them // under null group)? TGroupValue groupValue = GetDocGroupValue(doc); if (!groupMap.TryGetValue(groupValue, out CollectedSearchGroup <TGroupValue> group)) { // First time we are seeing this group, or, we've seen // it before but it fell out of the top N and is now // coming back if (groupMap.Count < topNGroups) { // Still in startup transient: we have not // seen enough unique groups to start pruning them; // just keep collecting them // Add a new CollectedSearchGroup: CollectedSearchGroup <TGroupValue> sg = new CollectedSearchGroup <TGroupValue>(); sg.GroupValue = CopyDocGroupValue(groupValue, default); sg.ComparerSlot = groupMap.Count; sg.TopDoc = docBase + doc; foreach (FieldComparer fc in comparers) { fc.Copy(sg.ComparerSlot, doc); } groupMap[sg.GroupValue] = sg; if (groupMap.Count == topNGroups) { // End of startup transient: we now have max // number of groups; from here on we will drop // bottom group when we insert new one: BuildSortedSet(); } return; } // We already tested that the document is competitive, so replace // the bottom group with this new group. //CollectedSearchGroup<TGroupValue> bottomGroup = orderedGroups.PollLast(); CollectedSearchGroup <TGroupValue> bottomGroup; UninterruptableMonitor.Enter(m_orderedGroups); try { bottomGroup = m_orderedGroups.Last(); m_orderedGroups.Remove(bottomGroup); } finally { UninterruptableMonitor.Exit(m_orderedGroups); } if (Debugging.AssertsEnabled) { Debugging.Assert(m_orderedGroups.Count == topNGroups - 1); } groupMap.Remove(bottomGroup.GroupValue); // reuse the removed CollectedSearchGroup bottomGroup.GroupValue = CopyDocGroupValue(groupValue, bottomGroup.GroupValue); bottomGroup.TopDoc = docBase + doc; foreach (FieldComparer fc in comparers) { fc.Copy(bottomGroup.ComparerSlot, doc); } groupMap[bottomGroup.GroupValue] = bottomGroup; m_orderedGroups.Add(bottomGroup); if (Debugging.AssertsEnabled) { Debugging.Assert(m_orderedGroups.Count == topNGroups); } int lastComparerSlot = m_orderedGroups.Last().ComparerSlot; foreach (FieldComparer fc in comparers) { fc.SetBottom(lastComparerSlot); } return; } // Update existing group: for (int compIDX = 0; ; compIDX++) { FieldComparer fc = comparers[compIDX]; fc.Copy(spareSlot, doc); int c = reversed[compIDX] * fc.Compare(group.ComparerSlot, spareSlot); if (c < 0) { // Definitely not competitive. return; } else if (c > 0) { // Definitely competitive; set remaining comparers: for (int compIDX2 = compIDX + 1; compIDX2 < comparers.Length; compIDX2++) { comparers[compIDX2].Copy(spareSlot, doc); } break; } else if (compIDX == compIDXEnd) { // Here c=0. If we're at the last comparer, this doc is not // competitive, since docs are visited in doc Id order, which means // this doc cannot compete with any other document in the queue. return; } } // Remove before updating the group since lookup is done via comparers // TODO: optimize this CollectedSearchGroup <TGroupValue> prevLast; if (m_orderedGroups != null) { UninterruptableMonitor.Enter(m_orderedGroups); try { prevLast = m_orderedGroups.Last(); m_orderedGroups.Remove(group); } finally { UninterruptableMonitor.Exit(m_orderedGroups); } if (Debugging.AssertsEnabled) { Debugging.Assert(m_orderedGroups.Count == topNGroups - 1); } } else { prevLast = null; } group.TopDoc = docBase + doc; // Swap slots int tmp = spareSlot; spareSlot = group.ComparerSlot; group.ComparerSlot = tmp; // Re-add the changed group if (m_orderedGroups != null) { m_orderedGroups.Add(group); if (Debugging.AssertsEnabled) { Debugging.Assert(m_orderedGroups.Count == topNGroups); } var newLast = m_orderedGroups.Last(); // If we changed the value of the last group, or changed which group was last, then update bottom: if (group == newLast || prevLast != newLast) { foreach (FieldComparer fc in comparers) { fc.SetBottom(newLast.ComparerSlot); } } } }