public override bool Equals(object o) { if (this == o) { return(true); } if (o == null || GetType() != o.GetType()) { return(false); } if (!base.Equals(o)) { return(false); } TrieMapMatcher.PartialApproxMatch that = (TrieMapMatcher.PartialApproxMatch)o; if (lastMultimatchedMatchedStartIndex != that.lastMultimatchedMatchedStartIndex) { return(false); } if (lastMultimatchedOriginalStartIndex != that.lastMultimatchedOriginalStartIndex) { return(false); } if (trie != null ? !trie.Equals(that.trie) : that.trie != null) { return(false); } return(true); }
private bool AddToQueue(TrieMapMatcher.MatchQueue <K, V> queue, TrieMapMatcher.MatchQueue <K, V> best, IMatchCostFunction <K, V> costFunction, TrieMapMatcher.PartialApproxMatch <K, V> pam, K a, K b, bool multimatch, bool complete) { double deltaCost = costFunction.Cost(a, b, pam.GetMatchedLength()); double newCost = pam.cost + deltaCost; if (queue.maxCost != double.MaxValue && newCost > queue.maxCost) { return(false); } if (best.Size() >= queue.maxSize && newCost > best.TopCost()) { return(false); } TrieMapMatcher.PartialApproxMatch <K, V> npam = pam.WithMatch(costFunction, deltaCost, a, b); if (!multimatch || (npam.trie != null && npam.trie.children != null)) { if (!multimatch && complete && npam.value != null) { best.Add(npam); } queue.Add(npam); } if (multimatch && npam.value != null) { npam = pam.WithMatch(costFunction, deltaCost, a, b, multimatch, rootWithDelimiter); if (complete && npam.value != null) { best.Add(npam); } queue.Add(npam); } return(true); }
private TrieMapMatcher.PartialApproxMatch <K, V> WithMatch(IMatchCostFunction <K, V> costFunction, double deltaCost, K t, K k, bool multimatch, TrieMap <K, V> root) { TrieMapMatcher.PartialApproxMatch <K, V> res = WithMatch(costFunction, deltaCost, t, k); if (multimatch && res.matched != null && res.value != null) { // Update tracking of matched keys and values for multiple entry matches if (res.multimatches == null) { res.multimatches = new List <Match <K, V> >(1); } else { res.multimatches = new List <Match <K, V> >(multimatches.Count + 1); Sharpen.Collections.AddAll(res.multimatches, multimatches); } IList <K> newlyMatched = res.matched.SubList(lastMultimatchedMatchedStartIndex, res.matched.Count); res.multimatches.Add(new Match <K, V>(newlyMatched, res.value, lastMultimatchedOriginalStartIndex, res.end)); res.cost += costFunction.MultiMatchDeltaCost(newlyMatched, res.value, multimatches, res.multimatches); res.lastMultimatchedMatchedStartIndex = res.matched.Count; res.lastMultimatchedOriginalStartIndex = res.end; // Reset current value/key being matched res.trie = root; } return(res); }
private TrieMapMatcher.PartialApproxMatch <K, V> WithMatch(IMatchCostFunction <K, V> costFunction, double deltaCost, K t, K k) { TrieMapMatcher.PartialApproxMatch <K, V> res = new TrieMapMatcher.PartialApproxMatch <K, V>(); res.matched = matched; if (k != null) { if (res.matched == null) { res.matched = new List <K>(1); } else { res.matched = new List <K>(matched.Count + 1); Sharpen.Collections.AddAll(res.matched, matched); } res.matched.Add(k); } res.begin = begin; res.end = (t != null) ? end + 1 : end; res.cost = cost + deltaCost; res.trie = (k != null) ? trie.GetChildTrie(k) : trie; res.value = (res.trie != null) ? res.trie.value : null; res.multimatches = multimatches; res.lastMultimatchedMatchedStartIndex = lastMultimatchedMatchedStartIndex; res.lastMultimatchedOriginalStartIndex = lastMultimatchedOriginalStartIndex; if (res.lastMultimatchedOriginalStartIndex == end && k == null && t != null) { res.lastMultimatchedOriginalStartIndex++; } // Update alignments if (alignments != null) { res.alignments = new Interval[alignments.Length]; System.Array.Copy(alignments, 0, res.alignments, 0, alignments.Length); if (k != null && res.end > 0) { int p = res.end - 1; if (res.alignments[p] == null) { res.alignments[p] = Interval.ToInterval(res.matched.Count - 1, res.matched.Count); } else { res.alignments[p] = Interval.ToInterval(res.alignments[p].GetBegin(), res.alignments[p].GetEnd() + 1); } } } return(res); }
public virtual void Add(TrieMapMatcher.PartialApproxMatch <K, V> pam) { IList <Match <K, V> > multiMatchesWithoutOffsets = null; if (pam.multimatches != null) { multiMatchesWithoutOffsets = new List <Match <K, V> >(pam.multimatches.Count); foreach (Match <K, V> m in pam.multimatches) { multiMatchesWithoutOffsets.Add(new Match <K, V>(m.matched, m.value, 0, 0)); } } Match <K, V> m_1 = new MultiMatch <K, V>(pam.matched, pam.value, pam.begin, pam.end, multiMatchesWithoutOffsets); queue[m_1] = pam; }
public override void Add(TrieMapMatcher.PartialApproxMatch <K, V> pam) { Match <K, V> m = new MultiMatch <K, V>(pam.matched, pam.value, pam.begin, pam.end, pam.multimatches); int key = (pam.multimatches != null) ? pam.multimatches.Count : 0; if (pam.value == null) { key = key + 1; } BoundedCostOrderedMap <Match <K, V>, TrieMapMatcher.PartialApproxMatch <K, V> > mq = multimatchQueues[key]; if (mq == null) { multimatchQueues[key] = mq = new BoundedCostOrderedMap <Match <K, V>, TrieMapMatcher.PartialApproxMatch <K, V> >(MatchCostFunction, maxSize, maxCost); } mq[m] = pam; }
/// <summary> /// Given a target sequence, returns the n closes matches (or sequences of matches) from the trie /// based on the cost function (lower cost mean better match). /// </summary> /// <param name="target">Target sequence to match</param> /// <param name="costFunction">Cost function to use</param> /// <param name="maxCost">Matches with a cost higher than this are discarded</param> /// <param name="n">Number of matches to return. The actual number of matches may be less.</param> /// <param name="multimatch"> /// If true, attempt to return matches with sequences of elements from the trie. /// Otherwise, only each match will contain one element from the trie. /// </param> /// <param name="keepAlignments">If true, alignment information is returned</param> /// <returns>List of approximate matches</returns> public virtual IList <ApproxMatch <K, V> > FindClosestMatches(IList <K> target, IMatchCostFunction <K, V> costFunction, double maxCost, int n, bool multimatch, bool keepAlignments) { if (root.IsEmpty()) { return(null); } int extra = 3; // Find the closest n options to the key in the trie based on the given cost function for substitution // matches[i][j] stores the top n partial matches for i elements from the target // and j elements from the partial matches from trie keys // At any time, we only keep track of the last two rows // (prevMatches (matches[i-1][j]), curMatches (matches[i][j]) that we are working on TrieMapMatcher.MatchQueue <K, V> best = new TrieMapMatcher.MatchQueue <K, V>(n, maxCost); IList <TrieMapMatcher.PartialApproxMatch <K, V> >[] prevMatches = null; for (int i = 0; i <= target.Count; i++) { IList <TrieMapMatcher.PartialApproxMatch <K, V> >[] curMatches = new IList[target.Count + 1 + extra]; for (int j = 0; j <= target.Count + extra; j++) { if (j > 0) { bool complete = (i == target.Count); // Try to pick best match from trie K t = (i > 0 && i <= target.Count) ? target[i - 1] : null; // Look at the top n choices we saved away and pick n new options TrieMapMatcher.MatchQueue <K, V> queue = (multimatch) ? new TrieMapMatcher.MultiMatchQueue <K, V>(n, maxCost) : new TrieMapMatcher.MatchQueue <K, V>(n, maxCost); if (i > 0) { foreach (TrieMapMatcher.PartialApproxMatch <K, V> pam in prevMatches[j - 1]) { if (pam.trie != null) { if (pam.trie.children != null) { foreach (K k in pam.trie.children.Keys) { AddToQueue(queue, best, costFunction, pam, t, k, multimatch, complete); } } } } } foreach (TrieMapMatcher.PartialApproxMatch <K, V> pam_1 in curMatches[j - 1]) { if (pam_1.trie != null) { if (pam_1.trie.children != null) { foreach (K k in pam_1.trie.children.Keys) { AddToQueue(queue, best, costFunction, pam_1, null, k, multimatch, complete); } } } } if (i > 0) { foreach (TrieMapMatcher.PartialApproxMatch <K, V> pam in prevMatches[j]) { AddToQueue(queue, best, costFunction, pam_1, t, null, multimatch, complete); } } curMatches[j] = queue.ToSortedList(); } else { curMatches[0] = new List <TrieMapMatcher.PartialApproxMatch <K, V> >(); if (i > 0) { K t = (i < target.Count) ? target[i - 1] : null; foreach (TrieMapMatcher.PartialApproxMatch <K, V> pam in prevMatches[0]) { TrieMapMatcher.PartialApproxMatch <K, V> npam = pam.WithMatch(costFunction, costFunction.Cost(t, null, pam.GetMatchedLength()), t, null); if (npam.cost <= maxCost) { curMatches[0].Add(npam); } } } else { curMatches[0].Add(new TrieMapMatcher.PartialApproxMatch <K, V>(0, root, keepAlignments ? target.Count : 0)); } } } // System.out.println("i=" + i + ",j=" + j + "," + matches[i][j]); prevMatches = curMatches; } // Get the best matches IList <ApproxMatch <K, V> > res = new List <ApproxMatch <K, V> >(); foreach (TrieMapMatcher.PartialApproxMatch <K, V> m in best.ToSortedList()) { res.Add(m.ToApproxMatch()); } return(res); }