Ejemplo n.º 1
0
        private bool AddToQueue(TrieMapMatcher.MatchQueue <K, V> queue, TrieMapMatcher.MatchQueue <K, V> best, IMatchCostFunction <K, V> costFunction, TrieMapMatcher.PartialApproxMatch <K, V> pam, K a, K b, bool multimatch, bool complete)
        {
            double deltaCost = costFunction.Cost(a, b, pam.GetMatchedLength());
            double newCost   = pam.cost + deltaCost;

            if (queue.maxCost != double.MaxValue && newCost > queue.maxCost)
            {
                return(false);
            }
            if (best.Size() >= queue.maxSize && newCost > best.TopCost())
            {
                return(false);
            }
            TrieMapMatcher.PartialApproxMatch <K, V> npam = pam.WithMatch(costFunction, deltaCost, a, b);
            if (!multimatch || (npam.trie != null && npam.trie.children != null))
            {
                if (!multimatch && complete && npam.value != null)
                {
                    best.Add(npam);
                }
                queue.Add(npam);
            }
            if (multimatch && npam.value != null)
            {
                npam = pam.WithMatch(costFunction, deltaCost, a, b, multimatch, rootWithDelimiter);
                if (complete && npam.value != null)
                {
                    best.Add(npam);
                }
                queue.Add(npam);
            }
            return(true);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Given a target sequence, returns the n closes matches (or sequences of matches) from the trie
        /// based on the cost function (lower cost mean better match).
        /// </summary>
        /// <param name="target">Target sequence to match</param>
        /// <param name="costFunction">Cost function to use</param>
        /// <param name="maxCost">Matches with a cost higher than this are discarded</param>
        /// <param name="n">Number of matches to return. The actual number of matches may be less.</param>
        /// <param name="multimatch">
        /// If true, attempt to return matches with sequences of elements from the trie.
        /// Otherwise, only each match will contain one element from the trie.
        /// </param>
        /// <param name="keepAlignments">If true, alignment information is returned</param>
        /// <returns>List of approximate matches</returns>
        public virtual IList <ApproxMatch <K, V> > FindClosestMatches(IList <K> target, IMatchCostFunction <K, V> costFunction, double maxCost, int n, bool multimatch, bool keepAlignments)
        {
            if (root.IsEmpty())
            {
                return(null);
            }
            int extra = 3;

            // Find the closest n options to the key in the trie based on the given cost function for substitution
            // matches[i][j] stores the top n partial matches for i elements from the target
            //   and j elements from the partial matches from trie keys
            // At any time, we only keep track of the last two rows
            // (prevMatches (matches[i-1][j]), curMatches (matches[i][j]) that we are working on
            TrieMapMatcher.MatchQueue <K, V> best = new TrieMapMatcher.MatchQueue <K, V>(n, maxCost);
            IList <TrieMapMatcher.PartialApproxMatch <K, V> >[] prevMatches = null;
            for (int i = 0; i <= target.Count; i++)
            {
                IList <TrieMapMatcher.PartialApproxMatch <K, V> >[] curMatches = new IList[target.Count + 1 + extra];
                for (int j = 0; j <= target.Count + extra; j++)
                {
                    if (j > 0)
                    {
                        bool complete = (i == target.Count);
                        // Try to pick best match from trie
                        K t = (i > 0 && i <= target.Count) ? target[i - 1] : null;
                        // Look at the top n choices we saved away and pick n new options
                        TrieMapMatcher.MatchQueue <K, V> queue = (multimatch) ? new TrieMapMatcher.MultiMatchQueue <K, V>(n, maxCost) : new TrieMapMatcher.MatchQueue <K, V>(n, maxCost);
                        if (i > 0)
                        {
                            foreach (TrieMapMatcher.PartialApproxMatch <K, V> pam in prevMatches[j - 1])
                            {
                                if (pam.trie != null)
                                {
                                    if (pam.trie.children != null)
                                    {
                                        foreach (K k in pam.trie.children.Keys)
                                        {
                                            AddToQueue(queue, best, costFunction, pam, t, k, multimatch, complete);
                                        }
                                    }
                                }
                            }
                        }
                        foreach (TrieMapMatcher.PartialApproxMatch <K, V> pam_1 in curMatches[j - 1])
                        {
                            if (pam_1.trie != null)
                            {
                                if (pam_1.trie.children != null)
                                {
                                    foreach (K k in pam_1.trie.children.Keys)
                                    {
                                        AddToQueue(queue, best, costFunction, pam_1, null, k, multimatch, complete);
                                    }
                                }
                            }
                        }
                        if (i > 0)
                        {
                            foreach (TrieMapMatcher.PartialApproxMatch <K, V> pam in prevMatches[j])
                            {
                                AddToQueue(queue, best, costFunction, pam_1, t, null, multimatch, complete);
                            }
                        }
                        curMatches[j] = queue.ToSortedList();
                    }
                    else
                    {
                        curMatches[0] = new List <TrieMapMatcher.PartialApproxMatch <K, V> >();
                        if (i > 0)
                        {
                            K t = (i < target.Count) ? target[i - 1] : null;
                            foreach (TrieMapMatcher.PartialApproxMatch <K, V> pam in prevMatches[0])
                            {
                                TrieMapMatcher.PartialApproxMatch <K, V> npam = pam.WithMatch(costFunction, costFunction.Cost(t, null, pam.GetMatchedLength()), t, null);
                                if (npam.cost <= maxCost)
                                {
                                    curMatches[0].Add(npam);
                                }
                            }
                        }
                        else
                        {
                            curMatches[0].Add(new TrieMapMatcher.PartialApproxMatch <K, V>(0, root, keepAlignments ? target.Count : 0));
                        }
                    }
                }
                //        System.out.println("i=" + i + ",j=" + j + "," + matches[i][j]);
                prevMatches = curMatches;
            }
            // Get the best matches
            IList <ApproxMatch <K, V> > res = new List <ApproxMatch <K, V> >();

            foreach (TrieMapMatcher.PartialApproxMatch <K, V> m in best.ToSortedList())
            {
                res.Add(m.ToApproxMatch());
            }
            return(res);
        }