Exemplo n.º 1
0
        private static void CategorizeNodesByLabels(
            TreeComparer <TNode> comparer,
            TNode root,
            int labelCount,
            out List <TNode>[] nodes,
            out int totalCount)
        {
            nodes = new List <TNode> [labelCount];
            int count = 0;

            // It is important that we add the nodes in depth-first prefix order.
            // This order ensures that a node of a certain kind can have a parent of the same kind
            // and we can still use tied-to-parent for that kind. That's because the parent will always
            // be processed earlier than the child due to depth-first prefix ordering.
            foreach (TNode node in comparer.GetDescendants(root))
            {
                int label = comparer.GetLabel(node);
                if (label < 0 || label >= labelCount)
                {
                    throw new InvalidOperationException(string.Format(WorkspacesResources.Label_for_node_0_is_invalid_it_must_be_within_bracket_0_1, node, labelCount));
                }

                var list = nodes[label];
                if (list == null)
                {
                    nodes[label] = list = new List <TNode>();
                }

                list.Add(node);

                count++;
            }

            totalCount = count;
        }
Exemplo n.º 2
0
        internal Match(TNode root1, TNode root2, TreeComparer <TNode> comparer, IEnumerable <KeyValuePair <TNode, TNode> > knownMatches)
        {
            _root1    = root1;
            _root2    = root2;
            _comparer = comparer;

            int labelCount = comparer.LabelCount;

            CategorizeNodesByLabels(comparer, root1, labelCount, out var nodes1, out var count1);
            CategorizeNodesByLabels(comparer, root2, labelCount, out var nodes2, out var count2);

            _oneToTwo = new Dictionary <TNode, TNode>();
            _twoToOne = new Dictionary <TNode, TNode>();

            // Root nodes always match. Add them before adding known matches to make sure we always have root mapping.
            TryAdd(root1, root2);

            if (knownMatches != null)
            {
                foreach (var knownMatch in knownMatches)
                {
                    if (comparer.GetLabel(knownMatch.Key) != comparer.GetLabel(knownMatch.Value))
                    {
                        throw new ArgumentException(string.Format(WorkspacesResources.Matching_nodes_0_and_1_must_have_the_same_label, knownMatch.Key, knownMatch.Value), nameof(knownMatches));
                    }

                    if (!comparer.TreesEqual(knownMatch.Key, root1))
                    {
                        throw new ArgumentException(string.Format(WorkspacesResources.Node_0_must_be_contained_in_the_old_tree, knownMatch.Key), nameof(knownMatches));
                    }

                    if (!comparer.TreesEqual(knownMatch.Value, root2))
                    {
                        throw new ArgumentException(string.Format(WorkspacesResources.Node_0_must_be_contained_in_the_new_tree, knownMatch.Value), nameof(knownMatches));
                    }

                    // skip pairs whose key or value is already mapped:
                    TryAdd(knownMatch.Key, knownMatch.Value);
                }
            }

            ComputeMatch(nodes1, nodes2);
        }
Exemplo n.º 3
0
        private void ComputeMatchForLabel(List <TNode> s1, List <TNode> s2, int tiedToAncestor, double maxAcceptableDistance)
        {
            // Obviously, the algorithm below is O(n^2). However, in the common case, the 2 lists will
            // be sequences that exactly match. The purpose of "firstNonMatch2" is to reduce the complexity
            // to O(n) in this case. Basically, the pointer is the 1st non-matched node in the list of nodes of tree2
            // with the given label.
            // Whenever we match to firstNonMatch2 we set firstNonMatch2 to the subsequent node.
            // So in the case of totally matching sequences, we process them in O(n) -
            // both node1 and firstNonMatch2 will be advanced simultaneously.

            Debug.Assert(maxAcceptableDistance >= ExactMatchDistance && maxAcceptableDistance <= MaxDistance);
            int count1         = s1.Count;
            int count2         = s2.Count;
            int firstNonMatch2 = 0;

            for (int i1 = 0; i1 < count1; i1++)
            {
                TNode node1 = s1[i1];

                // Skip this guy if it already has a partner
                if (HasPartnerInTree2(node1))
                {
                    continue;
                }

                // Find node2 that matches node1 the best, i.e. has minimal distance.

                double bestDistance = MaxDistance * 2;
                TNode  bestMatch    = default;
                bool   matched      = false;
                int    i2;
                for (i2 = firstNonMatch2; i2 < count2; i2++)
                {
                    TNode node2 = s2[i2];

                    // Skip this guy if it already has a partner
                    if (HasPartnerInTree1(node2))
                    {
                        continue;
                    }

                    // this requires parents to be processed before their children:
                    if (tiedToAncestor > 0)
                    {
                        // TODO (tomat): For nodes tied to their parents,
                        // consider avoiding matching them to all other nodes of the same label.
                        // Rather we should only match them with their siblings that share the same parent.

                        var ancestor1 = _comparer.GetAncestor(node1, tiedToAncestor);
                        var ancestor2 = _comparer.GetAncestor(node2, tiedToAncestor);

                        // Since CategorizeNodesByLabels added nodes to the s1/s2 lists in depth-first prefix order,
                        // we can also accept equality in the following condition. That's because we find the partner
                        // of the parent node before we get to finding it for the child node of the same kind.
                        Debug.Assert(_comparer.GetLabel(ancestor1) <= _comparer.GetLabel(node1));

                        if (!Contains(ancestor1, ancestor2))
                        {
                            continue;
                        }
                    }

                    // We know that
                    // 1. (node1, node2) not in M
                    // 2. Both of their parents are matched to the same parent (or are not matched)
                    //
                    // Now, we have no other choice than comparing the node "values"
                    // and looking for the one with the smaller distance.

                    double distance = _comparer.GetDistance(node1, node2);
                    if (distance < bestDistance)
                    {
                        matched      = true;
                        bestMatch    = node2;
                        bestDistance = distance;

                        // We only stop if we've got an exact match. This is to resolve the problem
                        // of entities with identical names(name is often used as the "value" of a
                        // node) but with different "sub-values" (e.g. two locals may have the same name
                        // but different types. Since the type is not part of the value, we don't want
                        // to stop looking for the best match if we don't have an exact match).
                        if (distance == ExactMatchDistance)
                        {
                            break;
                        }
                    }
                }

                if (matched && bestDistance <= maxAcceptableDistance)
                {
                    bool added = TryAdd(node1, bestMatch);

                    // We checked above that node1 doesn't have a partner.
                    // The map is a bijection by construction, so we should be able to add the mapping.
                    Debug.Assert(added);

                    // If we exactly matched to firstNonMatch2 we can advance it.
                    if (i2 == firstNonMatch2)
                    {
                        firstNonMatch2 = i2 + 1;
                    }

                    if (firstNonMatch2 == count2)
                    {
                        return;
                    }
                }
            }
        }