Beispiel #1
0
        private void ComputeMatchForLabel(List <TNode> s1, List <TNode> s2, int tiedToAncestor, double maxAcceptableDistance)
        {
            // Obviously, the algorithm below is O(n^2). However, in the common case, the 2 lists will
            // be sequences that exactly match. The purpose of "firstNonMatch2" is to reduce the complexity
            // to O(n) in this case. Basically, the pointer is the 1st non-matched node in the list of nodes of tree2
            // with the given label.
            // Whenever we match to firstNonMatch2 we set firstNonMatch2 to the subsequent node.
            // So in the case of totally matching sequences, we process them in O(n) -
            // both node1 and firstNonMatch2 will be advanced simultaneously.

            int count1         = s1.Count;
            int count2         = s2.Count;
            int firstNonMatch2 = 0;

            for (int i1 = 0; i1 < count1; i1++)
            {
                TNode node1 = s1[i1];

                // Skip this guy if it already has a partner
                if (HasPartnerInTree2(node1))
                {
                    continue;
                }

                // Find node2 that matches node1 the best, i.e. has minimal distance.

                double bestDistance = MaxDistance;
                TNode  bestMatch    = default(TNode);
                bool   matched      = false;
                int    i2;
                for (i2 = firstNonMatch2; i2 < count2; i2++)
                {
                    TNode node2 = s2[i2];

                    // Skip this guy if it already has a partner
                    if (HasPartnerInTree1(node2))
                    {
                        continue;
                    }

                    // this requires parents to be processed before their children:
                    if (tiedToAncestor > 0)
                    {
                        // TODO (tomat): For nodes tied to their parents,
                        // consider avoding matching them to all other nodes of the same label.
                        // Rather we should only match them with their siblings that share the same parent.

                        var ancestor1 = comparer.GetAncestor(node1, tiedToAncestor);
                        var ancestor2 = comparer.GetAncestor(node2, tiedToAncestor);

                        Debug.Assert(comparer.GetLabel(ancestor1) < comparer.GetLabel(node1));

                        if (!Contains(ancestor1, ancestor2))
                        {
                            continue;
                        }
                    }

                    // We know that
                    // 1. (node1, node2) not in M
                    // 2. Both of their parents are matched to the same parent (or are not matched)
                    //
                    // Now, we have no other choice than comparing the node "values"
                    // and looking for the one with the smaller distance.

                    double distance = comparer.GetDistance(node1, node2);
                    if (distance < bestDistance)
                    {
                        matched      = true;
                        bestMatch    = node2;
                        bestDistance = distance;

                        // We only stop if we've got an exact match. This is to resolve the problem
                        // of entities with identical names(name is often used as the "value" of a
                        // node) but with different "sub-values" (e.g. two locals may have the same name
                        // but different types. Since the type is not part of the value, we don't want
                        // to stop looking for the best match if we don't have an exact match).
                        if (distance == ExactMatchDistance)
                        {
                            break;
                        }
                    }
                }

                if (matched && bestDistance <= maxAcceptableDistance)
                {
                    Add(node1, bestMatch);

                    // If we exactly matched to firstNonMatch2 we can advance it.
                    if (i2 == firstNonMatch2)
                    {
                        firstNonMatch2 = i2 + 1;
                    }
                }
            }
        }
Beispiel #2
0
        private void ComputeMatchForLabel(
            List <TNode> s1,
            List <TNode> s2,
            int tiedToAncestor,
            double maxAcceptableDistance
            )
        {
            // Obviously, the algorithm below is O(n^2). However, in the common case, the 2 lists will
            // be sequences that exactly match. The purpose of "firstNonMatch2" is to reduce the complexity
            // to O(n) in this case. Basically, the pointer is the 1st non-matched node in the list of nodes of tree2
            // with the given label.
            // Whenever we match to firstNonMatch2 we set firstNonMatch2 to the subsequent node.
            // So in the case of totally matching sequences, we process them in O(n) -
            // both node1 and firstNonMatch2 will be advanced simultaneously.

            Debug.Assert(
                maxAcceptableDistance >= ExactMatchDistance && maxAcceptableDistance <= MaxDistance
                );
            var count1         = s1.Count;
            var count2         = s2.Count;
            var firstNonMatch2 = 0;

            for (var i1 = 0; i1 < count1; i1++)
            {
                var node1 = s1[i1];

                // Skip this guy if it already has a partner
                if (HasPartnerInTree2(node1))
                {
                    continue;
                }

                // Find node2 that matches node1 the best, i.e. has minimal distance.

                var   bestDistance = MaxDistance * 2;
                TNode bestMatch    = default;
                var   matched      = false;
                int   i2;
                for (i2 = firstNonMatch2; i2 < count2; i2++)
                {
                    var node2 = s2[i2];

                    // Skip this guy if it already has a partner
                    if (HasPartnerInTree1(node2))
                    {
                        continue;
                    }

                    // this requires parents to be processed before their children:
                    if (tiedToAncestor > 0)
                    {
                        // TODO (tomat): For nodes tied to their parents,
                        // consider avoiding matching them to all other nodes of the same label.
                        // Rather we should only match them with their siblings that share the same parent.

                        // Check if nodes that are configured to be tied to their ancestor have the respective ancestor matching.
                        // In cases when we compare substrees rooted below both of these ancestors we assume the ancestors are
                        // matching since the roots of the subtrees must match and therefore their ancestors must match as well.
                        // If one node's ancestor is present in the subtree and the other isn't then we are not in the scenario
                        // of comparing subtrees with matching roots and thus we consider the nodes not matching.

                        var hasAncestor1 = _comparer.TryGetAncestor(
                            node1,
                            tiedToAncestor,
                            out var ancestor1
                            );
                        var hasAncestor2 = _comparer.TryGetAncestor(
                            node2,
                            tiedToAncestor,
                            out var ancestor2
                            );
                        if (hasAncestor1 != hasAncestor2)
                        {
                            continue;
                        }

                        if (hasAncestor1)
                        {
                            // Since CategorizeNodesByLabels added nodes to the s1/s2 lists in depth-first prefix order,
                            // we can also accept equality in the following condition. That's because we find the partner
                            // of the parent node before we get to finding it for the child node of the same kind.
                            Debug.Assert(
                                _comparer.GetLabel(ancestor1) <= _comparer.GetLabel(node1)
                                );

                            if (!Contains(ancestor1, ancestor2))
                            {
                                continue;
                            }
                        }
                    }

                    // We know that
                    // 1. (node1, node2) not in M
                    // 2. Both of their parents are matched to the same parent (or are not matched)
                    //
                    // Now, we have no other choice than comparing the node "values"
                    // and looking for the one with the smaller distance.

                    var distance = _comparer.GetDistance(node1, node2);
                    if (distance < bestDistance)
                    {
                        matched      = true;
                        bestMatch    = node2;
                        bestDistance = distance;

                        // We only stop if we've got an exact match. This is to resolve the problem
                        // of entities with identical names(name is often used as the "value" of a
                        // node) but with different "sub-values" (e.g. two locals may have the same name
                        // but different types. Since the type is not part of the value, we don't want
                        // to stop looking for the best match if we don't have an exact match).
                        if (distance == ExactMatchDistance)
                        {
                            break;
                        }
                    }
                }

                if (matched && bestDistance <= maxAcceptableDistance)
                {
                    var added = TryAdd(node1, bestMatch);

                    // We checked above that node1 doesn't have a partner.
                    // The map is a bijection by construction, so we should be able to add the mapping.
                    Debug.Assert(added);

                    // If we exactly matched to firstNonMatch2 we can advance it.
                    if (i2 == firstNonMatch2)
                    {
                        firstNonMatch2 = i2 + 1;
                    }

                    if (firstNonMatch2 == count2)
                    {
                        return;
                    }
                }
            }
        }