private static void CategorizeNodesByLabels( TreeComparer <TNode> comparer, TNode root, int labelCount, out List <TNode>[] nodes, out int totalCount) { nodes = new List <TNode> [labelCount]; int count = 0; // It is important that we add the nodes in depth-first prefix order. // This order ensures that a node of a certain kind can have a parent of the same kind // and we can still use tied-to-parent for that kind. That's because the parent will always // be processed earlier than the child due to depth-first prefix ordering. foreach (TNode node in comparer.GetDescendants(root)) { int label = comparer.GetLabel(node); if (label < 0 || label >= labelCount) { throw new InvalidOperationException(string.Format(WorkspacesResources.Label_for_node_0_is_invalid_it_must_be_within_bracket_0_1, node, labelCount)); } var list = nodes[label]; if (list == null) { nodes[label] = list = new List <TNode>(); } list.Add(node); count++; } totalCount = count; }
internal Match(TNode root1, TNode root2, TreeComparer <TNode> comparer, IEnumerable <KeyValuePair <TNode, TNode> > knownMatches) { _root1 = root1; _root2 = root2; _comparer = comparer; int labelCount = comparer.LabelCount; CategorizeNodesByLabels(comparer, root1, labelCount, out var nodes1, out var count1); CategorizeNodesByLabels(comparer, root2, labelCount, out var nodes2, out var count2); _oneToTwo = new Dictionary <TNode, TNode>(); _twoToOne = new Dictionary <TNode, TNode>(); // Root nodes always match. Add them before adding known matches to make sure we always have root mapping. TryAdd(root1, root2); if (knownMatches != null) { foreach (var knownMatch in knownMatches) { if (comparer.GetLabel(knownMatch.Key) != comparer.GetLabel(knownMatch.Value)) { throw new ArgumentException(string.Format(WorkspacesResources.Matching_nodes_0_and_1_must_have_the_same_label, knownMatch.Key, knownMatch.Value), nameof(knownMatches)); } if (!comparer.TreesEqual(knownMatch.Key, root1)) { throw new ArgumentException(string.Format(WorkspacesResources.Node_0_must_be_contained_in_the_old_tree, knownMatch.Key), nameof(knownMatches)); } if (!comparer.TreesEqual(knownMatch.Value, root2)) { throw new ArgumentException(string.Format(WorkspacesResources.Node_0_must_be_contained_in_the_new_tree, knownMatch.Value), nameof(knownMatches)); } // skip pairs whose key or value is already mapped: TryAdd(knownMatch.Key, knownMatch.Value); } } ComputeMatch(nodes1, nodes2); }
private void ComputeMatchForLabel(List <TNode> s1, List <TNode> s2, int tiedToAncestor, double maxAcceptableDistance) { // Obviously, the algorithm below is O(n^2). However, in the common case, the 2 lists will // be sequences that exactly match. The purpose of "firstNonMatch2" is to reduce the complexity // to O(n) in this case. Basically, the pointer is the 1st non-matched node in the list of nodes of tree2 // with the given label. // Whenever we match to firstNonMatch2 we set firstNonMatch2 to the subsequent node. // So in the case of totally matching sequences, we process them in O(n) - // both node1 and firstNonMatch2 will be advanced simultaneously. Debug.Assert(maxAcceptableDistance >= ExactMatchDistance && maxAcceptableDistance <= MaxDistance); int count1 = s1.Count; int count2 = s2.Count; int firstNonMatch2 = 0; for (int i1 = 0; i1 < count1; i1++) { TNode node1 = s1[i1]; // Skip this guy if it already has a partner if (HasPartnerInTree2(node1)) { continue; } // Find node2 that matches node1 the best, i.e. has minimal distance. double bestDistance = MaxDistance * 2; TNode bestMatch = default; bool matched = false; int i2; for (i2 = firstNonMatch2; i2 < count2; i2++) { TNode node2 = s2[i2]; // Skip this guy if it already has a partner if (HasPartnerInTree1(node2)) { continue; } // this requires parents to be processed before their children: if (tiedToAncestor > 0) { // TODO (tomat): For nodes tied to their parents, // consider avoiding matching them to all other nodes of the same label. // Rather we should only match them with their siblings that share the same parent. var ancestor1 = _comparer.GetAncestor(node1, tiedToAncestor); var ancestor2 = _comparer.GetAncestor(node2, tiedToAncestor); // Since CategorizeNodesByLabels added nodes to the s1/s2 lists in depth-first prefix order, // we can also accept equality in the following condition. That's because we find the partner // of the parent node before we get to finding it for the child node of the same kind. Debug.Assert(_comparer.GetLabel(ancestor1) <= _comparer.GetLabel(node1)); if (!Contains(ancestor1, ancestor2)) { continue; } } // We know that // 1. (node1, node2) not in M // 2. Both of their parents are matched to the same parent (or are not matched) // // Now, we have no other choice than comparing the node "values" // and looking for the one with the smaller distance. double distance = _comparer.GetDistance(node1, node2); if (distance < bestDistance) { matched = true; bestMatch = node2; bestDistance = distance; // We only stop if we've got an exact match. This is to resolve the problem // of entities with identical names(name is often used as the "value" of a // node) but with different "sub-values" (e.g. two locals may have the same name // but different types. Since the type is not part of the value, we don't want // to stop looking for the best match if we don't have an exact match). if (distance == ExactMatchDistance) { break; } } } if (matched && bestDistance <= maxAcceptableDistance) { bool added = TryAdd(node1, bestMatch); // We checked above that node1 doesn't have a partner. // The map is a bijection by construction, so we should be able to add the mapping. Debug.Assert(added); // If we exactly matched to firstNonMatch2 we can advance it. if (i2 == firstNonMatch2) { firstNonMatch2 = i2 + 1; } if (firstNonMatch2 == count2) { return; } } } }