Represents a single XNode within the XTree. Maps to a single XML element, attribute or text node within the underlying XML document.
Example #1
0
 /// <summary>
 /// Constructor.
 /// </summary>
 /// <param name="node">The respective System.Xml.XmlNode instance that this XNode maps to from the original XmlDocument.</param>
 /// <param name="parent">The parent XNode of this node in the XML Hierarchy</param>
 public XNode(XmlNode node, XNode parent)
 {
     this.XmlNode = node;
     this.Parent = parent;
     this.Attributes = Empty;
     this.Children = Empty;
     this.Elements = Empty;
     this.Texts = Empty;
 }
Example #2
0
        /// <summary>
        /// Compute the minimal editing distance between two lists of elements
        /// </summary>
        private static int DistanceMatchList(XNode[] nodes1, XNode[] nodes2, bool treeOrder)
        {
            var distance = new int[nodes1.Length + 1, nodes2.Length + 1];
            var matching1 = new int[nodes1.Length];
            var matching2 = new int[nodes2.Length];

            // Insert cost.
            for (int i = 0; i < nodes2.Length; i++)
                distance[nodes1.Length, i] = nodes2[i].GetDescendantCount() + 1;

            for (int i = 0; i < nodes1.Length; i++)
            {
                // delete cost.
                int deleteCost = nodes1[i].GetDescendantCount() + 1;
                distance[i, nodes2.Length] = deleteCost;

                for (int j = 0; j < nodes2.Length; j++)
                {
                    var dist = treeOrder
                        ? Distance(nodes1[i], nodes2[j], true, NoConnection)
                        : Distance(nodes2[j], nodes1[i], true, NoConnection);

                    if (dist < NoConnection)
                    {
                        var key = treeOrder
                            ? new Tuple<XNode, XNode>(nodes1[i], nodes2[j])
                            : new Tuple<XNode, XNode>(nodes2[j], nodes1[i]);

                        distanceLookup[key] = dist;
                    }
                    distance[i, j] = dist;
                }
            }

            // compute the minimal cost matching.
            return FindMinimalMatching(distance, matching1, matching2);
        }
Example #3
0
        /// <summary>
        /// Calculate the editing distance between two elements, up to a maximum threshold.
        /// </summary>
        /// <returns>The minimal editing distance</returns>
        private static int DistanceElements(XNode node1, XNode node2, int threshold)
        {
            int dist = 0;

            // Distance of attributes.
            if (node1.Attributes.Length == 0)
                dist = node2.Attributes.Length * 2;
            else if (node2.Attributes.Length == 0)
                dist = node1.Attributes.Length * 2;
            else
                dist = DistanceAttributes(node1.Attributes, node2.Attributes);

            // Match second level nodes first.
            if (node1.Children.Length == 0)
            {
                foreach(var child2 in node2.Children)
                    dist += child2.GetDescendantCount() + 1;
            }
            else if (node2.Children.Length == 0)
            {
                foreach (var child1 in node1.Children)
                    dist += child1.GetDescendantCount() + 1;
            }
            else if (node1.Children.Length == 1 && node2.Children.Length == 1)
            {
                var child1 = node1.Children[0];
                var child2 = node2.Children[0];

                if (child1.HashEquals(child2.Hash))
                    return dist;

                var isElement1 = child1.IsElement();
                var isElement2 = child2.IsElement();

                if (isElement1 && isElement2)
                {
                    if (child1.Name == child2.Name)
                        dist += DistanceElements(child1, child1, threshold - dist);
                    else
                        dist += child1.GetDescendantCount() + child2.GetDescendantCount() + 2;
                }
                else if (!isElement1 && !isElement2)
                    dist++;
                else
                    dist += child1.GetDescendantCount() + child2.GetDescendantCount() + 2;
            }
            else
            {
                // Match text nodes.
                if (node1.Texts.Length == 0)
                    dist += node1.Texts.Length;
                else if (node2.Texts.Length == 0)
                    dist += node2.Texts.Length;
                else
                    dist += DistanceTexts(node1.Texts, node2.Texts);

                var elementCount1 = node1.Elements.Length;
                var elementCount2 = node2.Elements.Length;

                var matched1 = new bool[elementCount1];
                var matched2 = new bool[elementCount2];
                int matched = MatchFilter(node1.Elements, node2.Elements, matched1, matched2);

                if (elementCount1 == matched && elementCount2 == matched)
                    return dist;

                if (elementCount1 == matched)
                {
                    for (int i = 0; i < elementCount2; i++)
                    {
                        if (!matched2[i])
                            dist += node2.Elements[i].GetDescendantCount() + 1;
                    }
                    return dist;
                }

                if (elementCount2 == matched)
                {
                    for (int i = 0; i < elementCount1; i++)
                    {
                        if (!matched1[i])
                            dist += node1.Elements[i].GetDescendantCount() + 1;
                    }
                    return dist;
                }

                 // 'Match' remaining unmatched child elements nodes.
                int remaining1 = node1.Elements.Length - matched;
                int remaining2 = node2.Elements.Length - matched;
                int matchCount1 = 0;
                int matchCount2 = 0;

                while ((matchCount1 < remaining1) && (matchCount2 < remaining2))
                {
                    var unmatched1 = new List<XNode>();
                    var unmatched2 = new List<XNode>();
                    string name = null;

                    // Find and group unmatched elements by their name
                    foreach (var child1 in node1.Elements)
                    {
                        if (child1.Matching == null && child1.Match != MatchType.NoMatch)
                        {
                            if (name == null)
                                name = child1.Name;

                            if (name == child1.Name)
                            {
                                unmatched1.Add(child1);
                                matchCount1++;
                            }
                        }
                    }

                    // Find unmatched nodes in the other subtree with the same element name
                    foreach (var child2 in node2.Elements)
                    {
                        if (child2.Matching == null && child2.Match != MatchType.NoMatch)
                        {
                            if (name == child2.Name)
                            {
                                unmatched2.Add(child2);
                                matchCount2++;
                            }
                        }
                    }

                    if (unmatched2.Count == 0)
                    {
                        for (int i = 0; i < unmatched2.Count; i++)
                            dist += unmatched2[i].GetDescendantCount();
                    }
                    else
                    {
                        // To find minimal-cost matching between those unmatched elements
                        dist += (unmatched1.Count >= unmatched2.Count)
                            ? DistanceMatchList(unmatched1.ToArray(), unmatched2.ToArray(), true)
                            : DistanceMatchList(unmatched2.ToArray(), unmatched1.ToArray(), false);
                    }
                }

                if (matchCount1 < remaining1)
                {
                    for (int i = 0; i < elementCount1; i++)
                    {
                        if (!matched1[i])
                            dist += node1.Elements[i].GetDescendantCount();
                    }
                }
                else if (matchCount2 < remaining2)
                {
                    for (int i = 0; i < elementCount2; i++)
                    {
                        if (!matched2[i])
                            dist += node2.Elements[i].GetDescendantCount();
                    }
                }
            }

            if (dist < threshold)
                return dist;

            return NoConnection;
        }
Example #4
0
        /// <summary>
        /// Calculate the editing distance between two lists of attributes
        /// </summary>
        private static int DistanceAttributes(XNode[] attributes1, XNode[] attributes2)
        {
            if (attributes1.Length == 1 && attributes2.Length == 1)
            {
                if (attributes1[0].HashEquals(attributes2[0].Hash))
                    return 0;

                return (attributes1[0].Name == attributes2[0].Name) ? 1 : 2;
            }

            var dist = 0;
            var matched = 0;
            var matching = new bool[attributes2.Length];
            for (int i = 0; i < attributes1.Length; i++)
            {
                var found = false;

                for (int j = 0; j < attributes2.Length; j++)
                {
                    if (matching[j])
                        continue;

                    else if (attributes1[i].HashEquals(attributes2[j].Hash))
                    {
                        matching[j] = true;
                        found = true;
                        matched++;
                        break;
                    }
                    else if (attributes1[i].Name == attributes2[j].Name)
                    {
                        matching[j] = true;
                        dist++;
                        found = true;
                        matched++;
                        break;
                    }
                }

                if (!found)
                    dist += 2;
            }

            dist += (attributes2.Length - matched) * 2;
            return dist;
        }
Example #5
0
        /// <summary>
        /// Calculate the editing distance between the two nodes (with caching)
        /// </summary>
        /// <returns>The minimal editing distance</returns>
        private static int Distance(XNode node1, XNode node2, bool toRecord, int threshold)
        {
            var isElement1 = node1.IsElement();
            var isElement2 = node2.IsElement();
            if (isElement1 && isElement2)
            {
                if (node1.Name != node2.Name)
                    return NoConnection;

                int dist = DistanceElements(node1, node2, threshold);
                if (toRecord && (dist < NoConnection))
                    distanceLookup[new Tuple<XNode, XNode>(node1, node2)] = dist;

                return dist;
            }

            if (!isElement1 && !isElement2)
                return 1;

            return NoConnection;
        }
Example #6
0
        /// <summary>
        /// Diff and match two lists of text nodes
        /// </summary>
        private static void DiffTexts(XNode[] texts1, XNode[] texts2)
        {
            // First, try matching exactly equal text nodes
            var matched = MatchEqual(texts1, texts2, MatchType.Match);

            // Randomly match any remaining unmatched text1 nodes with any remaining unmatched text2 nodes
            if (matched < texts1.Length && texts1.Length <= texts2.Length)
                matched += MatchAny(texts1, texts2, MatchType.Change);

            else if (matched < texts2.Length && texts2.Length <= texts1.Length)
                matched += MatchAny(texts2, texts1, MatchType.Change);

            // Finally, set any remaining text nodes as unmatched
            if (matched < texts1.Length)
                SetUnmatched(texts1, MatchType.NoMatch);

            else if (matched < texts2.Length)
                SetUnmatched(texts2, MatchType.NoMatch);
        }
Example #7
0
        public static XNode Build(XmlNode node, XNode parent)
        {
            if (node.NodeType == XmlNodeType.Attribute)
            {
                var xnode = new XNode(node, parent);
                xnode.Name = node.Name.ToLowerInvariant();
                var name = "@" + xnode.Name;
                xnode.Hash = Murmur3Hasher.HashString(name + "/" + (node.Value ?? string.Empty));
                return xnode;
            }

            else if (node.NodeType == XmlNodeType.Text || node.NodeType == XmlNodeType.CDATA)
            {
                var xnode = new XNode(node, parent);
                xnode.Name = "#text";
                xnode.Hash = Murmur3Hasher.HashString(xnode.Name + "/" + (node.Value ?? string.Empty));

                return xnode;
            }

            else if (node.NodeType == XmlNodeType.Element)
            {
                var xnode = new XNode(node, parent);
                var name = node.Name.ToLowerInvariant();
                xnode.Name = name;
                var hashes = new List<byte[]>();
                hashes.Add(Murmur3Hasher.HashString(name + "/"));

                // Add attributes
                var attributes = new List<XNode>();
                for (var i = 0; i < node.Attributes.Count; i++)
                {
                    var child = XNode.Build(node.Attributes[i], xnode);
                    if (child != null)
                    {
                        hashes.Add(child.Hash);
                        attributes.Add(child);
                    }
                }
                xnode.Attributes = attributes.ToArray();

                // Add child elements and text nodes
                var children = new List<XNode>();
                var elements = new List<XNode>();
                var texts = new List<XNode>();
                for (var i = 0; i < node.ChildNodes.Count; i++)
                {
                    var child = XNode.Build(node.ChildNodes[i], xnode);
                    if (child != null)
                    {
                        hashes.Add(child.Hash);
                        children.Add(child);
                        if (child.IsElement())
                            elements.Add(child);
                        else
                            texts.Add(child);
                    }
                }
                xnode.Children = children.ToArray();
                xnode.Elements = elements.ToArray();
                xnode.Texts = texts.ToArray();

                // Sort and concatenate child hashes and then compute the hash
                var joined = ConcatAll(hashes.OrderBy(h => h, ByteArrayComparer.Instance)
                    .ToList(), Murmur3Hasher.OUTPUT_LENGTH);
                xnode.Hash = Murmur3Hasher.HashBytes(joined);

                return xnode;
            }

            return null;
        }
Example #8
0
        /// <summary>
        /// Set match for child nodes with equal hash values (equal sub-trees)
        /// </summary>
        private static int MatchEqual(XNode[] nodes1, XNode[] nodes2, MatchType match)
        {
            var matched = 0;
            foreach (var node1 in nodes1)
            {
                foreach (var node2 in nodes2)
                {
                    if (node2.Matching == null && node2.Match != MatchType.NoMatch && node1.HashEquals(node2.Hash))
                    {
                        SetMatching(node1, node2, MatchType.Match);
                        matched++;
                        break;
                    }
                }

                if (matched == nodes2.Length)
                    break;
            }

            return matched;
        }
Example #9
0
        /// <summary>
        /// Set the match for the given nodes if they do not have a matching node.
        /// </summary>
        /// <returns>The number of unmatched nodes that were updated</returns>
        private static int SetUnmatched(XNode[] nodes, MatchType match)
        {
            var count = 0;
            foreach (var node in nodes)
            {
                if (node.Matching == null && node.Match != MatchType.NoMatch)
                {
                    node.Match = match;
                    count++;
                }
            }

            return count;
        }
Example #10
0
 /// <summary>
 /// Set the match for the given nodes.
 /// </summary>
 private static void SetMatching(XNode[] nodes, MatchType match)
 {
     for (var i = 0; i < nodes.Length; i++)
         nodes[i].Match = match;
 }
Example #11
0
 /// <summary>
 /// Set the match for the given nodes to each other.
 /// </summary>
 private static void SetMatching(XNode node1, XNode node2, MatchType match)
 {
     node1.Match = match;
     node2.Match = match;
     node1.Matching = node2;
     node2.Matching = node1;
 }
Example #12
0
 /// <summary>
 /// Set the match for the given node
 /// </summary>
 private static void SetMatching(XNode node1, MatchType match)
 {
     node1.Match = match;
 }
Example #13
0
        /// <summary>
        /// Find minimal cost matching between two node lists; Record the matching info back to the trees.
        /// </summary>
        private static void MatchList(XNode[] nodes1, XNode[] nodes2, bool treeOrder)
        {
            var distance = new int[nodes1.Length + 1, nodes2.Length + 1];

            // Calculate insert cost.
            for (int i = 0; i < nodes2.Length; i++)
                distance[nodes1.Length, i] = nodes2[i].GetDescendantCount() + 1;

            for (int i = 0; i < nodes1.Length; i++)
            {
                // Calculate delete cost
                var deleteCost = nodes1[i].GetDescendantCount() + 1;
                distance[i, nodes2.Length] = deleteCost;

                for (int j = 0; j < nodes2.Length; j++)
                {
                    int dist = 0;

                    dist = treeOrder
                        ? Distance(nodes1[i], nodes2[j], true, NoConnection)
                        : Distance(nodes2[j], nodes1[i], true, NoConnection);

                    if (dist < NoConnection)
                    {
                        var key = treeOrder
                            ? new Tuple<XNode, XNode>(nodes1[i], nodes2[j])
                            : new Tuple<XNode, XNode>(nodes2[j], nodes1[i]);

                        distanceLookup[key] = dist;
                    }

                    distance[i, j] = dist;
                }
            }

            // compute the minimal cost matching.
            var matching1 = new int[nodes1.Length];
            var matching2 = new int[nodes2.Length];
            FindMinimalMatching(distance, matching1, matching2);

            for (int i = 0; i < matching1.Length; i++)
            {
                if (matching1[i] == NoMatch)
                    SetMatching(nodes1[i], MatchType.NoMatch);
                else
                    SetMatching(nodes1[i], nodes2[matching1[i]], MatchType.Change);
            }

            for (int i = 0; i < matching2.Length; i++)
            {
                if (matching2[i] == NoMatch)
                    SetMatching(nodes2[i], MatchType.NoMatch);
                else
                    SetMatching(nodes2[i], nodes1[matching2[i]], MatchType.Change);
            }

            for (int i = 0; i < matching1.Length; i++)
            {
                if (matching1[i] != NoMatch)
                {
                    var node1 = nodes1[i];
                    var node2 = nodes2[matching1[i]];
                    if (node1.IsElement() && node2.IsElement())
                    {
                        if (treeOrder)
                            DiffElements(node1, node2);
                        else
                            DiffElements(node2, node1);
                    }
                }
            }
        }
Example #14
0
        /// <summary>
        /// Compute the editing distance between two groups of text nodes
        /// </summary>
        private static int DistanceTexts(XNode[] texts1, XNode[] texts2)
        {
            var matched = 0;
            var matching = new bool[texts2.Length];
            for (int i = 0; i < texts1.Length; i++)
            {
                for (int j = 0; j < texts2.Length; j++)
                {
                    if (!matching[j] && texts1[i].HashEquals(texts2[j].Hash))
                    {
                        matching[j] = true;
                        matched++;
                        break;
                    }
                }

                if (matched == texts2.Length)
                    break;
            }

            return texts1.Length >= texts2.Length
                ? texts1.Length - matched
                : texts2.Length - matched;
        }
Example #15
0
        /// <summary>
        /// Diff and match two lists of attributes
        /// </summary>
        private static void DiffAttributes(XNode[] attributes1, XNode[] attributes2)
        {
            // If only one attribute in both nodes
            if ((attributes1.Length == 1) && (attributes2.Length == 1))
            {
                if (attributes1[0].HashEquals(attributes2[0].Hash))
                    return;

                if (attributes1[0].Name == attributes2[0].Name)
                {
                    SetMatching(attributes1[0], attributes2[0], MatchType.Change);
                    return;
                }

                SetMatching(attributes1[0], attributes2[0], MatchType.NoMatch);
                return;
            }

            // Try and match every attribute in node1 with each attribute in node2
            var matched = 0;
            foreach (var attr1 in attributes1)
            {
                var found = false;
                foreach (var attr2 in attributes2)
                {
                    if (attr2.Matching != null)
                        continue;

                    if (attr2.HashEquals(attr1.Hash))
                    {
                        SetMatching(attr1, attr2, MatchType.Match);
                        matched++;
                        found = true;
                        break;
                    }

                    if (attr2.Name == attr1.Name)
                    {
                        SetMatching(attr1, attr2, MatchType.Change);
                        matched++;
                        found = true;
                        break;
                    }
                }

                if (!found)
                    attr1.Match = MatchType.NoMatch;
            }

            // If node2 has more attributes
            if (matched != attributes2.Length)
                SetUnmatched(attributes2, MatchType.NoMatch);
        }
Example #16
0
        /// <summary>
        /// Randomly match nodes any nodes that are unmatched with other unmatched nodes
        /// </summary>
        private static int MatchAny(XNode[] nodes1, XNode[] nodes2, MatchType match)
        {
            var matched = 0;
            foreach (var node1 in nodes1)
            {
                if (node1.Matching == null && node1.Match != MatchType.NoMatch)
                {
                    foreach (var node2 in nodes2)
                    {
                        if (node2.Matching == null && node2.Match != MatchType.NoMatch)
                        {
                            SetMatching(node1, node2, match);
                            matched++;
                            break;
                        }
                    }
                }
            }

            return matched;
        }
Example #17
0
        /// <summary>
        /// Compare and match the two nodes (and their children).
        /// </summary>
        private static void DiffElements(XNode node1, XNode node2)
        {
            // Attributes
            if (node1.Attributes.Length > 0)
            {
                if (node2.Attributes.Length > 0)
                    DiffAttributes(node1.Attributes, node2.Attributes);
                else
                    SetMatching(node1.Attributes, MatchType.NoMatch);
            }
            else if (node2.Attributes.Length > 0)
            {
                SetMatching(node2.Attributes, MatchType.NoMatch);
            }

            // Children = Elements and Text

            // First, if no children
            if (node1.Children.Length == 0)
                SetMatching(node2.Children, MatchType.NoMatch);

            else if (node2.Children.Length == 0)
                SetMatching(node1.Children, MatchType.NoMatch);

            // Next, if one child each
            else if (node2.Children.Length == 1 && node1.Children.Length == 1)
            {
                var child1 = node1.Children[0];
                var child2 = node2.Children[0];

                if (child1.HashEquals(child2.Hash))
                    return;

                var isElement1 = child1.IsElement();
                var isElement2 = child2.IsElement();

                if (isElement1 && isElement2)
                {
                    if (child1.Name == child2.Name)
                    {
                        SetMatching(child1, child2, MatchType.Change);
                        DiffElements(child1, child2);
                    }
                    else
                        SetMatching(child1, child2, MatchType.NoMatch);
                }
                else if (!isElement1 && !isElement2)
                    SetMatching(child1, child2, MatchType.Change);
                else
                    SetMatching(child1, child2, MatchType.NoMatch);
            }

            // Then, if many children
            else
            {
                // Match text nodes
                if (node1.Texts.Length > 0)
                {
                    if (node2.Texts.Length > 0)
                        DiffTexts(node1.Texts, node2.Texts);
                    else
                        SetMatching(node1.Texts, MatchType.NoMatch);
                }
                else if (node2.Texts.Length > 0)
                    SetMatching(node2.Texts, MatchType.NoMatch);

                // Match element nodes with equal hashes
                var matched = MatchEqual(node1.Elements, node2.Elements, MatchType.Match);
                if (matched == node1.Elements.Length && matched == node2.Elements.Length)
                    return;

                if (matched == node1.Elements.Length)
                    SetUnmatched(node2.Elements, MatchType.NoMatch);

                if (matched == node2.Elements.Length)
                    SetUnmatched(node1.Elements, MatchType.NoMatch);

                // 'Match' remaining unmatched child elements nodes.
                int remaining1 = node1.Elements.Length - matched;
                int remaining2 = node2.Elements.Length - matched;
                int matchCount1 = 0;
                int matchCount2 = 0;

                while ((matchCount1 < remaining1) && (matchCount2 < remaining2))
                {
                    var unmatched1 = new List<XNode>();
                    var unmatched2 = new List<XNode>();
                    string name = null;

                    // Find and group unmatched elements by their name
                    foreach (var child1 in node1.Elements)
                    {
                        if (child1.Matching == null && child1.Match != MatchType.NoMatch)
                        {
                            if (name == null)
                                name = child1.Name;

                            if (name == child1.Name)
                            {
                                unmatched1.Add(child1);
                                matchCount1++;
                            }
                        }
                    }

                    // Find unmatched nodes in the other subtree with the same element name
                    foreach (var child2 in node2.Elements)
                    {
                        if (child2.Matching == null && child2.Match != MatchType.NoMatch)
                        {
                            if (name == child2.Name)
                            {
                                unmatched2.Add(child2);
                                matchCount2++;
                            }
                        }
                    }

                    if (unmatched2.Count == 0)
                        SetMatching(unmatched1, MatchType.NoMatch);
                    else
                    {
                        if ((unmatched1.Count == 1) && (unmatched2.Count == 1))
                        {
                            SetMatching(unmatched1[0], unmatched2[0], MatchType.Change);
                            DiffElements(unmatched1[0], unmatched2[0]);
                        }

                        // Find minimal-cost matching between those unmatched
                        else if (unmatched1.Count >= unmatched2.Count)
                            MatchList(unmatched1.ToArray(), unmatched2.ToArray(), true);
                        else
                            MatchList(unmatched2.ToArray(), unmatched1.ToArray(), false);
                    }

                }

                // Finally mark any remaining child elements as unmatched
                if (matchCount1 < remaining1)
                    SetUnmatched(node1.Elements, MatchType.NoMatch);
                else if (matchCount2 < remaining2)
                    SetUnmatched(node2.Elements, MatchType.NoMatch);
            }
        }
Example #18
0
        /// <summary>
        /// Filter out matched elements (equal hashes).
        /// </summary>
        /// <returns>The number of matched nodes</returns>
        private static int MatchFilter(XNode[] elements1, XNode[] elements2, bool[] matched1, bool[] matched2)
        {
            int matched = 0;
            for (int i = 0; i < elements2.Length; i++)
            {
                for (int j = 0; j < elements1.Length; j++)
                {
                    if (!matched1[j] && !matched2[i] && elements1[j].HashEquals(elements2[i].Hash))
                    {
                        matched1[j] = true;
                        matched2[i] = true;
                        matched++;
                        break;
                    }
                }
            }

            return matched;
        }
Example #19
0
 public XTree(XmlDocument document)
 {
     Document = document;
     Root     = XNode.Build(document.DocumentElement, null);
 }