Exemplo n.º 1
0
        public void LongestCommonSubstring(SuffixNode node, int length, int len1)
        {
            length += node.end.endIndex - node.startIndex + 1;

            bool firstLeaf  = false;
            bool secondLeaf = false;

            for (int i = 0; i < node.child.Length; i++)
            {
                var childNode = node.child[i];

                if (childNode != null)
                {
                    if (childNode.startIndex <= len1)
                    {
                        firstLeaf = true;
                    }
                    else
                    {
                        secondLeaf = true;
                    }
                    LongestCommonSubstring(childNode, length, len1);
                }
            }

            if (firstLeaf && secondLeaf)
            {
                if (this.deepestLength < length)
                {
                    this.deepestLength = length;
                    this.deepestNode   = node;
                }
            }
        }
Exemplo n.º 2
0
 public SuffixNode(int si, End e, int ind, SuffixNode linkNode)
 {
     this.startIndex = si;
     this.end        = e;
     this.index      = ind;
     this.suffixLink = linkNode;
 }
Exemplo n.º 3
0
        private char nextCharInTree(int index)
        {
            SuffixNode activeEdgeNode = this.active.activeNode.child[this.str[this.active.activeEdge]];          // current edge in the direction of current char.

            int activeEdgeLength = (activeEdgeNode.end.endIndex - activeEdgeNode.startIndex) + 1;

            if (activeEdgeLength > this.active.activeLength)
            {
                return(this.str[activeEdgeNode.startIndex + this.active.activeLength]);
            }
            else if (activeEdgeLength == this.active.activeLength)
            {
                if (activeEdgeNode.child[this.str[index]] != null)
                {
                    return(this.str[index]);
                }
                else
                {
                    throw new OverflowException();
                }
            }
            else
            {
                this.active.activeNode   = activeEdgeNode;
                this.active.activeLength = this.active.activeLength - activeEdgeLength;
                this.active.activeEdge   = this.active.activeEdge + activeEdgeLength;
                return(nextCharInTree(index));
            }
        }
Exemplo n.º 4
0
 private SuffixTreeImpl()
 {
     this.theRoot = new SuffixNode()
     {
         Parent = null
     };
 }
Exemplo n.º 5
0
        public int LongestRepeatedSubstring(SuffixNode node, int si)
        {
            int nodeLength = node.end.endIndex - si + 1;

            int  maxL = 0;
            bool hi   = false;

            for (int i = 0; i < node.child.Length; i++)
            {
                if (node.child[i] != null && node.child[i].index == -1)
                {
                    hi = true;
                    int temp = LongestRepeatedSubstring(node.child[i], si);
                    if (temp > maxL)
                    {
                        maxL = temp;
                        node = node.child[i];
                    }
                }
            }
            if (maxL > 0)
            {
                return(maxL);
            }

            if (hi)
            {
                return(nodeLength);
            }

            return(0);
        }
Exemplo n.º 6
0
 /// <summary>
 /// C'tor
 /// </summary>
 /// <param name="text"></param>
 private SuffixTreeImpl(string text)
 {
     this.theRoot = new SuffixNode()
     {
         Parent = null
     };
     this.Text = text;
 }
Exemplo n.º 7
0
            /// <summary>
            /// match routine
            /// </summary>
            /// <param name="node">node to start matching from</param>
            /// <param name="edges">set of edges to be matched tree edges against</param>
            /// <param name="childNode">on return should contain the node reached by the matching routing</param>
            /// <param name="unmatchedEdgeCursor">if matching didn't finish at an internal node, it contains the first unmatched char index</param>
            /// <returns>true if matching ended at node; false if matching finished inside an edge</returns>
            private bool Match(SuffixNode node, IEdgeLabel[] edges, out SuffixNode childNode, out int unmatchedEdgeCursor)
            {
                if (node == null)
                {
                    throw new ArgumentNullException("parent");
                }

                if (edges == null || edges.Length <= 0)
                {
                    throw new ArgumentException("invalid argument", "edges");
                }

                unmatchedEdgeCursor = -1;
                var matchingEdgeIndex  = 0;
                var matchingEdgeCursor = edges[matchingEdgeIndex].Start;
                var matchingEdgeEnd    = edges[matchingEdgeIndex].End;

                childNode = this.TraverseEdge(node, matchingEdgeCursor);
                var treeEdgeLength = this.GetEdgeLength(childNode.Edge);

                do
                {
                    var diff = matchingEdgeEnd - matchingEdgeCursor + 1 - treeEdgeLength;

                    if (diff > 0)
                    {
                        matchingEdgeCursor += treeEdgeLength;
                    }
                    else
                    {
                        if (++matchingEdgeIndex < edges.Length)
                        {
                            matchingEdgeCursor = edges[matchingEdgeIndex].Start;
                            matchingEdgeEnd    = edges[matchingEdgeIndex].End;
                        }
                        else
                        {
                            // nothing more to match ..
                            if (diff < 0) //unmatched = -diff
                            {
                                unmatchedEdgeCursor = (childNode.Edge.End == -1 ? (this.CurrentPhase - 1) : childNode.Edge.End) + diff + 1;
                            }
                            break;
                        }
                    }

                    if (diff >= 0)
                    {
                        node           = childNode;
                        childNode      = this.TraverseEdge(node, matchingEdgeCursor);
                        treeEdgeLength = this.GetEdgeLength(childNode.Edge);
                    }
                } while (true);

                return(unmatchedEdgeCursor == -1);
            }
Exemplo n.º 8
0
        private int EdgeLength([NotNull] SuffixNode node)
        {
            var stop = Math.Min(node.End, _text.Count);

            if (node.Start == stop)
            {
                return(1);
            }
            return(stop - node.Start);
        }
Exemplo n.º 9
0
            private SuffixNode TraverseEdge(SuffixNode parent, int matchingStart)
            {
                SuffixNode child = parent.GetEdge(this.GetCharFromIndex(matchingStart));

                if (child == null)
                {
                    throw new ApplicationException("unexpected!");
                }

                return(child);
            }
Exemplo n.º 10
0
        private int AddNode(int start, int end)
        {
            var node = new SuffixNode {
                Start      = start,
                End        = end,
                SuffixLink = 0
            };

            _tree.Add(node);
            return(_tree.Count - 1);
        }
Exemplo n.º 11
0
            private SuffixNode TraverseEdge(SuffixNode parent, int matchingStart)
            {
                SuffixNode child = parent.GetEdge(this.GetCharFromIndex(matchingStart));

                if (child != null)
                {
                    return(child);
                }
                else
                {
                    throw new ArgumentException("invalid argument", "matchingStart");
                }
            }
Exemplo n.º 12
0
 private void FixupLeaves(SuffixNode p, int endIndex)
 {
     if (!p.IsLeaf)
     {
         foreach (var c in p.Children)
         {
             this.FixupLeaves(c.Value as SuffixNode, endIndex);
         }
     }
     else
     {
         p.Edge.End = endIndex;
     }
 }
Exemplo n.º 13
0
            /// <summary>
            /// pre-match routine
            /// </summary>
            /// <param name="p">node to start pre-matching from</param>
            /// <param name="target">if method returns true, this value will be ignored; otherwise, upon return, it contains the node to start the next matching from</param>
            /// <param name="edges">if method returns true, this will be ignored; otherwise, upon return, it contains the set of edges to be matched next,
            /// or null (if no further matching should be performed next</param>
            /// <returns>true if the next match should be started from root node, false otherwise</returns>
            private bool PreMatch(SuffixNode p, out SuffixNode target, out IEdgeLabel[] edges)
            {
                Debug.Assert(p != this.theRoot);

                target = null;
                edges  = null;

                bool goDownFromRoot = true;
                var  v = p.Parent;

                if (p.Link != null)
                {
                    target         = p.Link;
                    edges          = null;
                    goDownFromRoot = false;
                }
                else if (v != this.theRoot)
                {
                    if (v.Link == null)
                    {
                        var w = v.Parent;

                        if (w != this.theRoot)
                        {
                            Debug.Assert(w.Link != null);

                            edges = new EdgeLabel[2] {
                                EdgeLabel.Create(v.Edge.Start, v.Edge.End == -1 ? (this.CurrentPhase - 1) : v.Edge.End),
                                EdgeLabel.Create(p.Edge.Start, p.Edge.End == -1 ? (this.CurrentPhase - 1) : p.Edge.End)
                            };

                            target         = w.Link;
                            goDownFromRoot = false;
                        }
                    }
                    else
                    {
                        Debug.Assert(v.Link != null);

                        edges = new EdgeLabel[1] {
                            EdgeLabel.Create(p.Edge.Start, p.Edge.End == -1 ? (this.CurrentPhase - 1) : p.Edge.End)
                        };

                        target         = v.Link;
                        goDownFromRoot = false;
                    }
                }

                return(goDownFromRoot);
            }
Exemplo n.º 14
0
        public void BuildSuffixTree(string inputStr)
        {
            this.str = inputStr + '$';

            this.root = new SuffixNode(-1, null, -1, this.root);

            remSuffix = 0;
            globalEnd = new End(-1);
            active    = new ActivePoint(this.root, -1, 0);

            for (int i = 0; i < str.Length; i++)
            {
                this.StartPhase(i);
            }
        }
Exemplo n.º 15
0
                public SuffixNode AddEdge(string text, int startCharIndex, int endCharIndex)
                {
                    Debug.Assert(endCharIndex == -1 || (0 <= startCharIndex && startCharIndex <= endCharIndex && endCharIndex < text.Length));
                    Debug.Assert(!this.Children.ContainsKey(text[startCharIndex]));

                    var newChild = new SuffixNode()
                    {
                        Edge   = EdgeLabel.Create(startCharIndex, endCharIndex),
                        Parent = this,
                        Link   = null,
                        IsLeaf = false
                    };

                    this.Children.Add(text[startCharIndex], newChild);
                    return(newChild);
                }
Exemplo n.º 16
0
        [NotNull] private int[] NodeEdgeData([NotNull] SuffixNode node)
        {
            var stop = Math.Min(node.End, _text.Count);

            if (node.Start == stop)
            {
                return(new int[0]);
            }

            var sb = new int[stop - node.Start];
            var j  = 0;

            for (int i = node.Start; i < stop; i++)
            {
                sb[j++] = _text[i];
            }
            return(sb);
        }
Exemplo n.º 17
0
        public void DFSToPopulateStartIndexAtLeaves(SuffixNode node, int sLength, List <SuffixNode> leaveNodes)
        {
            bool isLeaf = true;

            sLength += node.end.endIndex - node.startIndex + 1;
            for (int i = 0; i < node.child.Length; i++)
            {
                if (node.child[i] != null)
                {
                    isLeaf = false;
                    DFSToPopulateStartIndexAtLeaves(node.child[i], sLength, leaveNodes);
                }
            }
            if (isLeaf)
            {
                node.index = node.end.endIndex - sLength + 1;
                leaveNodes.Add(node);
            }
        }
Exemplo n.º 18
0
        public string LongestCommonSubstring(int len1)
        {
            this.deepestNode   = null;
            this.deepestLength = 0;

            for (int i = 0; i < this.root.child.Length; i++)
            {
                if (this.root.child[i] != null)
                {
                    LongestCommonSubstring(this.root.child[i], 0, len1);
                }
            }

            if (this.deepestNode != null)
            {
                return(this.str.Substring(deepestNode.end.endIndex - deepestLength + 1, deepestLength));
            }

            return(string.Empty);
        }
Exemplo n.º 19
0
 public void AddEdge(string text, SuffixNode newChild)
 {
     this.Children.Add(text[newChild.Edge.Start], newChild);
 }
Exemplo n.º 20
0
            public static SuffixTreeImpl <ChildrenCollectionType> Create(string text)
            {
                if (string.IsNullOrEmpty(text))
                {
                    return(null);
                }

                var tree = new SuffixTreeImpl <ChildrenCollectionType>();

                tree.Text = text;

                var root = tree.theRoot;
                int m    = text.Length;

                var deep = root.AddEdge(tree.Text, 0, -1);

                deep.SetLeaf(0);

                SuffixNode prevExtEnd      = deep;
                int        lastCreatedLeaf = -1;

                for (int i = 1; i < m; i++)
                {
                    tree.CurrentPhase = i;

                    bool       skipRemaining        = false;
                    SuffixNode internCreatedPrevExt = null;

                    for (int j = 1; j < i && !skipRemaining; j++)
                    {
                        if (j < lastCreatedLeaf)
                        {
                            continue;
                        }

                        List <EdgeLabel> edges = null;
                        SuffixNode       found = null;
                        int  edgecursor        = -1;
                        bool matchEndedAtNode;

                        if (j == lastCreatedLeaf && j > 1)
                        {
                            if (!prevExtEnd.IsLeaf)
                            {
                                edges            = new List <EdgeLabel>(new EdgeLabel[] { EdgeLabel.Create(i - 1, i - 1) });
                                matchEndedAtNode = tree.Match(prevExtEnd, edges, out found, out edgecursor);
                            }
                            else
                            {
                                found            = prevExtEnd;
                                matchEndedAtNode = true;
                            }
                        }

                        if (found == null)
                        {
                            SuffixNode target = null;
                            if (tree.PreMatch(prevExtEnd, out target, out edges))
                            {
                                edges            = new List <EdgeLabel>(new EdgeLabel[] { EdgeLabel.Create(j, i - 1) });
                                matchEndedAtNode = tree.Match(tree.theRoot, edges, out found, out edgecursor);
                            }
                            else
                            {
                                if (edges != null)
                                {
                                    matchEndedAtNode = tree.Match(target, edges, out found, out edgecursor);
                                }
                                else
                                {
                                    found            = target;
                                    matchEndedAtNode = true;
                                }
                            }
                        }
                        else
                        {
                            matchEndedAtNode = true;
                        }

                        if (!matchEndedAtNode)
                        {
                            if (tree.Text[edgecursor] == tree.Text[i])
                            {
                                skipRemaining = true;
                                break;
                            }
                            else
                            {
                                var foundParent = found.Parent;

                                // there's no node here .. better create one..
                                foundParent.RemoveEdge(tree.GetCharFromIndex(found.Edge.Start));

                                // create new node..
                                var internalNode = foundParent.AddEdge(tree.Text, found.Edge.Start, edgecursor - 1);

                                // massage old node and add it back..
                                found.Parent     = internalNode;
                                found.Edge.Start = edgecursor;
                                internalNode.AddEdge(tree.Text, found);

                                // fix up links if we need to ..
                                if (internCreatedPrevExt != null)
                                {
                                    internCreatedPrevExt.Link = internalNode;
                                }

                                internCreatedPrevExt = internalNode;

                                // create a new leaf and hang it here..
                                var newLeaf = internalNode.AddEdge(tree.Text, i, -1);
                                newLeaf.SetLeaf(j);
                                lastCreatedLeaf = j;

                                prevExtEnd = internalNode;
                            }
                        }
                        else
                        {
                            if (found.IsLeaf)
                            {
                                prevExtEnd = found;
                            }
                            else
                            {
                                if (internCreatedPrevExt != null)
                                {
                                    internCreatedPrevExt.Link = found;
                                }

                                internCreatedPrevExt = found.Link == null ? found : null;

                                if (found.GetEdge(tree.GetCharFromIndex(i)) == null)
                                {
                                    var newLeaf = found.AddEdge(tree.Text, i, -1);
                                    newLeaf.SetLeaf(j);
                                    lastCreatedLeaf = j;

                                    prevExtEnd = found;
                                }
                                else
                                {
                                    skipRemaining = true;
                                    break;
                                }
                            }
                        }
                    }

                    if (!skipRemaining)
                    {
                        var parent = root;
                        var child  = parent.GetEdge(tree.GetCharFromIndex(i));
                        if (child == null)
                        {
                            var newLeaf = parent.AddEdge(tree.Text, i, -1);
                            newLeaf.SetLeaf(i);

                            lastCreatedLeaf = i;
                            prevExtEnd      = newLeaf;
                        }

                        if (internCreatedPrevExt != null)
                        {
                            internCreatedPrevExt.Link = parent;
                            internCreatedPrevExt      = null;
                        }
                    }
                }

                tree.FixupLeaves(tree.theRoot, tree.Text.Length - 1);
                return(tree);
            }
Exemplo n.º 21
0
            private bool Match(SuffixNode p, List <EdgeLabel> edges, out SuffixNode child, out int firstUnmachedEdgeIndex)
            {
                if (edges == null || edges.Count <= 0)
                {
                    throw new ArgumentException();
                }

                firstUnmachedEdgeIndex = -1;

                int matchingToDo = 0;

                for (int i = 0; i < edges.Count; i++)
                {
                    matchingToDo += this.GetEdgeLength(edges[i]);
                }

                int matchingDone = 0;
                var parent       = p;

                child = null;

                int matchingEdgeIndex = 0;
                var matchingEdge      = edges[matchingEdgeIndex];
                int cursorNext        = matchingEdge.Start;
                int edgeEnd           = matchingEdge.End;

                child = this.TraverseEdge(parent, cursorNext);
                var treeEdgeLen = this.GetEdgeLength(child.Edge);

                do
                {
                    if (cursorNext + treeEdgeLen - 1 < edgeEnd)
                    {
                        matchingDone += treeEdgeLen;
                        cursorNext   += treeEdgeLen;

                        Debug.Assert(matchingDone < matchingToDo);

                        parent      = child;
                        child       = this.TraverseEdge(parent, cursorNext);
                        treeEdgeLen = this.GetEdgeLength(child.Edge);
                    }
                    else if (cursorNext + treeEdgeLen - 1 == edgeEnd)
                    {
                        matchingDone += treeEdgeLen;

                        if (matchingEdgeIndex + 1 < edges.Count)
                        {
                            matchingEdge = edges[++matchingEdgeIndex];
                            cursorNext   = matchingEdge.Start;
                            edgeEnd      = matchingEdge.End;

                            parent      = child;
                            child       = this.TraverseEdge(parent, cursorNext);
                            treeEdgeLen = this.GetEdgeLength(child.Edge);
                        }
                    }
                    else
                    {
                        matchingDone += edgeEnd - cursorNext + 1;
                        var deltaUnmatched = cursorNext + treeEdgeLen - 1 - edgeEnd;

                        if (matchingEdgeIndex + 1 < edges.Count)
                        {
                            matchingEdge = edges[++matchingEdgeIndex];
                            cursorNext   = matchingEdge.Start;
                            edgeEnd      = matchingEdge.End;

                            treeEdgeLen = deltaUnmatched;
                        }
                        else
                        {
                            firstUnmachedEdgeIndex = (child.Edge.End == -1 ? (this.CurrentPhase - 1) : child.Edge.End) - deltaUnmatched + 1;
                        }
                    }
                }while (matchingDone < matchingToDo);

                return(firstUnmachedEdgeIndex == -1);
            }
Exemplo n.º 22
0
 public ActivePoint(SuffixNode node)
 {
     this.ActiveNode   = node;
     this.ActiveEdge   = -1;
     this.ActiveLength = 0;
 }
Exemplo n.º 23
0
        private void StartPhase(int index)
        {
            ++remSuffix;
            ++globalEnd.endIndex;                                                                    //Handling Rule 1 extension.
            SuffixNode LastCreatedInternalNode = null;

            while (remSuffix > 0)
            {
                int ch = (int)this.str[index];                                                       // ascii of next character in input to compare.

                if (active.activeLength == 0)                                                        // active node will be Root if Aactive length is 0.No Need to traverse edge. Check directly on active node.
                {
                    if (active.activeNode.child[ch] == null)                                         // If edge in current char direction doesn't exists.
                    {
                        SuffixNode node = new SuffixNode(index, globalEnd, -1, root);                // New node start index will be current index.

                        active.activeNode.child[ch] = node;                                          // Create and assign new Edge in current char direction.

                        --remSuffix;                                                                 // As a new leaf node is created, so one remaning suffix is handled.
                    }
                    else
                    {
                        ++active.activeLength;
                        active.activeEdge = active.activeNode.child[ch].startIndex;                  // Set Active edge and increase active length.
                        break;
                    }
                }
                else
                {
                    try
                    {
                        char nextChInTree = nextCharInTree(index);

                        SuffixNode activeEdgeNode = active.activeNode.child[this.str[active.activeEdge]]; // current edge in the direction of current char.

                        if (nextChInTree == this.str[index])                                              // If current char matches Next Char after active length. Rule 3 Extension
                        {
                            // walk down and jump active node if required while increasing active length
                            int activeEdgeLength = activeEdgeNode.end.endIndex - activeEdgeNode.startIndex + 1;

                            if (active.activeLength >= activeEdgeLength)
                            {
                                active.activeNode   = activeEdgeNode;
                                active.activeLength = 1;
                                active.activeEdge   = active.activeNode.startIndex + activeEdgeLength;
                            }
                            else
                            {
                                ++active.activeLength;
                            }

                            break;                                                                  // Ends of phase as Rule 3 occurred.
                        }
                        else                                                                        // If current char does not match next char after active length. Rule 2 Extension.
                        {
                            SuffixNode newInternalNode = new SuffixNode(                            // Two new leaf nodes will be created and current node will become internal node.
                                activeEdgeNode.startIndex,
                                new End(activeEdgeNode.startIndex + active.activeLength - 1),
                                -1,
                                this.root);

                            SuffixNode newLeafNode = new SuffixNode(
                                index,
                                globalEnd,
                                -1,
                                this.root);

                            activeEdgeNode.startIndex = activeEdgeNode.startIndex + active.activeLength;

                            newInternalNode.child[this.str[activeEdgeNode.startIndex]] = activeEdgeNode;
                            newInternalNode.child[this.str[newLeafNode.startIndex]]    = newLeafNode;

                            active.activeNode.child[this.str[newInternalNode.startIndex]] = newInternalNode;

                            --remSuffix;


                            if (active.activeNode != this.root)
                            {
                                active.activeNode = active.activeNode.suffixLink;
                            }
                            else
                            {
                                active.activeLength--;
                                active.activeEdge++;
                            }

                            if (LastCreatedInternalNode != null)
                            {
                                LastCreatedInternalNode.suffixLink = newInternalNode;               // Suffix link pointed to the internal node created in the same phase;
                            }

                            LastCreatedInternalNode = newInternalNode;
                        }
                    }
                    catch (OverflowException)
                    {
                        SuffixNode activeEdgeNode = this.active.activeNode.child[this.str[this.active.activeEdge]];   // current edge in the direction of current char.

                        activeEdgeNode.child[this.str[index]] = new SuffixNode(index, this.globalEnd, -1, this.root); // Rule 2 extension happens.

                        if (active.activeNode != this.root)
                        {
                            this.active.activeNode = this.active.activeNode.suffixLink;
                        }
                        this.active.activeEdge++;
                        this.active.activeLength--;
                        this.remSuffix--;
                    }
                }
            }
        }
Exemplo n.º 24
0
 public ActivePoint(SuffixNode an, int ae, int al)
 {
     activeNode   = an;
     activeEdge   = ae;
     activeLength = al;
 }