Beispiel #1
0
        private void DfsTraversal(SuffixNode root, List <char> result)
        {
            var suffix = result.ToList();

            for (var i = root.Start; i <= root.EndIndex.End; i++)
            {
                suffix.Add(this._input[i]);
            }

            if (root.Index != -1)
            {
                Console.Write($"{root.Index}: ");
                Console.Write(suffix.ToArray());
                Console.WriteLine();
                return;
            }

            foreach (var child in root.Children)
            {
                if (child != null)
                {
                    this.DfsTraversal(child, suffix);
                }
            }
        }
Beispiel #2
0
        public static SuffixNode CreateNode(int start, EndIndex endIndex)
        {
            SuffixNode node = new SuffixNode();

            node.Start    = start;
            node.EndIndex = endIndex;
            return(node);
        }
Beispiel #3
0
        private bool Validate(SuffixNode root, char[] input, int index, int curr)
        {
            if (root == null)
            {
                Console.WriteLine($"Failed at {curr} for index {index}");
                return(false);
            }

            if (root.Index != -1)
            {
                if (root.Index != index)
                {
                    Console.WriteLine($"Index is not same. Failed at {curr} for index {index}");
                    return(false);
                }
                return(true);
            }

            if (curr > input.Length)
            {
                Console.WriteLine($"Index is not same. Failed at {curr} for index {index}");
                return(false);
            }

            var node = root.Children[input[curr]];

            if (node == null)
            {
                Console.WriteLine($"Failed at {curr} for index {index}");
                return(false);
            }

            var j = 0;

            for (var i = node.Start; i <= node.EndIndex.End; i++)
            {
                if (input[curr + j] != input[i])
                {
                    Console.WriteLine($"Mismatch found {input[curr + j]} vs. {input[i]}");
                    return(false);
                }
                j++;
            }
            curr += node.Length;
            return(this.Validate(node, input, index, curr));
        }
Beispiel #4
0
        public void Build()
        {
            this._root                 = SuffixNode.CreateNode(1, new EndIndex(0));
            this._root.Index           = -1;
            this._activePoint          = new ActivePoint(this._root);
            this._globalEndIndex       = new EndIndex(-1);
            this._remainingSuffixCount = 0;
            for (var i = 0; i < this._input.Length; i++)
            {
                this.StartPhase(i);
            }

            if (this._remainingSuffixCount != 0)
            {
                Console.WriteLine("Something went wrong!");
            }

            this.SetIndexUsingDfs(this._root, 0, this._input.Length);
        }
Beispiel #5
0
        private void SetIndexUsingDfs(SuffixNode root, int val, int size)
        {
            if (root == null)
            {
                return;
            }

            val += root.Length;
            if (root.Index != -1)
            {
                root.Index = size - val;
                return;
            }

            foreach (var child in root.Children)
            {
                this.SetIndexUsingDfs(child, val, size);
            }
        }
Beispiel #6
0
        private void StartPhase(int i)
        {
            // set lastCreatedSuffixNode as null before start of each phrase.
            SuffixNode lastCreatedSuffixNode = null;

            // increase globle end index for leaf. Finish rule 1 extension for leaf.
            this._globalEndIndex.Increase();
            // increase remainingSuffixCount before start of each phrase.
            this._remainingSuffixCount++;
            while (this._remainingSuffixCount > 0)
            {
                if (this._activePoint.ActiveLength == 0)
                {
                    // If active length is 0, look for current character from root.
                    // When active length is 0, the active node must be root.
                    var nodeByChar = this.FindSuffixNodeByCharIndex(i);
                    if (nodeByChar != null)
                    {
                        // if current character from root is not null, then increase active length by 1,
                        // and break out of while loop.
                        this._activePoint.ActiveEdge = nodeByChar.Start;
                        this._activePoint.ActiveLength++;
                        break;
                    }
                    else
                    {
                        // create a new leaf node with current character from leaf. Rule 2 extension.
                        this._activePoint.ActiveNode.Children[this._input[i]] = SuffixNode.CreateNode(i, this._globalEndIndex);
                        this._remainingSuffixCount--;
                    }
                }
                else
                {
                    // If active length is not 0, it means we are traversing somewhere in middle.
                    // So check if next character is same as current char.
                    this.MoveForwardIfNeeded(i);
                    if (this.IsNextCharMatched(i))
                    {
                        // If next character is same with current character, then apply rule 3 extension
                        // and do trick 2 (show stopper), break out while loop.

                        // Answer tusroy's TODO question:
                        // If lastCreatedSuffixNode is not null, then it must be X -> Z format. Z is current char.
                        // We can directly assign current active edge node to suffix link.
                        if (lastCreatedSuffixNode != null)
                        {
                            lastCreatedSuffixNode.SuffixLink = this.GetCurrentActiveEdgeNode();
                        }
                        var edgeNode = this.GetCurrentActiveEdgeNode();
                        if (this._activePoint.ActiveLength < edgeNode.Length)
                        {
                            this._activePoint.ActiveLength++;
                        }
                        else
                        {
                            this._activePoint.ActiveNode   = edgeNode;
                            this._activePoint.ActiveEdge   = edgeNode.Children[this._input[i]].Start;
                            this._activePoint.ActiveLength = this._activePoint.ActiveLength - edgeNode.Length + 1; // actually it should be 1.
                        }
                        break;
                    }
                    else
                    {
                        // If next character is not same with current character, do rule 2 extension.
                        var edgeNode     = this.GetCurrentActiveEdgeNode();
                        var internalNode = edgeNode;
                        if (edgeNode.Length > this._activePoint.ActiveLength)
                        {
                            //  if split at middle of edge node, create a new interal node first.
                            internalNode = SuffixNode.CreateNode(
                                edgeNode.Start,
                                new EndIndex(edgeNode.Start + this._activePoint.ActiveLength - 1));
                            // for every newly created internal node, set root as default suffix link.
                            internalNode.SuffixLink = this._root;
                            internalNode.Index      = -1;
                            edgeNode.Start          = edgeNode.Start + this._activePoint.ActiveLength;
                            internalNode.Children[this._input[edgeNode.Start]] = edgeNode;
                            this._activePoint.ActiveNode.Children[this._input[internalNode.Start]] = internalNode;
                        }

                        // create a leaf node.
                        var leafNode = SuffixNode.CreateNode(i, this._globalEndIndex);
                        internalNode.Children[this._input[i]] = leafNode;

                        // Add suffix link.
                        if (lastCreatedSuffixNode != null)
                        {
                            lastCreatedSuffixNode.SuffixLink = internalNode;
                        }
                        lastCreatedSuffixNode = internalNode;

                        // Once rule 2 extension is done, decrease remainingSuffixCount.
                        this._remainingSuffixCount--;

                        // if active node is not root, follow suffix link
                        if (this._activePoint.ActiveNode != this._root)
                        {
                            this._activePoint.ActiveNode = this._activePoint.ActiveNode.SuffixLink;
                        }
                        else
                        {
                            this._activePoint.ActiveEdge++;
                            this._activePoint.ActiveLength--;
                        }
                    }
                }
            }
        }