예제 #1
0
        /// <summary>
        /// constructor that takes relative text position
        /// </summary>
        /// <param name="start"></param>
        /// <param name="end"></param>
        public Edge(Node node, int start, int end = -1)
        {
            if (node == null)
            {
                throw new ArgumentNullException("node");
            }

            if (start < 0)
            {
                throw new ArgumentOutOfRangeException("start", "start cannot be negative");
            }

            // pretend that "end" can be infinite, and then compare with start
            if (start > (uint)end)
            {
                throw new ArgumentOutOfRangeException("start", "cannot start the string after its end");
            }

            // infinity is just -1
            if (end < 0)
            {
                end = -1;
            }

            this.Start = start;
            this.End = end;
            this.EndNode = node;
        }
예제 #2
0
        /// <summary>
        /// Construct the tree for a given string of text
        /// </summary>
        /// <param name="text">text from which the tree is constructed</param>
        public SuffixTree(string text)
        {
            if (string.IsNullOrWhiteSpace(text))
            {
                throw new ArgumentNullException(text);
            }

            Text = text;
            _activeNode = new Node(_currentNodeNumber);
            RootNode = _activeNode;
        }
예제 #3
0
 /// <summary>
 /// Adds a new node to the tree
 /// </summary>
 /// <param name="label">Node label</param>
 /// <param name="start">Start position in the text</param>
 /// <param name="end">End position in the text</param>
 internal void AddNode(uint label, int start, int end = -1)
 {
     var newNode = new Node(label);
     var newEdge = new Edge(newNode, start, end);
     this.Edges.Add(newEdge.Route[0], newEdge);
 }
예제 #4
0
        /// <summary>
        /// Creates the actual suffix tree
        /// </summary>
        public void Create()
        {
            // some of our loop iterations actually constitute one route
            // in that case we should not chose to accidentally slip and
            // follow the suffix node
            bool followSuffixNode = false;

            for (int i = 0; i < Text.Length;)
            {
                // make sure the lower bound remains within its boundaries
                ValidateAndUpdateMinDistance(i);

                var nodeEdge = _activeNode.FindNextRoute(i + _activeLength, followSuffixNode);

                //if we have terminated in a non-leaf node we are done
                if (i + _activeLength >= Text.Length && nodeEdge == null)
                {
                    break;
                }

                // we could not find anything, add to the tree
                if (nodeEdge == null)
                {
                    _activeNode.AddNode(++_currentNodeNumber, i + _activeLength);
                    _lastBranchIndex = i + _activeLength;
                    i++;
                    followSuffixNode = true;
                    continue;
                }

                var node = nodeEdge.Item1;
                var edge = nodeEdge.Item2;

                if (edge == null)
                {
                    //we found a suffix node
                    _activeNode = node;
                    _activeLength--;
                    followSuffixNode = false;
                    continue;
                }
                else if (node != null)
                {
                    //we found a new active
                    _activeNode = node;
                    _activeLength++;
                    followSuffixNode = false;
                    continue;
                }

                // now walk the chosen path and see where the current suffix diverges
                var edgePosTuple = edge.WalkTheEdge(i, ref _activeLength, ref _minDistance, ref _activeNode);

                edge = edgePosTuple.Item1;
                int j = edgePosTuple.Item2;

                if (j == edge.Route.Length)
                {
                    _activeNode = edge.EndNode;
                    _activeLength += edge.Route.Length;
                    followSuffixNode = false;
                    continue;
                }

                // we now need to insert a new branch node
                _minDistance = j;
                _lastBranchIndex = i + j + _activeLength;

                if (_lastBranchIndex >= Text.Length)
                {
                    i++;
                    followSuffixNode = true;
                    continue;
                }

                // we are inserting a new branch node
                var newBranchNode = edge.Split(edge.Start + j - 1, ++_currentNodeNumber);

                // if we have reached this branch node through a route of just
                // one character - the last branch node should be set as the
                if (edge.Route.Length == 1)
                {
                    newBranchNode.SuffixPointer = _activeNode;
                }

                // the second check is because of the root-node suffix pointer special case
                // above
                if (null != _lastBranchNode && _lastBranchNode.SuffixPointer == null)
                {
                    _lastBranchNode.SuffixPointer = newBranchNode;
                }

                newBranchNode.AddNode(++_currentNodeNumber, _lastBranchIndex);
                _lastBranchNode = newBranchNode;
                i++;
                followSuffixNode = true;
            }
        }
예제 #5
0
        /// <summary>
        /// Splits the edge into two new edges.
        /// </summary>
        /// <param name="end">Index of the end of the old edge</param>
        /// <returns></returns>
        internal Node Split(int end, uint currentNodeNumber)
        {
            int nextStart = end + 1;
            var oldNode = this.EndNode;

            var newEdge = new Edge(oldNode, nextStart, this.End);
            Node newNode = new Node(currentNodeNumber);

            this.End = end;
            this.EndNode = newNode;
            newNode.Edges.Add(newEdge.Route[0], newEdge);
            return newNode;
        }
예제 #6
0
        /// <summary>
        /// Keep comparing original text from position i
        /// with what is in the edge
        /// </summary>
        /// <param name="i">Index of comparison start in the original text</param>
        /// <param name="skipCharacters"> How many characters are guaranteed equal</param>
        /// <returns>(edge, index) - the edje the character in it where the walk ended</returns>
        internal Tuple<Edge, int> WalkTheEdge(int i, ref int activeLength, ref int minDistance, ref Node activeNode)
        {
            string text = SuffixTree.Text;
            int skipCharacters = minDistance;
            int index = i + activeLength;

            // we know we do not need any comparisons on this edge
            if (skipCharacters >= this.Route.Length)
            {
                var edge = this.EndNode.FindEdgeByChar(i + this.Route.Length);
                activeLength += this.Route.Length;
                minDistance -= this.Route.Length;

                activeNode = this.EndNode;
                return edge.WalkTheEdge(i, ref activeLength, ref minDistance, ref activeNode);
            }

            int j = Walk(text, index, skipCharacters);
            return new Tuple<Edge, int>(this, j);
        }