Beispiel #1
0
        /**
         * Updates the tree starting from inputNode and by adding stringPart.
         * 
         * Returns a reference (NodeA<T>, string) Tuple for the string that has been added so far.
         * This means:
         * - the NodeA<T> will be the NodeA<T> that can be reached by the longest path string (S1)
         *   that can be obtained by concatenating consecutive edges in the tree and
         *   that is a substring of the string added so far to the tree.
         * - the string will be the remainder that must be added to S1 to get the string
         *   added so far.
         * 
         * @param inputNode the NodeA<T> to start from
         * @param stringPart the string to add to the tree
         * @param rest the rest of the string
         * @param value the value to add to the index
         */
        private Tuple<Node<T>, List<int>> Update(Node<T> inputNode, List<int> stringPart, List<int> rest, T value)
        {
            var s = inputNode;
            var tempstr = stringPart;
            var newChar = stringPart[stringPart.Count - 1];
            // Console.WriteLine("stringPart: " + stringPart.Count + ", Rest count = " + rest.Count);
            // line 1
            var oldroot = _root;

            // line 1b
            var ret = TestAndSplit(s, tempstr.Take(tempstr.Count - 1).ToList(), newChar, rest, value);
            
            var r = ret.Item2;
            var endpoint = ret.Item1;
            
            // line 2
            while (!endpoint)
            {
                // line 3
                var tempEdge = r.GetEdge(newChar);
                Node<T> leaf;
                if (null != tempEdge)
                {
                    // such a NodeA<T> is already present. This is one of the main differences from Ukkonen's case:
                    // the tree can contain deeper nodes at this stage because different strings were added by previous iterations.
                    leaf = tempEdge.Target;
                }
                else
                {
                    // must build a new leaf
                    leaf = new Node<T>();
                    leaf.AddRef(value);
                    var newedge = new Edge<T>(rest, leaf);
                    r.AddEdge(newChar, newedge);
                }

                // update suffix link for newly created leaf
                if (_activeLeaf != _root)
                {
                    _activeLeaf.Suffix = leaf;
                }
                _activeLeaf = leaf;

                // line 4
                if (oldroot != _root)
                {
                    oldroot.Suffix = r;
                }

                // line 5
                oldroot = r;

                // line 6
                if (null == s.Suffix)
                {
                    // root NodeA<T>
                    //TODO Check why assert
                    //assert (root == s);
                    // this is a special case to handle what is referred to as NodeA<T> _|_ on the paper
                    tempstr = tempstr.Skip(1).ToList();
                }
                else
                {
                    var canret = Canonize(s.Suffix, SafeCutLastChar(tempstr));
                    s = canret.Item1;
                    // use intern to ensure that tempstr is a reference from the string pool
                    
                    var lastChar = tempstr[tempstr.Count - 1];
                    tempstr = new List<int>();
                    tempstr.AddRange(canret.Item2);
                    tempstr.Add(lastChar); // tempstr = canret.Item2 + tempstr[-1]
                }

                // line 7
                ret = TestAndSplit(s, SafeCutLastChar(tempstr), newChar, rest, value);
                r = ret.Item2;
                endpoint = ret.Item1;
            }

            // line 8
            if (oldroot != _root)
            {
                oldroot.Suffix = r;
            }

            return new Tuple<Node<T>, List<int>>(s, tempstr);
        }
Beispiel #2
0
        // refactored
        private static Tuple<bool, Node<T>> TestAndSplit(Node<T> inputs, List<int> stringPart, int t, List<int> remainder, T value)
        {
            // descend the tree as far as possible
            var ret = Canonize(inputs, stringPart);
            var s = ret.Item1;
            var str = ret.Item2;

            // if (!(string.Empty.Equals(str)))
            if (str.Count != 0)
            {
                var g = s.GetEdge(str[0]);

                var label = g.Label;
                // must see whether "str" is substring of the label of an EdgeA<T>
                if (label.Count > str.Count && label[str.Count] == t)
                {
                    return new Tuple<bool, Node<T>>(true, s);
                }
                // need to split the EdgeA<T>
                var newlabel = label.Skip(str.Count).ToList();
                //assert (label.startsWith(str));

                // build a new NodeA<T>
                var r = new Node<T>();
                // build a new EdgeA<T>
                var newedge = new Edge<T>(str, r);

                g.Label = newlabel;

                // link s -> r
                r.AddEdge(newlabel[0], g);
                s.AddEdge(str[0], newedge);

                return new Tuple<bool, Node<T>>(false, r);
            }
            var e = s.GetEdge(t);
            if (null == e)
            {
                // if there is no t-transtion from s
                return new Tuple<bool, Node<T>>(false, s);
            }
            // if (remainder.Equals(e.Label))
            if (Equals(remainder, e.Label))
            {
                // update payload of destination NodeA<T>
                e.Target.AddRef(value);
                return new Tuple<bool, Node<T>>(true, s);
            }
            // if (remainder.StartsWith(e.Label))
            if (StartsWith(remainder, e.Label))
            {
                return new Tuple<bool, Node<T>>(true, s);
            }
            if (!StartsWith(e.Label, remainder))
            {
                return new Tuple<bool, Node<T>>(true, s);
            }
            // need to split as above
            var newNode = new Node<T>();
            newNode.AddRef(value);

            var newEdge = new Edge<T>(remainder, newNode);
            e.Label = e.Label.Skip(remainder.Count).ToList();
            newNode.AddEdge(e.Label[0], e);
            s.AddEdge(t, newEdge);
            return new Tuple<bool, Node<T>>(false, s);
            // they are different words. No prefix. but they may still share some common substr
        }