/** * Updates the tree starting from inputNode and by adding stringPart. * * Returns a reference (NodeA<T>, string) Tuple for the string that has been added so far. * This means: * - the NodeA<T> will be the NodeA<T> that can be reached by the longest path string (S1) * that can be obtained by concatenating consecutive edges in the tree and * that is a substring of the string added so far to the tree. * - the string will be the remainder that must be added to S1 to get the string * added so far. * * @param inputNode the NodeA<T> to start from * @param stringPart the string to add to the tree * @param rest the rest of the string * @param value the value to add to the index */ private Tuple<Node<T>, List<int>> Update(Node<T> inputNode, List<int> stringPart, List<int> rest, T value) { var s = inputNode; var tempstr = stringPart; var newChar = stringPart[stringPart.Count - 1]; // Console.WriteLine("stringPart: " + stringPart.Count + ", Rest count = " + rest.Count); // line 1 var oldroot = _root; // line 1b var ret = TestAndSplit(s, tempstr.Take(tempstr.Count - 1).ToList(), newChar, rest, value); var r = ret.Item2; var endpoint = ret.Item1; // line 2 while (!endpoint) { // line 3 var tempEdge = r.GetEdge(newChar); Node<T> leaf; if (null != tempEdge) { // such a NodeA<T> is already present. This is one of the main differences from Ukkonen's case: // the tree can contain deeper nodes at this stage because different strings were added by previous iterations. leaf = tempEdge.Target; } else { // must build a new leaf leaf = new Node<T>(); leaf.AddRef(value); var newedge = new Edge<T>(rest, leaf); r.AddEdge(newChar, newedge); } // update suffix link for newly created leaf if (_activeLeaf != _root) { _activeLeaf.Suffix = leaf; } _activeLeaf = leaf; // line 4 if (oldroot != _root) { oldroot.Suffix = r; } // line 5 oldroot = r; // line 6 if (null == s.Suffix) { // root NodeA<T> //TODO Check why assert //assert (root == s); // this is a special case to handle what is referred to as NodeA<T> _|_ on the paper tempstr = tempstr.Skip(1).ToList(); } else { var canret = Canonize(s.Suffix, SafeCutLastChar(tempstr)); s = canret.Item1; // use intern to ensure that tempstr is a reference from the string pool var lastChar = tempstr[tempstr.Count - 1]; tempstr = new List<int>(); tempstr.AddRange(canret.Item2); tempstr.Add(lastChar); // tempstr = canret.Item2 + tempstr[-1] } // line 7 ret = TestAndSplit(s, SafeCutLastChar(tempstr), newChar, rest, value); r = ret.Item2; endpoint = ret.Item1; } // line 8 if (oldroot != _root) { oldroot.Suffix = r; } return new Tuple<Node<T>, List<int>>(s, tempstr); }
// refactored private static Tuple<bool, Node<T>> TestAndSplit(Node<T> inputs, List<int> stringPart, int t, List<int> remainder, T value) { // descend the tree as far as possible var ret = Canonize(inputs, stringPart); var s = ret.Item1; var str = ret.Item2; // if (!(string.Empty.Equals(str))) if (str.Count != 0) { var g = s.GetEdge(str[0]); var label = g.Label; // must see whether "str" is substring of the label of an EdgeA<T> if (label.Count > str.Count && label[str.Count] == t) { return new Tuple<bool, Node<T>>(true, s); } // need to split the EdgeA<T> var newlabel = label.Skip(str.Count).ToList(); //assert (label.startsWith(str)); // build a new NodeA<T> var r = new Node<T>(); // build a new EdgeA<T> var newedge = new Edge<T>(str, r); g.Label = newlabel; // link s -> r r.AddEdge(newlabel[0], g); s.AddEdge(str[0], newedge); return new Tuple<bool, Node<T>>(false, r); } var e = s.GetEdge(t); if (null == e) { // if there is no t-transtion from s return new Tuple<bool, Node<T>>(false, s); } // if (remainder.Equals(e.Label)) if (Equals(remainder, e.Label)) { // update payload of destination NodeA<T> e.Target.AddRef(value); return new Tuple<bool, Node<T>>(true, s); } // if (remainder.StartsWith(e.Label)) if (StartsWith(remainder, e.Label)) { return new Tuple<bool, Node<T>>(true, s); } if (!StartsWith(e.Label, remainder)) { return new Tuple<bool, Node<T>>(true, s); } // need to split as above var newNode = new Node<T>(); newNode.AddRef(value); var newEdge = new Edge<T>(remainder, newNode); e.Label = e.Label.Skip(remainder.Count).ToList(); newNode.AddEdge(e.Label[0], e); s.AddEdge(t, newEdge); return new Tuple<bool, Node<T>>(false, s); // they are different words. No prefix. but they may still share some common substr }