/// <summary> /// Given a string, builds its suffix tree. /// </summary> /// <param name="text">The string for which the suffix tree is being built. </param> /// <returns>The root of the suffix tree. </returns> public static SuffixTreeNode Build(string text) { /*Assuming text does not contain $, appending $. Without this, some suffixes will be implicit in the tree. */ string extendedText = text + "$"; int n = extendedText.Length; SuffixTreeNode root = null; for (int i = n - 2; i >= 0; i--) /* Start building the Suffix tree by the shortest suffix. For example in string 'data$', the shortest suffix is 'a$'. */ { string suffix = extendedText.Substring(i); if (root == null) /* If root is null, create a root node, and make the current suffix its only child. */ { root = new SuffixTreeNode { IsRoot = true }; root.Children.Add(new SuffixTreeNode { IsLeaf = true, StringValue = suffix, StartIndex = i }); } else /* Otherwise traverse the tree starting from root to find the right position for the current suffix. */ { Insert(root, suffix, i); } } return(root); }
/// <summary> /// Inserts the given suffix in the tree. Notice that the suffix is not necessarily inserted as a while. On the traversal of the tree, the intermediate nodes that have common prefixes with these suffix, make the suffix to break down. /// </summary> /// <param name="root">The root node of a suffix tree. </param> /// <param name="suffix">The suffix string that should be inserted in the suffix tree. </param> /// <param name="startIndex">The start index of the suffix in its container string. </param> public static void Insert(SuffixTreeNode root, string suffix, int startIndex) { SuffixTreeNode node = null; var nodes = root.Children.Where(c => c.StringValue.StartsWith(suffix[0])); /* Before creating a new branch in the tree, look for a branch of the root that has a common starting character with the current suffix. */ if (!nodes.Any()) /* If no child of the root has a common starting character with suffix, create a new child. */ { root.Children.Add(new SuffixTreeNode { IsLeaf = true, StringValue = suffix, StartIndex = startIndex }); return; } Contract.Assert(nodes.ToList().Count == 1); /* It is expected that all the branches (children) of a node start with distinct characters. */ node = nodes.ToList()?[0]; /* Take the only child that has the same starting character as the suffix and continue traversing down its children. */ int indexOverSuffix = 1; while (true) { int j = 1; while (j < node.StringValue.Length && indexOverSuffix < suffix.Length && node.StringValue[j] == suffix[indexOverSuffix]) { j++; indexOverSuffix++; } if (j <= node.StringValue.Length - 1) /* This means node should be converted to a intermediate node, with two children, and new suffix string */ { var child1 = new SuffixTreeNode { IsLeaf = true, StringValue = node.StringValue.Substring(j), StartIndex = node.StartIndex }; var child2 = new SuffixTreeNode { IsLeaf = true, StringValue = suffix.Substring(indexOverSuffix), StartIndex = startIndex }; node.IsLeaf = false; node.IsRoot = false; node.IsIntermediate = true; node.StartIndex = -1; node.StringValue = node.StringValue.Substring(0, node.StringValue.Length - 1); node.Children.Add(child1); node.Children.Add(child2); break; } else if (j == node.StringValue.Length && indexOverSuffix < suffix.Length) { nodes = node.Children.Where(c => c.StringValue.StartsWith(suffix[indexOverSuffix])); if (!nodes.Any()) { node.Children.Add(new SuffixTreeNode { IsLeaf = true, StringValue = suffix.Substring(indexOverSuffix), StartIndex = startIndex }); break; } Contract.Assert(nodes.ToList().Count == 1); /* It is expected that all the branches (children) of a node start with distinct characters. */ node = nodes.ToList()[0]; indexOverSuffix++; } } }