public void LongestCommonSubstring(SuffixNode node, int length, int len1) { length += node.end.endIndex - node.startIndex + 1; bool firstLeaf = false; bool secondLeaf = false; for (int i = 0; i < node.child.Length; i++) { var childNode = node.child[i]; if (childNode != null) { if (childNode.startIndex <= len1) { firstLeaf = true; } else { secondLeaf = true; } LongestCommonSubstring(childNode, length, len1); } } if (firstLeaf && secondLeaf) { if (this.deepestLength < length) { this.deepestLength = length; this.deepestNode = node; } } }
public SuffixNode(int si, End e, int ind, SuffixNode linkNode) { this.startIndex = si; this.end = e; this.index = ind; this.suffixLink = linkNode; }
private char nextCharInTree(int index) { SuffixNode activeEdgeNode = this.active.activeNode.child[this.str[this.active.activeEdge]]; // current edge in the direction of current char. int activeEdgeLength = (activeEdgeNode.end.endIndex - activeEdgeNode.startIndex) + 1; if (activeEdgeLength > this.active.activeLength) { return(this.str[activeEdgeNode.startIndex + this.active.activeLength]); } else if (activeEdgeLength == this.active.activeLength) { if (activeEdgeNode.child[this.str[index]] != null) { return(this.str[index]); } else { throw new OverflowException(); } } else { this.active.activeNode = activeEdgeNode; this.active.activeLength = this.active.activeLength - activeEdgeLength; this.active.activeEdge = this.active.activeEdge + activeEdgeLength; return(nextCharInTree(index)); } }
private SuffixTreeImpl() { this.theRoot = new SuffixNode() { Parent = null }; }
public int LongestRepeatedSubstring(SuffixNode node, int si) { int nodeLength = node.end.endIndex - si + 1; int maxL = 0; bool hi = false; for (int i = 0; i < node.child.Length; i++) { if (node.child[i] != null && node.child[i].index == -1) { hi = true; int temp = LongestRepeatedSubstring(node.child[i], si); if (temp > maxL) { maxL = temp; node = node.child[i]; } } } if (maxL > 0) { return(maxL); } if (hi) { return(nodeLength); } return(0); }
/// <summary> /// C'tor /// </summary> /// <param name="text"></param> private SuffixTreeImpl(string text) { this.theRoot = new SuffixNode() { Parent = null }; this.Text = text; }
/// <summary> /// match routine /// </summary> /// <param name="node">node to start matching from</param> /// <param name="edges">set of edges to be matched tree edges against</param> /// <param name="childNode">on return should contain the node reached by the matching routing</param> /// <param name="unmatchedEdgeCursor">if matching didn't finish at an internal node, it contains the first unmatched char index</param> /// <returns>true if matching ended at node; false if matching finished inside an edge</returns> private bool Match(SuffixNode node, IEdgeLabel[] edges, out SuffixNode childNode, out int unmatchedEdgeCursor) { if (node == null) { throw new ArgumentNullException("parent"); } if (edges == null || edges.Length <= 0) { throw new ArgumentException("invalid argument", "edges"); } unmatchedEdgeCursor = -1; var matchingEdgeIndex = 0; var matchingEdgeCursor = edges[matchingEdgeIndex].Start; var matchingEdgeEnd = edges[matchingEdgeIndex].End; childNode = this.TraverseEdge(node, matchingEdgeCursor); var treeEdgeLength = this.GetEdgeLength(childNode.Edge); do { var diff = matchingEdgeEnd - matchingEdgeCursor + 1 - treeEdgeLength; if (diff > 0) { matchingEdgeCursor += treeEdgeLength; } else { if (++matchingEdgeIndex < edges.Length) { matchingEdgeCursor = edges[matchingEdgeIndex].Start; matchingEdgeEnd = edges[matchingEdgeIndex].End; } else { // nothing more to match .. if (diff < 0) //unmatched = -diff { unmatchedEdgeCursor = (childNode.Edge.End == -1 ? (this.CurrentPhase - 1) : childNode.Edge.End) + diff + 1; } break; } } if (diff >= 0) { node = childNode; childNode = this.TraverseEdge(node, matchingEdgeCursor); treeEdgeLength = this.GetEdgeLength(childNode.Edge); } } while (true); return(unmatchedEdgeCursor == -1); }
private int EdgeLength([NotNull] SuffixNode node) { var stop = Math.Min(node.End, _text.Count); if (node.Start == stop) { return(1); } return(stop - node.Start); }
private SuffixNode TraverseEdge(SuffixNode parent, int matchingStart) { SuffixNode child = parent.GetEdge(this.GetCharFromIndex(matchingStart)); if (child == null) { throw new ApplicationException("unexpected!"); } return(child); }
private int AddNode(int start, int end) { var node = new SuffixNode { Start = start, End = end, SuffixLink = 0 }; _tree.Add(node); return(_tree.Count - 1); }
private SuffixNode TraverseEdge(SuffixNode parent, int matchingStart) { SuffixNode child = parent.GetEdge(this.GetCharFromIndex(matchingStart)); if (child != null) { return(child); } else { throw new ArgumentException("invalid argument", "matchingStart"); } }
private void FixupLeaves(SuffixNode p, int endIndex) { if (!p.IsLeaf) { foreach (var c in p.Children) { this.FixupLeaves(c.Value as SuffixNode, endIndex); } } else { p.Edge.End = endIndex; } }
/// <summary> /// pre-match routine /// </summary> /// <param name="p">node to start pre-matching from</param> /// <param name="target">if method returns true, this value will be ignored; otherwise, upon return, it contains the node to start the next matching from</param> /// <param name="edges">if method returns true, this will be ignored; otherwise, upon return, it contains the set of edges to be matched next, /// or null (if no further matching should be performed next</param> /// <returns>true if the next match should be started from root node, false otherwise</returns> private bool PreMatch(SuffixNode p, out SuffixNode target, out IEdgeLabel[] edges) { Debug.Assert(p != this.theRoot); target = null; edges = null; bool goDownFromRoot = true; var v = p.Parent; if (p.Link != null) { target = p.Link; edges = null; goDownFromRoot = false; } else if (v != this.theRoot) { if (v.Link == null) { var w = v.Parent; if (w != this.theRoot) { Debug.Assert(w.Link != null); edges = new EdgeLabel[2] { EdgeLabel.Create(v.Edge.Start, v.Edge.End == -1 ? (this.CurrentPhase - 1) : v.Edge.End), EdgeLabel.Create(p.Edge.Start, p.Edge.End == -1 ? (this.CurrentPhase - 1) : p.Edge.End) }; target = w.Link; goDownFromRoot = false; } } else { Debug.Assert(v.Link != null); edges = new EdgeLabel[1] { EdgeLabel.Create(p.Edge.Start, p.Edge.End == -1 ? (this.CurrentPhase - 1) : p.Edge.End) }; target = v.Link; goDownFromRoot = false; } } return(goDownFromRoot); }
public void BuildSuffixTree(string inputStr) { this.str = inputStr + '$'; this.root = new SuffixNode(-1, null, -1, this.root); remSuffix = 0; globalEnd = new End(-1); active = new ActivePoint(this.root, -1, 0); for (int i = 0; i < str.Length; i++) { this.StartPhase(i); } }
public SuffixNode AddEdge(string text, int startCharIndex, int endCharIndex) { Debug.Assert(endCharIndex == -1 || (0 <= startCharIndex && startCharIndex <= endCharIndex && endCharIndex < text.Length)); Debug.Assert(!this.Children.ContainsKey(text[startCharIndex])); var newChild = new SuffixNode() { Edge = EdgeLabel.Create(startCharIndex, endCharIndex), Parent = this, Link = null, IsLeaf = false }; this.Children.Add(text[startCharIndex], newChild); return(newChild); }
[NotNull] private int[] NodeEdgeData([NotNull] SuffixNode node) { var stop = Math.Min(node.End, _text.Count); if (node.Start == stop) { return(new int[0]); } var sb = new int[stop - node.Start]; var j = 0; for (int i = node.Start; i < stop; i++) { sb[j++] = _text[i]; } return(sb); }
public void DFSToPopulateStartIndexAtLeaves(SuffixNode node, int sLength, List <SuffixNode> leaveNodes) { bool isLeaf = true; sLength += node.end.endIndex - node.startIndex + 1; for (int i = 0; i < node.child.Length; i++) { if (node.child[i] != null) { isLeaf = false; DFSToPopulateStartIndexAtLeaves(node.child[i], sLength, leaveNodes); } } if (isLeaf) { node.index = node.end.endIndex - sLength + 1; leaveNodes.Add(node); } }
public string LongestCommonSubstring(int len1) { this.deepestNode = null; this.deepestLength = 0; for (int i = 0; i < this.root.child.Length; i++) { if (this.root.child[i] != null) { LongestCommonSubstring(this.root.child[i], 0, len1); } } if (this.deepestNode != null) { return(this.str.Substring(deepestNode.end.endIndex - deepestLength + 1, deepestLength)); } return(string.Empty); }
public void AddEdge(string text, SuffixNode newChild) { this.Children.Add(text[newChild.Edge.Start], newChild); }
public static SuffixTreeImpl <ChildrenCollectionType> Create(string text) { if (string.IsNullOrEmpty(text)) { return(null); } var tree = new SuffixTreeImpl <ChildrenCollectionType>(); tree.Text = text; var root = tree.theRoot; int m = text.Length; var deep = root.AddEdge(tree.Text, 0, -1); deep.SetLeaf(0); SuffixNode prevExtEnd = deep; int lastCreatedLeaf = -1; for (int i = 1; i < m; i++) { tree.CurrentPhase = i; bool skipRemaining = false; SuffixNode internCreatedPrevExt = null; for (int j = 1; j < i && !skipRemaining; j++) { if (j < lastCreatedLeaf) { continue; } List <EdgeLabel> edges = null; SuffixNode found = null; int edgecursor = -1; bool matchEndedAtNode; if (j == lastCreatedLeaf && j > 1) { if (!prevExtEnd.IsLeaf) { edges = new List <EdgeLabel>(new EdgeLabel[] { EdgeLabel.Create(i - 1, i - 1) }); matchEndedAtNode = tree.Match(prevExtEnd, edges, out found, out edgecursor); } else { found = prevExtEnd; matchEndedAtNode = true; } } if (found == null) { SuffixNode target = null; if (tree.PreMatch(prevExtEnd, out target, out edges)) { edges = new List <EdgeLabel>(new EdgeLabel[] { EdgeLabel.Create(j, i - 1) }); matchEndedAtNode = tree.Match(tree.theRoot, edges, out found, out edgecursor); } else { if (edges != null) { matchEndedAtNode = tree.Match(target, edges, out found, out edgecursor); } else { found = target; matchEndedAtNode = true; } } } else { matchEndedAtNode = true; } if (!matchEndedAtNode) { if (tree.Text[edgecursor] == tree.Text[i]) { skipRemaining = true; break; } else { var foundParent = found.Parent; // there's no node here .. better create one.. foundParent.RemoveEdge(tree.GetCharFromIndex(found.Edge.Start)); // create new node.. var internalNode = foundParent.AddEdge(tree.Text, found.Edge.Start, edgecursor - 1); // massage old node and add it back.. found.Parent = internalNode; found.Edge.Start = edgecursor; internalNode.AddEdge(tree.Text, found); // fix up links if we need to .. if (internCreatedPrevExt != null) { internCreatedPrevExt.Link = internalNode; } internCreatedPrevExt = internalNode; // create a new leaf and hang it here.. var newLeaf = internalNode.AddEdge(tree.Text, i, -1); newLeaf.SetLeaf(j); lastCreatedLeaf = j; prevExtEnd = internalNode; } } else { if (found.IsLeaf) { prevExtEnd = found; } else { if (internCreatedPrevExt != null) { internCreatedPrevExt.Link = found; } internCreatedPrevExt = found.Link == null ? found : null; if (found.GetEdge(tree.GetCharFromIndex(i)) == null) { var newLeaf = found.AddEdge(tree.Text, i, -1); newLeaf.SetLeaf(j); lastCreatedLeaf = j; prevExtEnd = found; } else { skipRemaining = true; break; } } } } if (!skipRemaining) { var parent = root; var child = parent.GetEdge(tree.GetCharFromIndex(i)); if (child == null) { var newLeaf = parent.AddEdge(tree.Text, i, -1); newLeaf.SetLeaf(i); lastCreatedLeaf = i; prevExtEnd = newLeaf; } if (internCreatedPrevExt != null) { internCreatedPrevExt.Link = parent; internCreatedPrevExt = null; } } } tree.FixupLeaves(tree.theRoot, tree.Text.Length - 1); return(tree); }
private bool Match(SuffixNode p, List <EdgeLabel> edges, out SuffixNode child, out int firstUnmachedEdgeIndex) { if (edges == null || edges.Count <= 0) { throw new ArgumentException(); } firstUnmachedEdgeIndex = -1; int matchingToDo = 0; for (int i = 0; i < edges.Count; i++) { matchingToDo += this.GetEdgeLength(edges[i]); } int matchingDone = 0; var parent = p; child = null; int matchingEdgeIndex = 0; var matchingEdge = edges[matchingEdgeIndex]; int cursorNext = matchingEdge.Start; int edgeEnd = matchingEdge.End; child = this.TraverseEdge(parent, cursorNext); var treeEdgeLen = this.GetEdgeLength(child.Edge); do { if (cursorNext + treeEdgeLen - 1 < edgeEnd) { matchingDone += treeEdgeLen; cursorNext += treeEdgeLen; Debug.Assert(matchingDone < matchingToDo); parent = child; child = this.TraverseEdge(parent, cursorNext); treeEdgeLen = this.GetEdgeLength(child.Edge); } else if (cursorNext + treeEdgeLen - 1 == edgeEnd) { matchingDone += treeEdgeLen; if (matchingEdgeIndex + 1 < edges.Count) { matchingEdge = edges[++matchingEdgeIndex]; cursorNext = matchingEdge.Start; edgeEnd = matchingEdge.End; parent = child; child = this.TraverseEdge(parent, cursorNext); treeEdgeLen = this.GetEdgeLength(child.Edge); } } else { matchingDone += edgeEnd - cursorNext + 1; var deltaUnmatched = cursorNext + treeEdgeLen - 1 - edgeEnd; if (matchingEdgeIndex + 1 < edges.Count) { matchingEdge = edges[++matchingEdgeIndex]; cursorNext = matchingEdge.Start; edgeEnd = matchingEdge.End; treeEdgeLen = deltaUnmatched; } else { firstUnmachedEdgeIndex = (child.Edge.End == -1 ? (this.CurrentPhase - 1) : child.Edge.End) - deltaUnmatched + 1; } } }while (matchingDone < matchingToDo); return(firstUnmachedEdgeIndex == -1); }
public ActivePoint(SuffixNode node) { this.ActiveNode = node; this.ActiveEdge = -1; this.ActiveLength = 0; }
private void StartPhase(int index) { ++remSuffix; ++globalEnd.endIndex; //Handling Rule 1 extension. SuffixNode LastCreatedInternalNode = null; while (remSuffix > 0) { int ch = (int)this.str[index]; // ascii of next character in input to compare. if (active.activeLength == 0) // active node will be Root if Aactive length is 0.No Need to traverse edge. Check directly on active node. { if (active.activeNode.child[ch] == null) // If edge in current char direction doesn't exists. { SuffixNode node = new SuffixNode(index, globalEnd, -1, root); // New node start index will be current index. active.activeNode.child[ch] = node; // Create and assign new Edge in current char direction. --remSuffix; // As a new leaf node is created, so one remaning suffix is handled. } else { ++active.activeLength; active.activeEdge = active.activeNode.child[ch].startIndex; // Set Active edge and increase active length. break; } } else { try { char nextChInTree = nextCharInTree(index); SuffixNode activeEdgeNode = active.activeNode.child[this.str[active.activeEdge]]; // current edge in the direction of current char. if (nextChInTree == this.str[index]) // If current char matches Next Char after active length. Rule 3 Extension { // walk down and jump active node if required while increasing active length int activeEdgeLength = activeEdgeNode.end.endIndex - activeEdgeNode.startIndex + 1; if (active.activeLength >= activeEdgeLength) { active.activeNode = activeEdgeNode; active.activeLength = 1; active.activeEdge = active.activeNode.startIndex + activeEdgeLength; } else { ++active.activeLength; } break; // Ends of phase as Rule 3 occurred. } else // If current char does not match next char after active length. Rule 2 Extension. { SuffixNode newInternalNode = new SuffixNode( // Two new leaf nodes will be created and current node will become internal node. activeEdgeNode.startIndex, new End(activeEdgeNode.startIndex + active.activeLength - 1), -1, this.root); SuffixNode newLeafNode = new SuffixNode( index, globalEnd, -1, this.root); activeEdgeNode.startIndex = activeEdgeNode.startIndex + active.activeLength; newInternalNode.child[this.str[activeEdgeNode.startIndex]] = activeEdgeNode; newInternalNode.child[this.str[newLeafNode.startIndex]] = newLeafNode; active.activeNode.child[this.str[newInternalNode.startIndex]] = newInternalNode; --remSuffix; if (active.activeNode != this.root) { active.activeNode = active.activeNode.suffixLink; } else { active.activeLength--; active.activeEdge++; } if (LastCreatedInternalNode != null) { LastCreatedInternalNode.suffixLink = newInternalNode; // Suffix link pointed to the internal node created in the same phase; } LastCreatedInternalNode = newInternalNode; } } catch (OverflowException) { SuffixNode activeEdgeNode = this.active.activeNode.child[this.str[this.active.activeEdge]]; // current edge in the direction of current char. activeEdgeNode.child[this.str[index]] = new SuffixNode(index, this.globalEnd, -1, this.root); // Rule 2 extension happens. if (active.activeNode != this.root) { this.active.activeNode = this.active.activeNode.suffixLink; } this.active.activeEdge++; this.active.activeLength--; this.remSuffix--; } } } }
public ActivePoint(SuffixNode an, int ae, int al) { activeNode = an; activeEdge = ae; activeLength = al; }