public void MakeTree(string T, double minThreshold) { Console.WriteLine("Started: " + DateTime.Now.ToString()); this.minThreshold = minThreshold; this.T = T; this.N = T.Length; // might be T.Length - 1; Node.Count = 1; Suffix.T = T; Edge.T = T; Nodes = new Node[N * 2]; int prime = (new Prime((int)((N * 2) + (N * 2 * 0.1)))).next(); Edge.HASH_TABLE_SIZE = prime; Edge.Edges = new Edge[prime]; InitializeNodesAndEdges(); // The active point is the first non-leaf suffix in the // tree. We start by setting this to be the empty string // at node 0. The AddPrefix() function will update this // value after every new prefix is added. Suffix active = new Suffix(0, 0, -1); // The initial active prefix for (int i = 0; i < N; i++) { AddPrefix(active, i); } Console.WriteLine("Tree Done: " + DateTime.Now.ToString()); }
// When a suffix ends on an implicit node, adding a new character // means I have to split an existing edge. This function is called // to split an edge at the point defined by the Suffix argument. // The existing edge loses its parent, as well as some of its leading // characters. The newly created edge descends from the original // parent, and now has the existing edge as a child. // // Since the existing edge is getting a new parent and starting // character, its hash table entry will no longer be valid. That's // why it gets removed at the start of the function. After the parent // and start char have been recalculated, it is re-inserted. // The number of characters stolen from the original node and given // to the new node is equal to the number of characters in the suffix // argument, which is last - first + 1; public int SplitEdge(Suffix s) { Remove(); Edge new_edge = new Edge(first_char_index, first_char_index + s.last_char_index - s.first_char_index, s.origin_node); new_edge.Insert(); //SuffTree.FindNode(new_edge.end_node).suffix_node = s.origin_node; SuffTree.Nodes[new_edge.end_node].suffix_node = s.origin_node; first_char_index += s.last_char_index - s.first_char_index + 1; start_node = new_edge.end_node; Insert(); return(new_edge.end_node); }
// // This routine constitutes the heart of the algorithm. // It is called repetitively, once for each of the prefixes // of the input string. The prefix in question is denoted // by the index of its last character. // // At each prefix, we start at the active point, and add // a new edge denoting the new last character, until we // reach a point where the new edge is not needed due to // the presence of an existing edge starting with the new // last character. This point is the end point. // // Luckily for use, the end point just happens to be the // active point for the next pass through the tree. All // we have to do is update it's last_char_index to indicate // that it has grown by a single character, and then this // routine can do all its work one more time. // public void AddPrefix(Suffix active, int last_char_index) { int parent_node; int last_parent_node = -1; for (; ;) { Edge edge = new Edge(); parent_node = active.origin_node; // Step 1 is to try and find a matching edge for the given node. // If a matching edge exists, we are done adding edges, so we break // out of this big loop. if (active.Explicit()) { edge = Edge.Find(active.origin_node, T[last_char_index]); //if (edge != null) break; if (edge.start_node != -1) { break; } } else { //implicit node, a little more complicated edge = Edge.Find(active.origin_node, T[active.first_char_index]); int span = active.last_char_index - active.first_char_index; if (T[edge.first_char_index + span + 1] == T[last_char_index]) { break; } parent_node = edge.SplitEdge(active); } // We didn't find a matching edge, so we create a new one, add // it to the tree at the parent node position, and insert it // into the hash table. When we create a new node, it also // means we need to create a suffix link to the new node from // the last node we visited. Edge new_edge = new Edge(last_char_index, N - 1, parent_node); new_edge.Insert(); if (last_parent_node > 0) { //Node n = new Node(); /****** new edition ******* * //n.idx = last_parent_node; * //n.suffix_node = parent_node; */ //Nodes.Add(n); Nodes[last_parent_node].suffix_node = parent_node; } last_parent_node = parent_node; // This final step is where we move to the next smaller suffix if (active.origin_node == 0) { active.first_char_index++; } else { //active.origin_node = FindNode(active.origin_node).suffix_node; active.origin_node = Nodes[active.origin_node].suffix_node; } active.Canonize(); } if (last_parent_node > 0) { //Node n = new Node(); /******* New Edition ************ * n.idx = last_parent_node; * n.suffix_node = parent_node; */ //Nodes.Add(n); Nodes[last_parent_node].suffix_node = parent_node; } active.last_char_index++; //Now the endpoint is the next active point active.Canonize(); }