コード例 #1
0
        public void MakeTree(string T, double minThreshold)
        {
            Console.WriteLine("Started: " + DateTime.Now.ToString());
            this.minThreshold = minThreshold;
            this.T            = T;
            this.N            = T.Length; // might be T.Length - 1;
            Node.Count        = 1;
            Suffix.T          = T;
            Edge.T            = T;

            Nodes = new Node[N * 2];
            int prime = (new Prime((int)((N * 2) + (N * 2 * 0.1)))).next();

            Edge.HASH_TABLE_SIZE = prime;
            Edge.Edges           = new Edge[prime];
            InitializeNodesAndEdges();

            // The active point is the first non-leaf suffix in the
            // tree.  We start by setting this to be the empty string
            // at node 0.  The AddPrefix() function will update this
            // value after every new prefix is added.
            Suffix active = new Suffix(0, 0, -1);  // The initial active prefix

            for (int i = 0; i < N; i++)
            {
                AddPrefix(active, i);
            }
            Console.WriteLine("Tree Done: " + DateTime.Now.ToString());
        }
コード例 #2
0
        // When a suffix ends on an implicit node, adding a new character
        // means I have to split an existing edge.  This function is called
        // to split an edge at the point defined by the Suffix argument.
        // The existing edge loses its parent, as well as some of its leading
        // characters.  The newly created edge descends from the original
        // parent, and now has the existing edge as a child.
        //
        // Since the existing edge is getting a new parent and starting
        // character, its hash table entry will no longer be valid.  That's
        // why it gets removed at the start of the function.  After the parent
        // and start char have been recalculated, it is re-inserted.
        // The number of characters stolen from the original node and given
        // to the new node is equal to the number of characters in the suffix
        // argument, which is last - first + 1;
        public int SplitEdge(Suffix s)
        {
            Remove();
            Edge new_edge =
                new Edge(first_char_index,
                         first_char_index + s.last_char_index - s.first_char_index,
                         s.origin_node);

            new_edge.Insert();
            //SuffTree.FindNode(new_edge.end_node).suffix_node = s.origin_node;
            SuffTree.Nodes[new_edge.end_node].suffix_node = s.origin_node;
            first_char_index += s.last_char_index - s.first_char_index + 1;
            start_node        = new_edge.end_node;
            Insert();
            return(new_edge.end_node);
        }
コード例 #3
0
        //
        // This routine constitutes the heart of the algorithm.
        // It is called repetitively, once for each of the prefixes
        // of the input string.  The prefix in question is denoted
        // by the index of its last character.
        //
        // At each prefix, we start at the active point, and add
        // a new edge denoting the new last character, until we
        // reach a point where the new edge is not needed due to
        // the presence of an existing edge starting with the new
        // last character.  This point is the end point.
        //
        // Luckily for use, the end point just happens to be the
        // active point for the next pass through the tree.  All
        // we have to do is update it's last_char_index to indicate
        // that it has grown by a single character, and then this
        // routine can do all its work one more time.
        //

        public void AddPrefix(Suffix active, int last_char_index)
        {
            int parent_node;
            int last_parent_node = -1;

            for (; ;)
            {
                Edge edge = new Edge();
                parent_node = active.origin_node;

                // Step 1 is to try and find a matching edge for the given node.
                // If a matching edge exists, we are done adding edges, so we break
                // out of this big loop.
                if (active.Explicit())
                {
                    edge = Edge.Find(active.origin_node, T[last_char_index]);
                    //if (edge != null) break;
                    if (edge.start_node != -1)
                    {
                        break;
                    }
                }
                else
                { //implicit node, a little more complicated
                    edge = Edge.Find(active.origin_node, T[active.first_char_index]);
                    int span = active.last_char_index - active.first_char_index;
                    if (T[edge.first_char_index + span + 1] == T[last_char_index])
                    {
                        break;
                    }
                    parent_node = edge.SplitEdge(active);
                }

                // We didn't find a matching edge, so we create a new one, add
                // it to the tree at the parent node position, and insert it
                // into the hash table.  When we create a new node, it also
                // means we need to create a suffix link to the new node from
                // the last node we visited.
                Edge new_edge = new Edge(last_char_index, N - 1, parent_node);
                new_edge.Insert();
                if (last_parent_node > 0)
                {
                    //Node n = new Node();

                    /****** new edition *******
                     * //n.idx = last_parent_node;
                     * //n.suffix_node = parent_node;
                     */
                    //Nodes.Add(n);
                    Nodes[last_parent_node].suffix_node = parent_node;
                }
                last_parent_node = parent_node;

                // This final step is where we move to the next smaller suffix
                if (active.origin_node == 0)
                {
                    active.first_char_index++;
                }
                else
                {
                    //active.origin_node = FindNode(active.origin_node).suffix_node;
                    active.origin_node = Nodes[active.origin_node].suffix_node;
                }
                active.Canonize();
            }
            if (last_parent_node > 0)
            {
                //Node n = new Node();

                /******* New Edition ************
                 * n.idx = last_parent_node;
                 * n.suffix_node = parent_node;
                 */
                //Nodes.Add(n);
                Nodes[last_parent_node].suffix_node = parent_node;
            }
            active.last_char_index++;  //Now the endpoint is the next active point
            active.Canonize();
        }