Пример #1
0
        public void SuffixTreeNodeResolver_ResolveTest1()
        {
            SuffixTreeNode         root     = new SuffixTreeNode(-1, false);
            SuffixTreeNodeResolver resolver = new SuffixTreeNodeResolver(root);

            SuffixTreeNode aChild   = new SuffixTreeNode(1, true);
            SuffixTreeNode xyzChild = new SuffixTreeNode(2, true);
            SuffixTreeNode zzChild  = new SuffixTreeNode(3, true);

            root.AddChild("A", aChild);
            root.AddChild("XYZ$", xyzChild);
            root.AddChild("ZZ", zzChild);

            Assert.AreSame(aChild, resolver.Resolve("A"));
            Assert.IsNull(resolver.Resolve("AA"));
            Assert.IsNull(resolver.Resolve("B"));

            Assert.AreSame(xyzChild, resolver.Resolve("X"));
            Assert.AreSame(xyzChild, resolver.Resolve("XY"));
            Assert.AreSame(xyzChild, resolver.Resolve("XYZ"));
            Assert.AreSame(xyzChild, resolver.Resolve("XYZ$"));
            Assert.IsNull(resolver.Resolve("XYZ$$"));

            Assert.AreSame(zzChild, resolver.Resolve("Z"));
            Assert.AreSame(zzChild, resolver.Resolve("ZZ"));
            Assert.IsNull(resolver.Resolve("ZZZ"));
            Assert.AreSame(root, resolver.Resolve(""));
        }
Пример #2
0
        public SuffixTreeNode BuildSuffixTree(string text, long[] SuffixArray,
                                              long[] LCPArray /*, ref List<string> result*/)
        {
            var root = new SuffixTreeNode(new Dictionary <char, SuffixTreeNode>(), null, 0,
                                          -1, -1);
            var lcpPrev = 0;
            var curNode = root;

            for (int i = 0; i < text.Length; i++)
            {
                var suffix = SuffixArray[i];
                while (curNode.StringDepth > lcpPrev)
                {
                    curNode = curNode.Parent;
                }
                if (curNode.StringDepth == lcpPrev)
                {
                    curNode = CreateNewLeaf(curNode, text, suffix);
                }
                else
                {
                    var edgeStart = SuffixArray[i - 1] + curNode.StringDepth;
                    var offset    = lcpPrev - curNode.StringDepth;
                    var midNode   = BreakEdge(curNode, text, edgeStart, offset);
                    curNode = CreateNewLeaf(midNode, text, suffix);
                }
                if (i < text.Length - 1)
                {
                    lcpPrev = (int)LCPArray[i];
                }
                //result.Add(text.Substring(curNode.EdgeStart, curNode.EdgeEnd - curNode.EdgeStart + 1));
            }

            return(root);
        }
Пример #3
0
 /// <summary>
 /// Gets a list of all the nodes in a suffix tree rooted at <paramref name="root"/>.
 /// </summary>
 /// <param name="root">The tree node at which suffix tree is rooted. </param>
 /// <param name="nodes">A list of the nodes in the tree. </param>
 public void GetNodes(SuffixTreeNode root, List <SuffixTreeNode> nodes)
 {
     nodes.Add(root);
     foreach (SuffixTreeNode node in root.Children)
     {
         GetNodes(node, nodes);
     }
 }
Пример #4
0
        public void SuffixTreeNode_AddChildGuardCase3Test()
        {
            SuffixTreeNode child = new SuffixTreeNode(1, true);
            SuffixTreeNode root  = new SuffixTreeNode(-1, false);

            root.AddChild("key1", child);
            root.AddChild("key2", child);
        }
Пример #5
0
        public void SuffixTreeNode_DefaultsTest()
        {
            SuffixTreeNode node = new SuffixTreeNode(-1, false);

            Assert.AreEqual(-1, node.Index);
            Assert.IsFalse(node.IsTerminal);
            CollectionAssertEx.IsEmpty(node.GetKeys());
            CollectionAssertEx.IsEmpty(node.GetChildren());
        }
Пример #6
0
 public SuffixTreeNode(Dictionary <char, SuffixTreeNode> children,
                       SuffixTreeNode parent, int stringDepth, int edgeStart, int edgeEnd)
 {
     Children    = children;
     Parent      = parent;
     StringDepth = stringDepth;
     EdgeStart   = edgeStart;
     EdgeEnd     = edgeEnd;
 }
 public SuffixTree(string s)
 {
     _root = new SuffixTreeNode();
     for (int i = 0; i < s.Length; i++)
     {
         string suffix = s.Substring(i);
         _root.Insert(suffix);
     }
 }
Пример #8
0
        private SuffixTreeNode CreateNewLeaf(SuffixTreeNode curNode, string text,
                                             long suffix)
        {
            var leaf = new SuffixTreeNode(new Dictionary <char, SuffixTreeNode>(), curNode,
                                          text.Length - (int)suffix,
                                          (int)suffix + curNode.StringDepth, text.Length - 1);

            curNode.Children[text[leaf.EdgeStart]] = leaf;
            return(leaf);
        }
Пример #9
0
        public void SuffixTreeNodeResolver_ResolveSimpleTest()
        {
            SuffixTreeNode         root     = new SuffixTreeNode(-1, false);
            SuffixTreeNodeResolver resolver = new SuffixTreeNodeResolver(root);

            Assert.AreSame(root, resolver.Resolve(""));
            Assert.IsNull(resolver.Resolve("A"));
            Assert.IsNull(resolver.Resolve("B"));
            Assert.IsNull(resolver.Resolve("TEST"));
        }
Пример #10
0
            // Constructor (Builds a Tree of suffies of the given text)
            public Suffix_Tree(String s)
            {
                root = new SuffixTreeNode();

                // Consider all suffixes of given string and insert
                // them into the Suffix Tree using recursive function
                // insertSuffix() in SuffixTreeNode class
                for (int i = 0; i < s.Length; i++)
                {
                    root.insertSuffix(s.Substring(i));
                }
            }
Пример #11
0
        public static IEnumerable <SuffixTreeNodeTriplet> BFS(this SuffixTree @this)
        {
            Queue <SuffixTreeNode> queue = new Queue <SuffixTreeNode>();

            queue.EnQueue(@this.Root);
            while (!queue.IsEmpty)
            {
                SuffixTreeNode node = queue.DeQueue();
                yield return(new SuffixTreeNodeTriplet(node.Index, node.IsTerminal, node.GetKeys()));

                queue.Fill(node.GetChildren());
            }
        }
Пример #12
0
        public void SuffixTreeNode_AddChildTest()
        {
            SuffixTreeNode child1 = new SuffixTreeNode(1, true);
            SuffixTreeNode child2 = new SuffixTreeNode(2, true);
            SuffixTreeNode child3 = new SuffixTreeNode(3, true);
            SuffixTreeNode root   = new SuffixTreeNode(-1, false);

            root.AddChild("zkey3", child3);
            root.AddChild("ykey2", child2);
            root.AddChild("xkey1", child1);
            CollectionAssert.AreEqual(new string[] { "xkey1", "ykey2", "zkey3" }, root.GetKeys());
            CollectionAssert.AreEqual(new SuffixTreeNode[] { child1, child2, child3 }, root.GetChildren());
        }
Пример #13
0
 //--------------------------------------------------------------------------------------
 private void Collectleave(SuffixTreeNode node, List <int> list)
 {
     if (node.Chields.Count == 0)
     {
         list.Add(node.StarPosition);
     }
     else
     {
         foreach (var n in node.Chields.Values)
         {
             Collectleave(n, list);
         }
     }
 }
            public void Insert(string s)
            {
                if (s.Length == 0)
                {
                    return;
                }

                char c = s[0];

                if (!Children.ContainsKey(c))
                {
                    Children[c] = new SuffixTreeNode();
                }

                Children[c].Insert(s.Substring(1));
            }
Пример #15
0
        private SuffixTreeNode BreakEdge(SuffixTreeNode node, string text, long start,
                                         int offset)
        {
            var startChar = text[(int)start];
            var midChar   = text[(int)start + offset];
            var midNode   = new SuffixTreeNode(new Dictionary <char, SuffixTreeNode>(),
                                               node, node.StringDepth + offset, (int)start,
                                               (int)start + offset - 1);

            midNode.Children[midChar]           = node.Children[startChar];
            node.Children[startChar].Parent     = midNode;
            node.Children[startChar].EdgeStart += offset;
            node.Children[startChar]            = midNode;

            return(midNode);
        }
Пример #16
0
 public SuffixTree(string text)
 {
     text += '$';
     Root  = new SuffixTreeNode(-1, -1);
     Root.link[text[0]] = new SuffixTreeNode(0, text.Length);
     Length            += text.Length - 1;
     for (int i = 1; i < text.Length; i++)
     {
         var current = Root;
         var j       = i;
         while (j < text.Length)
         {
             if (current.link.ContainsKey(text[j]))
             {
                 var child = current.link[text[j]];
                 var label = text.Substring(child.offset, child.length);
                 var k     = j + 1;
                 while (k - j < label.Length && text[k] == label[k - j])
                 {
                     k += 1;
                 }
                 if (k - j == label.Length)
                 {
                     current = child;
                     j       = k;
                 }
                 else
                 {
                     var existChar = label[k - j];
                     var newChar   = text[k];
                     var mid       = new SuffixTreeNode(child.offset, k - j);
                     mid.link[newChar]     = new SuffixTreeNode(k, text.Length - k);
                     Length               += text.Length - 1 - k;
                     mid.link[existChar]   = child;
                     child.offset         += k - j;
                     child.length         -= k - j;
                     current.link[text[j]] = mid;
                 }
             }
             else
             {
                 current.link[text[j]] = new SuffixTreeNode(j, text.Length - j);
                 Length += text.Length - 1 - j;
             }
         }
     }
 }
Пример #17
0
        /// <summary>
        /// Checks whether the tree rooted at <paramref name="root"/> has Suffix tree properties.
        /// </summary>
        /// <param name="root"></param>
        /// <param name="text"></param>
        public void CheckSuffixTreeProperties(SuffixTreeNode root, string text)
        {
            var nodes = new List <SuffixTreeNode>();

            GetNodes(root, nodes);

            int            leafCounter       = 0;
            int            rootCounter       = 0;
            SuffixTreeNode rootNode          = null;
            var            intermediateNodes = new List <SuffixTreeNode>();

            foreach (SuffixTreeNode node in nodes)
            {
                if (node.IsLeaf)
                {
                    leafCounter++;
                }

                if (node.IsRoot)
                {
                    rootCounter++;
                    rootNode = node;
                }
                if (node.IsIntermediate)
                {
                    intermediateNodes.Add(node);
                }
            }

            /* Property1: the suffix tree must contain exactly 'text.Length' leaf nodes. */
            Assert.AreEqual(text.Length, leafCounter);

            /* Property2: The tree must have exactly one root node. */
            Assert.AreEqual(1, rootCounter);
            Assert.IsTrue(ReferenceEquals(rootNode, root));

            /* Property3: Root's childrenCount is >= 0 */
            Assert.IsTrue(root.Children.Count >= 0);

            /* Property4: All intermediate nodes' childrenCount >= 2 */
            foreach (SuffixTreeNode node in intermediateNodes)
            {
                Assert.IsTrue(node.Children.Count >= 2);
            }
        }
Пример #18
0
        public void SuffixTreeNode_GetEdgeTest()
        {
            SuffixTreeNode child1 = new SuffixTreeNode(1, true);
            SuffixTreeNode child2 = new SuffixTreeNode(2, true);
            SuffixTreeNode child3 = new SuffixTreeNode(3, true);
            SuffixTreeNode root   = new SuffixTreeNode(-1, false);

            root.AddChild("xkey", child1);
            root.AddChild("ykey", child2);
            root.AddChild("zkey", child3);

            Assert.AreEqual(new SuffixTreeEdge("xkey", root, child1), root.GetEdge('x'));
            Assert.AreEqual(new SuffixTreeEdge("ykey", root, child2), root.GetEdge('y'));
            Assert.AreEqual(new SuffixTreeEdge("zkey", root, child3), root.GetEdge('z'));
            Assert.IsNull(root.GetEdge('a'));
            Assert.IsNull(root.GetEdge('b'));
            Assert.IsNull(root.GetEdge('c'));
        }
            private bool ExistsAsSubstring(string t)
            {
                SuffixTreeNode currentNodeToSearch = _root;

                foreach (var c in t)
                {
                    if (currentNodeToSearch.Children.ContainsKey(c))
                    {
                        currentNodeToSearch = currentNodeToSearch.Children[c];
                    }
                    else
                    {
                        return(false);
                    }
                }

                return(true);
            }
Пример #20
0
            // A recursive function to insert a suffix of the s in
            // subtree rooted with this node
            public void insertSuffix(String s)
            {
                // If string has more characters
                if (s.Length > 0)
                {
                    // Find the first character and convert it
                    // into 0-25 range.
                    char cIndex = (char)(s[0] - 'a');

                    // If there is no edge for this character,
                    // add a new edge
                    if (children[cIndex] == null)
                    {
                        children[cIndex] = new SuffixTreeNode();
                    }

                    // Recur for next suffix
                    children[cIndex].insertSuffix(s.Substring(1));
                }
            }
Пример #21
0
        public void SuffixTreeNodeResolver_ResolveTest2()
        {
            SuffixTreeNode         root     = new SuffixTreeNode(-1, false);
            SuffixTreeNodeResolver resolver = new SuffixTreeNodeResolver(root);

            SuffixTreeNode xxChild = new SuffixTreeNode(-1, false);
            SuffixTreeNode yyChild = new SuffixTreeNode(2, true);
            SuffixTreeNode zzChild = new SuffixTreeNode(3, true);

            root.AddChild("XX", xxChild);
            xxChild.AddChild("YY", yyChild);
            xxChild.AddChild("ZZ", zzChild);
            Assert.AreSame(xxChild, resolver.Resolve("X"));
            Assert.AreSame(xxChild, resolver.Resolve("XX"));
            Assert.AreSame(yyChild, resolver.Resolve("XXY"));
            Assert.AreSame(yyChild, resolver.Resolve("XXYY"));
            Assert.AreSame(zzChild, resolver.Resolve("XXZ"));
            Assert.AreSame(zzChild, resolver.Resolve("XXZZ"));
            Assert.IsNull(resolver.Resolve("XXYZ"));
        }
Пример #22
0
            // A recursive function to count nodes in Tree
            public int _countNodesInTree(SuffixTreeNode node)
            {
                // If all characters of pattern have been processed,
                if (node == null)
                {
                    return(0);
                }

                int count = 0;

                for (int i = 0; i < MAX_CHAR; i++)
                {
                    // if children is not NULL then find count
                    // of all nodes in this subTree
                    if (node.children[i] != null)
                    {
                        count += _countNodesInTree(node.children[i]);
                    }
                }

                // return count of nodes of subTree and plus
                // 1 because of node's own count
                return(1 + count);
            }
Пример #23
0
        //--------------------------------------------------------------------------------------

        public List <int> FindSubstring(string text, string pattern, SuffixTreeBase suffixTreeBase, bool isSaveStatisticsForEmpty = true)
        {
            stopwatch = new Stopwatch();
            stopwatch.Start();
            StatisticAccumulator.CreateStatistics(text, pattern);

            SuffixTreeNode root = suffixTreeBase.Execute(text);

            List <int> result = new List <int>();

            SuffixTreeNode lastNode        = null;
            SuffixTreeNode currentNode     = root;
            int            patternPosition = 0;

            while (lastNode == null)
            {
                SuffixTreeNode nextNode = null;
                if (!currentNode.Chields.TryGetValue(pattern[patternPosition], out nextNode))
                {
                    break;
                }
                int i = 0;
                for (i = nextNode.StarSegment; i <= nextNode.EndSegment; i++)
                {
                    if (text[i] != pattern[patternPosition])
                    {
                        break;
                    }
                    if (patternPosition++ == pattern.Length - 1)
                    {
                        lastNode = nextNode;
                        break;
                    }
                }
                if (i < nextNode.EndSegment || lastNode != null)
                {
                    break;
                }
            }

            if (lastNode != null)
            {
                Collectleave(lastNode, result);
            }

            stopwatch.Stop();
            if (result.Count > 0 || isSaveStatisticsForEmpty)
            {
                long elapsedTicks         = stopwatch.ElapsedTicks;
                long durationMilliSeconds = stopwatch.ElapsedMilliseconds;
                _outputPresentation = string.Join(",", result.Select(p => p.ToString()));

                StatisticAccumulator.SaveStatisticData(_outputPresentation, elapsedTicks, durationMilliSeconds, DateTime.Now, null);
            }
            else
            {
                StatisticAccumulator.RemoveStatisticData();
            }

            return(result);
        }
Пример #24
0
        public void Build_ExpectsCorrectTree()
        {
            string         text = "banana";
            SuffixTreeNode root = SuffixTree.Build(text);

            CheckSuffixTreeProperties(root, text);

            Assert.IsTrue(root.IsRoot);
            Assert.IsFalse(root.IsLeaf);
            Assert.IsFalse(root.IsIntermediate);
            Assert.AreEqual(string.Empty, root.StringValue);
            Assert.AreEqual(-1, root.StartIndex);
            Assert.AreEqual(3, root.Children.Count);

            SuffixTreeNode rootChild1 = root.Children[0]; /* a */
            SuffixTreeNode rootChild2 = root.Children[1]; /* na */
            SuffixTreeNode rootChild3 = root.Children[2]; /* banana$ */

            Assert.IsTrue(rootChild1.IsIntermediate);
            Assert.IsFalse(rootChild1.IsRoot);
            Assert.IsFalse(rootChild1.IsLeaf);
            Assert.AreEqual("a", rootChild1.StringValue);
            Assert.AreEqual(-1, rootChild1.StartIndex);
            Assert.AreEqual(2, rootChild1.Children.Count);
            SuffixTreeNode childA1 = rootChild1.Children[0]; /* $ */
            SuffixTreeNode childA2 = rootChild1.Children[1]; /* na */

            Assert.IsFalse(childA1.IsIntermediate);
            Assert.IsFalse(childA1.IsRoot);
            Assert.IsTrue(childA1.IsLeaf);
            Assert.AreEqual("$", childA1.StringValue);
            Assert.AreEqual(5, childA1.StartIndex);
            Assert.AreEqual(0, childA1.Children.Count);
            Assert.IsTrue(childA2.IsIntermediate);
            Assert.IsFalse(childA2.IsRoot);
            Assert.IsFalse(childA2.IsLeaf);
            Assert.AreEqual("na", childA2.StringValue);
            Assert.AreEqual(-1, childA2.StartIndex);
            Assert.AreEqual(2, childA2.Children.Count);
            SuffixTreeNode childA2NA1 = childA2.Children[0]; /* $ */
            SuffixTreeNode childA2NA2 = childA2.Children[1]; /* na$ */

            Assert.IsFalse(childA2NA1.IsIntermediate);
            Assert.IsFalse(childA2NA1.IsRoot);
            Assert.IsTrue(childA2NA1.IsLeaf);
            Assert.AreEqual("$", childA2NA1.StringValue);
            Assert.AreEqual(3, childA2NA1.StartIndex);
            Assert.AreEqual(0, childA2NA1.Children.Count);
            Assert.IsFalse(childA2NA2.IsIntermediate);
            Assert.IsFalse(childA2NA2.IsRoot);
            Assert.IsTrue(childA2NA2.IsLeaf);
            Assert.AreEqual("na$", childA2NA2.StringValue);
            Assert.AreEqual(1, childA2NA2.StartIndex);
            Assert.AreEqual(0, childA2NA2.Children.Count);

            Assert.IsTrue(rootChild2.IsIntermediate);
            Assert.IsFalse(rootChild2.IsRoot);
            Assert.IsFalse(rootChild2.IsLeaf);
            Assert.AreEqual("na", rootChild2.StringValue);
            Assert.AreEqual(-1, rootChild2.StartIndex);
            Assert.AreEqual(2, rootChild2.Children.Count);
            SuffixTreeNode childNA1 = rootChild2.Children[0]; /* $ */
            SuffixTreeNode childNA2 = rootChild2.Children[1]; /* na$ */

            Assert.IsFalse(childNA1.IsIntermediate);
            Assert.IsFalse(childNA1.IsRoot);
            Assert.IsTrue(childNA1.IsLeaf);
            Assert.AreEqual("$", childNA1.StringValue);
            Assert.AreEqual(4, childNA1.StartIndex);
            Assert.AreEqual(0, childNA1.Children.Count);
            Assert.IsFalse(childNA2.IsIntermediate);
            Assert.IsFalse(childNA2.IsRoot);
            Assert.IsTrue(childNA2.IsLeaf);
            Assert.AreEqual("na$", childNA2.StringValue);
            Assert.AreEqual(2, childNA2.StartIndex);
            Assert.AreEqual(0, childNA2.Children.Count);

            Assert.IsFalse(rootChild3.IsIntermediate);
            Assert.IsFalse(rootChild3.IsRoot);
            Assert.IsTrue(rootChild3.IsLeaf);
            Assert.AreEqual("banana$", rootChild3.StringValue);
            Assert.AreEqual(0, rootChild3.StartIndex);
            Assert.AreEqual(0, rootChild3.Children.Count);
        }
Пример #25
0
        public override SuffixTreeNode Execute(string text)
        {
            if (StatisticAccumulator != null)
            {
                stopwatch = new Stopwatch();
                stopwatch.Start();
                StatisticAccumulator.CreateStatistics(text);
            }

            int lastPositionInText = text.Length;

            text += "$";
            root  = new SuffixTreeNode();
            for (int i = 0; i < text.Length - 1; i++)
            {
                int            j       = i;
                SuffixTreeNode current = root;
                while (current != null)
                {
                    if (current.Chields.ContainsKey(text[j]))
                    {
                        SuffixTreeNode next = current.Chields[text[j]];
                        int            j0   = j;
                        int            k    = next.StarSegment;
                        if (StatisticAccumulator != null)
                        {
                            StatisticAccumulator.IterationCountInc(3);
                        }
                        while (k <= next.EndSegment)
                        {
                            if (StatisticAccumulator != null)
                            {
                                StatisticAccumulator.IterationCountInc();
                                StatisticAccumulator.NumberOfComparisonInc();
                            }
                            if (text[j++] != text[k])
                            {
                                break;
                            }
                            k++;
                        }
                        if (k > next.EndSegment)
                        {
                            if (StatisticAccumulator != null)
                            {
                                StatisticAccumulator.IterationCountInc();
                            }
                            current = next;
                        }
                        else
                        {
                            if (StatisticAccumulator != null)
                            {
                                StatisticAccumulator.IterationCountInc(14);
                            }
                            SuffixTreeNode newMiddle = new SuffixTreeNode()
                            {
                                Parent      = next.Parent,
                                StarSegment = next.StarSegment,
                                EndSegment  = k - 1,
                                StartSymbol = next.StartSymbol
                            };
                            SuffixTreeNode newLeaf = new SuffixTreeNode()
                            {
                                Parent       = newMiddle,
                                StarSegment  = j - 1,
                                EndSegment   = lastPositionInText,
                                StartSymbol  = text[j - 1],
                                StarPosition = i
                            };
                            newMiddle.Chields.Add(text[k], next);
                            newMiddle.Chields.Add(text[j - 1], newLeaf);
                            newMiddle.Parent.Chields[newMiddle.StartSymbol] = newMiddle;
                            next.Parent      = newMiddle;
                            next.StarSegment = k;
                            next.StartSymbol = text[k];
                            break;
                        }
                    }
                    else
                    {
                        if (StatisticAccumulator != null)
                        {
                            StatisticAccumulator.IterationCountInc(2);
                        }
                        current.Chields.Add(text[j], new SuffixTreeNode()
                        {
                            Parent       = current,
                            StarSegment  = j,
                            EndSegment   = lastPositionInText,
                            StartSymbol  = text[j],
                            StarPosition = i
                        });
                        break;
                    }
                }
            }

            string outputPresentation = NodePresentationAsString(root);

            if (StatisticAccumulator != null)
            {
                stopwatch.Stop();
                long elapsedTicks         = stopwatch.ElapsedTicks;
                long durationMilliSeconds = stopwatch.ElapsedMilliseconds;
                StatisticAccumulator.SaveStatisticData(outputPresentation, elapsedTicks, durationMilliSeconds, DateTime.Now, null);
            }


            return(root);
        }
Пример #26
0
 //--------------------------------------------------------------------------------------
 protected string NodePresentationAsString(SuffixTreeNode node)
 {
     return($"[{node.StarSegment}-{node.EndSegment}]({string.Join(",", node.Chields.OrderBy(n => n.Key).Select(n => NodePresentationAsString(n.Value)))})");
 }
Пример #27
0
        public void SuffixTreeNode_AddChildGuardCase1Test()
        {
            SuffixTreeNode root = new SuffixTreeNode(-1, false);

            root.AddChild("", new SuffixTreeNode(1, true));
        }