static void Main(string[] args) { var inputString = Console.ReadLine(); var sufTree = new SuffixTree(inputString); Console.WriteLine(sufTree.Length); }
public void AddingAfterClearingSuffixTree() { var suffixTree = new SuffixTree(); var words = new string[] { "One", "one", "oNe", "two", "hello", "test", "here", "there", "?!??!?!?!?", "VeryVeryVeryLoooooooooooooooooong", ".k[2c3-9024g-u,9weg,ouimwt", "3q2tgwadh", "`+rv`+*1v+vt23*1`vt*1v", "!@#)(*^$!%@_", " bum ", " bam ", " bum bam ", "1", "12", "123", "1234", "12345", "123456" }; for (int i = 0; i < words.Length; i++) { if (suffixTree.Contains(words[i])) { Assert.Fail(); } suffixTree.Add(words[i]); if (!suffixTree.Contains(words[i])) { Assert.Fail(); } } if (suffixTree.Count != words.Length) { Assert.Fail(); } suffixTree.Clear(); if (suffixTree.Count != 0) { Assert.Fail(); } for (int i = 0; i < words.Length; i++) { if (suffixTree.Contains(words[i])) { Assert.Fail(); } suffixTree.Add(words[i]); if (!suffixTree.Contains(words[i])) { Assert.Fail(); } } Assert.IsTrue(suffixTree.Count == words.Length); }
public void Suffix_Smoke_Test() { var tree = new SuffixTree <char>(); tree.Insert("bananaa".ToCharArray()); Assert.IsTrue(tree.Count == 1); //IEnumerable test Assert.AreEqual(tree.Count, tree.Count()); Assert.IsTrue(tree.Contains("aa".ToCharArray())); Assert.IsFalse(tree.Contains("ab".ToCharArray())); var matches = tree.StartsWith("na".ToCharArray()); Assert.IsTrue(matches.Count == 2); matches = tree.StartsWith("an".ToCharArray()); Assert.IsTrue(matches.Count == 2); tree.Delete("bananaa".ToCharArray()); Assert.IsTrue(tree.Count == 0); //IEnumerable test Assert.AreEqual(tree.Count, tree.Count()); }
public void CheckIfSuffixIsContained() { var suffixTree = new SuffixTree(); var words = new string[] { "One", "one", "oNe", "two", "hello", "test", "here", "there", "?!??!?!?!?", "VeryVeryVeryLoooooooooooooooooong", ".k[2c3-9024g-u,9weg,ouimwt", "3q2tgwadh", "`+rv`+*1v+vt23*1`vt*1v", "!@#)(*^$!%@_", " bum ", " bam ", " bum bam ", "1", "12", "123", "1234", "12345", "123456" }; for (int i = 0; i < words.Length; i++) { suffixTree.Add(words[i]); for (int j = 0; j < words[i].Length; j++) { if (!suffixTree.ContainsSuffix(words[i].Substring(j, words[i].Length - j))) { Assert.Fail(); } } if (!suffixTree.Contains(words[i])) { Assert.Fail(); } } Assert.IsTrue(suffixTree.Count == words.Length); }
public string Solve(string dna, string pattern) { SuffixTree text = new SuffixTree(dna + "$"); for (int i = 0; i < text.root.Children.Count; i++) { var result = matchproblem(dna + "$", pattern, text.root.Children[i]); if (result.Item2 > 1) { continue; } if (result.Item1 == false) { break; } } return(null); //this.ExcludeTestCaseRangeInclusive(40, 106); //string result = null; //for(int i = 0; i <= dna.Length - pattern.Length;i++) //{ // if (match(dna, pattern, i)) // result += i + " "; //} //if (result == null) // result = "No Match!"; //return result.Trim(); }
public void Rebuild(List <UserAction> actions) { //if (actions.Count == 4) // System.Windows.Forms.MessageBox.Show("bbq"); //Mutex mutex = new Mutex(false, CommonInfo.GUID + "-user-actions-lock"); //mutex.WaitOne(); SuffixTree suffix_tree = new SuffixTree(actions); //mutex.ReleaseMutex(); suffix_tree.BuildTree(); _longest_repetitions = suffix_tree.GetLongestRepeatedSubstrings(1); //UserActionList list = suffix_tree.GetLongestRepeatedSubstring(1, 3); //if (list != null) //{ // //_logger.WriteLine("Repetition detected at " + DateTime.Now.ToString()); // foreach (UserAction action in list) // _logger.WriteLine(action.Description); //} //_logger.WriteLine("Tree build on " + DateTime.Now); //try //{ // foreach (string str in _suffix_tree.DumpEdges()) // { // _logger.WriteLine(str); // } //} //catch (Exception e) //{ // //System.Windows.Forms.MessageBox.Show(e.Message); //} }
public static SuffixTree Create(string word, char canonizationChar = '$') { var tree = new SuffixTree(word); tree.Build(canonizationChar); return(tree); }
public StNode(SuffixTree tree, StNode suffixNode) { this.tree = tree; this.id = tree.NodeCount++; this.childEdges = new Dictionary <char, StEdge>(); this.suffixNode = suffixNode; }
public Edge(SuffixTree tree, Node head) { _tree = tree; Head = head; StartIndex = tree.CurrentSuffixEndIndex; EdgeNumber = _tree.NextEdgeNumber++; }
public StNode(SuffixTree tree, StNode suffixNode) { this.tree = tree; this.id = tree.NodeCount++; this.childEdges = new Dictionary<char, StEdge>(); this.suffixNode = suffixNode; }
private static void runTests(SuffixTree tree) { var testMethods = // MethodBase.GetCurrentMethod().DeclaringType (typeof(SuffixTreeTest)).GetMethods(BindingFlags.NonPublic | BindingFlags.Static) .Where(m => m.GetCustomAttributes(typeof(SuffixTreeTestMethodAttribute), false).Length > 0) .OrderBy(m => m.Name); int numTests = 0; int numFailedTests = 0; foreach (MethodInfo mi in testMethods) { DescriptionAttribute descriptor = (DescriptionAttribute) mi.GetCustomAttributes(typeof(DescriptionAttribute), false).FirstOrDefault(); bool didPassTest = (bool)mi.Invoke(null, new Object[1] { tree }); numTests++; if (!didPassTest) { numFailedTests++; Console.WriteLine("Failed test: {0:s}", descriptor.Description); } } Console.WriteLine(String.Format("Passed {0:d} of {1:d} tests", numTests - numFailedTests, numTests)); }
public void VerifyPartialRandomInvalidWords() { SuffixTree tree = new SuffixTree(theString); tree.BuildTree(); Random random = new Random((int)DateTime.Now.Ticks); foreach (string individualString in individualStrings) { StringBuilder builder = new StringBuilder(individualString); //this will inject random characters into valid words for (int j = random.Next(individualString.Length - 2); j < random.Next(individualString.Length); j++) { builder.Insert(j, random.Next('a', 'z')); } string builtString = builder.ToString(); string message = "Corrupting: " + individualString + " as " + builtString; //I originally checked to see if builder is in individualStrings, however with such a large //data set it took way too long to execute. There is a risk that a random string of 5 to 15 //characters IS in the word list! if (!individualStrings.Contains(builtString)) { Assert.IsTrue(!tree.Search(builtString), message); } } }
public void build_for_large_binary() { var sw = new Stopwatch(); sw.Restart(); var data = new byte[32768]; var rnd = new Random(); rnd.NextBytes(data); sw.Stop(); Console.WriteLine($"Data generation took {sw.Elapsed} for 32KB"); sw.Restart(); var subject = new SuffixTree(); subject.Extend(data); subject.Terminate(); sw.Stop(); Console.WriteLine($"Building suffix tree took {sw.Elapsed}"); sw.Restart(); var positions = subject.FindAll(new[] { (byte)'x' }).OrderBy(n => n).ToList(); var result = string.Join(",", positions); sw.Stop(); Console.WriteLine($"Querying suffix tree took {sw.Elapsed}"); Console.WriteLine($"\r\n'x' found in these {positions.Count} positions: {result}"); }
public void ContainsTest1() { SuffixTree tree = new SuffixTree(""); Assert.IsTrue(tree.Contains("")); Assert.IsFalse(tree.Contains("1")); Assert.IsFalse(tree.Contains("test")); }
public void Contains_Simple_OnLongString_ShouldReturnTrue() { var s = "olbafuynhfcxzqhnebecxjrfwfttwrxvgujqxaxuaukbflddcrptlvyoaxuwzlwmoeljnxgmsleapkyzodhtymxuvlchoomsuodicehnzyebqtgsqeplinthhnalituxrisknsyjszuaatwoulznpjbvjmhytqgaqmctqvwgxailhproehwctldlagpjqaawdbialginqmweqrcopiqfnludmjuxkqlsgrydzyhecoojgmspowoykgghnbudhujnmyhqxbkfggxxprgfhraksfylcveevxvlxpzxkcqtkchasarbusvqzimvvfsvredhjykpqyyysyxbzwsuqahpjcroqvhysaynfheehppinszvwmyqlmymyqngrqzuefojczpoqcgbkvkmfpipdoetqxtdigphjhkxuwzieqirlvapypdysohfydtxzppfuufcreorhpsyydvvvsproofmuucwqqtskzieegstlokqkvjbssfythoenpbhlhnnsgknlapaigdwvrvsnyrhxhuzqkzoakldexmvnuvqscxmrysnuumawqrldjbtbmnhytvmmyykdaxuvqifecczafafzewmuplebvkxseatwsxwatbszboybwzhgfdtsjpxckknalqvgwuwwretocfaphnyuoyvnxbtabosfewkfrlbbeiduuidlogxfdacbplkbkpljvthltjjrlxbtejpdqjddnnsfhsljjfvmsjigyxmhjeqfcmmzzqpsxmnkuwlhhvcrtxskfmyieoctweswpkplcnjiqmtjjdloobapntxqmducnkabjcutinyhhekioybfokektjerdojqfvyalkvpqsznlvqvrswhelvburtkzdcceqehyqndhlcvkbieceazmuanqiauhkyhcbcckeydaevunddkwlntezctepnfrchvquxgtsnupoiwneengszjggwxkmahlbiwzsbyryqasufdsaaigulgwjqepccwesmbcfpoymrsjrbqwzjpjmbexpjloxdtwxqbdmggreurdcohfpgbchhrthdopewrsyfindsvrexpkkooxkmzxklsalyfuxscwthbfdbeghnpowbjxcedzogidsrdnjimcybbxmwpdiwnihhgylpsbukpsjtbkktylouakffurdfmpsnndtjcvjkbviezyqdgvhdcllibfbniafffwebrmyvbryjnomzgiglecxjntcvcrngwrvhefqaswhpynyzqwdpvewmjlpndtihwebjqolymkytrtidajqrdyvqzhcsvlvfvqspskkttqjsotdqkcdwzmdxxuevpvcrsijxskruaajrqaqgcarbxfrwerhddeetidequujlxmyaaoriomkhdmqaitbzbvhmnhmuntueqwueagpomwdhturmpwkyszjiwwlucqbhqbxgibuqmghvlrrbypswfsxkhgwjcndjnqblxargeegkzmhlahbahsfecevnpbxqdbuamjffddctbcedlcptoynjiuypvbgeatatnxztxsxvjrihxmoeeqmghwxxdyzrczljthnteqrfrquhvlssswndmdwxcfzrhcszffqdnjmqyjnywrurbsyavdxcwwtjsttcbsnvrpgiqlswqdcqmxjxwoebxjwlhlxbjuxuacdwktlivrfmncnqosxecfccutmikgwkeprlrkdfcinqgeeeompsmpcvxvnopzmrnuvdljcxjurxmliveisyfqsnpxsokkefgdujosxckvrkgeavugntchvztxkdqeiwyluxxgptyuuligmgfjcwcynffbgysjewlaaglqjuujjxytrphnfwncbkgkwswhcvliseqyifouatvszslptxqnhawzjhgfyorphndgksqdeoqohsqvwctwofrvqqpsnfisbcpluhesurrihkxvpugeitmatignbqqqldkdwqzaggxmitqlzobbuqccoeddmsdtjvywnbiiwkbidkjrofmbxjlnzfryzgxjbwgiaxbahchovroigmraoofyuzqheonmrfpskgciitjtxjzbhlpsohvysrwdwviirlxpvemizykpykhipjwhmqxoiwtevhyddyrigooibzrshqmbypvthubgozvhinzmntadmkfplledvglacrbeghcofvsddhokjhyfcqwwhbwjlkafilmaezpwezzgzgajpxhxcgwmcieilzlfrsxjlagjbjryhbrznmsfushtydgfsizclunncsbzpktmkmhmacicjuqhqaozwtihtcokd"; var t = s.Substring(s.Length / 2, s.Length / 50); var st = SuffixTree.Build(s); Assert.IsTrue(st.Contains(t)); }
public void Contains_OnNonRepeated_ShouldReturnFalse() { var s = "abcdefghijklmnpqrst"; var t = "rstb"; var st = SuffixTree.Build(s); Assert.IsFalse(st.Contains(t)); }
public void Contains_OnNonRepeated_ShouldReturnTrue() { var s = "abcdefghijklmnpqrst"; var t = s.Substring(3, 6); var st = SuffixTree.Build(s); Assert.IsTrue(st.Contains(t)); }
private static bool validateSuffixStrings( SuffixTree tree, out List <int> failedLeafNodeIds) { Stack <Tuple <StEdge, string> > edgeStrings = new Stack <Tuple <StEdge, string> >(); // Step 1: Populate edgeStrings with data from child edges of the root node. // Track any leaves that are immedage children of the root node. List <Tuple <StEdge, string> > leafEdgeStrings = new List <Tuple <StEdge, string> >(); foreach (StEdge edge in tree.Root.ChildEdges()) { Tuple <StEdge, string> edgeString = new Tuple <StEdge, string>(edge, tree.EdgeSubstring(edge)); edgeStrings.Push(edgeString); if (!edge.ChildNode.HasChildEdges()) { Console.WriteLine(String.Format("SuffixTreeTest: Found a leaf: {0:s}", edgeString.Item2)); leafEdgeStrings.Add(edgeString); } } // Step 2: Walk the tree, adding the remaining edges. Keep track of leaf edges. while (edgeStrings.Count > 0) { Tuple <StEdge, string> edgeString = edgeStrings.Pop(); foreach (StEdge childEdge in edgeString.Item1.ChildNode.ChildEdges()) { Tuple <StEdge, string> newEdgeString = new Tuple <StEdge, string>( childEdge, edgeString.Item2 + tree.EdgeSubstring(childEdge)); edgeStrings.Push(newEdgeString); if (!childEdge.ChildNode.HasChildEdges()) { Console.WriteLine(String.Format("SuffixTreeTest: Found a leaf: {0:s}", newEdgeString.Item2)); leafEdgeStrings.Add(newEdgeString); } } } // Step 3: Inspect the leaf edge data. Keep track of failed leaf nodes failedLeafNodeIds = new List <int>(); foreach (var leafEdgeString in leafEdgeStrings) { // Accumulated string should equal the corresponding substring of tree.Text. int len = leafEdgeString.Item2.Length; string pathStr = leafEdgeString.Item2; string textStr = tree.RangeString(tree.Text.Length - len, tree.Text.Length - 1); string formatSpec = "{0," + tree.Text.Length.ToString() + ":s}"; string formatStr = String.Format( "SuffixTreeTest: About to compare \"{0:s}\" with \"{1:s}\"", formatSpec, formatSpec); Console.WriteLine(formatStr, pathStr, textStr); if (pathStr != textStr) { failedLeafNodeIds.Add(leafEdgeString.Item1.ChildNode.Id); } } return(failedLeafNodeIds.Count() == 0); }
public void ContainsForSuffixTree() { string s = "thequickredfoxjumpedoverthelazybrowndog"; var suffixTree = new SuffixTree(s); Assert.True(suffixTree.AllExistAsSubstrings(new [] { "azy", "own", "dog", "la", "br", "fox", "thequick" })); Assert.True(suffixTree.AllExistAsSubstrings(new [] { "lazybrowndog", "azybrowndog", "zybrowndog", "ybrowndog" })); Assert.True(suffixTree.AllExistAsSubstrings(new [] { "l", "g", "a", "o" })); }
public void AddingAndRemovingSomeWordsAndCheckingIfContained() { var suffixTree = new SuffixTree(); var words = new string[] { "One", "one", "oNe", "two", "hello", "test", "here", "there", "?!??!?!?!?", "VeryVeryVeryLoooooooooooooooooong", ".k[2c3-9024g-u,9weg,ouimwt", "3q2tgwadh", "`+rv`+*1v+vt23*1`vt*1v", "!@#)(*^$!%@_", " bum ", " bam ", " bum bam ", "1", "12", "123", "1234", "12345", "123456" }; for (int i = 0; i < words.Length; i++) { if (suffixTree.Contains(words[i])) { Assert.Fail(); } suffixTree.Add(words[i]); if (!suffixTree.Contains(words[i])) { Assert.Fail(); } } if (suffixTree.Count != words.Length) { Assert.Fail(); } int removedWords = 0; for (int i = 0; i < words.Length; i += 2) { if (suffixTree.Remove(words[i])) { removedWords++; } else { Assert.Fail(); } if (suffixTree.Contains(words[i])) { Assert.Fail(); } } Assert.IsTrue(suffixTree.Count == words.Length - removedWords); }
public static void Main() { // SuffixTree.Create("abcabxabcd"); // SuffixTree.Create("abcdefabxybcdmnabcdex"); // SuffixTree.Create("abcadak"); // SuffixTree.Create("dedododeeodo"); // SuffixTree.Create("ooooooooo"); // SuffixTree.Create("mississippi"); SuffixTree.Create("AABAAAB"); }
public void MergePartialOverlapBothSides(int rs, int rl, int ls, int ll) { var st = new SuffixTree("abzabcd$"); st.Merge(ls, ll); st.Merge(rs, rl); var actual = st.ToNodeText(); actual.ShouldBeEquivalentTo(new[] { "ab", "z", "cd" }); }
public void construction_from_text() { var subject = new SuffixTree(); subject.Extend(SampleText); var desc = subject.TreeDescription(); Console.WriteLine(desc); }
public void MergeFullOverlap() { var st = new SuffixTree("abab$"); st.Merge(0, 2); st.Merge(2, 2); var actual = st.ToNodeText(); actual.ShouldBeEquivalentTo(new [] { "ab" }); }
/// <summary> /// Required method for Designer support - do not modify /// the contents of this method with the code editor. /// </summary> private void InitializeComponent() { this._searchResults = new List <string>(); this._textBox = new TextBox(); this._resultTextBox = new RichTextBox(); this._textBox.Enabled = false; this._textBox.Text = "Building suffix tree please wait.."; Cursor.Current = Cursors.WaitCursor; new Thread(() => { // TODO: Change this to your own file path var text = File.ReadAllText( @"D:\Github\Algorithms\SearchingShakespeare\shakespeare-complete-works.txt") + "$"; text = Regex.Replace(text, @"\s+", " "); var lower = text.ToLower(); this._suffixTree = new SuffixTree(text, lower); this._textBox.Text = "Enter text here..."; this._textBox.Enabled = true; Cursor.Current = Cursors.Default; }).Start(); this.SuspendLayout(); this._textBox.Location = new Point(0, 0); this._textBox.Size = new Size(1620, 20); this._textBox.AcceptsReturn = true; this._textBox.Multiline = false; this._textBox.Name = "Search"; this._textBox.TextChanged += TextBox_TextChanged; this._textBox.GotFocus += TextBoxRemovetext; this._textBox.LostFocus += TextBoxAddtext; this._resultTextBox.Location = new Point(0, 20); this._resultTextBox.BackColor = Color.Tomato; this._resultTextBox.Size = new Size(1620, 790); this._resultTextBox.Text = "Can you see me ?"; this._resultTextBox.Multiline = true; this._resultTextBox.Enabled = false; this._resultTextBox.Text = ""; this._resultTextBox.ForeColor = Color.Black; this._resultTextBox.ScrollBars = RichTextBoxScrollBars.Vertical; this._resultTextBox.Font = new Font(new FontFamily(System.Drawing.Text.GenericFontFamilies.Monospace), 12); this.components = new System.ComponentModel.Container(); this.Controls.Add(_textBox); this.Controls.Add(_resultTextBox); this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font; this.ClientSize = new System.Drawing.Size(1620, 450); this.ResumeLayout(false); }
public StSuffix( SuffixTree tree, StNode originNode, int beginIndex = 0, int endIndex = int.MaxValue) { this.tree = tree; this.OriginNode = originNode; this.beginIndex = beginIndex; this.endIndex = endIndex; }
public void query_for_existence() { var subject = new SuffixTree(); subject.Extend(SampleText); Assert.That(subject.Contains("much wood"), Is.True); Assert.That(subject.Contains("uck ch"), Is.True); Assert.That(subject.Contains("k if a"), Is.True); Assert.That(subject.Contains("wooden"), Is.False); Assert.That(subject.Contains("wwood"), Is.False); }
public void PerformanceComparisonSmallString() { string s = "smallstring"; string[] t = { "small", "mall", "string", "str", "lls" }; var suffixTree = new SuffixTree(s); var precomputedHashes = new PrecomputedHashes(s); PerformanceHelper.PerformanceTestAction(() => suffixTree.AllExistAsSubstrings(t), "SuffixTree small string"); PerformanceHelper.PerformanceTestAction(() => SearchUsingStringContains(s, t), "String.Contains small string"); PerformanceHelper.PerformanceTestAction(() => precomputedHashes.AllExistAsSubstrings(t), "PrecomputedHashes small string"); }
/// <summary> /// Run tests tagged with a particular attribute, and invoke them using reflection. /// Currently, only tests from this class are invoked. /// </summary> public static void Main(string[] args) { bool isDone = false; while (!isDone) { Console.Write("Enter input string with unique final character: "); Console.Out.Flush(); string word = Console.ReadLine(); SuffixTree.Verbosity = StVerbosityLevel.Verbose; SuffixTree tree = null; bool isCreationSuccessful = true; try { tree = new SuffixTree(word); Console.WriteLine("Final suffix tree:"); Console.WriteLine(tree.ToString()); } catch (Exception ex) { isCreationSuccessful = false; Console.WriteLine(); Console.WriteLine(String.Format( "Suffix tree creation: Caught exception: {0;s}", ex.Message)); } Console.WriteLine(); if (isCreationSuccessful) { Console.Write("Press 'Enter' to proceed with validation: "); Console.Out.Flush(); Console.ReadLine(); try { runTests(tree); } catch (Exception ex) { Console.WriteLine(); Console.WriteLine(String.Format( "Suffix tree testing: Caught exception: {0;s}", ex.Message)); } } Console.Write("Continue (y or n)? "); Console.Out.Flush(); string continueStr = Console.ReadLine(); if (continueStr == null || continueStr.Length > 0 && continueStr.ToLower()[0] != 'y') { isDone = true; } } }
public StEdge( SuffixTree tree, StNode parentNode, int indexOfFirstChar, int indexOfLastChar) { this.id = StEdge.nextId++; this.tree = tree; this.ParentNode = parentNode; this.ChildNode = new StNode(tree, null); this.BeginIndex = indexOfFirstChar; this.EndIndex = indexOfLastChar; }
public static IEnumerable <SuffixTreeNodeTriplet> BFS(this SuffixTree @this) { Queue <SuffixTreeNode> queue = new Queue <SuffixTreeNode>(); queue.EnQueue(@this.Root); while (!queue.IsEmpty) { SuffixTreeNode node = queue.DeQueue(); yield return(new SuffixTreeNodeTriplet(node.Index, node.IsTerminal, node.GetKeys())); queue.Fill(node.GetChildren()); } }
static void findStrings(String[] array, int[] query) { SuffixTree tree = new SuffixTree(array); for (int i = 0; i < query.Length; i++) { bool skip = false; int q = query[i]; if (q + 1 > tree.Root.total) { Console.WriteLine("INVALID"); skip = true; } if (!skip) { string output = ""; int pointer = 0; SuffixTree.Node current = tree.Root; while (pointer < 26) { char c = (char) (pointer + 97); if (current.Children[pointer] != null) { int t = current.Children[pointer].total; if (t < q) { q -= t; pointer++; } else { output += c; q--; if (q == 0) { break; } current = current.Children[pointer]; pointer = 0; } } else { pointer++; } } Console.WriteLine(output); } } }
private static void Main(string[] args) { int numTestCases = int.Parse(Console.ReadLine()); for (int testCaseNum = 1; testCaseNum <= numTestCases; testCaseNum++) { string firstString = Console.ReadLine(); string secondString = Console.ReadLine(); SuffixTree tree = new SuffixTree(); tree.ConstructTree(firstString + "$", 1); tree.ConstructTree(secondString + "@", 2); string longestSubstring = tree.FindLongestCommonSubstring("$", "@"); // tree.root.Output(); //find the longest common substring Console.WriteLine("Test {0}: {1}-{2}", testCaseNum, longestSubstring.Length, longestSubstring); } }
public void readRulesFromXML() { XmlDocument doc = new XmlDocument(); try{ doc.Load(System.Reflection.Assembly.GetExecutingAssembly().GetManifestResourceStream("SavoyStemmerRules.xml")); }catch(Exception e) { throw new PTStemmerException("Problem while parsing Savoy's XML stemming rules file.",e);} XmlElement root = doc.DocumentElement; XmlAttribute val,val2,val3; foreach (XmlNode step in root.ChildNodes) { val = step.Attributes["name"]; if(val == null) throw new PTStemmerException("Problem while parsing Savoy's XML stemming rules file: Invalid step."); String stepName = val.Value; SuffixTree<SavoyRule> suffixes = new SuffixTree<SavoyRule>(); setProperty(suffixes,"size",0,step); foreach (XmlNode rule in step.ChildNodes) { val = rule.Attributes["suffix"]; val2 = rule.Attributes["replacement"]; val3 = rule.Attributes["size"]; if(val == null || val2 == null || val3 == null) throw new PTStemmerException("Problem while parsing Savoy's XML stemming rules file: Invalid rule in "+stepName+"."); String suffix = val.Value; String replacement = val2.Value; int size = 0; try{ size = Convert.ToInt32(val3.Value); }catch(Exception e) {throw new PTStemmerException("Problem while parsing Savoy's XML stemming rules file: Missing or invalid rules properties on step "+stepName+".", e);} SavoyRule r = new SavoyRule(size,replacement); suffixes.addSuffix(suffix,r); } if(stepName.Equals("pluralreduction")) pluralreductionrules = suffixes; else if(stepName.Equals("femininereduction")) femininereductionrules = suffixes; else if(stepName.Equals("finalvowel")) finalvowel = suffixes; } if(pluralreductionrules == null || femininereductionrules == null || finalvowel == null) throw new PTStemmerException("Problem while parsing Savoy's XML stemming rules file: Missing steps."); }
private void setProperty(SuffixTree<SavoyRule> tree, String propertyName, int defaultValue, XmlNode node) { XmlAttribute val = node.Attributes[propertyName]; if(val != null) tree.Properties[propertyName] = Convert.ToInt32(val.Value); else tree.Properties[propertyName] = defaultValue; }
private String applyRules(String st, SuffixTree<SavoyRule> rules) { int length = st.Length-1; if(length < rules.Properties["size"]) //If the word is smaller than the minimum stemming size of this step, ignore it return st; List<Pair<String, SavoyRule>> res = rules.getLongestSuffixesAndValues(st); for(int i=res.Count-1; i>=0; i--) { Pair<String, SavoyRule> r = res[i]; String suffix = r.First; SavoyRule rule = r.Second; if(length > rule.size) return st.Substring(0, st.Length-suffix.Length)+rule.replacement; } return st; }
private static bool validateSuffixStrings(SuffixTree tree) { List<int> failedLeafNodeIds; bool result = validateSuffixStrings(tree, out failedLeafNodeIds); if (!result) { Console.WriteLine("Error: Some paths had strings that did not match the underlying text."); Console.WriteLine(String.Format("Failed leaf nodes: {0:s}", failedLeafNodeIds.Count == 0 ? "None" : String.Join(", ", failedLeafNodeIds.Select(x => x.ToString())) )); } return result; }
private static bool validateSuffixStrings( SuffixTree tree, out List<int> failedLeafNodeIds) { Stack<Tuple<StEdge, string>> edgeStrings = new Stack<Tuple<StEdge, string>>(); // Step 1: Populate edgeStrings with data from child edges of the root node. // Track any leaves that are immedage children of the root node. List<Tuple<StEdge, string>> leafEdgeStrings = new List<Tuple<StEdge, string>>(); foreach (StEdge edge in tree.Root.ChildEdges()) { Tuple<StEdge, string> edgeString = new Tuple<StEdge, string>(edge, tree.EdgeSubstring(edge)); edgeStrings.Push(edgeString); if (!edge.ChildNode.HasChildEdges()) { Console.WriteLine(String.Format("SuffixTreeTest: Found a leaf: {0:s}", edgeString.Item2)); leafEdgeStrings.Add(edgeString); } } // Step 2: Walk the tree, adding the remaining edges. Keep track of leaf edges. while (edgeStrings.Count > 0) { Tuple<StEdge, string> edgeString = edgeStrings.Pop(); foreach (StEdge childEdge in edgeString.Item1.ChildNode.ChildEdges()) { Tuple<StEdge, string> newEdgeString = new Tuple<StEdge, string>( childEdge, edgeString.Item2 + tree.EdgeSubstring(childEdge)); edgeStrings.Push(newEdgeString); if (!childEdge.ChildNode.HasChildEdges()) { Console.WriteLine(String.Format("SuffixTreeTest: Found a leaf: {0:s}", newEdgeString.Item2)); leafEdgeStrings.Add(newEdgeString); } } } // Step 3: Inspect the leaf edge data. Keep track of failed leaf nodes failedLeafNodeIds = new List<int>(); foreach (var leafEdgeString in leafEdgeStrings) { // Accumulated string should equal the corresponding substring of tree.Text. int len = leafEdgeString.Item2.Length; string pathStr = leafEdgeString.Item2; string textStr = tree.RangeString(tree.Text.Length - len, tree.Text.Length - 1); string formatSpec = "{0," + tree.Text.Length.ToString() + ":s}"; string formatStr = String.Format( "SuffixTreeTest: About to compare \"{0:s}\" with \"{1:s}\"", formatSpec, formatSpec); Console.WriteLine(formatStr, pathStr, textStr); if (pathStr != textStr) { failedLeafNodeIds.Add(leafEdgeString.Item1.ChildNode.Id); } } return (failedLeafNodeIds.Count() == 0); }