public void DocumentMappingTest2() { IBaseTree tree = setUpTree(); IDataTree dataTree = DataTreeBuilder.CreateDocumentMappedTree(tree, toMap); //make sure the root has the correct number of children Assert.AreEqual(dataTree.Root.Children.Count, 2); //make sure each of the children has the correct weight Assert.AreEqual(dataTree.Root.Children.Where(c => c.EndPoint.Keyword == "tree").FirstOrDefault().Weight, 3); Assert.AreEqual(dataTree.Root.Children.Where(c => c.EndPoint.Keyword == "others").FirstOrDefault().Weight, 2); //make sure a branch on the content tree which is not existent in the mapped tree does not exist Assert.AreEqual(dataTree.Root.Children.Where(c => c.EndPoint.Keyword == "nonexistent").FirstOrDefault(), null); //make sure each child's connections have their own correct weight Assert.AreEqual(dataTree.Root.Children.Where(c => c.EndPoint.Keyword == "tree").FirstOrDefault().EndPoint.Children.Where(x => x.EndPoint.Keyword == "content").FirstOrDefault().Weight, 1); Assert.AreEqual(dataTree.Root.Children.Where(c => c.EndPoint.Keyword == "tree").FirstOrDefault().EndPoint.Children.Where(x => x.EndPoint.Keyword == "mapped").FirstOrDefault().Weight, 1); Assert.AreEqual(dataTree.Root.Children.Where(c => c.EndPoint.Keyword == "others").FirstOrDefault().EndPoint.Children.Where(x => x.EndPoint.Keyword == "series").FirstOrDefault().Weight, 1); Assert.AreEqual(dataTree.Root.Children.Where(c => c.EndPoint.Keyword == "others").FirstOrDefault().EndPoint.Children.Where(x => x.EndPoint.Keyword == "going").FirstOrDefault().Weight, 1); //make sure children connections do not cross Assert.AreEqual(dataTree.Root.Children.Where(c => c.EndPoint.Keyword == "others").FirstOrDefault().EndPoint.Children.Where(x => x.EndPoint.Keyword == "content").FirstOrDefault(), null); //make sure leaf children do not have any connections Assert.AreEqual(dataTree.Root.Children.Where(c => c.EndPoint.Keyword == "tree").FirstOrDefault().EndPoint.Children.Where(x => x.EndPoint.Keyword == "content").FirstOrDefault().EndPoint.Children.Count, 0); }
public static void MakeTrees(string contentTreeName) { IIO io = new FileIO(); IEnumerable<string> file = io.ReadSourceIterable(testpath + "TIME.ALL"); ITextExtractor it = new BeginMarkerExtraction(file, "*TEXT"); ITreeIO tio = new TreeIO(); IBaseTree tree = tio.LoadBaseTree(testpath + contentTreeName); int count = 1; while (it.HasNextContent()) { string content = it.FindNextContent(); //Console.WriteLine("-----"); string name = "" + count; //Console.WriteLine(name); content = Helpers.ConsumeName(content); //Console.WriteLine(content); IDataTree datatree = DataTreeBuilder.CreateDocumentMappedTree(tree); //Console.WriteLine(tree); //Console.WriteLine(datatree.GetBaseTree()); DataTreeBuilder.AddToDataTree(datatree, content); datatree.Name = name; tio.SaveDataTree(datatree, testpath + @"\datatrees\" + name + ".dtree"); //Console.WriteLine(datatree.MappedWords); count++; } }
public IDataTree setUpDataTree(IBaseTree baseTree) { IDataTree data = DataTreeBuilder.CreateDocumentMappedTree(baseTree); DataTreeBuilder.AddToDataTree(data, toMap); return(data); }
public void DocumentMapNonexistentContentEnumberable() { IBaseTree tree = setUpTree(); IDataTree dataTree = DataTreeBuilder.CreateDocumentMappedTree(tree); DataTreeBuilder.AddAllToDataTree(dataTree, null); }
public void DocumentMapChangeBaseTree() { IBaseTree tree = setUpTree(); IDataTree dataTree = DataTreeBuilder.CreateDocumentMappedTree(tree); DataTreeBuilder.AddAllToDataTree(dataTree, toMapEnumerable); dataTree.SetBaseTree(null); Assert.AreEqual(dataTree.GetBaseTree(), null); }
public void CompareTrees() { IBaseTree tree = setUpTree(); IDataTree originalDataTree = DataTreeBuilder.CreateDocumentMappedTree(tree); DataTreeBuilder.AddToDataTree(originalDataTree, toMap); IDataTree probablyMatches = DataTreeBuilder.CreateDocumentMappedTree(tree); DataTreeBuilder.AddToDataTree(probablyMatches, shouldProbablyMatch); IDataTree notAMatch = DataTreeBuilder.CreateDocumentMappedTree(tree); DataTreeBuilder.AddToDataTree(notAMatch, shouldProbablyNotMatch); Assert.IsTrue(originalDataTree.CompareTo(originalDataTree)); Assert.IsTrue(originalDataTree.CompareTo(probablyMatches)); Assert.IsFalse(originalDataTree.CompareTo(notAMatch)); }
private void queryOkay_Click(object sender, RoutedEventArgs e) { string query = queryBox.Text; List <string> matches = new List <string>(); using (Ookii.Dialogs.Wpf.ProgressDialog dial = new ProgressDialog()) { IDataTree queryTree = DataTreeBuilder.CreateDocumentMappedTree(baseTree); DataTreeBuilder.AddToDataTreeBoyerMoore(queryTree, query); ITreeIO tio = new TreeIO(); foreach (String s in Directory.EnumerateFiles(documentLabel.Content.ToString())) { IDataTree docTree = tio.LoadDataTree(s); if (queryTree.CompareTo(docTree)) { matches.Add(s); } } } if (matches.Count > 0) { StringBuilder sb = new StringBuilder("Documents which match \"" + query + "\":"); foreach (string s in matches) { string s2 = System.IO.Path.GetFileName(s); s2 = new string(s2.Take(s2.LastIndexOf('.')).ToArray()); documents.Add(s2); sb.Append(" " + s2); } MessageBox.Show(sb.ToString()); } else { MessageBox.Show("No documents match the query \"" + query + "\""); } queryPopup.IsOpen = false; }
public void DocumentMapNonexistentContent2() { IBaseTree tree = setUpTree(); IDataTree dataTree = DataTreeBuilder.CreateDocumentMappedTree(tree, null); }
public void DocumentMapNonexistentBaseTree2() { IDataTree dataTree = DataTreeBuilder.CreateDocumentMappedTree(null, toMap); }
public static void Compare(string contentTreeName) { Console.SetBufferSize(100, 20000); IIO io = new FileIO(); IEnumerable<string> file = io.ReadSourceIterable(testpath + "TIME.QUE"); IEnumerable<string> expectedResults = io.ReadSourceIterable(testpath + "TIME.REL"); var resultsEnum = expectedResults.GetEnumerator(); ITextExtractor it = new BeginMarkerExtraction(file, "*FIND"); ITreeIO tio = new TreeIO(); IBaseTree tree = tio.LoadBaseTree(testpath + contentTreeName); double totalRecall = 0; double totalPrecision = 0; double bestRecall = -1; double worstRecall = 2; double bestPrecision = -1; double worstPrecision = 2; double totalGoodPrecision = 0; double count = 0; while (it.HasNextContent()) { string query = it.FindNextContent(); Console.WriteLine("---------------------------------"); string queryName = Helpers.GetNameWhenFirst(query); Console.WriteLine("Query: " + queryName); query = Helpers.ConsumeName(query); Console.WriteLine(query); IDataTree queryTree = DataTreeBuilder.CreateDocumentMappedTree(tree); DataTreeBuilder.AddToDataTree(queryTree, query); queryTree.PrintDataTree(); Console.WriteLine("Expected Results: "); while(string.IsNullOrEmpty(resultsEnum.Current)) resultsEnum.MoveNext(); string expected = Helpers.ConsumeName(resultsEnum.Current); Console.WriteLine(expected); resultsEnum.MoveNext(); expected = expected.Trim(); string[] expectedArray = expected.Split(' '); double relevant = 0; double totalRetrieved = 0; Console.WriteLine("Actual Results: "); foreach(String s in Directory.EnumerateFiles(testpath + @"\datatrees")) { IDataTree docTree = tio.LoadDataTree(s); if (queryTree.CompareTo(docTree)) { Console.WriteLine(" Found: " + docTree.Name); //docTree.PrintDataTree(); totalRetrieved++; if (expectedArray.Contains(docTree.Name)) { relevant++; } } else if (expectedArray.Contains(docTree.Name)) { Console.WriteLine(" Expected: " + docTree.Name); //docTree.PrintDataTree(); } } Console.WriteLine(); Console.WriteLine("Precision: " + relevant + "/" + totalRetrieved ); Console.WriteLine("Recall: " + relevant + "/" + (expectedArray.Length)); Console.WriteLine(); //Console.ReadLine(); count++; double recall = relevant / expectedArray.Length; double precision = 0; if (totalRetrieved > 0) { precision = relevant / totalRetrieved; } totalPrecision += precision; totalRecall += recall; if (precision > bestPrecision) { bestPrecision = precision; } if (precision < worstPrecision) { worstPrecision = precision; } if(recall > bestRecall) { bestRecall = recall; } if(recall < worstRecall) { worstRecall = recall; } if (recall > .5) { totalGoodPrecision += precision; } } Console.WriteLine("-------------------"); Console.WriteLine("Average Precision: " + totalPrecision / count); Console.WriteLine("Average Recall: " + totalRecall / count); Console.WriteLine("Worst Precision: " + worstPrecision); Console.WriteLine("Worst Recall: " + worstRecall); Console.WriteLine("Best Precision: " + bestPrecision); Console.WriteLine("Best Recall: " + bestRecall); //Console.WriteLine("Average Good Recall Precision: " + totalGoodPrecision / count); }
private void formatOkay_Click(object sender, RoutedEventArgs e) { if (baseTree == null) { MessageBox.Show("Please select a content tree for the data tree."); return; } if (formatBox.SelectedIndex == -1) { formatBox.BorderBrush = Brushes.Red; return; } if (string.IsNullOrEmpty(documentFormatBox.Text)) { documentFormatBox.BorderBrush = Brushes.Red; return; } OpenFileDialog ofd = new OpenFileDialog(); ofd.FileName = "Tree"; ofd.DefaultExt = ".txt"; Nullable <bool> result = ofd.ShowDialog(); if (result == true) { string filename = ofd.FileName; documentLabel.Content = filename + "datatrees"; using (Ookii.Dialogs.Wpf.ProgressDialog dial = new ProgressDialog()) { dial.ProgressBarStyle = ProgressBarStyle.MarqueeProgressBar; dial.Show(); dial.Description = "Analyzing text..."; IIO io = new FileIO(); ITextExtractor it = null; switch (formatBox.SelectedIndex) { case 0: string text = io.ReadSource(filename); it = new XMLTextExtractor(text, documentFormatBox.Text); break; case 1: var texts = io.ReadSourceIterable(filename); it = new BeginMarkerExtraction(texts, documentFormatBox.Text); break; default: throw new InvalidOperationException(); } documents = new ObservableCollection <string>(); while (it.HasNextContent()) { string content = it.FindNextContent(); string name = Helpers.GetNameWhenFirst(content); documents.Add(name); IDataTree tree = DataTreeBuilder.CreateDocumentMappedTree(baseTree); DataTreeBuilder.AddToDataTree(tree, content); ITreeIO tio = new TreeIO(); tio.SaveDataTree(tree, filename + @"datatrees\" + name + ".dtree"); } documentList.ItemsSource = documents; } } buildDataTreePopup.IsOpen = false; }