예제 #1
0
        public void DocumentMappingTest()
        {
            IBaseTree tree = setUpTree();

            IDataTree dataTree = DataTreeBuilder.CreateDocumentMappedTree(tree);

            DataTreeBuilder.AddToDataTree(dataTree, toMap);

            //make sure the root has the correct number of children
            Assert.AreEqual(dataTree.Root.Children.Count, 2);

            //make sure each of the children has the correct weight
            Assert.AreEqual(dataTree.Root.Children.Where(c => c.EndPoint.Keyword == "tree").FirstOrDefault().Weight, 3);
            Assert.AreEqual(dataTree.Root.Children.Where(c => c.EndPoint.Keyword == "others").FirstOrDefault().Weight, 2);

            //make sure a branch on the content tree which is not existent in the mapped tree does not exist
            Assert.AreEqual(dataTree.Root.Children.Where(c => c.EndPoint.Keyword == "nonexistent").FirstOrDefault(), null);

            //make sure each child's connections have their own correct weight
            Assert.AreEqual(dataTree.Root.Children.Where(c => c.EndPoint.Keyword == "tree").FirstOrDefault().EndPoint.Children.Where(x => x.EndPoint.Keyword == "content").FirstOrDefault().Weight, 1);
            Assert.AreEqual(dataTree.Root.Children.Where(c => c.EndPoint.Keyword == "tree").FirstOrDefault().EndPoint.Children.Where(x => x.EndPoint.Keyword == "mapped").FirstOrDefault().Weight, 1);

            Assert.AreEqual(dataTree.Root.Children.Where(c => c.EndPoint.Keyword == "others").FirstOrDefault().EndPoint.Children.Where(x => x.EndPoint.Keyword == "series").FirstOrDefault().Weight, 1);
            Assert.AreEqual(dataTree.Root.Children.Where(c => c.EndPoint.Keyword == "others").FirstOrDefault().EndPoint.Children.Where(x => x.EndPoint.Keyword == "going").FirstOrDefault().Weight, 1);

            //make sure children connections do not cross
            Assert.AreEqual(dataTree.Root.Children.Where(c => c.EndPoint.Keyword == "others").FirstOrDefault().EndPoint.Children.Where(x => x.EndPoint.Keyword == "content").FirstOrDefault(), null);

            //make sure leaf children do not have any connections
            Assert.AreEqual(dataTree.Root.Children.Where(c => c.EndPoint.Keyword == "tree").FirstOrDefault().EndPoint.Children.Where(x => x.EndPoint.Keyword == "content").FirstOrDefault().EndPoint.Children.Count, 0);
        }
예제 #2
0
        public static void MakeTrees(string contentTreeName)
        {
            IIO io = new FileIO();
            IEnumerable<string> file = io.ReadSourceIterable(testpath + "TIME.ALL");
            ITextExtractor it = new BeginMarkerExtraction(file, "*TEXT");

            ITreeIO tio = new TreeIO();
            IBaseTree tree = tio.LoadBaseTree(testpath + contentTreeName);
            int count = 1;
            while (it.HasNextContent()) {
                string content = it.FindNextContent();
                //Console.WriteLine("-----");
                string name = "" + count;
                //Console.WriteLine(name);
                content = Helpers.ConsumeName(content);
                //Console.WriteLine(content);

                IDataTree datatree = DataTreeBuilder.CreateDocumentMappedTree(tree);
                //Console.WriteLine(tree);
                //Console.WriteLine(datatree.GetBaseTree());
                DataTreeBuilder.AddToDataTree(datatree, content);
                datatree.Name = name;

                tio.SaveDataTree(datatree, testpath + @"\datatrees\" + name + ".dtree");

                //Console.WriteLine(datatree.MappedWords);
                count++;
            }
        }
예제 #3
0
        public IDataTree setUpDataTree(IBaseTree baseTree)
        {
            IDataTree data = DataTreeBuilder.CreateDocumentMappedTree(baseTree);

            DataTreeBuilder.AddToDataTree(data, toMap);
            return(data);
        }
예제 #4
0
        public void DocumentMapNonexistentContent()
        {
            IBaseTree tree = setUpTree();

            IDataTree dataTree = DataTreeBuilder.CreateDocumentMappedTree(tree);

            DataTreeBuilder.AddToDataTree(dataTree, null);
        }
예제 #5
0
        public void DocumentMapNullBaseTree()
        {
            IBaseTree tree = setUpTree();

            IDataTree dataTree = DataTreeBuilder.CreateDocumentMappedTree(tree);

            DataTreeBuilder.AddAllToDataTree(dataTree, toMapEnumerable);

            dataTree.SetBaseTree(null);
            Assert.AreEqual(dataTree.GetBaseTree(), null);

            DataTreeBuilder.AddToDataTree(dataTree, toMap);
        }
예제 #6
0
        public void CompareTrees()
        {
            IBaseTree tree = setUpTree();

            IDataTree originalDataTree = DataTreeBuilder.CreateDocumentMappedTree(tree);

            DataTreeBuilder.AddToDataTree(originalDataTree, toMap);

            IDataTree probablyMatches = DataTreeBuilder.CreateDocumentMappedTree(tree);

            DataTreeBuilder.AddToDataTree(probablyMatches, shouldProbablyMatch);

            IDataTree notAMatch = DataTreeBuilder.CreateDocumentMappedTree(tree);

            DataTreeBuilder.AddToDataTree(notAMatch, shouldProbablyNotMatch);

            Assert.IsTrue(originalDataTree.CompareTo(originalDataTree));
            Assert.IsTrue(originalDataTree.CompareTo(probablyMatches));
            Assert.IsFalse(originalDataTree.CompareTo(notAMatch));
        }
예제 #7
0
 public void DocumentMapNonexistentDataTree()
 {
     DataTreeBuilder.AddToDataTree(null, toMap);
 }
예제 #8
0
        public static void Compare(string contentTreeName)
        {
            Console.SetBufferSize(100, 20000);
            IIO io = new FileIO();
            IEnumerable<string> file = io.ReadSourceIterable(testpath + "TIME.QUE");
            IEnumerable<string> expectedResults = io.ReadSourceIterable(testpath + "TIME.REL");
            var resultsEnum = expectedResults.GetEnumerator();
            ITextExtractor it = new BeginMarkerExtraction(file, "*FIND");

            ITreeIO tio = new TreeIO();
            IBaseTree tree = tio.LoadBaseTree(testpath + contentTreeName);
            double totalRecall = 0;
            double totalPrecision = 0;
            double bestRecall = -1;
            double worstRecall = 2;
            double bestPrecision = -1;
            double worstPrecision = 2;
            double totalGoodPrecision = 0;
            double count = 0;
            while (it.HasNextContent()) {
                string query = it.FindNextContent();
                Console.WriteLine("---------------------------------");
                string queryName = Helpers.GetNameWhenFirst(query);
                Console.WriteLine("Query: " + queryName);
                query = Helpers.ConsumeName(query);

                Console.WriteLine(query);

                IDataTree queryTree = DataTreeBuilder.CreateDocumentMappedTree(tree);
                DataTreeBuilder.AddToDataTree(queryTree, query);

                queryTree.PrintDataTree();

                Console.WriteLine("Expected Results: ");
                while(string.IsNullOrEmpty(resultsEnum.Current))
                    resultsEnum.MoveNext();
                string expected = Helpers.ConsumeName(resultsEnum.Current);
                Console.WriteLine(expected);
                resultsEnum.MoveNext();

                expected = expected.Trim();
                string[] expectedArray = expected.Split(' ');
                double relevant = 0;
                double totalRetrieved = 0;

                Console.WriteLine("Actual Results: ");
                foreach(String s in Directory.EnumerateFiles(testpath + @"\datatrees")) {
                    IDataTree docTree = tio.LoadDataTree(s);
                    if (queryTree.CompareTo(docTree)) {
                        Console.WriteLine(" Found: " + docTree.Name);
                        //docTree.PrintDataTree();
                        totalRetrieved++;
                        if (expectedArray.Contains(docTree.Name)) {
                            relevant++;
                        }
                    } else if (expectedArray.Contains(docTree.Name)) {
                        Console.WriteLine(" Expected: " + docTree.Name);
                        //docTree.PrintDataTree();
                    }
                }

                Console.WriteLine();
                Console.WriteLine("Precision: " + relevant + "/" + totalRetrieved );
                Console.WriteLine("Recall: " + relevant + "/" + (expectedArray.Length));
                Console.WriteLine();
                //Console.ReadLine();

                count++;
                double recall = relevant / expectedArray.Length;
                double precision = 0;
                if (totalRetrieved > 0) {
                    precision = relevant / totalRetrieved;
                }
                totalPrecision += precision;
                totalRecall += recall;

                if (precision > bestPrecision) {
                    bestPrecision = precision;
                }
                if (precision < worstPrecision) {
                    worstPrecision = precision;
                }

                if(recall > bestRecall) {
                    bestRecall = recall;
                }
                if(recall < worstRecall) {
                    worstRecall = recall;
                }
                if (recall > .5) {
                    totalGoodPrecision += precision;
                }
            }

            Console.WriteLine("-------------------");
            Console.WriteLine("Average Precision: " + totalPrecision / count);
            Console.WriteLine("Average Recall: " + totalRecall / count);
            Console.WriteLine("Worst Precision: " + worstPrecision);
            Console.WriteLine("Worst Recall: " + worstRecall);
            Console.WriteLine("Best Precision: " + bestPrecision);
            Console.WriteLine("Best Recall: " + bestRecall);
            //Console.WriteLine("Average Good Recall Precision: " + totalGoodPrecision / count);
        }
예제 #9
0
        private void formatOkay_Click(object sender, RoutedEventArgs e)
        {
            if (baseTree == null)
            {
                MessageBox.Show("Please select a content tree for the data tree.");
                return;
            }

            if (formatBox.SelectedIndex == -1)
            {
                formatBox.BorderBrush = Brushes.Red;
                return;
            }
            if (string.IsNullOrEmpty(documentFormatBox.Text))
            {
                documentFormatBox.BorderBrush = Brushes.Red;
                return;
            }

            OpenFileDialog ofd = new OpenFileDialog();

            ofd.FileName   = "Tree";
            ofd.DefaultExt = ".txt";

            Nullable <bool> result = ofd.ShowDialog();

            if (result == true)
            {
                string filename = ofd.FileName;
                documentLabel.Content = filename + "datatrees";
                using (Ookii.Dialogs.Wpf.ProgressDialog dial = new ProgressDialog()) {
                    dial.ProgressBarStyle = ProgressBarStyle.MarqueeProgressBar;
                    dial.Show();
                    dial.Description = "Analyzing text...";
                    IIO            io = new FileIO();
                    ITextExtractor it = null;
                    switch (formatBox.SelectedIndex)
                    {
                    case 0:
                        string text = io.ReadSource(filename);
                        it = new XMLTextExtractor(text, documentFormatBox.Text);
                        break;

                    case 1:
                        var texts = io.ReadSourceIterable(filename);
                        it = new BeginMarkerExtraction(texts, documentFormatBox.Text);
                        break;

                    default:
                        throw new InvalidOperationException();
                    }
                    documents = new ObservableCollection <string>();
                    while (it.HasNextContent())
                    {
                        string content = it.FindNextContent();
                        string name    = Helpers.GetNameWhenFirst(content);
                        documents.Add(name);

                        IDataTree tree = DataTreeBuilder.CreateDocumentMappedTree(baseTree);
                        DataTreeBuilder.AddToDataTree(tree, content);

                        ITreeIO tio = new TreeIO();
                        tio.SaveDataTree(tree, filename + @"datatrees\" + name + ".dtree");
                    }
                    documentList.ItemsSource = documents;
                }
            }


            buildDataTreePopup.IsOpen = false;
        }