Example #1
0
        public static void MakeTrees(string contentTreeName)
        {
            IIO io = new FileIO();
            IEnumerable<string> file = io.ReadSourceIterable(testpath + "TIME.ALL");
            ITextExtractor it = new BeginMarkerExtraction(file, "*TEXT");

            ITreeIO tio = new TreeIO();
            IBaseTree tree = tio.LoadBaseTree(testpath + contentTreeName);
            int count = 1;
            while (it.HasNextContent()) {
                string content = it.FindNextContent();
                //Console.WriteLine("-----");
                string name = "" + count;
                //Console.WriteLine(name);
                content = Helpers.ConsumeName(content);
                //Console.WriteLine(content);

                IDataTree datatree = DataTreeBuilder.CreateDocumentMappedTree(tree);
                //Console.WriteLine(tree);
                //Console.WriteLine(datatree.GetBaseTree());
                DataTreeBuilder.AddToDataTree(datatree, content);
                datatree.Name = name;

                tio.SaveDataTree(datatree, testpath + @"\datatrees\" + name + ".dtree");

                //Console.WriteLine(datatree.MappedWords);
                count++;
            }
        }
Example #2
0
        public static void BuildContentTreeGoodMatches()
        {
            ITreeIO tio = new TreeIO();
            WordSuggestor ws = tio.LoadObject(testpath + "WordSuggestions") as WordSuggestor;

            IBaseTree tree = ws.BuildTreeGoodMatches();
            tio.SaveBaseTree(tree, testpath + "AutoTree2.tree");
        }
Example #3
0
        public void LoadInvalidFileTest2()
        {
            ITreeIO   io       = new TreeIO();
            IBaseTree basetree = setUpBaseTree();
            IDataTree datatree = setUpDataTree(basetree);

            io.SaveDataTree(datatree, location);

            Assert.IsTrue(File.Exists(location));

            IBaseTree loadedDatatree = io.LoadBaseTree(location);
        }
Example #4
0
        public static void BuildSuggestor()
        {
            IIO io = new FileIO();
            IEnumerable<string> file = io.ReadSourceIterable(testpath + "TIME.ALL");
            ITextExtractor it = new BeginMarkerExtraction(file, "*TEXT");

            Console.WriteLine("Building suggestion base");
            WordSuggestor ws = new WordSuggestor();
            ws.addAll(it);
            Console.WriteLine("Saving tree");
            ITreeIO tio = new TreeIO();
            tio.SaveObject(ws, testpath + "WordSuggestions");
        }
        private void documentList_SelectionChanged(object sender, SelectionChangedEventArgs e)
        {
            object selectedVal = documentList.SelectedValue;
            string name        = selectedVal.ToString();
            string filename    = documentLabel.Content.ToString() + @"\" + name + ".dtree";

            ITreeIO   tio  = new TreeIO();
            IDataTree tree = tio.LoadDataTree(filename);

            DocumentTreeWrapper wrapper = new DocumentTreeWrapper(tree);

            dataTree.ItemsSource = wrapper.Tree;
        }
Example #6
0
        public void SaveBaseTreeTest()
        {
            ITreeIO   io       = new TreeIO();
            IBaseTree basetree = setUpBaseTree();

            io.SaveBaseTree(basetree, location);

            Assert.IsTrue(File.Exists(location));

            IBaseTree loadedBasetree = io.LoadBaseTree(location);

            Assert.AreNotSame(basetree, loadedBasetree);
            Assert.AreEqual(basetree.Root.KeyWord, loadedBasetree.Root.KeyWord);
        }
Example #7
0
        public void SaveDataTreeTest()
        {
            ITreeIO   io       = new TreeIO();
            IBaseTree basetree = setUpBaseTree();
            IDataTree datatree = setUpDataTree(basetree);

            io.SaveDataTree(datatree, location);

            Assert.IsTrue(File.Exists(location));

            IDataTree loadedDatatree = io.LoadDataTree(location);

            Assert.AreNotSame(basetree, loadedDatatree);
            Assert.AreEqual(basetree.Root.KeyWord, loadedDatatree.Root.Keyword);

            Assert.IsNull(loadedDatatree.GetBaseTree());
            Assert.IsNotNull(datatree.GetBaseTree());
        }
        private void loadContentTreeButton_Click(object sender, RoutedEventArgs e)
        {
            OpenFileDialog ofd = new OpenFileDialog();

            ofd.FileName   = "Tree";
            ofd.DefaultExt = ".tree";
            ofd.Filter     = "Tree Files (.tree)|*.tree";

            Nullable <bool> result = ofd.ShowDialog();

            if (result == true)
            {
                string filename = ofd.FileName;
                TreeIO io       = new TreeIO();
                baseTree = io.LoadBaseTree(filename);
                contentTreeLabel.Content = ofd.FileName;
            }
        }
Example #9
0
        private void Load_Click(object sender, RoutedEventArgs e)
        {
            OpenFileDialog ofd = new OpenFileDialog();

            ofd.FileName   = "Tree";
            ofd.DefaultExt = ".tree";
            ofd.Filter     = "Tree Files (.tree)|*.tree";

            Nullable <bool> result = ofd.ShowDialog();

            if (result == true)
            {
                string filename = ofd.FileName;
                TreeIO io       = new TreeIO();
                baseTree = io.LoadBaseTree(filename);

                tree = new ViewableTree(baseTree);
                nodeList.ItemsSource = tree.Root;
            }
        }
Example #10
0
        private void Save_Click(object sender, RoutedEventArgs e)
        {
            SaveFileDialog sfd = new SaveFileDialog();

            sfd.FileName   = "Tree";                      // Default file name
            sfd.DefaultExt = ".tree";                     // Default file extension
            sfd.Filter     = "Tree Files (.tree)|*.tree"; // Filter files by extension

            // Show open file dialog box
            Nullable <bool> result = sfd.ShowDialog();

            // Process open file dialog box results
            if (result == true)
            {
                // Open document
                string filename = sfd.FileName;
                TreeIO io       = new TreeIO();
                io.SaveBaseTree(baseTree, filename);
            }
        }
Example #11
0
        private void loadDataTreeButton_Click(object sender, RoutedEventArgs e)
        {
            OpenFileDialog ofd = new OpenFileDialog();

            ofd.FileName   = "Tree";
            ofd.DefaultExt = ".dtree";
            ofd.Filter     = "Data Tree Files (.dtree)|*.dtree";

            Nullable <bool> result = ofd.ShowDialog();

            if (result == true)
            {
                string    filename = ofd.FileName;
                TreeIO    io       = new TreeIO();
                IDataTree tree     = io.LoadDataTree(filename);

                DocumentTreeWrapper wrapper = new DocumentTreeWrapper(tree);

                dataTree.ItemsSource = wrapper.Tree;
            }
        }
Example #12
0
        private void compareButton_Click(object sender, RoutedEventArgs e)
        {
            MessageBox.Show("Please select first tree");
            OpenFileDialog ofd = new OpenFileDialog();

            ofd.FileName   = "Tree";
            ofd.DefaultExt = ".dtree";
            ofd.Filter     = "Data Tree Files (.dtree)|*.dtree";

            Nullable <bool> result = ofd.ShowDialog();

            if (result == true)
            {
                string    filename = ofd.FileName;
                TreeIO    io       = new TreeIO();
                IDataTree tree     = io.LoadDataTree(filename);


                MessageBox.Show("Please select a second tree");

                result = ofd.ShowDialog();

                if (result == true)
                {
                    string    filename2 = ofd.FileName;
                    IDataTree tree2     = io.LoadDataTree(filename2);

                    if (tree.CompareTo(tree2))
                    {
                        MessageBox.Show("Your treees match");
                    }
                    else
                    {
                        MessageBox.Show("These trees do not match");
                    }
                }
            }
        }
Example #13
0
        private void queryOkay_Click(object sender, RoutedEventArgs e)
        {
            string        query   = queryBox.Text;
            List <string> matches = new List <string>();

            using (Ookii.Dialogs.Wpf.ProgressDialog dial = new ProgressDialog()) {
                IDataTree queryTree = DataTreeBuilder.CreateDocumentMappedTree(baseTree);
                DataTreeBuilder.AddToDataTreeBoyerMoore(queryTree, query);

                ITreeIO tio = new TreeIO();
                foreach (String s in Directory.EnumerateFiles(documentLabel.Content.ToString()))
                {
                    IDataTree docTree = tio.LoadDataTree(s);
                    if (queryTree.CompareTo(docTree))
                    {
                        matches.Add(s);
                    }
                }
            }

            if (matches.Count > 0)
            {
                StringBuilder sb = new StringBuilder("Documents which match \"" + query + "\":");
                foreach (string s in matches)
                {
                    string s2 = System.IO.Path.GetFileName(s);
                    s2 = new string(s2.Take(s2.LastIndexOf('.')).ToArray());
                    documents.Add(s2);
                    sb.Append(" " + s2);
                }
                MessageBox.Show(sb.ToString());
            }
            else
            {
                MessageBox.Show("No documents match the query \"" + query + "\"");
            }
            queryPopup.IsOpen = false;
        }
Example #14
0
        public static void Compare(string contentTreeName)
        {
            Console.SetBufferSize(100, 20000);
            IIO io = new FileIO();
            IEnumerable<string> file = io.ReadSourceIterable(testpath + "TIME.QUE");
            IEnumerable<string> expectedResults = io.ReadSourceIterable(testpath + "TIME.REL");
            var resultsEnum = expectedResults.GetEnumerator();
            ITextExtractor it = new BeginMarkerExtraction(file, "*FIND");

            ITreeIO tio = new TreeIO();
            IBaseTree tree = tio.LoadBaseTree(testpath + contentTreeName);
            double totalRecall = 0;
            double totalPrecision = 0;
            double bestRecall = -1;
            double worstRecall = 2;
            double bestPrecision = -1;
            double worstPrecision = 2;
            double totalGoodPrecision = 0;
            double count = 0;
            while (it.HasNextContent()) {
                string query = it.FindNextContent();
                Console.WriteLine("---------------------------------");
                string queryName = Helpers.GetNameWhenFirst(query);
                Console.WriteLine("Query: " + queryName);
                query = Helpers.ConsumeName(query);

                Console.WriteLine(query);

                IDataTree queryTree = DataTreeBuilder.CreateDocumentMappedTree(tree);
                DataTreeBuilder.AddToDataTree(queryTree, query);

                queryTree.PrintDataTree();

                Console.WriteLine("Expected Results: ");
                while(string.IsNullOrEmpty(resultsEnum.Current))
                    resultsEnum.MoveNext();
                string expected = Helpers.ConsumeName(resultsEnum.Current);
                Console.WriteLine(expected);
                resultsEnum.MoveNext();

                expected = expected.Trim();
                string[] expectedArray = expected.Split(' ');
                double relevant = 0;
                double totalRetrieved = 0;

                Console.WriteLine("Actual Results: ");
                foreach(String s in Directory.EnumerateFiles(testpath + @"\datatrees")) {
                    IDataTree docTree = tio.LoadDataTree(s);
                    if (queryTree.CompareTo(docTree)) {
                        Console.WriteLine(" Found: " + docTree.Name);
                        //docTree.PrintDataTree();
                        totalRetrieved++;
                        if (expectedArray.Contains(docTree.Name)) {
                            relevant++;
                        }
                    } else if (expectedArray.Contains(docTree.Name)) {
                        Console.WriteLine(" Expected: " + docTree.Name);
                        //docTree.PrintDataTree();
                    }
                }

                Console.WriteLine();
                Console.WriteLine("Precision: " + relevant + "/" + totalRetrieved );
                Console.WriteLine("Recall: " + relevant + "/" + (expectedArray.Length));
                Console.WriteLine();
                //Console.ReadLine();

                count++;
                double recall = relevant / expectedArray.Length;
                double precision = 0;
                if (totalRetrieved > 0) {
                    precision = relevant / totalRetrieved;
                }
                totalPrecision += precision;
                totalRecall += recall;

                if (precision > bestPrecision) {
                    bestPrecision = precision;
                }
                if (precision < worstPrecision) {
                    worstPrecision = precision;
                }

                if(recall > bestRecall) {
                    bestRecall = recall;
                }
                if(recall < worstRecall) {
                    worstRecall = recall;
                }
                if (recall > .5) {
                    totalGoodPrecision += precision;
                }
            }

            Console.WriteLine("-------------------");
            Console.WriteLine("Average Precision: " + totalPrecision / count);
            Console.WriteLine("Average Recall: " + totalRecall / count);
            Console.WriteLine("Worst Precision: " + worstPrecision);
            Console.WriteLine("Worst Recall: " + worstRecall);
            Console.WriteLine("Best Precision: " + bestPrecision);
            Console.WriteLine("Best Recall: " + bestRecall);
            //Console.WriteLine("Average Good Recall Precision: " + totalGoodPrecision / count);
        }
Example #15
0
        public static void getDocsForQuery(string contentTreeName)
        {
            Console.SetBufferSize(100, 2000);
            IIO io = new FileIO();
            IEnumerable<string> file = io.ReadSourceIterable(testpath + "TIME.QUE");
            IEnumerable<string> expectedResults = io.ReadSourceIterable(testpath + "TIME.REL");
            var resultsEnum = expectedResults.GetEnumerator();
            ITextExtractor it = new BeginMarkerExtraction(file, "*FIND");

            ITreeIO tio = new TreeIO();
            IBaseTree tree = tio.LoadBaseTree(testpath + contentTreeName);

            string query = it.FindNextContent();
            Console.WriteLine("---------------------------------");
            string queryName = Helpers.GetNameWhenFirst(query);
            Console.WriteLine("Query: " + queryName);
            query = Helpers.ConsumeName(query);

            Console.WriteLine(query);

            IDataTree queryTree = DataTreeBuilder.CreateStemmedDocumentMapTree(tree);
            DataTreeBuilder.AddToDataTreeBoyerMoore(queryTree, query);
            queryTree.PrintDataTree();

            Console.WriteLine("Expected Results: ");
            while (string.IsNullOrEmpty(resultsEnum.Current))
                resultsEnum.MoveNext();
            string expected = Helpers.ConsumeName(resultsEnum.Current);
            Console.WriteLine(expected);
            resultsEnum.MoveNext();

            expected = expected.Trim();
            string[] expectedArray = expected.Split(' ');
            double relevant = 0;
            double totalRetrieved = 0;

            Console.WriteLine("Actual Results: ");
            List<string> retrieved = new List<string>();
            foreach (String s in Directory.EnumerateFiles(testpath + @"\datatrees")) {
                IDataTree docTree = tio.LoadDataTree(s);
                if (queryTree.CompareTo(docTree)) {
                    Console.Write(" " + docTree.Name);
                    retrieved.Add(docTree.Name);
                    totalRetrieved++;
                    if (expectedArray.Contains(docTree.Name)) {
                        relevant++;
                    }

                    Console.WriteLine("Found---");
                    Console.WriteLine(docTree.Name);
                    docTree.PrintDataTree();
                    Console.WriteLine("---");
                }
                if (expectedArray.Contains(docTree.Name)) {
                    Console.WriteLine("Expected---");
                    Console.WriteLine(docTree.Name);
                    docTree.PrintDataTree();
                    Console.WriteLine("---");
                }

            }

            Console.WriteLine();
            Console.WriteLine("Precision: " + relevant + "/" + totalRetrieved);
            Console.WriteLine("Recall: " + relevant + "/" + (expectedArray.Length));
            Console.WriteLine();

            Console.WriteLine("---------------------------------");
            Thread.Sleep(10000);

            IEnumerable<string> fileAll = io.ReadSourceIterable(testpath + "TIME.ALL");
            ITextExtractor itAll = new BeginMarkerExtraction(fileAll, "*TEXT");

            //int count = 1;
            //while (itAll.HasNextContent()) {
            //    string content = itAll.FindNextContent();
            //    string name = "" + count;
            //    if (retrieved.Contains(name) || expectedArray.ToList().Contains(name)) {
            //        Console.WriteLine("------------------------------------------------------------");
            //        Console.WriteLine("------------------------------------------------------------");
            //        Console.WriteLine(name);
            //        content = Helpers.ConsumeName(content);
            //        Console.WriteLine(content);
            //        Console.WriteLine("------------------------------------------------------------");
            //        Console.WriteLine("------------------------------------------------------------");
            //    }

            //    count++;
            //}
        }
Example #16
0
        public static void CompareAll()
        {
            Console.SetBufferSize(100, 2000);
            IIO io = new FileIO();
            IEnumerable<string> expectedResults = io.ReadSourceIterable(testpath + "TIME.REL");

            List<IEnumerable<string>> resPerLine = new List<IEnumerable<string>>();
            foreach (string s in expectedResults) {
                if (!string.IsNullOrEmpty(s)) {
                    string answers = Helpers.ConsumeName(s);
                    resPerLine.Add(answers.Split(' '));
                }
            }

            double avgPrecision = 0;
            double avgRecall = 0;
            int numCounted = 0;

            TreeIO tio = new TreeIO();
            foreach (String s in Directory.EnumerateFiles(testpath + @"\datatrees")) {
                IDataTree docTree = tio.LoadDataTree(s);
                Console.WriteLine(docTree.Name + ":");
                int count = 0;
                List<string> matches = new List<string>();
                foreach (String s2 in Directory.EnumerateFiles(testpath + @"\datatrees")) {
                    if(s != s2) {
                        IDataTree tree2 = tio.LoadDataTree(s2);
                        if (docTree.CompareTo(tree2)) {
                            Console.Write(tree2.Name + " ");
                            count++;
                            matches.Add(tree2.Name);
                        }
                    }
                }

                Console.WriteLine();
                Console.WriteLine("Count: " + count);

                IEnumerable<IEnumerable<string>> containsTreeRes = resPerLine.Where(l => l.Contains(docTree.Name));
                if (containsTreeRes.Count() > 0) {
                    List<string> expectedMatches = new List<string>();
                    foreach (IEnumerable<string> list in containsTreeRes) {
                        foreach (string m in list) {
                            expectedMatches.Add(m);
                        }
                    }
                    expectedMatches = expectedMatches.Distinct().ToList();

                    IEnumerable<string> foundExpected = matches.Intersect(expectedMatches);

                    Console.WriteLine("Precision: " + foundExpected.Count() + "/" + matches.Count);
                    Console.WriteLine("Recall: " + foundExpected.Count() + "/" + expectedMatches.Count);

                    numCounted++;
                    if (matches.Count > 0) {
                        avgPrecision += ((double)foundExpected.Count()) / matches.Count;
                    }
                    avgRecall += ((double)foundExpected.Count()) / expectedMatches.Count;
                }

                Console.WriteLine();
            }

            Console.WriteLine("-------");
            Console.WriteLine("Average Precision: " + (avgPrecision / numCounted));
            Console.WriteLine("Average Recall: " + (avgRecall / numCounted));
        }
Example #17
0
        private void formatOkay_Click(object sender, RoutedEventArgs e)
        {
            if (baseTree == null)
            {
                MessageBox.Show("Please select a content tree for the data tree.");
                return;
            }

            if (formatBox.SelectedIndex == -1)
            {
                formatBox.BorderBrush = Brushes.Red;
                return;
            }
            if (string.IsNullOrEmpty(documentFormatBox.Text))
            {
                documentFormatBox.BorderBrush = Brushes.Red;
                return;
            }

            OpenFileDialog ofd = new OpenFileDialog();

            ofd.FileName   = "Tree";
            ofd.DefaultExt = ".txt";

            Nullable <bool> result = ofd.ShowDialog();

            if (result == true)
            {
                string filename = ofd.FileName;
                documentLabel.Content = filename + "datatrees";
                using (Ookii.Dialogs.Wpf.ProgressDialog dial = new ProgressDialog()) {
                    dial.ProgressBarStyle = ProgressBarStyle.MarqueeProgressBar;
                    dial.Show();
                    dial.Description = "Analyzing text...";
                    IIO            io = new FileIO();
                    ITextExtractor it = null;
                    switch (formatBox.SelectedIndex)
                    {
                    case 0:
                        string text = io.ReadSource(filename);
                        it = new XMLTextExtractor(text, documentFormatBox.Text);
                        break;

                    case 1:
                        var texts = io.ReadSourceIterable(filename);
                        it = new BeginMarkerExtraction(texts, documentFormatBox.Text);
                        break;

                    default:
                        throw new InvalidOperationException();
                    }
                    documents = new ObservableCollection <string>();
                    while (it.HasNextContent())
                    {
                        string content = it.FindNextContent();
                        string name    = Helpers.GetNameWhenFirst(content);
                        documents.Add(name);

                        IDataTree tree = DataTreeBuilder.CreateDocumentMappedTree(baseTree);
                        DataTreeBuilder.AddToDataTree(tree, content);

                        ITreeIO tio = new TreeIO();
                        tio.SaveDataTree(tree, filename + @"datatrees\" + name + ".dtree");
                    }
                    documentList.ItemsSource = documents;
                }
            }


            buildDataTreePopup.IsOpen = false;
        }