예제 #1
0
        public static void Compare(string contentTreeName)
        {
            Console.SetBufferSize(100, 20000);
            IIO io = new FileIO();
            IEnumerable<string> file = io.ReadSourceIterable(testpath + "TIME.QUE");
            IEnumerable<string> expectedResults = io.ReadSourceIterable(testpath + "TIME.REL");
            var resultsEnum = expectedResults.GetEnumerator();
            ITextExtractor it = new BeginMarkerExtraction(file, "*FIND");

            ITreeIO tio = new TreeIO();
            IBaseTree tree = tio.LoadBaseTree(testpath + contentTreeName);
            double totalRecall = 0;
            double totalPrecision = 0;
            double bestRecall = -1;
            double worstRecall = 2;
            double bestPrecision = -1;
            double worstPrecision = 2;
            double totalGoodPrecision = 0;
            double count = 0;
            while (it.HasNextContent()) {
                string query = it.FindNextContent();
                Console.WriteLine("---------------------------------");
                string queryName = Helpers.GetNameWhenFirst(query);
                Console.WriteLine("Query: " + queryName);
                query = Helpers.ConsumeName(query);

                Console.WriteLine(query);

                IDataTree queryTree = DataTreeBuilder.CreateDocumentMappedTree(tree);
                DataTreeBuilder.AddToDataTree(queryTree, query);

                queryTree.PrintDataTree();

                Console.WriteLine("Expected Results: ");
                while(string.IsNullOrEmpty(resultsEnum.Current))
                    resultsEnum.MoveNext();
                string expected = Helpers.ConsumeName(resultsEnum.Current);
                Console.WriteLine(expected);
                resultsEnum.MoveNext();

                expected = expected.Trim();
                string[] expectedArray = expected.Split(' ');
                double relevant = 0;
                double totalRetrieved = 0;

                Console.WriteLine("Actual Results: ");
                foreach(String s in Directory.EnumerateFiles(testpath + @"\datatrees")) {
                    IDataTree docTree = tio.LoadDataTree(s);
                    if (queryTree.CompareTo(docTree)) {
                        Console.WriteLine(" Found: " + docTree.Name);
                        //docTree.PrintDataTree();
                        totalRetrieved++;
                        if (expectedArray.Contains(docTree.Name)) {
                            relevant++;
                        }
                    } else if (expectedArray.Contains(docTree.Name)) {
                        Console.WriteLine(" Expected: " + docTree.Name);
                        //docTree.PrintDataTree();
                    }
                }

                Console.WriteLine();
                Console.WriteLine("Precision: " + relevant + "/" + totalRetrieved );
                Console.WriteLine("Recall: " + relevant + "/" + (expectedArray.Length));
                Console.WriteLine();
                //Console.ReadLine();

                count++;
                double recall = relevant / expectedArray.Length;
                double precision = 0;
                if (totalRetrieved > 0) {
                    precision = relevant / totalRetrieved;
                }
                totalPrecision += precision;
                totalRecall += recall;

                if (precision > bestPrecision) {
                    bestPrecision = precision;
                }
                if (precision < worstPrecision) {
                    worstPrecision = precision;
                }

                if(recall > bestRecall) {
                    bestRecall = recall;
                }
                if(recall < worstRecall) {
                    worstRecall = recall;
                }
                if (recall > .5) {
                    totalGoodPrecision += precision;
                }
            }

            Console.WriteLine("-------------------");
            Console.WriteLine("Average Precision: " + totalPrecision / count);
            Console.WriteLine("Average Recall: " + totalRecall / count);
            Console.WriteLine("Worst Precision: " + worstPrecision);
            Console.WriteLine("Worst Recall: " + worstRecall);
            Console.WriteLine("Best Precision: " + bestPrecision);
            Console.WriteLine("Best Recall: " + bestRecall);
            //Console.WriteLine("Average Good Recall Precision: " + totalGoodPrecision / count);
        }
예제 #2
0
        public static void getDocsForQuery(string contentTreeName)
        {
            Console.SetBufferSize(100, 2000);
            IIO io = new FileIO();
            IEnumerable<string> file = io.ReadSourceIterable(testpath + "TIME.QUE");
            IEnumerable<string> expectedResults = io.ReadSourceIterable(testpath + "TIME.REL");
            var resultsEnum = expectedResults.GetEnumerator();
            ITextExtractor it = new BeginMarkerExtraction(file, "*FIND");

            ITreeIO tio = new TreeIO();
            IBaseTree tree = tio.LoadBaseTree(testpath + contentTreeName);

            string query = it.FindNextContent();
            Console.WriteLine("---------------------------------");
            string queryName = Helpers.GetNameWhenFirst(query);
            Console.WriteLine("Query: " + queryName);
            query = Helpers.ConsumeName(query);

            Console.WriteLine(query);

            IDataTree queryTree = DataTreeBuilder.CreateStemmedDocumentMapTree(tree);
            DataTreeBuilder.AddToDataTreeBoyerMoore(queryTree, query);
            queryTree.PrintDataTree();

            Console.WriteLine("Expected Results: ");
            while (string.IsNullOrEmpty(resultsEnum.Current))
                resultsEnum.MoveNext();
            string expected = Helpers.ConsumeName(resultsEnum.Current);
            Console.WriteLine(expected);
            resultsEnum.MoveNext();

            expected = expected.Trim();
            string[] expectedArray = expected.Split(' ');
            double relevant = 0;
            double totalRetrieved = 0;

            Console.WriteLine("Actual Results: ");
            List<string> retrieved = new List<string>();
            foreach (String s in Directory.EnumerateFiles(testpath + @"\datatrees")) {
                IDataTree docTree = tio.LoadDataTree(s);
                if (queryTree.CompareTo(docTree)) {
                    Console.Write(" " + docTree.Name);
                    retrieved.Add(docTree.Name);
                    totalRetrieved++;
                    if (expectedArray.Contains(docTree.Name)) {
                        relevant++;
                    }

                    Console.WriteLine("Found---");
                    Console.WriteLine(docTree.Name);
                    docTree.PrintDataTree();
                    Console.WriteLine("---");
                }
                if (expectedArray.Contains(docTree.Name)) {
                    Console.WriteLine("Expected---");
                    Console.WriteLine(docTree.Name);
                    docTree.PrintDataTree();
                    Console.WriteLine("---");
                }

            }

            Console.WriteLine();
            Console.WriteLine("Precision: " + relevant + "/" + totalRetrieved);
            Console.WriteLine("Recall: " + relevant + "/" + (expectedArray.Length));
            Console.WriteLine();

            Console.WriteLine("---------------------------------");
            Thread.Sleep(10000);

            IEnumerable<string> fileAll = io.ReadSourceIterable(testpath + "TIME.ALL");
            ITextExtractor itAll = new BeginMarkerExtraction(fileAll, "*TEXT");

            //int count = 1;
            //while (itAll.HasNextContent()) {
            //    string content = itAll.FindNextContent();
            //    string name = "" + count;
            //    if (retrieved.Contains(name) || expectedArray.ToList().Contains(name)) {
            //        Console.WriteLine("------------------------------------------------------------");
            //        Console.WriteLine("------------------------------------------------------------");
            //        Console.WriteLine(name);
            //        content = Helpers.ConsumeName(content);
            //        Console.WriteLine(content);
            //        Console.WriteLine("------------------------------------------------------------");
            //        Console.WriteLine("------------------------------------------------------------");
            //    }

            //    count++;
            //}
        }