Example #1
0
        public static void Main(string[] args)
        {
            if (args.Length < 4 || args.Length > 5)
            {
                // LUCENENET specific - our wrapper console shows correct usage
                throw new ArgumentException();
                //Console.Error.WriteLine("Usage: QueryDriver <topicsFile> <qrelsFile> <submissionFile> <indexDir> [querySpec]");
                //Console.Error.WriteLine("topicsFile: input file containing queries");
                //Console.Error.WriteLine("qrelsFile: input file containing relevance judgements");
                //Console.Error.WriteLine("submissionFile: output submission file for trec_eval");
                //Console.Error.WriteLine("indexDir: index directory");
                //Console.Error.WriteLine("querySpec: string composed of fields to use in query consisting of T=title,D=description,N=narrative:");
                //Console.Error.WriteLine("\texample: TD (query on Title + Description). The default is T (title only)");
                //Environment.Exit(1);
            }

            FileInfo         topicsFile = new FileInfo(args[0]);
            FileInfo         qrelsFile  = new FileInfo(args[1]);
            SubmissionReport submitLog  = new SubmissionReport(new StreamWriter(new FileStream(args[2], FileMode.Create, FileAccess.Write), Encoding.UTF8 /* huh, no nio.Charset ctor? */), "lucene");

            using Store.FSDirectory dir = Store.FSDirectory.Open(new DirectoryInfo(args[3]));
            using IndexReader reader    = DirectoryReader.Open(dir);
            string        fieldSpec = args.Length == 5 ? args[4] : "T"; // default to Title-only if not specified.
            IndexSearcher searcher  = new IndexSearcher(reader);

            int    maxResults   = 1000;
            string docNameField = "docname";

            TextWriter logger = Console.Out; //new StreamWriter(Console, Encoding.GetEncoding(0));

            // use trec utilities to read trec topics into quality queries
            TrecTopicsReader qReader = new TrecTopicsReader();

            QualityQuery[] qqs = qReader.ReadQueries(IOUtils.GetDecodingReader(topicsFile, Encoding.UTF8));

            // prepare judge, with trec utilities that read from a QRels file
            IJudge judge = new TrecJudge(IOUtils.GetDecodingReader(qrelsFile, Encoding.UTF8));

            // validate topics & judgments match each other
            judge.ValidateData(qqs, logger);

            ISet <string> fieldSet = new JCG.HashSet <string>();

            if (fieldSpec.IndexOf('T') >= 0)
            {
                fieldSet.Add("title");
            }
            if (fieldSpec.IndexOf('D') >= 0)
            {
                fieldSet.Add("description");
            }
            if (fieldSpec.IndexOf('N') >= 0)
            {
                fieldSet.Add("narrative");
            }

            // set the parsing of quality queries into Lucene queries.
            IQualityQueryParser qqParser = new SimpleQQParser(fieldSet.ToArray(), "body");

            // run the benchmark
            QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField);

            qrun.MaxResults = maxResults;
            QualityStats[] stats = qrun.Execute(judge, submitLog, logger);

            // print an avarage sum of the results
            QualityStats avg = QualityStats.Average(stats);

            avg.Log("SUMMARY", 2, logger, "  ");
        }
Example #2
0
        public void TestTrecQuality()
        {
            // first create the partial reuters index
            createReutersIndex();


            int    maxResults   = 1000;
            String docNameField = "doctitle"; // orig docID is in the linedoc format title

            TextWriter logger = VERBOSE ? Console.Out : null;

            // prepare topics
            Stream           topics  = GetType().getResourceAsStream("trecTopics.txt");
            TrecTopicsReader qReader = new TrecTopicsReader();

            QualityQuery[] qqs = qReader.ReadQueries(new StreamReader(topics, Encoding.UTF8));

            // prepare judge
            Stream qrels = GetType().getResourceAsStream("trecQRels.txt");
            IJudge judge = new TrecJudge(new StreamReader(qrels, Encoding.UTF8));

            // validate topics & judgments match each other
            judge.ValidateData(qqs, logger);

            Store.Directory dir      = NewFSDirectory(new DirectoryInfo(System.IO.Path.Combine(getWorkDir().FullName, "index")));
            IndexReader     reader   = DirectoryReader.Open(dir);
            IndexSearcher   searcher = new IndexSearcher(reader);

            IQualityQueryParser qqParser = new SimpleQQParser("title", "body");
            QualityBenchmark    qrun     = new QualityBenchmark(qqs, qqParser, searcher, docNameField);

            SubmissionReport submitLog = VERBOSE ? new SubmissionReport(logger, "TestRun") : null;

            qrun.MaxResults = (maxResults);
            QualityStats[] stats = qrun.Execute(judge, submitLog, logger);

            // --------- verify by the way judgments were altered for this test:
            // for some queries, depending on m = qnum % 8
            // m==0: avg_precision and recall are hurt, by marking fake docs as relevant
            // m==1: precision_at_n and avg_precision are hurt, by unmarking relevant docs
            // m==2: all precision, precision_at_n and recall are hurt.
            // m>=3: these queries remain perfect
            for (int i = 0; i < stats.Length; i++)
            {
                QualityStats s = stats[i];
                switch (i % 8)
                {
                case 0:
                    assertTrue("avg-p should be hurt: " + s.GetAvp(), 1.0 > s.GetAvp());
                    assertTrue("recall should be hurt: " + s.Recall, 1.0 > s.Recall);
                    for (int j = 1; j <= QualityStats.MAX_POINTS; j++)
                    {
                        assertEquals("p_at_" + j + " should be perfect: " + s.GetPrecisionAt(j), 1.0, s.GetPrecisionAt(j), 1E-2);
                    }
                    break;

                case 1:
                    assertTrue("avg-p should be hurt", 1.0 > s.GetAvp());
                    assertEquals("recall should be perfect: " + s.Recall, 1.0, s.Recall, 1E-2);
                    for (int j = 1; j <= QualityStats.MAX_POINTS; j++)
                    {
                        assertTrue("p_at_" + j + " should be hurt: " + s.GetPrecisionAt(j), 1.0 > s.GetPrecisionAt(j));
                    }
                    break;

                case 2:
                    assertTrue("avg-p should be hurt: " + s.GetAvp(), 1.0 > s.GetAvp());
                    assertTrue("recall should be hurt: " + s.Recall, 1.0 > s.Recall);
                    for (int j = 1; j <= QualityStats.MAX_POINTS; j++)
                    {
                        assertTrue("p_at_" + j + " should be hurt: " + s.GetPrecisionAt(j), 1.0 > s.GetPrecisionAt(j));
                    }
                    break;

                default:
                {
                    assertEquals("avg-p should be perfect: " + s.GetAvp(), 1.0, s.GetAvp(), 1E-2);
                    assertEquals("recall should be perfect: " + s.Recall, 1.0, s.Recall, 1E-2);
                    for (int j = 1; j <= QualityStats.MAX_POINTS; j++)
                    {
                        assertEquals("p_at_" + j + " should be perfect: " + s.GetPrecisionAt(j), 1.0, s.GetPrecisionAt(j), 1E-2);
                    }
                    break;
                }
                }
            }

            QualityStats avg = QualityStats.Average(stats);

            if (logger != null)
            {
                avg.Log("Average statistis:", 1, logger, "  ");
            }


            assertTrue("mean avg-p should be hurt: " + avg.GetAvp(), 1.0 > avg.GetAvp());
            assertTrue("avg recall should be hurt: " + avg.Recall, 1.0 > avg.Recall);
            for (int j = 1; j <= QualityStats.MAX_POINTS; j++)
            {
                assertTrue("avg p_at_" + j + " should be hurt: " + avg.GetPrecisionAt(j), 1.0 > avg.GetPrecisionAt(j));
            }

            reader.Dispose();
            dir.Dispose();
        }