public static void Main(string[] args) { if (args.Length < 4 || args.Length > 5) { // LUCENENET specific - our wrapper console shows correct usage throw new ArgumentException(); //Console.Error.WriteLine("Usage: QueryDriver <topicsFile> <qrelsFile> <submissionFile> <indexDir> [querySpec]"); //Console.Error.WriteLine("topicsFile: input file containing queries"); //Console.Error.WriteLine("qrelsFile: input file containing relevance judgements"); //Console.Error.WriteLine("submissionFile: output submission file for trec_eval"); //Console.Error.WriteLine("indexDir: index directory"); //Console.Error.WriteLine("querySpec: string composed of fields to use in query consisting of T=title,D=description,N=narrative:"); //Console.Error.WriteLine("\texample: TD (query on Title + Description). The default is T (title only)"); //Environment.Exit(1); } FileInfo topicsFile = new FileInfo(args[0]); FileInfo qrelsFile = new FileInfo(args[1]); SubmissionReport submitLog = new SubmissionReport(new StreamWriter(new FileStream(args[2], FileMode.Create, FileAccess.Write), Encoding.UTF8 /* huh, no nio.Charset ctor? */), "lucene"); using Store.FSDirectory dir = Store.FSDirectory.Open(new DirectoryInfo(args[3])); using IndexReader reader = DirectoryReader.Open(dir); string fieldSpec = args.Length == 5 ? args[4] : "T"; // default to Title-only if not specified. IndexSearcher searcher = new IndexSearcher(reader); int maxResults = 1000; string docNameField = "docname"; TextWriter logger = Console.Out; //new StreamWriter(Console, Encoding.GetEncoding(0)); // use trec utilities to read trec topics into quality queries TrecTopicsReader qReader = new TrecTopicsReader(); QualityQuery[] qqs = qReader.ReadQueries(IOUtils.GetDecodingReader(topicsFile, Encoding.UTF8)); // prepare judge, with trec utilities that read from a QRels file IJudge judge = new TrecJudge(IOUtils.GetDecodingReader(qrelsFile, Encoding.UTF8)); // validate topics & judgments match each other judge.ValidateData(qqs, logger); ISet <string> fieldSet = new JCG.HashSet <string>(); if (fieldSpec.IndexOf('T') >= 0) { fieldSet.Add("title"); } if (fieldSpec.IndexOf('D') >= 0) { fieldSet.Add("description"); } if (fieldSpec.IndexOf('N') >= 0) { fieldSet.Add("narrative"); } // set the parsing of quality queries into Lucene queries. IQualityQueryParser qqParser = new SimpleQQParser(fieldSet.ToArray(), "body"); // run the benchmark QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField); qrun.MaxResults = maxResults; QualityStats[] stats = qrun.Execute(judge, submitLog, logger); // print an avarage sum of the results QualityStats avg = QualityStats.Average(stats); avg.Log("SUMMARY", 2, logger, " "); }
public void TestTrecQuality() { // first create the partial reuters index createReutersIndex(); int maxResults = 1000; String docNameField = "doctitle"; // orig docID is in the linedoc format title TextWriter logger = VERBOSE ? Console.Out : null; // prepare topics Stream topics = GetType().getResourceAsStream("trecTopics.txt"); TrecTopicsReader qReader = new TrecTopicsReader(); QualityQuery[] qqs = qReader.ReadQueries(new StreamReader(topics, Encoding.UTF8)); // prepare judge Stream qrels = GetType().getResourceAsStream("trecQRels.txt"); IJudge judge = new TrecJudge(new StreamReader(qrels, Encoding.UTF8)); // validate topics & judgments match each other judge.ValidateData(qqs, logger); Store.Directory dir = NewFSDirectory(new DirectoryInfo(System.IO.Path.Combine(getWorkDir().FullName, "index"))); IndexReader reader = DirectoryReader.Open(dir); IndexSearcher searcher = new IndexSearcher(reader); IQualityQueryParser qqParser = new SimpleQQParser("title", "body"); QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField); SubmissionReport submitLog = VERBOSE ? new SubmissionReport(logger, "TestRun") : null; qrun.MaxResults = (maxResults); QualityStats[] stats = qrun.Execute(judge, submitLog, logger); // --------- verify by the way judgments were altered for this test: // for some queries, depending on m = qnum % 8 // m==0: avg_precision and recall are hurt, by marking fake docs as relevant // m==1: precision_at_n and avg_precision are hurt, by unmarking relevant docs // m==2: all precision, precision_at_n and recall are hurt. // m>=3: these queries remain perfect for (int i = 0; i < stats.Length; i++) { QualityStats s = stats[i]; switch (i % 8) { case 0: assertTrue("avg-p should be hurt: " + s.GetAvp(), 1.0 > s.GetAvp()); assertTrue("recall should be hurt: " + s.Recall, 1.0 > s.Recall); for (int j = 1; j <= QualityStats.MAX_POINTS; j++) { assertEquals("p_at_" + j + " should be perfect: " + s.GetPrecisionAt(j), 1.0, s.GetPrecisionAt(j), 1E-2); } break; case 1: assertTrue("avg-p should be hurt", 1.0 > s.GetAvp()); assertEquals("recall should be perfect: " + s.Recall, 1.0, s.Recall, 1E-2); for (int j = 1; j <= QualityStats.MAX_POINTS; j++) { assertTrue("p_at_" + j + " should be hurt: " + s.GetPrecisionAt(j), 1.0 > s.GetPrecisionAt(j)); } break; case 2: assertTrue("avg-p should be hurt: " + s.GetAvp(), 1.0 > s.GetAvp()); assertTrue("recall should be hurt: " + s.Recall, 1.0 > s.Recall); for (int j = 1; j <= QualityStats.MAX_POINTS; j++) { assertTrue("p_at_" + j + " should be hurt: " + s.GetPrecisionAt(j), 1.0 > s.GetPrecisionAt(j)); } break; default: { assertEquals("avg-p should be perfect: " + s.GetAvp(), 1.0, s.GetAvp(), 1E-2); assertEquals("recall should be perfect: " + s.Recall, 1.0, s.Recall, 1E-2); for (int j = 1; j <= QualityStats.MAX_POINTS; j++) { assertEquals("p_at_" + j + " should be perfect: " + s.GetPrecisionAt(j), 1.0, s.GetPrecisionAt(j), 1E-2); } break; } } } QualityStats avg = QualityStats.Average(stats); if (logger != null) { avg.Log("Average statistis:", 1, logger, " "); } assertTrue("mean avg-p should be hurt: " + avg.GetAvp(), 1.0 > avg.GetAvp()); assertTrue("avg recall should be hurt: " + avg.Recall, 1.0 > avg.Recall); for (int j = 1; j <= QualityStats.MAX_POINTS; j++) { assertTrue("avg p_at_" + j + " should be hurt: " + avg.GetPrecisionAt(j), 1.0 > avg.GetPrecisionAt(j)); } reader.Dispose(); dir.Dispose(); }