/// <summary> /// Run the quality benchmark. /// </summary> /// <param name="judge"> /// The judge that can tell if a certain result doc is relevant for a certain quality query. /// If null, no judgements would be made. Usually null for a submission run. /// </param> /// <param name="submitRep">Submission report is created if non null.</param> /// <param name="qualityLog">If not null, quality run data would be printed for each query.</param> /// <returns><see cref="QualityStats"/> of each quality query that was executed.</returns> /// <exception cref="Exception">If quality benchmark failed to run.</exception> public virtual QualityStats[] Execute(IJudge judge, SubmissionReport submitRep, TextWriter qualityLog) { int nQueries = Math.Min(maxQueries, m_qualityQueries.Length); QualityStats[] stats = new QualityStats[nQueries]; for (int i = 0; i < nQueries; i++) { QualityQuery qq = m_qualityQueries[i]; // generate query Query q = m_qqParser.Parse(qq); // search with this query long t1 = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results TopDocs td = m_searcher.Search(q, null, maxResults); long searchTime = (J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond) - t1; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results //most likely we either submit or judge, but check both if (judge != null) { stats[i] = AnalyzeQueryResults(qq, q, td, judge, qualityLog, searchTime); } if (submitRep != null) { submitRep.Report(qq, td, m_docNameField, m_searcher); } } if (submitRep != null) { submitRep.Flush(); } return(stats); }
/// <summary> /// Create a <see cref="QualityStats"/> object that is the average of the input <see cref="QualityStats"/> objects. /// </summary> /// <param name="stats">array of input stats to be averaged.</param> /// <returns>an average over the input stats.</returns> public static QualityStats Average(QualityStats[] stats) { QualityStats avg = new QualityStats(0, 0); if (stats.Length == 0) { // weired, no stats to average! return(avg); } int m = 0; // queries with positive judgements // aggregate for (int i = 0; i < stats.Length; i++) { avg.searchTime += stats[i].searchTime; avg.docNamesExtractTime += stats[i].docNamesExtractTime; if (stats[i].maxGoodPoints > 0) { m++; avg.numGoodPoints += stats[i].numGoodPoints; avg.numPoints += stats[i].numPoints; avg.pReleventSum += stats[i].GetAvp(); avg.recall += stats[i].recall; avg.mrr += stats[i].MRR; avg.maxGoodPoints += stats[i].maxGoodPoints; for (int j = 1; j < avg.pAt.Length; j++) { avg.pAt[j] += stats[i].GetPrecisionAt(j); } } } if (Debugging.AssertsEnabled) { Debugging.Assert(m > 0, () => "Fishy: no \"good\" queries!"); } // take average: times go by all queries, other measures go by "good" queries only. avg.searchTime /= stats.Length; avg.docNamesExtractTime /= stats.Length; avg.numGoodPoints /= m; avg.numPoints /= m; avg.recall /= m; avg.mrr /= m; avg.maxGoodPoints /= m; for (int j = 1; j < avg.pAt.Length; j++) { avg.pAt[j] /= m; } avg.pReleventSum /= m; // this is actually avgp now avg.pReleventSum *= avg.maxGoodPoints; // so that getAvgP() would be correct return(avg); }
/// <summary>Analyze/judge results for a single quality query; optionally log them.</summary> private QualityStats AnalyzeQueryResults(QualityQuery qq, Query q, TopDocs td, IJudge judge, TextWriter logger, long searchTime) { QualityStats stts = new QualityStats(judge.MaxRecall(qq), searchTime); ScoreDoc[] sd = td.ScoreDocs; long t1 = Support.Time.CurrentTimeMilliseconds(); // extraction of first doc name we measure also construction of doc name extractor, just in case. DocNameExtractor xt = new DocNameExtractor(m_docNameField); for (int i = 0; i < sd.Length; i++) { string docName = xt.DocName(m_searcher, sd[i].Doc); long docNameExtractTime = Support.Time.CurrentTimeMilliseconds() - t1; t1 = Support.Time.CurrentTimeMilliseconds(); bool isRelevant = judge.IsRelevant(docName, qq); stts.AddResult(i + 1, isRelevant, docNameExtractTime); } if (logger != null) { logger.WriteLine(qq.QueryID + " - " + q); stts.Log(qq.QueryID + " Stats:", 1, logger, " "); } return(stts); }
public void TestTrecQuality() { // first create the partial reuters index createReutersIndex(); int maxResults = 1000; String docNameField = "doctitle"; // orig docID is in the linedoc format title TextWriter logger = VERBOSE ? Console.Out : null; // prepare topics Stream topics = GetType().getResourceAsStream("trecTopics.txt"); TrecTopicsReader qReader = new TrecTopicsReader(); QualityQuery[] qqs = qReader.ReadQueries(new StreamReader(topics, Encoding.UTF8)); // prepare judge Stream qrels = GetType().getResourceAsStream("trecQRels.txt"); IJudge judge = new TrecJudge(new StreamReader(qrels, Encoding.UTF8)); // validate topics & judgments match each other judge.ValidateData(qqs, logger); Store.Directory dir = NewFSDirectory(new DirectoryInfo(System.IO.Path.Combine(getWorkDir().FullName, "index"))); IndexReader reader = DirectoryReader.Open(dir); IndexSearcher searcher = new IndexSearcher(reader); IQualityQueryParser qqParser = new SimpleQQParser("title", "body"); QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField); SubmissionReport submitLog = VERBOSE ? new SubmissionReport(logger, "TestRun") : null; qrun.MaxResults = (maxResults); QualityStats[] stats = qrun.Execute(judge, submitLog, logger); // --------- verify by the way judgments were altered for this test: // for some queries, depending on m = qnum % 8 // m==0: avg_precision and recall are hurt, by marking fake docs as relevant // m==1: precision_at_n and avg_precision are hurt, by unmarking relevant docs // m==2: all precision, precision_at_n and recall are hurt. // m>=3: these queries remain perfect for (int i = 0; i < stats.Length; i++) { QualityStats s = stats[i]; switch (i % 8) { case 0: assertTrue("avg-p should be hurt: " + s.GetAvp(), 1.0 > s.GetAvp()); assertTrue("recall should be hurt: " + s.Recall, 1.0 > s.Recall); for (int j = 1; j <= QualityStats.MAX_POINTS; j++) { assertEquals("p_at_" + j + " should be perfect: " + s.GetPrecisionAt(j), 1.0, s.GetPrecisionAt(j), 1E-2); } break; case 1: assertTrue("avg-p should be hurt", 1.0 > s.GetAvp()); assertEquals("recall should be perfect: " + s.Recall, 1.0, s.Recall, 1E-2); for (int j = 1; j <= QualityStats.MAX_POINTS; j++) { assertTrue("p_at_" + j + " should be hurt: " + s.GetPrecisionAt(j), 1.0 > s.GetPrecisionAt(j)); } break; case 2: assertTrue("avg-p should be hurt: " + s.GetAvp(), 1.0 > s.GetAvp()); assertTrue("recall should be hurt: " + s.Recall, 1.0 > s.Recall); for (int j = 1; j <= QualityStats.MAX_POINTS; j++) { assertTrue("p_at_" + j + " should be hurt: " + s.GetPrecisionAt(j), 1.0 > s.GetPrecisionAt(j)); } break; default: { assertEquals("avg-p should be perfect: " + s.GetAvp(), 1.0, s.GetAvp(), 1E-2); assertEquals("recall should be perfect: " + s.Recall, 1.0, s.Recall, 1E-2); for (int j = 1; j <= QualityStats.MAX_POINTS; j++) { assertEquals("p_at_" + j + " should be perfect: " + s.GetPrecisionAt(j), 1.0, s.GetPrecisionAt(j), 1E-2); } break; } } } QualityStats avg = QualityStats.Average(stats); if (logger != null) { avg.Log("Average statistis:", 1, logger, " "); } assertTrue("mean avg-p should be hurt: " + avg.GetAvp(), 1.0 > avg.GetAvp()); assertTrue("avg recall should be hurt: " + avg.Recall, 1.0 > avg.Recall); for (int j = 1; j <= QualityStats.MAX_POINTS; j++) { assertTrue("avg p_at_" + j + " should be hurt: " + avg.GetPrecisionAt(j), 1.0 > avg.GetPrecisionAt(j)); } reader.Dispose(); dir.Dispose(); }