public override void Run() { // Read files IEnumerable <Run> runs = AbstractCommand.ReadInputFile(this._inputPath); IEnumerable <RelevanceEstimate> judged = new RelevanceEstimate[] { }; if (this._judgedPath != null) { judged = AbstractCommand.ReadKnownJudgments(this._judgedPath); } // Initialize wrapped estimator this._estimator.Initialize(runs, judged); // Compile list of all query-doc pairs Dictionary <string, HashSet <string> > querydocs = AbstractCommand.ToQueryDocuments(runs); // Estimate relevance of all query-doc pairs List <RelevanceEstimate> estimates = new List <RelevanceEstimate>(); foreach (var qd in querydocs) { foreach (var doc in qd.Value) { estimates.Add(this._estimator.Estimate(qd.Key, doc)); } } // Output estimates TabSeparated io = new TabSeparated(this._decimalDigits); ((IWriter <RelevanceEstimate>)io).Write(Console.Out, estimates); }
public void Initialize(IEnumerable <Run> runs, IEnumerable <RelevanceEstimate> judged) { // Re-structure known judgments foreach (var j in judged) { string id = RelevanceEstimate.GetId(j.Query, j.Document); this._judged[id] = j; } // Instantiate estimator switch (this._name) { case "uniform": // nothing to initialize this._estimator = new UniformRelevanceEstimator(100); break; case "mout": // read metadata IEnumerable <Metadata> metadata = AbstractCommand.ReadMetadata(this._parameters["meta"]); this._estimator = new MoutRelevanceEstimator(runs, metadata); break; case "mjud": // read metadata metadata = AbstractCommand.ReadMetadata(this._parameters["meta"]); IEnumerable <RelevanceEstimate> judgedEst = this._parameters.ContainsKey("judged") ? AbstractCommand.ReadKnownJudgments(this._parameters["judged"]) : new RelevanceEstimate[] { }; this._estimator = new MjudRelevanceEstimator(runs, metadata, judgedEst); break; } }
public override void Run() { // Read files IEnumerable <Run> runs = AbstractCommand.ReadInputFile(this._inputPath); IEnumerable <RelevanceEstimate> judged = AbstractCommand.ReadKnownJudgments(this._judgedPath); // Double format string doubleFormat = "0."; for (int i = 0; i < this._decimalDigits; i++) { doubleFormat += "#"; } // Initialize wrapped estimator, without any known this._estimator.Initialize(runs, new RelevanceEstimate[] { }); // Estimate and output foreach (var rel in judged) { double label = rel.Expectation; // true relevance double[] features = this._estimator.Features(rel.Query, rel.Document); RelevanceEstimate rel2 = this._estimator.Estimate(rel.Query, rel.Document); List <string> strings = new List <string>(); strings.Add(rel.Query); strings.Add(rel.Document); strings.Add(label.ToString()); strings.AddRange(features.Select(f => f.ToString(doubleFormat, CultureInfo.InvariantCulture))); Console.WriteLine(string.Join("\t", strings)); } }
public override void Run() { // Read files IEnumerable <Run> runs = AbstractCommand.ReadInputFile(this._inputPath); IEnumerable <RelevanceEstimate> judged = new RelevanceEstimate[] { }; if (this._judgedPath != null) { judged = AbstractCommand.ReadKnownJudgments(this._judgedPath); } IEnumerable <RelevanceEstimate> estimates = AbstractCommand.ReadEstimatedJudgments(this._estimatedPath); // Instantiate estimate store and measure RelevanceEstimateStore store = new RelevanceEstimateStore(estimates); store.Update(judged); IMeasure measure = new CG(100); //TODO: max relevance // Re-structure runs for efficient access Dictionary <string, Dictionary <string, Run> > sqRuns = AbstractCommand.ToSystemQueryRuns(runs); // Estimate per-query absolute effectiveness Dictionary <string, Dictionary <string, AbsoluteEffectivenessEstimate> > sqAbss = EvaluateCommand.GetSystemQueryAbsolutes(sqRuns, measure, store, this._confEstimator); // Average and sort List <AbsoluteEffectivenessEstimate> absSorted = EvaluateCommand.GetSortedMeanAbsolutes(sqAbss, this._confEstimator); // Estimate per-query relative effectiveness Dictionary <string, Dictionary <string, Dictionary <string, RelativeEffectivenessEstimate> > > ssqRels = EvaluateCommand.GetSystemSystemQueryRelatives(sqRuns, measure, store, this._confEstimator); // Average (already sorted) List <RelativeEffectivenessEstimate> relSorted = EvaluateCommand.GetSortedMeanRelatives(ssqRels, this._confEstimator); // Output estimates TabSeparated io = new TabSeparated(this._decimalDigits); ((IWriter <AbsoluteEffectivenessEstimate>)io).Write(Console.Out, absSorted); ((IWriter <RelativeEffectivenessEstimate>)io).Write(Console.Out, relSorted); }
public override void Run() { // Read files, initialize store, estimator and measure IEnumerable <Run> runs = AbstractCommand.ReadInputFile(this._inputPath); RelevanceEstimateStore store = new RelevanceEstimateStore(AbstractCommand.ReadKnownJudgments(this._judgedPath)); this._estimator.Initialize(runs, new RelevanceEstimate[] { }); // No known judgments at this point IMeasure measure = new CG(100); //TODO: max relevance // Compile list of all query-doc-sys-rank tuples Dictionary <string, Dictionary <string, Dictionary <string, int> > > qdsRanks = AbstractCommand.ToQueryDocumentSystemRanks(runs); // Re-structure runs for efficient access Dictionary <string, Dictionary <string, Run> > sqRuns = AbstractCommand.ToSystemQueryRuns(runs); // Estimate relevance of all query-doc pairs Dictionary <string, Dictionary <string, RelevanceEstimate> > qdEstimates = new Dictionary <string, Dictionary <string, RelevanceEstimate> >(); foreach (var dsRanks in qdsRanks) { string query = dsRanks.Key; Dictionary <string, RelevanceEstimate> dEstimates = new Dictionary <string, RelevanceEstimate>(); foreach (var sRanks in dsRanks.Value) { string doc = sRanks.Key; dEstimates.Add(doc, this._estimator.Estimate(query, doc)); // Don't estimate yet, will do inside the loop } qdEstimates.Add(query, dEstimates); } bool needsNext = true; double confidence = 0.5; int iteration = 1, judged = 0; do { /* Evaluate */ if (this._target == EvaluationTargets.Relative) { // Estimate per-query relative effectiveness Dictionary <string, Dictionary <string, Dictionary <string, RelativeEffectivenessEstimate> > > ssqRels = EvaluateCommand.GetSystemSystemQueryRelatives(sqRuns, measure, this._estimator, this._confEstimator); // Average (already sorted) List <RelativeEffectivenessEstimate> relSorted = EvaluateCommand.GetSortedMeanRelatives(ssqRels, this._confEstimator); confidence = relSorted.Average(r => r.Confidence); if (confidence < this._confidence) { measure.ComputeQueryDocumentWeights(qdEstimates, qdsRanks, ssqRels); } else { needsNext = false; } } else { // Estimate per-query absolute effectiveness Dictionary <string, Dictionary <string, AbsoluteEffectivenessEstimate> > sqAbss = EvaluateCommand.GetSystemQueryAbsolutes(sqRuns, measure, this._estimator, this._confEstimator); // Average and sort List <AbsoluteEffectivenessEstimate> absSorted = EvaluateCommand.GetSortedMeanAbsolutes(sqAbss, this._confEstimator); confidence = absSorted.Average(a => a.Confidence); if (confidence < this._confidence) { measure.ComputeQueryDocumentWeights(qdEstimates, qdsRanks, sqAbss); } else { needsNext = false; } } Console.WriteLine(iteration + " : Conf=" + confidence + " Judged=" + judged); if (needsNext) { /* Next */ var batches = NextCommand.GetBatches(qdEstimates, this._batchNum, this._batchSize); // "Judge" all batches foreach (var batch in batches) { Console.WriteLine(batch[0].Query + " : " + string.Join(" ", batch.Select(d => d.Document))); foreach (var doc in batch) { this._estimator.Update(store.Estimate(doc.Query, doc.Document)); judged++; // Remove from list of pending var dEstimates = qdEstimates[doc.Query]; dEstimates.Remove(doc.Document); if (dEstimates.Count == 0) { qdEstimates.Remove(doc.Query); } } } /* Re-estimate */ // Re-estimate relevance of pending query-doc pairs foreach (var dEstimates in qdEstimates) { string query = dEstimates.Key; Dictionary <string, RelevanceEstimate> estimates = dEstimates.Value; foreach (string doc in estimates.Keys.ToArray()) { estimates[doc] = this._estimator.Estimate(query, doc); } } } iteration++; } while (needsNext); // TODO: output effectiveness estimates }
public override void Run() { // Read files IEnumerable <Run> runs = AbstractCommand.ReadInputFile(this._inputPath); IEnumerable <RelevanceEstimate> judged = new RelevanceEstimate[] { }; if (this._judgedPath != null) { judged = AbstractCommand.ReadKnownJudgments(this._judgedPath); } IEnumerable <RelevanceEstimate> estimates = AbstractCommand.ReadEstimatedJudgments(this._estimatedPath); // Instantiate estimate store and measure RelevanceEstimateStore store = new RelevanceEstimateStore(estimates); store.Update(judged); IMeasure measure = new CG(100); //TODO: max relevance // Compile list of all query-doc-sys-rank tuples Dictionary <string, Dictionary <string, Dictionary <string, int> > > qdsRanks = AbstractCommand.ToQueryDocumentSystemRanks(runs); // Re-structure estimates Dictionary <string, Dictionary <string, RelevanceEstimate> > qdEstimates = new Dictionary <string, Dictionary <string, RelevanceEstimate> >(); foreach (var est in estimates) { Dictionary <string, RelevanceEstimate> dEstimates = null; if (!qdEstimates.TryGetValue(est.Query, out dEstimates)) { dEstimates = new Dictionary <string, RelevanceEstimate>(); qdEstimates.Add(est.Query, dEstimates); } dEstimates.Add(est.Document, est); } // Remove judged query-docs foreach (var j in judged) { Dictionary <string, RelevanceEstimate> dEstimates = null; if (qdEstimates.TryGetValue(j.Query, out dEstimates)) { dEstimates.Remove(j.Document); if (dEstimates.Count == 0) { qdEstimates.Remove(j.Query); } } } bool needsNext = false; // Re-structure runs for efficient access Dictionary <string, Dictionary <string, Run> > sqRuns = AbstractCommand.ToSystemQueryRuns(runs); if (this._target == EvaluationTargets.Relative) { // Estimate per-query relative effectiveness Dictionary <string, Dictionary <string, Dictionary <string, RelativeEffectivenessEstimate> > > ssqRels = EvaluateCommand.GetSystemSystemQueryRelatives(sqRuns, measure, store, this._confEstimator); // Average (already sorted) List <RelativeEffectivenessEstimate> relSorted = EvaluateCommand.GetSortedMeanRelatives(ssqRels, this._confEstimator); if (relSorted.Average(r => r.Confidence) < this._confidence) { needsNext = true; measure.ComputeQueryDocumentWeights(qdEstimates, qdsRanks, ssqRels); } } else { // Estimate per-query absolute effectiveness Dictionary <string, Dictionary <string, AbsoluteEffectivenessEstimate> > sqAbss = EvaluateCommand.GetSystemQueryAbsolutes(sqRuns, measure, store, this._confEstimator); // Average and sort List <AbsoluteEffectivenessEstimate> absSorted = EvaluateCommand.GetSortedMeanAbsolutes(sqAbss, this._confEstimator); if (absSorted.Average(a => a.Confidence) < this._confidence) { needsNext = true; measure.ComputeQueryDocumentWeights(qdEstimates, qdsRanks, sqAbss); } } if (needsNext) { var batches = NextCommand.GetBatches(qdEstimates, this._batchNum, this._batchSize); TabSeparated io = new TabSeparated(this._decimalDigits); ((IWriter <List <RelevanceEstimate> >)io).Write(Console.Out, batches.Take(this._batchNum)); } }