Beispiel #1
0
        public override void Run()
        {
            // Read files, initialize store, estimator and measure
            IEnumerable<Run> runs = AbstractCommand.ReadInputFile(this._inputPath);
            RelevanceEstimateStore store = new RelevanceEstimateStore(AbstractCommand.ReadKnownJudgments(this._judgedPath));
            this._estimator.Initialize(runs, new RelevanceEstimate[] { }); // No known judgments at this point
            IMeasure measure = new CG(100); //TODO: max relevance

            // Compile list of all query-doc-sys-rank tuples
            Dictionary<string, Dictionary<string, Dictionary<string, int>>> qdsRanks = AbstractCommand.ToQueryDocumentSystemRanks(runs);
            // Re-structure runs for efficient access
            Dictionary<string, Dictionary<string, Run>> sqRuns = AbstractCommand.ToSystemQueryRuns(runs);

            // Estimate relevance of all query-doc pairs
            Dictionary<string, Dictionary<string, RelevanceEstimate>> qdEstimates = new Dictionary<string, Dictionary<string, RelevanceEstimate>>();
            foreach (var dsRanks in qdsRanks) {
                string query = dsRanks.Key;
                Dictionary<string, RelevanceEstimate> dEstimates = new Dictionary<string, RelevanceEstimate>();
                foreach (var sRanks in dsRanks.Value) {
                    string doc = sRanks.Key;
                    dEstimates.Add(doc, this._estimator.Estimate(query,doc)); // Don't estimate yet, will do inside the loop
                }
                qdEstimates.Add(query, dEstimates);
            }

            bool needsNext = true;
            double confidence = 0.5;
            int iteration = 1, judged = 0;
            do {
                /* Evaluate */

                if (this._target == EvaluationTargets.Relative) {
                    // Estimate per-query relative effectiveness
                    Dictionary<string, Dictionary<string, Dictionary<string, RelativeEffectivenessEstimate>>> ssqRels =
                        EvaluateCommand.GetSystemSystemQueryRelatives(sqRuns, measure, this._estimator, this._confEstimator);
                    // Average (already sorted)
                    List<RelativeEffectivenessEstimate> relSorted = EvaluateCommand.GetSortedMeanRelatives(ssqRels, this._confEstimator);

                    confidence = relSorted.Average(r => r.Confidence);
                    if (confidence < this._confidence) {
                        measure.ComputeQueryDocumentWeights(qdEstimates, qdsRanks, ssqRels);
                    } else {
                        needsNext = false;
                    }
                } else {
                    // Estimate per-query absolute effectiveness
                    Dictionary<string, Dictionary<string, AbsoluteEffectivenessEstimate>> sqAbss =
                        EvaluateCommand.GetSystemQueryAbsolutes(sqRuns, measure, this._estimator, this._confEstimator);
                    // Average and sort
                    List<AbsoluteEffectivenessEstimate> absSorted = EvaluateCommand.GetSortedMeanAbsolutes(sqAbss, this._confEstimator);

                    confidence = absSorted.Average(a => a.Confidence);
                    if (confidence < this._confidence) {
                        measure.ComputeQueryDocumentWeights(qdEstimates, qdsRanks, sqAbss);
                    } else {
                        needsNext = false;
                    }
                }

                Console.WriteLine(iteration + " : Conf=" + confidence + " Judged=" + judged);
                if (needsNext) {
                    /* Next */

                    var batches = NextCommand.GetBatches(qdEstimates, this._batchNum, this._batchSize);
                    // "Judge" all batches
                    foreach (var batch in batches) {
                        Console.WriteLine(batch[0].Query + " : " + string.Join(" ", batch.Select(d => d.Document)));
                        foreach (var doc in batch) {
                            this._estimator.Update(store.Estimate(doc.Query, doc.Document));
                            judged++;
                            // Remove from list of pending
                            var dEstimates = qdEstimates[doc.Query];
                            dEstimates.Remove(doc.Document);
                            if (dEstimates.Count == 0) {
                                qdEstimates.Remove(doc.Query);
                            }
                        }
                    }

                    /* Re-estimate */

                    // Re-estimate relevance of pending query-doc pairs
                    foreach (var dEstimates in qdEstimates) {
                        string query = dEstimates.Key;
                        Dictionary<string, RelevanceEstimate> estimates = dEstimates.Value;
                        foreach (string doc in estimates.Keys.ToArray()) {
                            estimates[doc] = this._estimator.Estimate(query, doc);
                        }
                    }
                }

                iteration++;
            } while (needsNext);

            // TODO: output effectiveness estimates
        }
Beispiel #2
0
        public override void Run()
        {
            // Read files
            IEnumerable<Run> runs = AbstractCommand.ReadInputFile(this._inputPath);
            IEnumerable<RelevanceEstimate> judged = new RelevanceEstimate[] { };
            if (this._judgedPath != null) {
                judged = AbstractCommand.ReadKnownJudgments(this._judgedPath);
            }
            IEnumerable<RelevanceEstimate> estimates = AbstractCommand.ReadEstimatedJudgments(this._estimatedPath);
            // Instantiate estimate store and measure
            RelevanceEstimateStore store = new RelevanceEstimateStore(estimates);
            store.Update(judged);
            IMeasure measure = new CG(100); //TODO: max relevance

            // Compile list of all query-doc-sys-rank tuples
            Dictionary<string, Dictionary<string, Dictionary<string, int>>> qdsRanks = AbstractCommand.ToQueryDocumentSystemRanks(runs);
            // Re-structure estimates
            Dictionary<string, Dictionary<string, RelevanceEstimate>> qdEstimates = new Dictionary<string, Dictionary<string, RelevanceEstimate>>();
            foreach (var est in estimates) {
                Dictionary<string, RelevanceEstimate> dEstimates = null;
                if (!qdEstimates.TryGetValue(est.Query, out dEstimates)) {
                    dEstimates = new Dictionary<string, RelevanceEstimate>();
                    qdEstimates.Add(est.Query, dEstimates);
                }
                dEstimates.Add(est.Document, est);
            }
            // Remove judged query-docs
            foreach (var j in judged) {
                Dictionary<string, RelevanceEstimate> dEstimates = null;
                if (qdEstimates.TryGetValue(j.Query, out dEstimates)) {
                    dEstimates.Remove(j.Document);
                    if (dEstimates.Count == 0) {
                        qdEstimates.Remove(j.Query);
                    }
                }
            }

            bool needsNext = false;
            // Re-structure runs for efficient access
            Dictionary<string, Dictionary<string, Run>> sqRuns = AbstractCommand.ToSystemQueryRuns(runs);

            if (this._target == EvaluationTargets.Relative) {
                // Estimate per-query relative effectiveness
                Dictionary<string, Dictionary<string, Dictionary<string, RelativeEffectivenessEstimate>>> ssqRels =
                    EvaluateCommand.GetSystemSystemQueryRelatives(sqRuns, measure, store, this._confEstimator);
                // Average (already sorted)
                List<RelativeEffectivenessEstimate> relSorted = EvaluateCommand.GetSortedMeanRelatives(ssqRels, this._confEstimator);

                if (relSorted.Average(r => r.Confidence) < this._confidence) {
                    needsNext = true;
                    measure.ComputeQueryDocumentWeights(qdEstimates, qdsRanks, ssqRels);
                }
            } else {
                // Estimate per-query absolute effectiveness
                Dictionary<string, Dictionary<string, AbsoluteEffectivenessEstimate>> sqAbss =
                    EvaluateCommand.GetSystemQueryAbsolutes(sqRuns, measure, store, this._confEstimator);
                // Average and sort
                List<AbsoluteEffectivenessEstimate> absSorted = EvaluateCommand.GetSortedMeanAbsolutes(sqAbss, this._confEstimator);

                if (absSorted.Average(a => a.Confidence) < this._confidence) {
                    needsNext = true;
                    measure.ComputeQueryDocumentWeights(qdEstimates, qdsRanks, sqAbss);
                }
            }

            if (needsNext) {
                var batches = NextCommand.GetBatches(qdEstimates, this._batchNum, this._batchSize);
                TabSeparated io = new TabSeparated(this._decimalDigits);
                ((IWriter<List<RelevanceEstimate>>)io).Write(Console.Out, batches.Take(this._batchNum));
            }
        }