Esempio n. 1
0
        public override void Run()
        {
            // Read files, initialize store, estimator and measure
            IEnumerable <Run>      runs  = AbstractCommand.ReadInputFile(this._inputPath);
            RelevanceEstimateStore store = new RelevanceEstimateStore(AbstractCommand.ReadKnownJudgments(this._judgedPath));

            this._estimator.Initialize(runs, new RelevanceEstimate[] { }); // No known judgments at this point
            IMeasure measure = new CG(100);                                //TODO: max relevance

            // Compile list of all query-doc-sys-rank tuples
            Dictionary <string, Dictionary <string, Dictionary <string, int> > > qdsRanks = AbstractCommand.ToQueryDocumentSystemRanks(runs);
            // Re-structure runs for efficient access
            Dictionary <string, Dictionary <string, Run> > sqRuns = AbstractCommand.ToSystemQueryRuns(runs);

            // Estimate relevance of all query-doc pairs
            Dictionary <string, Dictionary <string, RelevanceEstimate> > qdEstimates = new Dictionary <string, Dictionary <string, RelevanceEstimate> >();

            foreach (var dsRanks in qdsRanks)
            {
                string query = dsRanks.Key;
                Dictionary <string, RelevanceEstimate> dEstimates = new Dictionary <string, RelevanceEstimate>();
                foreach (var sRanks in dsRanks.Value)
                {
                    string doc = sRanks.Key;
                    dEstimates.Add(doc, this._estimator.Estimate(query, doc)); // Don't estimate yet, will do inside the loop
                }
                qdEstimates.Add(query, dEstimates);
            }


            bool   needsNext = true;
            double confidence = 0.5;
            int    iteration = 1, judged = 0;

            do
            {
                /* Evaluate */

                if (this._target == EvaluationTargets.Relative)
                {
                    // Estimate per-query relative effectiveness
                    Dictionary <string, Dictionary <string, Dictionary <string, RelativeEffectivenessEstimate> > > ssqRels =
                        EvaluateCommand.GetSystemSystemQueryRelatives(sqRuns, measure, this._estimator, this._confEstimator);
                    // Average (already sorted)
                    List <RelativeEffectivenessEstimate> relSorted = EvaluateCommand.GetSortedMeanRelatives(ssqRels, this._confEstimator);

                    confidence = relSorted.Average(r => r.Confidence);
                    if (confidence < this._confidence)
                    {
                        measure.ComputeQueryDocumentWeights(qdEstimates, qdsRanks, ssqRels);
                    }
                    else
                    {
                        needsNext = false;
                    }
                }
                else
                {
                    // Estimate per-query absolute effectiveness
                    Dictionary <string, Dictionary <string, AbsoluteEffectivenessEstimate> > sqAbss =
                        EvaluateCommand.GetSystemQueryAbsolutes(sqRuns, measure, this._estimator, this._confEstimator);
                    // Average and sort
                    List <AbsoluteEffectivenessEstimate> absSorted = EvaluateCommand.GetSortedMeanAbsolutes(sqAbss, this._confEstimator);

                    confidence = absSorted.Average(a => a.Confidence);
                    if (confidence < this._confidence)
                    {
                        measure.ComputeQueryDocumentWeights(qdEstimates, qdsRanks, sqAbss);
                    }
                    else
                    {
                        needsNext = false;
                    }
                }

                Console.WriteLine(iteration + " : Conf=" + confidence + " Judged=" + judged);
                if (needsNext)
                {
                    /* Next */

                    var batches = NextCommand.GetBatches(qdEstimates, this._batchNum, this._batchSize);
                    // "Judge" all batches
                    foreach (var batch in batches)
                    {
                        Console.WriteLine(batch[0].Query + " : " + string.Join(" ", batch.Select(d => d.Document)));
                        foreach (var doc in batch)
                        {
                            this._estimator.Update(store.Estimate(doc.Query, doc.Document));
                            judged++;
                            // Remove from list of pending
                            var dEstimates = qdEstimates[doc.Query];
                            dEstimates.Remove(doc.Document);
                            if (dEstimates.Count == 0)
                            {
                                qdEstimates.Remove(doc.Query);
                            }
                        }
                    }

                    /* Re-estimate */

                    // Re-estimate relevance of pending query-doc pairs
                    foreach (var dEstimates in qdEstimates)
                    {
                        string query = dEstimates.Key;
                        Dictionary <string, RelevanceEstimate> estimates = dEstimates.Value;
                        foreach (string doc in estimates.Keys.ToArray())
                        {
                            estimates[doc] = this._estimator.Estimate(query, doc);
                        }
                    }
                }

                iteration++;
            } while (needsNext);

            // TODO: output effectiveness estimates
        }
Esempio n. 2
0
        public override void Run()
        {
            // Read files
            IEnumerable <Run> runs = AbstractCommand.ReadInputFile(this._inputPath);
            IEnumerable <RelevanceEstimate> judged = new RelevanceEstimate[] { };

            if (this._judgedPath != null)
            {
                judged = AbstractCommand.ReadKnownJudgments(this._judgedPath);
            }
            IEnumerable <RelevanceEstimate> estimates = AbstractCommand.ReadEstimatedJudgments(this._estimatedPath);
            // Instantiate estimate store and measure
            RelevanceEstimateStore store = new RelevanceEstimateStore(estimates);

            store.Update(judged);
            IMeasure measure = new CG(100); //TODO: max relevance

            // Compile list of all query-doc-sys-rank tuples
            Dictionary <string, Dictionary <string, Dictionary <string, int> > > qdsRanks = AbstractCommand.ToQueryDocumentSystemRanks(runs);
            // Re-structure estimates
            Dictionary <string, Dictionary <string, RelevanceEstimate> > qdEstimates = new Dictionary <string, Dictionary <string, RelevanceEstimate> >();

            foreach (var est in estimates)
            {
                Dictionary <string, RelevanceEstimate> dEstimates = null;
                if (!qdEstimates.TryGetValue(est.Query, out dEstimates))
                {
                    dEstimates = new Dictionary <string, RelevanceEstimate>();
                    qdEstimates.Add(est.Query, dEstimates);
                }
                dEstimates.Add(est.Document, est);
            }
            // Remove judged query-docs
            foreach (var j in judged)
            {
                Dictionary <string, RelevanceEstimate> dEstimates = null;
                if (qdEstimates.TryGetValue(j.Query, out dEstimates))
                {
                    dEstimates.Remove(j.Document);
                    if (dEstimates.Count == 0)
                    {
                        qdEstimates.Remove(j.Query);
                    }
                }
            }

            bool needsNext = false;
            // Re-structure runs for efficient access
            Dictionary <string, Dictionary <string, Run> > sqRuns = AbstractCommand.ToSystemQueryRuns(runs);

            if (this._target == EvaluationTargets.Relative)
            {
                // Estimate per-query relative effectiveness
                Dictionary <string, Dictionary <string, Dictionary <string, RelativeEffectivenessEstimate> > > ssqRels =
                    EvaluateCommand.GetSystemSystemQueryRelatives(sqRuns, measure, store, this._confEstimator);
                // Average (already sorted)
                List <RelativeEffectivenessEstimate> relSorted = EvaluateCommand.GetSortedMeanRelatives(ssqRels, this._confEstimator);

                if (relSorted.Average(r => r.Confidence) < this._confidence)
                {
                    needsNext = true;
                    measure.ComputeQueryDocumentWeights(qdEstimates, qdsRanks, ssqRels);
                }
            }
            else
            {
                // Estimate per-query absolute effectiveness
                Dictionary <string, Dictionary <string, AbsoluteEffectivenessEstimate> > sqAbss =
                    EvaluateCommand.GetSystemQueryAbsolutes(sqRuns, measure, store, this._confEstimator);
                // Average and sort
                List <AbsoluteEffectivenessEstimate> absSorted = EvaluateCommand.GetSortedMeanAbsolutes(sqAbss, this._confEstimator);

                if (absSorted.Average(a => a.Confidence) < this._confidence)
                {
                    needsNext = true;
                    measure.ComputeQueryDocumentWeights(qdEstimates, qdsRanks, sqAbss);
                }
            }

            if (needsNext)
            {
                var          batches = NextCommand.GetBatches(qdEstimates, this._batchNum, this._batchSize);
                TabSeparated io      = new TabSeparated(this._decimalDigits);
                ((IWriter <List <RelevanceEstimate> >)io).Write(Console.Out, batches.Take(this._batchNum));
            }
        }