示例#1
0
        public override void Run()
        {
            // Read files
            IEnumerable <Run> runs = AbstractCommand.ReadInputFile(this._inputPath);
            IEnumerable <RelevanceEstimate> judged = new RelevanceEstimate[] { };

            if (this._judgedPath != null)
            {
                judged = AbstractCommand.ReadKnownJudgments(this._judgedPath);
            }
            // Initialize wrapped estimator
            this._estimator.Initialize(runs, judged);
            // Compile list of all query-doc pairs
            Dictionary <string, HashSet <string> > querydocs = AbstractCommand.ToQueryDocuments(runs);
            // Estimate relevance of all query-doc pairs
            List <RelevanceEstimate> estimates = new List <RelevanceEstimate>();

            foreach (var qd in querydocs)
            {
                foreach (var doc in qd.Value)
                {
                    estimates.Add(this._estimator.Estimate(qd.Key, doc));
                }
            }
            // Output estimates
            TabSeparated io = new TabSeparated(this._decimalDigits);

            ((IWriter <RelevanceEstimate>)io).Write(Console.Out, estimates);
        }
示例#2
0
        public override void Run()
        {
            // Read files
            IEnumerable <Run> runs = AbstractCommand.ReadInputFile(this._inputPath);
            IEnumerable <RelevanceEstimate> judged = AbstractCommand.ReadKnownJudgments(this._judgedPath);
            // Double format
            string doubleFormat = "0.";

            for (int i = 0; i < this._decimalDigits; i++)
            {
                doubleFormat += "#";
            }
            // Initialize wrapped estimator, without any known
            this._estimator.Initialize(runs, new RelevanceEstimate[] { });

            // Estimate and output
            foreach (var rel in judged)
            {
                double            label    = rel.Expectation; // true relevance
                double[]          features = this._estimator.Features(rel.Query, rel.Document);
                RelevanceEstimate rel2     = this._estimator.Estimate(rel.Query, rel.Document);

                List <string> strings = new List <string>();
                strings.Add(rel.Query);
                strings.Add(rel.Document);
                strings.Add(label.ToString());
                strings.AddRange(features.Select(f => f.ToString(doubleFormat, CultureInfo.InvariantCulture)));

                Console.WriteLine(string.Join("\t", strings));
            }
        }
示例#3
0
        public void Initialize(IEnumerable <Run> runs, IEnumerable <RelevanceEstimate> judged)
        {
            // Re-structure known judgments
            foreach (var j in judged)
            {
                string id = RelevanceEstimate.GetId(j.Query, j.Document);
                this._judged[id] = j;
            }
            // Instantiate estimator
            switch (this._name)
            {
            case "uniform":
                // nothing to initialize
                this._estimator = new UniformRelevanceEstimator(100);
                break;

            case "mout":
                // read metadata
                IEnumerable <Metadata> metadata = AbstractCommand.ReadMetadata(this._parameters["meta"]);
                this._estimator = new MoutRelevanceEstimator(runs, metadata);
                break;

            case "mjud":
                // read metadata
                metadata = AbstractCommand.ReadMetadata(this._parameters["meta"]);
                IEnumerable <RelevanceEstimate> judgedEst = this._parameters.ContainsKey("judged") ?
                                                            AbstractCommand.ReadKnownJudgments(this._parameters["judged"]) :
                                                            new RelevanceEstimate[] { };
                this._estimator = new MjudRelevanceEstimator(runs, metadata, judgedEst);
                break;
            }
        }
示例#4
0
        public override void CheckOptions(CommandLine cmd)
        {
            // Confidence estimator
            double confidence = Allcea.DEFAULT_CONFIDENCE;
            double sizeRel    = Allcea.DEFAULT_RELATIVE_SIZE;
            double sizeAbs    = Allcea.DEFAULT_ABSOLUTE_SIZE;

            if (cmd.HasOption('c'))
            {
                confidence = AbstractCommand.CheckConfidence(cmd.GetOptionValue('c'));
            }
            if (cmd.HasOption('s'))
            {
                string[] sizeStrings = cmd.GetOptionValues('s');
                if (sizeStrings.Length != 2)
                {
                    throw new ArgumentException("Must provide two target effect sizes: relative and absolute.");
                }
                sizeRel = AbstractCommand.CheckRelativeSize(sizeStrings[0]);
                sizeAbs = AbstractCommand.CheckAbsoluteSize(sizeStrings[1]);
            }
            this._confEstimator = new NormalConfidenceEstimator(confidence, sizeRel, sizeAbs);
            // Double format
            if (cmd.HasOption('d'))
            {
                this._decimalDigits = AbstractCommand.CheckDigits(cmd.GetOptionValue('d'));
            }
            // Files
            this._inputPath = AbstractCommand.CheckInputFile(cmd.GetOptionValue('i'));
            if (cmd.HasOption('j'))
            {
                this._judgedPath = AbstractCommand.CheckJudgedFile(cmd.GetOptionValue('j'));
            }
            this._estimatedPath = AbstractCommand.CheckEstimatedFile(cmd.GetOptionValue('e'));
        }
示例#5
0
        public override void CheckOptions(CommandLine cmd)
        {
            // Double format
            if (cmd.HasOption('d'))
            {
                this._decimalDigits = AbstractCommand.CheckDigits(cmd.GetOptionValue('d'));
            }
            // Files
            this._inputPath  = AbstractCommand.CheckInputFile(cmd.GetOptionValue('i'));
            this._judgedPath = AbstractCommand.CheckJudgedFile(cmd.GetOptionValue('j'));
            // Estimator
            Dictionary <string, string> parameters = Allcea.ParseNameValueParameters(cmd.GetOptionValues('p'));

            this._estimator = new EstimatorWrapper(cmd.GetOptionValue('e'), parameters);
        }
示例#6
0
        public override void Run()
        {
            // Read files
            IEnumerable <Run> runs = AbstractCommand.ReadInputFile(this._inputPath);
            IEnumerable <RelevanceEstimate> judged = new RelevanceEstimate[] { };

            if (this._judgedPath != null)
            {
                judged = AbstractCommand.ReadKnownJudgments(this._judgedPath);
            }
            IEnumerable <RelevanceEstimate> estimates = AbstractCommand.ReadEstimatedJudgments(this._estimatedPath);
            // Instantiate estimate store and measure
            RelevanceEstimateStore store = new RelevanceEstimateStore(estimates);

            store.Update(judged);
            IMeasure measure = new CG(100); //TODO: max relevance

            // Re-structure runs for efficient access
            Dictionary <string, Dictionary <string, Run> > sqRuns = AbstractCommand.ToSystemQueryRuns(runs);

            // Estimate per-query absolute effectiveness
            Dictionary <string, Dictionary <string, AbsoluteEffectivenessEstimate> > sqAbss =
                EvaluateCommand.GetSystemQueryAbsolutes(sqRuns, measure, store, this._confEstimator);
            // Average and sort
            List <AbsoluteEffectivenessEstimate> absSorted = EvaluateCommand.GetSortedMeanAbsolutes(sqAbss, this._confEstimator);

            // Estimate per-query relative effectiveness
            Dictionary <string, Dictionary <string, Dictionary <string, RelativeEffectivenessEstimate> > > ssqRels =
                EvaluateCommand.GetSystemSystemQueryRelatives(sqRuns, measure, store, this._confEstimator);
            // Average (already sorted)
            List <RelativeEffectivenessEstimate> relSorted = EvaluateCommand.GetSortedMeanRelatives(ssqRels, this._confEstimator);

            // Output estimates
            TabSeparated io = new TabSeparated(this._decimalDigits);

            ((IWriter <AbsoluteEffectivenessEstimate>)io).Write(Console.Out, absSorted);
            ((IWriter <RelativeEffectivenessEstimate>)io).Write(Console.Out, relSorted);
        }
示例#7
0
        public override void CheckOptions(CommandLine cmd)
        {
            // Target and confidence estimator
            if (cmd.HasOption('c'))
            {
                this._confidence = AbstractCommand.CheckConfidence(cmd.GetOptionValue('c'));
            }
            this._target = AbstractCommand.CheckTarget(cmd.GetOptionValue('t'));
            double sizeRel = Allcea.DEFAULT_RELATIVE_SIZE;
            double sizeAbs = Allcea.DEFAULT_ABSOLUTE_SIZE;

            if (cmd.HasOption('s'))
            {
                switch (this._target)
                {
                case EvaluationTargets.Relative: sizeRel = AbstractCommand.CheckRelativeSize(cmd.GetOptionValue('s')); break;

                case EvaluationTargets.Absolute: sizeAbs = AbstractCommand.CheckAbsoluteSize(cmd.GetOptionValue('s')); break;
                }
            }
            this._confEstimator = new NormalConfidenceEstimator(this._confidence, sizeRel, sizeAbs);
            // Double format
            if (cmd.HasOption('d'))
            {
                this._decimalDigits = AbstractCommand.CheckDigits(cmd.GetOptionValue('d'));
            }
            // Batches
            this._batchNum  = AbstractCommand.CheckBatchNumber(cmd.GetOptionValue('b'));
            this._batchSize = AbstractCommand.CheckBatchSize(cmd.GetOptionValue('n'));
            // Files
            this._inputPath  = AbstractCommand.CheckInputFile(cmd.GetOptionValue('i'));
            this._judgedPath = AbstractCommand.CheckJudgedFile(cmd.GetOptionValue('j'));
            // Estimator
            Dictionary <string, string> parameters = Allcea.ParseNameValueParameters(cmd.GetOptionValues('p'));

            this._estimator = new EstimatorWrapper(cmd.GetOptionValue('e'), parameters);
        }
示例#8
0
        public override void Run()
        {
            // Read files, initialize store, estimator and measure
            IEnumerable <Run>      runs  = AbstractCommand.ReadInputFile(this._inputPath);
            RelevanceEstimateStore store = new RelevanceEstimateStore(AbstractCommand.ReadKnownJudgments(this._judgedPath));

            this._estimator.Initialize(runs, new RelevanceEstimate[] { }); // No known judgments at this point
            IMeasure measure = new CG(100);                                //TODO: max relevance

            // Compile list of all query-doc-sys-rank tuples
            Dictionary <string, Dictionary <string, Dictionary <string, int> > > qdsRanks = AbstractCommand.ToQueryDocumentSystemRanks(runs);
            // Re-structure runs for efficient access
            Dictionary <string, Dictionary <string, Run> > sqRuns = AbstractCommand.ToSystemQueryRuns(runs);

            // Estimate relevance of all query-doc pairs
            Dictionary <string, Dictionary <string, RelevanceEstimate> > qdEstimates = new Dictionary <string, Dictionary <string, RelevanceEstimate> >();

            foreach (var dsRanks in qdsRanks)
            {
                string query = dsRanks.Key;
                Dictionary <string, RelevanceEstimate> dEstimates = new Dictionary <string, RelevanceEstimate>();
                foreach (var sRanks in dsRanks.Value)
                {
                    string doc = sRanks.Key;
                    dEstimates.Add(doc, this._estimator.Estimate(query, doc)); // Don't estimate yet, will do inside the loop
                }
                qdEstimates.Add(query, dEstimates);
            }


            bool   needsNext = true;
            double confidence = 0.5;
            int    iteration = 1, judged = 0;

            do
            {
                /* Evaluate */

                if (this._target == EvaluationTargets.Relative)
                {
                    // Estimate per-query relative effectiveness
                    Dictionary <string, Dictionary <string, Dictionary <string, RelativeEffectivenessEstimate> > > ssqRels =
                        EvaluateCommand.GetSystemSystemQueryRelatives(sqRuns, measure, this._estimator, this._confEstimator);
                    // Average (already sorted)
                    List <RelativeEffectivenessEstimate> relSorted = EvaluateCommand.GetSortedMeanRelatives(ssqRels, this._confEstimator);

                    confidence = relSorted.Average(r => r.Confidence);
                    if (confidence < this._confidence)
                    {
                        measure.ComputeQueryDocumentWeights(qdEstimates, qdsRanks, ssqRels);
                    }
                    else
                    {
                        needsNext = false;
                    }
                }
                else
                {
                    // Estimate per-query absolute effectiveness
                    Dictionary <string, Dictionary <string, AbsoluteEffectivenessEstimate> > sqAbss =
                        EvaluateCommand.GetSystemQueryAbsolutes(sqRuns, measure, this._estimator, this._confEstimator);
                    // Average and sort
                    List <AbsoluteEffectivenessEstimate> absSorted = EvaluateCommand.GetSortedMeanAbsolutes(sqAbss, this._confEstimator);

                    confidence = absSorted.Average(a => a.Confidence);
                    if (confidence < this._confidence)
                    {
                        measure.ComputeQueryDocumentWeights(qdEstimates, qdsRanks, sqAbss);
                    }
                    else
                    {
                        needsNext = false;
                    }
                }

                Console.WriteLine(iteration + " : Conf=" + confidence + " Judged=" + judged);
                if (needsNext)
                {
                    /* Next */

                    var batches = NextCommand.GetBatches(qdEstimates, this._batchNum, this._batchSize);
                    // "Judge" all batches
                    foreach (var batch in batches)
                    {
                        Console.WriteLine(batch[0].Query + " : " + string.Join(" ", batch.Select(d => d.Document)));
                        foreach (var doc in batch)
                        {
                            this._estimator.Update(store.Estimate(doc.Query, doc.Document));
                            judged++;
                            // Remove from list of pending
                            var dEstimates = qdEstimates[doc.Query];
                            dEstimates.Remove(doc.Document);
                            if (dEstimates.Count == 0)
                            {
                                qdEstimates.Remove(doc.Query);
                            }
                        }
                    }

                    /* Re-estimate */

                    // Re-estimate relevance of pending query-doc pairs
                    foreach (var dEstimates in qdEstimates)
                    {
                        string query = dEstimates.Key;
                        Dictionary <string, RelevanceEstimate> estimates = dEstimates.Value;
                        foreach (string doc in estimates.Keys.ToArray())
                        {
                            estimates[doc] = this._estimator.Estimate(query, doc);
                        }
                    }
                }

                iteration++;
            } while (needsNext);

            // TODO: output effectiveness estimates
        }
示例#9
0
        public override void Run()
        {
            // Read files
            IEnumerable <Run> runs = AbstractCommand.ReadInputFile(this._inputPath);
            IEnumerable <RelevanceEstimate> judged = new RelevanceEstimate[] { };

            if (this._judgedPath != null)
            {
                judged = AbstractCommand.ReadKnownJudgments(this._judgedPath);
            }
            IEnumerable <RelevanceEstimate> estimates = AbstractCommand.ReadEstimatedJudgments(this._estimatedPath);
            // Instantiate estimate store and measure
            RelevanceEstimateStore store = new RelevanceEstimateStore(estimates);

            store.Update(judged);
            IMeasure measure = new CG(100); //TODO: max relevance

            // Compile list of all query-doc-sys-rank tuples
            Dictionary <string, Dictionary <string, Dictionary <string, int> > > qdsRanks = AbstractCommand.ToQueryDocumentSystemRanks(runs);
            // Re-structure estimates
            Dictionary <string, Dictionary <string, RelevanceEstimate> > qdEstimates = new Dictionary <string, Dictionary <string, RelevanceEstimate> >();

            foreach (var est in estimates)
            {
                Dictionary <string, RelevanceEstimate> dEstimates = null;
                if (!qdEstimates.TryGetValue(est.Query, out dEstimates))
                {
                    dEstimates = new Dictionary <string, RelevanceEstimate>();
                    qdEstimates.Add(est.Query, dEstimates);
                }
                dEstimates.Add(est.Document, est);
            }
            // Remove judged query-docs
            foreach (var j in judged)
            {
                Dictionary <string, RelevanceEstimate> dEstimates = null;
                if (qdEstimates.TryGetValue(j.Query, out dEstimates))
                {
                    dEstimates.Remove(j.Document);
                    if (dEstimates.Count == 0)
                    {
                        qdEstimates.Remove(j.Query);
                    }
                }
            }

            bool needsNext = false;
            // Re-structure runs for efficient access
            Dictionary <string, Dictionary <string, Run> > sqRuns = AbstractCommand.ToSystemQueryRuns(runs);

            if (this._target == EvaluationTargets.Relative)
            {
                // Estimate per-query relative effectiveness
                Dictionary <string, Dictionary <string, Dictionary <string, RelativeEffectivenessEstimate> > > ssqRels =
                    EvaluateCommand.GetSystemSystemQueryRelatives(sqRuns, measure, store, this._confEstimator);
                // Average (already sorted)
                List <RelativeEffectivenessEstimate> relSorted = EvaluateCommand.GetSortedMeanRelatives(ssqRels, this._confEstimator);

                if (relSorted.Average(r => r.Confidence) < this._confidence)
                {
                    needsNext = true;
                    measure.ComputeQueryDocumentWeights(qdEstimates, qdsRanks, ssqRels);
                }
            }
            else
            {
                // Estimate per-query absolute effectiveness
                Dictionary <string, Dictionary <string, AbsoluteEffectivenessEstimate> > sqAbss =
                    EvaluateCommand.GetSystemQueryAbsolutes(sqRuns, measure, store, this._confEstimator);
                // Average and sort
                List <AbsoluteEffectivenessEstimate> absSorted = EvaluateCommand.GetSortedMeanAbsolutes(sqAbss, this._confEstimator);

                if (absSorted.Average(a => a.Confidence) < this._confidence)
                {
                    needsNext = true;
                    measure.ComputeQueryDocumentWeights(qdEstimates, qdsRanks, sqAbss);
                }
            }

            if (needsNext)
            {
                var          batches = NextCommand.GetBatches(qdEstimates, this._batchNum, this._batchSize);
                TabSeparated io      = new TabSeparated(this._decimalDigits);
                ((IWriter <List <RelevanceEstimate> >)io).Write(Console.Out, batches.Take(this._batchNum));
            }
        }
示例#10
0
 internal static string CheckEstimatedFile(string path)
 {
     return(AbstractCommand.CheckFile(path, "Estimated judgments"));
 }
示例#11
0
 internal static string CheckJudgedFile(string path)
 {
     return(AbstractCommand.CheckFile(path, "Known judgments"));
 }
示例#12
0
 // Check CLI options
 internal static string CheckInputFile(string path)
 {
     return(AbstractCommand.CheckFile(path, "Input"));
 }