Esempio n. 1
0
        public static void Main(string[] args)
        {
            //DateTime now = DateTime.Now;
            args = @"simulate -b 5 -n 1 -t rel -i ..\..\..\etc\$$year$$-runs.txt -j ..\..\..\etc\$$year$$-judgments.txt -e mout -p meta=..\..\..\etc\metadata.txt".Replace("$$year$$",args[0]).Split();

            if (args.Length > 0) {
                // Check CLI command name
                string commandName = args[0].ToLower();
                AbstractCommand command = null;
                switch (commandName) {
                    case "-h":
                        Allcea.PrintMainUsage(null);
                        Environment.Exit(0);
                        break;
                    case "estimate": command = new EstimateCommand(); break;
                    case "evaluate": command = new EvaluateCommand(); break;
                    case "next": command = new NextCommand(); break;
                    case "simulate": command = new SimulateCommand(); break;
                    case "features": command = new FeaturesCommand(); break;
                    default:
                        Console.Error.WriteLine("'" + commandName + "' is not a valid Allcea command. See '" + Allcea.CLI_NAME_AND_VERSION + " -h'.");
                        Environment.Exit(1);
                        break;
                }
                // Parse CLI options
                Options options = command.Options;
                // help? Cannot wait to parse CLI options because it will throw exception before
                if (options.HasOption("h") && args.Contains("-h")) {
                    Allcea.PrintUsage(null, commandName, options, command.OptionsFooter);
                } else {
                    try {
                        Parser parser = new BasicParser();
                        CommandLine cmd = parser.Parse(options, args.Skip(1).ToArray());
                        // If we have extra CLI options the Parse method doesn't throw exception. Handle here
                        if (cmd.Args == null || cmd.Args.Length != 0) {
                            throw new ParseException("Unused option(s): " + string.Join(",", cmd.Args));
                        }
                        // Run command
                        command.CheckOptions(cmd);
                        command.Run();
                    } catch (ParseException pe) {
                        Console.Error.WriteLine((pe.Message.EndsWith(".") ? pe.Message : pe.Message + ".")
                            + " See '" + Allcea.CLI_NAME_AND_VERSION + " " + commandName + " -h'.");
                        Environment.Exit(1);
                    } catch (Exception ex) {
                        Console.Error.WriteLine(ex.Message);
                        Environment.Exit(1);
                    }
                }
            } else {
                // No CLI options
                Allcea.PrintMainUsage(null);
                Environment.Exit(1);
            }
            //Console.Error.WriteLine(DateTime.Now.Subtract(now).TotalMilliseconds);
        }
Esempio n. 2
0
        public override void Run()
        {
            // Read files, initialize store, estimator and measure
            IEnumerable <Run>      runs  = AbstractCommand.ReadInputFile(this._inputPath);
            RelevanceEstimateStore store = new RelevanceEstimateStore(AbstractCommand.ReadKnownJudgments(this._judgedPath));

            this._estimator.Initialize(runs, new RelevanceEstimate[] { }); // No known judgments at this point
            IMeasure measure = new CG(100);                                //TODO: max relevance

            // Compile list of all query-doc-sys-rank tuples
            Dictionary <string, Dictionary <string, Dictionary <string, int> > > qdsRanks = AbstractCommand.ToQueryDocumentSystemRanks(runs);
            // Re-structure runs for efficient access
            Dictionary <string, Dictionary <string, Run> > sqRuns = AbstractCommand.ToSystemQueryRuns(runs);

            // Estimate relevance of all query-doc pairs
            Dictionary <string, Dictionary <string, RelevanceEstimate> > qdEstimates = new Dictionary <string, Dictionary <string, RelevanceEstimate> >();

            foreach (var dsRanks in qdsRanks)
            {
                string query = dsRanks.Key;
                Dictionary <string, RelevanceEstimate> dEstimates = new Dictionary <string, RelevanceEstimate>();
                foreach (var sRanks in dsRanks.Value)
                {
                    string doc = sRanks.Key;
                    dEstimates.Add(doc, this._estimator.Estimate(query, doc)); // Don't estimate yet, will do inside the loop
                }
                qdEstimates.Add(query, dEstimates);
            }


            bool   needsNext = true;
            double confidence = 0.5;
            int    iteration = 1, judged = 0;

            do
            {
                /* Evaluate */

                if (this._target == EvaluationTargets.Relative)
                {
                    // Estimate per-query relative effectiveness
                    Dictionary <string, Dictionary <string, Dictionary <string, RelativeEffectivenessEstimate> > > ssqRels =
                        EvaluateCommand.GetSystemSystemQueryRelatives(sqRuns, measure, this._estimator, this._confEstimator);
                    // Average (already sorted)
                    List <RelativeEffectivenessEstimate> relSorted = EvaluateCommand.GetSortedMeanRelatives(ssqRels, this._confEstimator);

                    confidence = relSorted.Average(r => r.Confidence);
                    if (confidence < this._confidence)
                    {
                        measure.ComputeQueryDocumentWeights(qdEstimates, qdsRanks, ssqRels);
                    }
                    else
                    {
                        needsNext = false;
                    }
                }
                else
                {
                    // Estimate per-query absolute effectiveness
                    Dictionary <string, Dictionary <string, AbsoluteEffectivenessEstimate> > sqAbss =
                        EvaluateCommand.GetSystemQueryAbsolutes(sqRuns, measure, this._estimator, this._confEstimator);
                    // Average and sort
                    List <AbsoluteEffectivenessEstimate> absSorted = EvaluateCommand.GetSortedMeanAbsolutes(sqAbss, this._confEstimator);

                    confidence = absSorted.Average(a => a.Confidence);
                    if (confidence < this._confidence)
                    {
                        measure.ComputeQueryDocumentWeights(qdEstimates, qdsRanks, sqAbss);
                    }
                    else
                    {
                        needsNext = false;
                    }
                }

                Console.WriteLine(iteration + " : Conf=" + confidence + " Judged=" + judged);
                if (needsNext)
                {
                    /* Next */

                    var batches = NextCommand.GetBatches(qdEstimates, this._batchNum, this._batchSize);
                    // "Judge" all batches
                    foreach (var batch in batches)
                    {
                        Console.WriteLine(batch[0].Query + " : " + string.Join(" ", batch.Select(d => d.Document)));
                        foreach (var doc in batch)
                        {
                            this._estimator.Update(store.Estimate(doc.Query, doc.Document));
                            judged++;
                            // Remove from list of pending
                            var dEstimates = qdEstimates[doc.Query];
                            dEstimates.Remove(doc.Document);
                            if (dEstimates.Count == 0)
                            {
                                qdEstimates.Remove(doc.Query);
                            }
                        }
                    }

                    /* Re-estimate */

                    // Re-estimate relevance of pending query-doc pairs
                    foreach (var dEstimates in qdEstimates)
                    {
                        string query = dEstimates.Key;
                        Dictionary <string, RelevanceEstimate> estimates = dEstimates.Value;
                        foreach (string doc in estimates.Keys.ToArray())
                        {
                            estimates[doc] = this._estimator.Estimate(query, doc);
                        }
                    }
                }

                iteration++;
            } while (needsNext);

            // TODO: output effectiveness estimates
        }
Esempio n. 3
0
        public override void Run()
        {
            // Read files
            IEnumerable <Run> runs = AbstractCommand.ReadInputFile(this._inputPath);
            IEnumerable <RelevanceEstimate> judged = new RelevanceEstimate[] { };

            if (this._judgedPath != null)
            {
                judged = AbstractCommand.ReadKnownJudgments(this._judgedPath);
            }
            IEnumerable <RelevanceEstimate> estimates = AbstractCommand.ReadEstimatedJudgments(this._estimatedPath);
            // Instantiate estimate store and measure
            RelevanceEstimateStore store = new RelevanceEstimateStore(estimates);

            store.Update(judged);
            IMeasure measure = new CG(100); //TODO: max relevance

            // Compile list of all query-doc-sys-rank tuples
            Dictionary <string, Dictionary <string, Dictionary <string, int> > > qdsRanks = AbstractCommand.ToQueryDocumentSystemRanks(runs);
            // Re-structure estimates
            Dictionary <string, Dictionary <string, RelevanceEstimate> > qdEstimates = new Dictionary <string, Dictionary <string, RelevanceEstimate> >();

            foreach (var est in estimates)
            {
                Dictionary <string, RelevanceEstimate> dEstimates = null;
                if (!qdEstimates.TryGetValue(est.Query, out dEstimates))
                {
                    dEstimates = new Dictionary <string, RelevanceEstimate>();
                    qdEstimates.Add(est.Query, dEstimates);
                }
                dEstimates.Add(est.Document, est);
            }
            // Remove judged query-docs
            foreach (var j in judged)
            {
                Dictionary <string, RelevanceEstimate> dEstimates = null;
                if (qdEstimates.TryGetValue(j.Query, out dEstimates))
                {
                    dEstimates.Remove(j.Document);
                    if (dEstimates.Count == 0)
                    {
                        qdEstimates.Remove(j.Query);
                    }
                }
            }

            bool needsNext = false;
            // Re-structure runs for efficient access
            Dictionary <string, Dictionary <string, Run> > sqRuns = AbstractCommand.ToSystemQueryRuns(runs);

            if (this._target == EvaluationTargets.Relative)
            {
                // Estimate per-query relative effectiveness
                Dictionary <string, Dictionary <string, Dictionary <string, RelativeEffectivenessEstimate> > > ssqRels =
                    EvaluateCommand.GetSystemSystemQueryRelatives(sqRuns, measure, store, this._confEstimator);
                // Average (already sorted)
                List <RelativeEffectivenessEstimate> relSorted = EvaluateCommand.GetSortedMeanRelatives(ssqRels, this._confEstimator);

                if (relSorted.Average(r => r.Confidence) < this._confidence)
                {
                    needsNext = true;
                    measure.ComputeQueryDocumentWeights(qdEstimates, qdsRanks, ssqRels);
                }
            }
            else
            {
                // Estimate per-query absolute effectiveness
                Dictionary <string, Dictionary <string, AbsoluteEffectivenessEstimate> > sqAbss =
                    EvaluateCommand.GetSystemQueryAbsolutes(sqRuns, measure, store, this._confEstimator);
                // Average and sort
                List <AbsoluteEffectivenessEstimate> absSorted = EvaluateCommand.GetSortedMeanAbsolutes(sqAbss, this._confEstimator);

                if (absSorted.Average(a => a.Confidence) < this._confidence)
                {
                    needsNext = true;
                    measure.ComputeQueryDocumentWeights(qdEstimates, qdsRanks, sqAbss);
                }
            }

            if (needsNext)
            {
                var          batches = NextCommand.GetBatches(qdEstimates, this._batchNum, this._batchSize);
                TabSeparated io      = new TabSeparated(this._decimalDigits);
                ((IWriter <List <RelevanceEstimate> >)io).Write(Console.Out, batches.Take(this._batchNum));
            }
        }