public SimulateCommand() { base.Options = new Options(); base.Options.AddOption(OptionBuilder.Factory.IsRequired().HasArg().WithArgName("file").WithDescription("path to the file with system runs.").Create("i")); base.Options.AddOption(OptionBuilder.Factory.IsRequired().HasArg().WithArgName("file").WithDescription("path to file with known judgments.").Create("j")); base.Options.AddOption(OptionBuilder.Factory.IsRequired().HasArg().WithArgName("name").WithDescription("name of the estimator to use.").Create("e")); base.Options.AddOption(OptionBuilder.Factory.HasArgs().WithArgName("name=value").WithDescription("optional parameter to the estimator.").Create("p")); base.Options.AddOption(OptionBuilder.Factory.IsRequired().HasArg().WithArgName("target").WithDescription("type of estimates to target ('rel' or 'abs').").Create('t')); base.Options.AddOption(OptionBuilder.Factory.IsRequired().HasArg().WithArgName("num").WithDescription("number of batches that will be judged.").Create('b')); base.Options.AddOption(OptionBuilder.Factory.IsRequired().HasArg().WithArgName("num").WithDescription("number of documents to judge per batch.").Create('n')); base.Options.AddOption(OptionBuilder.Factory.HasArg().WithArgName("conf").WithDescription("optional target average confidence on the estimates (defaults to " + Allcea.DEFAULT_CONFIDENCE + ").").Create("c")); base.Options.AddOption(OptionBuilder.Factory.HasArg().WithArgName("size").WithDescription("optional target effect size to compute confidence (defaults to " + Allcea.DEFAULT_RELATIVE_SIZE + " for relative and " + Allcea.DEFAULT_ABSOLUTE_SIZE + " for absolute).").Create("s")); base.Options.AddOption(OptionBuilder.Factory.HasArg().WithArgName("digits").WithDescription("optional number of fractional digits to output (defaults to " + Allcea.DEFAULT_DECIMAL_DIGITS + ")").Create("d")); base.Options.AddOption(OptionBuilder.Factory.WithDescription("shows this help message.").Create("h")); this._inputPath = null; this._judgedPath = null; this._estimator = null; this._decimalDigits = Allcea.DEFAULT_DECIMAL_DIGITS; this._batchNum = Allcea.DEFAULT_NUMBER_OF_BATCHES; this._batchSize = Allcea.DEFAULT_BATCH_SIZE; this._confEstimator = null; this._target = EvaluationTargets.Relative; this._confidence = Allcea.DEFAULT_CONFIDENCE; }
public override void CheckOptions(CommandLine cmd) { // Confidence estimator double confidence = Allcea.DEFAULT_CONFIDENCE; double sizeRel = Allcea.DEFAULT_RELATIVE_SIZE; double sizeAbs = Allcea.DEFAULT_ABSOLUTE_SIZE; if (cmd.HasOption('c')) { confidence = AbstractCommand.CheckConfidence(cmd.GetOptionValue('c')); } if (cmd.HasOption('s')) { string[] sizeStrings = cmd.GetOptionValues('s'); if (sizeStrings.Length != 2) { throw new ArgumentException("Must provide two target effect sizes: relative and absolute."); } sizeRel = AbstractCommand.CheckRelativeSize(sizeStrings[0]); sizeAbs = AbstractCommand.CheckAbsoluteSize(sizeStrings[1]); } this._confEstimator = new NormalConfidenceEstimator(confidence, sizeRel, sizeAbs); // Double format if (cmd.HasOption('d')) { this._decimalDigits = AbstractCommand.CheckDigits(cmd.GetOptionValue('d')); } // Files this._inputPath = AbstractCommand.CheckInputFile(cmd.GetOptionValue('i')); if (cmd.HasOption('j')) { this._judgedPath = AbstractCommand.CheckJudgedFile(cmd.GetOptionValue('j')); } this._estimatedPath = AbstractCommand.CheckEstimatedFile(cmd.GetOptionValue('e')); }
public EvaluateCommand() { base.Options = new Options(); base.Options.AddOption(OptionBuilder.Factory.IsRequired().HasArg().WithArgName("file").WithDescription("path to the file with system runs.").Create("i")); base.Options.AddOption(OptionBuilder.Factory.HasArg().WithArgName("file").WithDescription("optional path to file with known judgments.").Create("j")); base.Options.AddOption(OptionBuilder.Factory.IsRequired().HasArg().WithArgName("file").WithDescription("path to the file with estimated judgments.").Create("e")); base.Options.AddOption(OptionBuilder.Factory.HasArg().WithArgName("conf").WithDescription("optional confidence level for interval estimates (defaults to " + Allcea.DEFAULT_CONFIDENCE + ").").Create("c")); base.Options.AddOption(OptionBuilder.Factory.HasArgs(2).WithArgName("rel> <abs").WithDescription("optional target effect sizes to compute confidence (defaults to " + Allcea.DEFAULT_RELATIVE_SIZE + " and " + Allcea.DEFAULT_ABSOLUTE_SIZE + ").").Create("s")); base.Options.AddOption(OptionBuilder.Factory.HasArg().WithArgName("digits").WithDescription("optional number of fractional digits to output (defaults to " + Allcea.DEFAULT_DECIMAL_DIGITS + ")").Create("d")); base.Options.AddOption(OptionBuilder.Factory.WithDescription("shows this help message.").Create("h")); this._inputPath = null; this._judgedPath = null; this._estimatedPath = null; this._decimalDigits = Allcea.DEFAULT_DECIMAL_DIGITS; this._confEstimator = null; }
internal static Dictionary <string, Dictionary <string, Dictionary <string, RelativeEffectivenessEstimate> > > GetSystemSystemQueryRelatives( Dictionary <string, Dictionary <string, Run> > sqRuns, IMeasure measure, IRelevanceEstimator relEstimator, IConfidenceEstimator confEstimator) { Dictionary <string, Dictionary <string, Dictionary <string, RelativeEffectivenessEstimate> > > ssqRelEstimates = new Dictionary <string, Dictionary <string, Dictionary <string, RelativeEffectivenessEstimate> > >(); // [sysA [sysB [query rel]]] string[] allSystems = sqRuns.Keys.ToArray(); Parallel.For(0, allSystems.Length - 1, i => { string sysA = allSystems[i]; var runsA = sqRuns[sysA]; Dictionary <string, Dictionary <string, RelativeEffectivenessEstimate> > sqRelEstimates = new Dictionary <string, Dictionary <string, RelativeEffectivenessEstimate> >(); for (int j = i + 1; j < allSystems.Length; j++) { Dictionary <string, RelativeEffectivenessEstimate> qRelEstimates = new Dictionary <string, RelativeEffectivenessEstimate>(); string sysB = allSystems[j]; var runsB = sqRuns[sysB]; foreach (var qRun in runsA) { qRelEstimates.Add(qRun.Key, measure.Estimate(qRun.Value, runsB[qRun.Key], relEstimator, confEstimator)); } sqRelEstimates.Add(sysB, qRelEstimates); } lock (ssqRelEstimates) { ssqRelEstimates.Add(sysA, sqRelEstimates); } }); //for (int i = 0; i < allSystems.Length - 1; i++) { // string sysA = allSystems[i]; // var runsA = sqRuns[sysA]; // Dictionary<string, Dictionary<string, RelativeEffectivenessEstimate>> sqRelEstimates = new Dictionary<string, Dictionary<string, RelativeEffectivenessEstimate>>(); // for (int j = i + 1; j < allSystems.Length; j++) { // Dictionary<string, RelativeEffectivenessEstimate> qRelEstimates = new Dictionary<string, RelativeEffectivenessEstimate>(); // string sysB = allSystems[j]; // var runsB = sqRuns[sysB]; // foreach (var qRun in runsA) { // qRelEstimates.Add(qRun.Key, measure.Estimate(qRun.Value, runsB[qRun.Key], relEstimator, confEstimator)); // } // sqRelEstimates.Add(sysB, qRelEstimates); // } // ssqRelEstimates.Add(sysA, sqRelEstimates); //} return(ssqRelEstimates); }
internal static Dictionary <string, Dictionary <string, AbsoluteEffectivenessEstimate> > GetSystemQueryAbsolutes( Dictionary <string, Dictionary <string, Run> > sqRuns, IMeasure measure, IRelevanceEstimator relEstimator, IConfidenceEstimator confEstimator) { Dictionary <string, Dictionary <string, AbsoluteEffectivenessEstimate> > sqAbss = new Dictionary <string, Dictionary <string, AbsoluteEffectivenessEstimate> >(); foreach (var sqRun in sqRuns) { Dictionary <string, AbsoluteEffectivenessEstimate> qAbs = new Dictionary <string, AbsoluteEffectivenessEstimate>(); foreach (var qRun in sqRun.Value) { qAbs.Add(qRun.Key, measure.Estimate(qRun.Value, relEstimator, confEstimator)); } sqAbss.Add(sqRun.Key, qAbs); } return(sqAbss); }
internal static List <RelativeEffectivenessEstimate> GetSortedMeanRelatives( Dictionary <string, Dictionary <string, Dictionary <string, RelativeEffectivenessEstimate> > > ssqRels, IConfidenceEstimator confEstimator) { // Compute means List <RelativeEffectivenessEstimate> rels = new List <RelativeEffectivenessEstimate>(); foreach (var sqRels in ssqRels) { foreach (var qRels in sqRels.Value) { string sysA = sqRels.Key; string sysB = qRels.Key; double e = qRels.Value.Values.Sum(relEst => relEst.Expectation); double var = qRels.Value.Values.Sum(relEst => relEst.Variance); e /= qRels.Value.Values.Count; var /= qRels.Value.Values.Count * qRels.Value.Values.Count; if (e < 0) { e = -e; sysA = qRels.Key; sysB = sqRels.Key; } Estimate est = new Estimate(e, var); rels.Add(new RelativeEffectivenessEstimate(sysA, sysB, "[all]", e, var, confEstimator.EstimateInterval(est), confEstimator.EstimateRelativeConfidence(est))); } } // and sort var groups = rels.GroupBy(r => r.SystemA).OrderByDescending(g => g.Count()); List <RelativeEffectivenessEstimate> relSorted = new List <RelativeEffectivenessEstimate>(); foreach (var group in groups) { relSorted.AddRange(group.OrderBy(r => r.Expectation)); } return(relSorted); }
internal static List <AbsoluteEffectivenessEstimate> GetSortedMeanAbsolutes( Dictionary <string, Dictionary <string, AbsoluteEffectivenessEstimate> > sqAbss, IConfidenceEstimator confEstimator) { // Compute means List <AbsoluteEffectivenessEstimate> absSorted = new List <AbsoluteEffectivenessEstimate>(); foreach (var sqAbsEst in sqAbss) { double e = sqAbsEst.Value.Sum(qAbsEst => qAbsEst.Value.Expectation); double var = sqAbsEst.Value.Sum(qAbsEst => qAbsEst.Value.Variance); e /= sqAbsEst.Value.Count; var /= sqAbsEst.Value.Count * sqAbsEst.Value.Count; Estimate est = new Estimate(e, var); absSorted.Add(new AbsoluteEffectivenessEstimate(sqAbsEst.Key, "[all]", e, var, confEstimator.EstimateInterval(est), confEstimator.EstimateAbsoluteConfidence(est))); } // and sort absSorted = absSorted.OrderByDescending(est => est.Expectation).ToList(); return(absSorted); }
public override void CheckOptions(CommandLine cmd) { // Target and confidence estimator if (cmd.HasOption('c')) { this._confidence = AbstractCommand.CheckConfidence(cmd.GetOptionValue('c')); } this._target = AbstractCommand.CheckTarget(cmd.GetOptionValue('t')); double sizeRel = Allcea.DEFAULT_RELATIVE_SIZE; double sizeAbs = Allcea.DEFAULT_ABSOLUTE_SIZE; if (cmd.HasOption('s')) { switch (this._target) { case EvaluationTargets.Relative: sizeRel = AbstractCommand.CheckRelativeSize(cmd.GetOptionValue('s')); break; case EvaluationTargets.Absolute: sizeAbs = AbstractCommand.CheckAbsoluteSize(cmd.GetOptionValue('s')); break; } } this._confEstimator = new NormalConfidenceEstimator(this._confidence, sizeRel, sizeAbs); // Double format if (cmd.HasOption('d')) { this._decimalDigits = AbstractCommand.CheckDigits(cmd.GetOptionValue('d')); } // Batches this._batchNum = AbstractCommand.CheckBatchNumber(cmd.GetOptionValue('b')); this._batchSize = AbstractCommand.CheckBatchSize(cmd.GetOptionValue('n')); // Files this._inputPath = AbstractCommand.CheckInputFile(cmd.GetOptionValue('i')); this._judgedPath = AbstractCommand.CheckJudgedFile(cmd.GetOptionValue('j')); // Estimator Dictionary <string, string> parameters = Allcea.ParseNameValueParameters(cmd.GetOptionValues('p')); this._estimator = new EstimatorWrapper(cmd.GetOptionValue('e'), parameters); }
public override void CheckOptions(CommandLine cmd) { // Target and confidence estimator if (cmd.HasOption('c')) { this._confidence = AbstractCommand.CheckConfidence(cmd.GetOptionValue('c')); } this._target = AbstractCommand.CheckTarget(cmd.GetOptionValue('t')); double sizeRel = Allcea.DEFAULT_RELATIVE_SIZE; double sizeAbs = Allcea.DEFAULT_ABSOLUTE_SIZE; if (cmd.HasOption('s')) { switch (this._target) { case EvaluationTargets.Relative: sizeRel = AbstractCommand.CheckRelativeSize(cmd.GetOptionValue('s')); break; case EvaluationTargets.Absolute: sizeAbs = AbstractCommand.CheckAbsoluteSize(cmd.GetOptionValue('s')); break; } } this._confEstimator = new NormalConfidenceEstimator(this._confidence, sizeRel, sizeAbs); // Double format if (cmd.HasOption('d')) { this._decimalDigits = AbstractCommand.CheckDigits(cmd.GetOptionValue('d')); } // Batches this._batchNum = AbstractCommand.CheckBatchNumber(cmd.GetOptionValue('b')); this._batchSize = AbstractCommand.CheckBatchSize(cmd.GetOptionValue('n')); // Files this._inputPath = AbstractCommand.CheckInputFile(cmd.GetOptionValue('i')); this._judgedPath = AbstractCommand.CheckJudgedFile(cmd.GetOptionValue('j')); // Estimator Dictionary<string, string> parameters = Allcea.ParseNameValueParameters(cmd.GetOptionValues('p')); this._estimator = new EstimatorWrapper(cmd.GetOptionValue('e'), parameters); }
public RelativeEffectivenessEstimate Estimate(Run runA, Run runB, IRelevanceEstimator relEstimator, IConfidenceEstimator confEstimator) { double e = 0, var = 0; // Traverse docs retrieved by A HashSet<string> inRunA = new HashSet<string>(); // retrieved by run A foreach (string doc in runA.Documents) { RelevanceEstimate docEst = relEstimator.Estimate(runA.Query, doc); e += docEst.Expectation; var += docEst.Variance; inRunA.Add(doc); } // Traverse docs retrieved by B foreach (string doc in runB.Documents) { RelevanceEstimate docEst = relEstimator.Estimate(runB.Query, doc); e -= docEst.Expectation; if (inRunA.Contains(doc)) { // If retrieved in both runs, does not contribute to variance var -= docEst.Variance; } else { var += docEst.Variance; } } // Compute average e /= inRunA.Count; var /= inRunA.Count * inRunA.Count; // Normalize between 0 and 1 e /= this.MaxRelevance; var /= this.MaxRelevance * this.MaxRelevance; Estimate est = new Estimate(e, var); return new RelativeEffectivenessEstimate(runA.System, runB.System, runA.Query, e, var, confEstimator.EstimateInterval(est), confEstimator.EstimateRelativeConfidence(est)); }
public AbsoluteEffectivenessEstimate Estimate(Run run, IRelevanceEstimator relEstimator, IConfidenceEstimator confEstimator) { double e = 0, var = 0; // Traverse docs retrieved foreach (string doc in run.Documents) { RelevanceEstimate docEst = relEstimator.Estimate(run.Query, doc); e += docEst.Expectation; var += docEst.Variance; } // Compute average e /= run.Documents.Count(); var /= run.Documents.Count() * run.Documents.Count(); // Normalize between 0 and 1 e /= this.MaxRelevance; var /= this.MaxRelevance * this.MaxRelevance; Estimate est = new Estimate(e, var); return new AbsoluteEffectivenessEstimate(run.System, run.Query, e, var, confEstimator.EstimateInterval(est), confEstimator.EstimateAbsoluteConfidence(est)); }
internal static Dictionary<string, Dictionary<string, Dictionary<string, RelativeEffectivenessEstimate>>> GetSystemSystemQueryRelatives( Dictionary<string, Dictionary<string, Run>> sqRuns, IMeasure measure, IRelevanceEstimator relEstimator, IConfidenceEstimator confEstimator) { Dictionary<string, Dictionary<string, Dictionary<string, RelativeEffectivenessEstimate>>> ssqRelEstimates = new Dictionary<string, Dictionary<string, Dictionary<string, RelativeEffectivenessEstimate>>>(); // [sysA [sysB [query rel]]] string[] allSystems = sqRuns.Keys.ToArray(); Parallel.For(0, allSystems.Length - 1, i => { string sysA = allSystems[i]; var runsA = sqRuns[sysA]; Dictionary<string, Dictionary<string, RelativeEffectivenessEstimate>> sqRelEstimates = new Dictionary<string, Dictionary<string, RelativeEffectivenessEstimate>>(); for (int j = i + 1; j < allSystems.Length; j++) { Dictionary<string, RelativeEffectivenessEstimate> qRelEstimates = new Dictionary<string, RelativeEffectivenessEstimate>(); string sysB = allSystems[j]; var runsB = sqRuns[sysB]; foreach (var qRun in runsA) { qRelEstimates.Add(qRun.Key, measure.Estimate(qRun.Value, runsB[qRun.Key], relEstimator, confEstimator)); } sqRelEstimates.Add(sysB, qRelEstimates); } lock (ssqRelEstimates) { ssqRelEstimates.Add(sysA, sqRelEstimates); } }); //for (int i = 0; i < allSystems.Length - 1; i++) { // string sysA = allSystems[i]; // var runsA = sqRuns[sysA]; // Dictionary<string, Dictionary<string, RelativeEffectivenessEstimate>> sqRelEstimates = new Dictionary<string, Dictionary<string, RelativeEffectivenessEstimate>>(); // for (int j = i + 1; j < allSystems.Length; j++) { // Dictionary<string, RelativeEffectivenessEstimate> qRelEstimates = new Dictionary<string, RelativeEffectivenessEstimate>(); // string sysB = allSystems[j]; // var runsB = sqRuns[sysB]; // foreach (var qRun in runsA) { // qRelEstimates.Add(qRun.Key, measure.Estimate(qRun.Value, runsB[qRun.Key], relEstimator, confEstimator)); // } // sqRelEstimates.Add(sysB, qRelEstimates); // } // ssqRelEstimates.Add(sysA, sqRelEstimates); //} return ssqRelEstimates; }
internal static Dictionary<string, Dictionary<string, AbsoluteEffectivenessEstimate>> GetSystemQueryAbsolutes( Dictionary<string, Dictionary<string, Run>> sqRuns, IMeasure measure, IRelevanceEstimator relEstimator, IConfidenceEstimator confEstimator) { Dictionary<string, Dictionary<string, AbsoluteEffectivenessEstimate>> sqAbss = new Dictionary<string, Dictionary<string, AbsoluteEffectivenessEstimate>>(); foreach (var sqRun in sqRuns) { Dictionary<string, AbsoluteEffectivenessEstimate> qAbs = new Dictionary<string, AbsoluteEffectivenessEstimate>(); foreach (var qRun in sqRun.Value) { qAbs.Add(qRun.Key, measure.Estimate(qRun.Value, relEstimator, confEstimator)); } sqAbss.Add(sqRun.Key, qAbs); } return sqAbss; }
internal static List<RelativeEffectivenessEstimate> GetSortedMeanRelatives( Dictionary<string, Dictionary<string, Dictionary<string, RelativeEffectivenessEstimate>>> ssqRels, IConfidenceEstimator confEstimator) { // Compute means List<RelativeEffectivenessEstimate> rels = new List<RelativeEffectivenessEstimate>(); foreach (var sqRels in ssqRels) { foreach (var qRels in sqRels.Value) { string sysA = sqRels.Key; string sysB = qRels.Key; double e = qRels.Value.Values.Sum(relEst => relEst.Expectation); double var = qRels.Value.Values.Sum(relEst => relEst.Variance); e /= qRels.Value.Values.Count; var /= qRels.Value.Values.Count * qRels.Value.Values.Count; if (e < 0) { e = -e; sysA = qRels.Key; sysB = sqRels.Key; } Estimate est = new Estimate(e, var); rels.Add(new RelativeEffectivenessEstimate(sysA, sysB, "[all]", e, var, confEstimator.EstimateInterval(est), confEstimator.EstimateRelativeConfidence(est))); } } // and sort var groups = rels.GroupBy(r => r.SystemA).OrderByDescending(g => g.Count()); List<RelativeEffectivenessEstimate> relSorted = new List<RelativeEffectivenessEstimate>(); foreach (var group in groups) { relSorted.AddRange(group.OrderBy(r => r.Expectation)); } return relSorted; }
public AbsoluteEffectivenessEstimate Estimate(Run run, IRelevanceEstimator relEstimator, IConfidenceEstimator confEstimator) { double e = 0, var = 0; // Traverse docs retrieved foreach (string doc in run.Documents) { RelevanceEstimate docEst = relEstimator.Estimate(run.Query, doc); e += docEst.Expectation; var += docEst.Variance; } // Compute average e /= run.Documents.Count(); var /= run.Documents.Count() * run.Documents.Count(); // Normalize between 0 and 1 e /= this.MaxRelevance; var /= this.MaxRelevance * this.MaxRelevance; Estimate est = new Estimate(e, var); return(new AbsoluteEffectivenessEstimate(run.System, run.Query, e, var, confEstimator.EstimateInterval(est), confEstimator.EstimateAbsoluteConfidence(est))); }
internal static List<AbsoluteEffectivenessEstimate> GetSortedMeanAbsolutes( Dictionary<string, Dictionary<string, AbsoluteEffectivenessEstimate>> sqAbss, IConfidenceEstimator confEstimator) { // Compute means List<AbsoluteEffectivenessEstimate> absSorted = new List<AbsoluteEffectivenessEstimate>(); foreach (var sqAbsEst in sqAbss) { double e = sqAbsEst.Value.Sum(qAbsEst => qAbsEst.Value.Expectation); double var = sqAbsEst.Value.Sum(qAbsEst => qAbsEst.Value.Variance); e /= sqAbsEst.Value.Count; var /= sqAbsEst.Value.Count * sqAbsEst.Value.Count; Estimate est = new Estimate(e, var); absSorted.Add(new AbsoluteEffectivenessEstimate(sqAbsEst.Key, "[all]", e, var, confEstimator.EstimateInterval(est), confEstimator.EstimateAbsoluteConfidence(est))); } // and sort absSorted = absSorted.OrderByDescending(est => est.Expectation).ToList(); return absSorted; }
public RelativeEffectivenessEstimate Estimate(Run runA, Run runB, IRelevanceEstimator relEstimator, IConfidenceEstimator confEstimator) { double e = 0, var = 0; // Traverse docs retrieved by A HashSet <string> inRunA = new HashSet <string>(); // retrieved by run A foreach (string doc in runA.Documents) { RelevanceEstimate docEst = relEstimator.Estimate(runA.Query, doc); e += docEst.Expectation; var += docEst.Variance; inRunA.Add(doc); } // Traverse docs retrieved by B foreach (string doc in runB.Documents) { RelevanceEstimate docEst = relEstimator.Estimate(runB.Query, doc); e -= docEst.Expectation; if (inRunA.Contains(doc)) { // If retrieved in both runs, does not contribute to variance var -= docEst.Variance; } else { var += docEst.Variance; } } // Compute average e /= inRunA.Count; var /= inRunA.Count * inRunA.Count; // Normalize between 0 and 1 e /= this.MaxRelevance; var /= this.MaxRelevance * this.MaxRelevance; Estimate est = new Estimate(e, var); return(new RelativeEffectivenessEstimate(runA.System, runB.System, runA.Query, e, var, confEstimator.EstimateInterval(est), confEstimator.EstimateRelativeConfidence(est))); }