public void Initialize(IEnumerable <Run> runs, IEnumerable <RelevanceEstimate> judged) { // Re-structure known judgments foreach (var j in judged) { string id = RelevanceEstimate.GetId(j.Query, j.Document); this._judged[id] = j; } // Instantiate estimator switch (this._name) { case "uniform": // nothing to initialize this._estimator = new UniformRelevanceEstimator(100); break; case "mout": // read metadata IEnumerable <Metadata> metadata = AbstractCommand.ReadMetadata(this._parameters["meta"]); this._estimator = new MoutRelevanceEstimator(runs, metadata); break; case "mjud": // read metadata metadata = AbstractCommand.ReadMetadata(this._parameters["meta"]); IEnumerable <RelevanceEstimate> judgedEst = this._parameters.ContainsKey("judged") ? AbstractCommand.ReadKnownJudgments(this._parameters["judged"]) : new RelevanceEstimate[] { }; this._estimator = new MjudRelevanceEstimator(runs, metadata, judgedEst); break; } }
public double[] Features(string query, string doc) { string id = RelevanceEstimate.GetId(query, doc); double fSYS, sGEN, fGEN, fART; if (!this._fSYS.TryGetValue(id, out fSYS)) { fSYS = double.NaN; } if (!this._fGEN.TryGetValue(id, out fGEN)) { fGEN = double.NaN; } if (!this._fART.TryGetValue(id, out fART)) { fART = double.NaN; } bool hassGEN = false; if (this._sGEN.TryGetValue(id, out hassGEN)) { sGEN = hassGEN ? 1 : 0; } else { sGEN = double.NaN; } return(new double[] { fSYS, _OV, fART, sGEN, fGEN }); }
public MjudRelevanceEstimator(IEnumerable <Run> runs, IEnumerable <Metadata> metadata, IEnumerable <RelevanceEstimate> judged) { // Instantiate model: fSYS, aSYS, aART this._model = new OrdinalLogisticRegression(MjudRelevanceEstimator.LABELS, MjudRelevanceEstimator.ALPHAS, MjudRelevanceEstimator.BETAS); this._defaultEstimator = new MoutRelevanceEstimator(runs, metadata); // Number of systems and metadata int nSys = runs.Select(r => r.System).Distinct().Count(); this._dArtists = new Dictionary <string, string>(); foreach (var m in metadata) { this._dArtists[m.Document] = m.Artist; } // fSYS this._fSYS = new Dictionary <string, double>(); this._aSYS = new Dictionary <string, double>(); this._aART = new Dictionary <string, double>(); this._sRels = new Dictionary <string, List <double> >(); this._qaRels = new Dictionary <string, List <double> >(); foreach (var run in runs) { string query = run.Query; foreach (string doc in run.Documents) { string id = RelevanceEstimate.GetId(query, doc); // fSYS double fSYS = 0; this._fSYS.TryGetValue(id, out fSYS); this._fSYS[id] = fSYS + 1.0 / nSys; this._aSYS[id] = 0; this._aART[id] = 0; // sRels if (!this._sRels.ContainsKey(run.System)) { this._sRels[run.System] = new List <double>(); } // qaRels string artist = null; if (this._dArtists.TryGetValue(doc, out artist) && !this._qaRels.ContainsKey(query + "\t" + artist)) { this._qaRels[query + "\t" + artist] = new List <double>(); } } } // OV this._OV = ((double)this._fSYS.Count) / (nSys * (runs.Count() / nSys) * runs.First().Documents.Count()); this._qdsRanks = jurbano.Allcea.Cli.AbstractCommand.ToQueryDocumentSystemRanks(runs); // Incorporate known judgments foreach (var est in judged) { this.Update(est); } this._needsUpdate = true; }
public void Update(RelevanceEstimate estimate) { // Add to list of judged string id = RelevanceEstimate.GetId(estimate.Query, estimate.Document); this._judged[id] = estimate; // and update wrapped estimator as well this._estimator.Update(estimate); }
public RelevanceEstimate Estimate(string query, string doc) { string id = RelevanceEstimate.GetId(query, doc); RelevanceEstimate e = null; if (this._estimates.TryGetValue(id, out e)) { return(e); } throw new ArgumentException("No estimate available for document '" + doc + "' to query '" + query + "'."); }
protected void DoUpdate() { foreach (var dsRanks in this._qdsRanks) { string query = dsRanks.Key; foreach (var sRanks in dsRanks.Value) { string doc = sRanks.Key; string id = RelevanceEstimate.GetId(query, doc); // aSYS double aSYS = 0; bool hasaSYS = false; foreach (string sys in sRanks.Value.Keys) // systems that retrieved d for q { var rels = this._sRels[sys]; if (rels.Count != 0) { hasaSYS = true; aSYS += rels.Average(); } } if (hasaSYS) // If we don't have judgments yet related to q and d, don't estimate { this._aSYS[id] = aSYS / sRanks.Value.Count; } else { this._aSYS.Remove(id); } // aART string artist = null; if (this._dArtists.TryGetValue(doc, out artist) && artist != "VARIOUS ARTISTS") { var rels = this._qaRels[query + "\t" + artist]; if (rels.Count != 0) // If we don't have judgments yet related to q and d, don't estimate { this._aART[id] = rels.Average(); } else { this._aART.Remove(id); } } else { this._aART.Remove(id); } } } this._needsUpdate = false; }
public RelevanceEstimate Estimate(string query, string doc) { // Check if it is already judged string id = RelevanceEstimate.GetId(query, doc); RelevanceEstimate e = null; if (this._judged.TryGetValue(id, out e)) { return(e); } // if not, estimate return(this._estimator.Estimate(query, doc)); }
public RelevanceEstimate Estimate(string query, string doc) { string id = RelevanceEstimate.GetId(query, doc); double fSYS = 0; bool sGEN = false; double fGEN = 0; double fART = 0; // Do we have features for the query and document? if (this._fSYS.TryGetValue(id, out fSYS) && this._sGEN.TryGetValue(id, out sGEN) && this._fGEN.TryGetValue(id, out fGEN) && this._fART.TryGetValue(id, out fART)) { double[] thetas = new double[] { fSYS, this._OV, fSYS *this._OV, fART, sGEN ? 1 : 0, fGEN, sGEN ? fGEN : 0 }; double[] eval = this._model.Evaluate(thetas); return(new RelevanceEstimate(query, doc, eval[0], eval[1])); } // If here, some feature was missing, so return default estimate return(this._defaultEstimator.Estimate(query, doc)); }
public RelevanceEstimate Estimate(string query, string doc) { if (this._needsUpdate) { this.DoUpdate(); } string id = RelevanceEstimate.GetId(query, doc); double fSYS = 0; double aSYS = 0; double aART = 0; // Do we have features? if (this._fSYS.TryGetValue(id, out fSYS) && this._aSYS.TryGetValue(id, out aSYS) && this._aART.TryGetValue(id, out aART)) { double[] thetas = new double[] { fSYS, aSYS, aART }; double[] eval = this._model.Evaluate(thetas); return(new RelevanceEstimate(query, doc, eval[0], eval[1])); } // If here, some feature was missing, so return default estimate return(this._defaultEstimator.Estimate(query, doc)); }
public double[] Features(string query, string doc) { if (this._needsUpdate) { this.DoUpdate(); } string id = RelevanceEstimate.GetId(query, doc); double fSYS, aSYS, aART; if (!this._fSYS.TryGetValue(id, out fSYS)) { fSYS = double.NaN; } if (!this._aSYS.TryGetValue(id, out aSYS)) { aSYS = double.NaN; } if (!this._aART.TryGetValue(id, out aART)) { aART = double.NaN; } return(new double[] { fSYS, this._OV, aSYS, aART }); }
public MoutRelevanceEstimator(IEnumerable <Run> runs, IEnumerable <Metadata> metadata) { // Instantiate model: fSYS, OV, fSYS:OV, fART, sGEN, fGEN, sGEN:fGEN this._model = new OrdinalLogisticRegression(MoutRelevanceEstimator.LABELS, MoutRelevanceEstimator.ALPHAS, MoutRelevanceEstimator.BETAS); this._defaultEstimator = new UniformRelevanceEstimator(100); // Number of systems and metadata int nSys = runs.Select(r => r.System).Distinct().Count(); Dictionary <string, string> artists = new Dictionary <string, string>(); // [doc, artist] Dictionary <string, string> genres = new Dictionary <string, string>(); // [doc, genre] foreach (var m in metadata) { artists[m.Document] = m.Artist; genres[m.Document] = m.Genre; } // Auxiliary structure for easier computation of fGEN and fART Dictionary <string, HashSet <string> > qDocs = new Dictionary <string, HashSet <string> >(); // fSYS and OV this._fSYS = new Dictionary <string, double>(); foreach (var run in runs) { string query = run.Query; HashSet <string> docs = null; if (!qDocs.TryGetValue(query, out docs)) { docs = new HashSet <string>(); qDocs.Add(query, docs); } foreach (string doc in run.Documents) { string id = RelevanceEstimate.GetId(query, doc); // fSYS double fSYS = 0; this._fSYS.TryGetValue(id, out fSYS); this._fSYS[id] = fSYS + 1.0 / nSys; docs.Add(doc); } } // OV this._OV = ((double)this._fSYS.Count) / (nSys * qDocs.Count * runs.First().Documents.Count()); // sGEN, fGEN and fART, traverse qDocs this._sGEN = new Dictionary <string, bool>(); this._fGEN = new Dictionary <string, double>(); this._fART = new Dictionary <string, double>(); foreach (var docs in qDocs) { string query = docs.Key; foreach (string doc in docs.Value) { string id = RelevanceEstimate.GetId(query, doc); // sGEN and fGEN if (genres.ContainsKey(doc)) { string docGEN = genres[doc]; // sGEN if (genres.ContainsKey(query)) { this._sGEN[id] = docGEN == genres[query]; } // fGEN double docfGEN = 0; int docfGENnotnull = 0; // traverse all documents individually foreach (string doc2 in docs.Value) { if (genres.ContainsKey(doc2)) { string doc2GEN = genres[doc2]; if (docGEN == doc2GEN) { docfGEN++; } docfGENnotnull++; } } this._fGEN[id] = docfGEN / docfGENnotnull; } // fART if (artists.ContainsKey(doc)) { string docART = artists[doc]; double docfART = 0; int docfARTnotnull = 0; // traverse all documents individually foreach (string doc2 in docs.Value) { if (artists.ContainsKey(doc2)) { string doc2ART = artists[doc2]; if (docART == doc2ART) { docfART++; } docfARTnotnull++; } } this._fART[id] = docfART / docfARTnotnull; } } } }
public void Update(RelevanceEstimate estimate) { string id = RelevanceEstimate.GetId(estimate.Query, estimate.Document); this._estimates[id] = estimate; }