public virtual double GetPhraseWeightFromWords(ICounter <CandidatePhrase> weights, CandidatePhrase ph, double defaultWt) { string[] t = ph.GetPhrase().Split("\\s+"); if (t.Length < 2) { if (weights.ContainsKey(ph)) { return(weights.GetCount(ph)); } else { return(defaultWt); } } double totalscore = 0; double minScore = double.MaxValue; foreach (string w in t) { double score = defaultWt; if (weights.ContainsKey(CandidatePhrase.CreateOrGet(w))) { score = weights.GetCount(w); } if (score < minScore) { minScore = score; } totalscore += score; } if (useAvgInsteadofMinPhraseScoring) { return(totalscore / ph.GetPhrase().Length); } else { return(minScore); } }
public static double GetGoogleNgramScore(CandidatePhrase g) { double count = GoogleNGramsSQLBacked.GetCount(g.GetPhrase().ToLower()) + GoogleNGramsSQLBacked.GetCount(g.GetPhrase()); if (count != -1) { if (!Data.rawFreq.ContainsKey(g)) { //returning 1 because usually lower this tf-idf score the better. if we don't have raw freq info, give it a bad score return(1); } else { return((1 + Data.rawFreq.GetCount(g) * Math.Sqrt(Data.ratioGoogleNgramFreqWithDataFreq)) / count); } } return(0); }