public static double GetGoogleNgramScore(CandidatePhrase g) { double count = GoogleNGramsSQLBacked.GetCount(g.GetPhrase().ToLower()) + GoogleNGramsSQLBacked.GetCount(g.GetPhrase()); if (count != -1) { if (!Data.rawFreq.ContainsKey(g)) { //returning 1 because usually lower this tf-idf score the better. if we don't have raw freq info, give it a bad score return(1); } else { return((1 + Data.rawFreq.GetCount(g) * Math.Sqrt(Data.ratioGoogleNgramFreqWithDataFreq)) / count); } } return(0); }
public static void SetRatioGoogleNgramFreqWithDataFreq() { ratioGoogleNgramFreqWithDataFreq = GoogleNGramsSQLBacked.GetTotalCount(1) / Data.rawFreq.TotalCount(); Redwood.Log(ConstantsAndVariables.minimaldebug, "Data", "ratioGoogleNgramFreqWithDataFreq is " + ratioGoogleNgramFreqWithDataFreq); }