// possibility = WC(w0rd)/total_word_cout private static double GetWordPossOverTotalWc(string inWord, WordWcMap wordWcMap, bool caseFlag) { double wc = 1.0d * GetWc(inWord, wordWcMap, caseFlag); double totalWc = 1.0d * wordWcMap.GetTotalWc(); double score = wc / totalWc; return(score); }
// should be the same as GetUnigramFreqScore // score range is between 0.0 ~ 1.0 // not used because it is no good private static double GetWordScore2(string inWord, double maxWc, WordWcMap wordWcMap) { double wc = 1.0d * GetWc(inWord, wordWcMap); double totalWc = 1.0d * wordWcMap.GetTotalWc(); double score = (Math.Log(wc / totalWc) / Math.Log(maxWc / totalWc)); return(score); }
private static double GetScoreByPeter(string inWord, WordWcMap wordWcMap) { long totalWc = wordWcMap.GetTotalWc(); double wc = GetWc(inWord, wordWcMap); double score = wc / (1.0 * totalWc); return(score); }
// org code from baseline, TBM, From Ensemble public static double GetUnigramFreqScore(string inWord, WordWcMap wordWcMap) { Dictionary <string, int> wWcMap = wordWcMap.GetWordWcMap(); int freq = (wWcMap.ContainsKey(inWord) ? wWcMap.GetValueOrNull(inWord) : 0); IList <string> spls = inWord.Split("[ ]", true).ToList(); bool isSplit = spls.Count >= 2; if (isSplit == false) { if (inWord.EndsWith("'s", StringComparison.Ordinal)) { spls = new List <string>(); spls.Add(inWord.Substring(0, inWord.Length - 2)); spls.Add("'s"); isSplit = true; } } // use the min. wc of split word in the multiword's case if (freq == 0 && isSplit) { int min = int.MaxValue; foreach (string spl in spls) { if (String.IsNullOrEmpty(spl)) { continue; } int splFreq = (wWcMap.ContainsKey(spl) ? wWcMap.GetValueOrNull(spl) : 0); //System.out.println("Corpus count:" + spl + "|" + wWcMap.get(spl) + "|" + splFreq); if (splFreq >= 0 && splFreq < min) { min = splFreq; } } freq = min; } if (freq == 0) { return(0.0); // to avoid infinity } long maxWc = wordWcMap.GetMaxWc(); long totalWc = wordWcMap.GetTotalWc(); double score = (Math.Log(1.0 * freq / totalWc) / Math.Log(1.0 * maxWc / totalWc)); return(score); }