private int compareByEnsemble(CSpellScore o1, CSpellScore o2) { int @out = 0; OrthographicScore oScore1 = ((CSpellScore)o1).GetOScore(); OrthographicScore oScore2 = ((CSpellScore)o2).GetOScore(); FrequencyScore fScore1 = ((CSpellScore)o1).GetFScore(); FrequencyScore fScore2 = ((CSpellScore)o2).GetFScore(); ContextScore cScore1 = ((CSpellScore)o1).GetCScore(); ContextScore cScore2 = ((CSpellScore)o2).GetCScore(); double score1 = 0.6 * oScore1.GetScore() + 0.25 * fScore1.GetScore() + 0.15 * cScore1.GetScore(); double score2 = 0.6 * oScore2.GetScore() + 0.25 * fScore2.GetScore() + 0.15 * cScore2.GetScore(); // 1. compared by orthographic score, best // SCR-2: use a fixed number to ensure result is not 0. if (score2 > score1) { // from high to low @out = 1; } else if (score2 < score1) { @out = -1; } // 2. alphabetic order else { string cand1 = ((CSpellScore)o1).GetCandStr(); string cand2 = ((CSpellScore)o2).GetCandStr(); @out = cand2.CompareTo(cand1); } return(@out); }
// private methods private static bool IsTopCandValidByScores(ContextScore orgContextScore, FrequencyScore orgFreqScore, ContextScore topContextScore, CSpellScore topCSpellScore, CSpellApi cSpellApi) { // init bool flag = false; double rw1To1CandCsFactor = cSpellApi.GetRankRw1To1CandCsFac(); double rw1To1WordMinCs = cSpellApi.GetRankRw1To1WordMinCs(); double rw1To1CandMinCs = cSpellApi.GetRankRw1To1CandMinCs(); double rw1To1CandCsDist = cSpellApi.GetRankRw1To1CandCsDist(); double rw1To1CandFsFactor = cSpellApi.GetRankRw1To1CandFsFac(); double rw1To1CandMinFs = cSpellApi.GetRankRw1To1CandMinFs(); double rw1To1CandFsDist = cSpellApi.GetRankRw1To1CandFsDist(); double orgScore = orgContextScore.GetScore(); double topScore = topContextScore.GetScore(); // another rule for word2Vec on real-word // check contect score: // 1. the topScore is bigger enough to cover the orgScore // 2. the distance is > a value for confidence if (((topScore / -orgScore) > rw1To1CandCsFactor) && (orgScore > rw1To1WordMinCs) && (topScore > rw1To1CandMinCs) && ((topScore - orgScore) > rw1To1CandCsDist)) //609|796|0.6920 // check frequency, all positive: // 1. cand has better frequency // 2. the difference is withint a range { double orgFScore = orgFreqScore.GetScore(); double topFScore = topCSpellScore.GetFScore().GetScore(); if (((topFScore / orgFScore) > rw1To1CandFsFactor) && (topFScore > rw1To1CandMinFs) && ((topFScore > orgFScore) || ((orgFScore - topFScore) < rw1To1CandFsDist))) // within freq range { flag = true; } } return(flag); }
/// <summary> /// Compare two object o1 and o2. Both objects o1 and o2 are /// NoisyChannelScore. The compare algorithm: /// </summary> /// <param name="o1"> first object to be compared </param> /// <param name="o2"> second object to be compared /// </param> /// <returns> a negative integer, 0, or positive integer to represent the /// object o1 is less, equals, or greater than object 02. </returns> public virtual int Compare(NoisyChannelScore o1, NoisyChannelScore o2) { // 1. compare how many words for the candidates // for now, we assume less word is better, // i.e. whatever is better than "what ever" int @out = 0; string cand1 = ((NoisyChannelScore)o1).GetCandStr(); string cand2 = ((NoisyChannelScore)o2).GetCandStr(); int wordNo1 = TermUtil.GetWordNo(cand1); int wordNo2 = TermUtil.GetWordNo(cand2); if (wordNo1 != wordNo2) { @out = wordNo1 - wordNo2; // less wordNo has higher rank } else { // 2. compare noisy Channel score double score1 = ((NoisyChannelScore)o1).GetScore(); double score2 = ((NoisyChannelScore)o2).GetScore(); // SCR-2: use a fixed number to ensure result is not 0. if (score2 > score1) { // from high to low @out = 1; } else if (score2 < score1) { @out = -1; } else { // 3. compare by orthographic score OrthographicScore oScore1 = ((NoisyChannelScore)o1).GetOScore(); OrthographicScore oScore2 = ((NoisyChannelScore)o2).GetOScore(); if (oScore1.GetScore() != oScore2.GetScore()) { OrthographicScoreComparator <OrthographicScore> osc = new OrthographicScoreComparator <OrthographicScore>(); @out = osc.Compare(oScore1, oScore2); } else // 4. hannelScore { FrequencyScore fScore1 = ((NoisyChannelScore)o1).GetFScore(); FrequencyScore fScore2 = ((NoisyChannelScore)o2).GetFScore(); if (fScore1.GetScore() != fScore2.GetScore()) { FrequencyScoreComparator <FrequencyScore> fsc = new FrequencyScoreComparator <FrequencyScore>(); @out = fsc.Compare(fScore1, fScore2); } else // 4. alphabetic order { @out = cand2.CompareTo(cand1); } } } } return(@out); }
// private constructor public NoisyChannelScore(string wordStr, string candStr, WordWcMap wordWcMap, double wf1, double wf2, double wf3) { wordStr_ = wordStr; candStr_ = candStr; // calculate score oScore_ = new OrthographicScore(wordStr_, candStr_, wf1, wf2, wf3); fScore_ = new FrequencyScore(candStr_, wordWcMap); score_ = oScore_.GetScore() * fScore_.GetScore(); }
// return the best ranked str from candidates using frequency score public static string GetTopRankStrByScore(HashSet <string> candidates, WordWcMap wordWcMap) { string topRankStr = ""; double maxScore = 0.0; foreach (string cand in candidates) { FrequencyScore fs = new FrequencyScore(cand, wordWcMap); double score = fs.GetScore(); if (score > maxScore) { topRankStr = cand; maxScore = score; } } return(topRankStr); }
// by combination, O, N, F, C private int compareByCombo(CSpellScore o1, CSpellScore o2) { int @out = 0; OrthographicScore oScore1 = ((CSpellScore)o1).GetOScore(); OrthographicScore oScore2 = ((CSpellScore)o2).GetOScore(); NoisyChannelScore nScore1 = ((CSpellScore)o1).GetNScore(); NoisyChannelScore nScore2 = ((CSpellScore)o2).GetNScore(); FrequencyScore fScore1 = ((CSpellScore)o1).GetFScore(); FrequencyScore fScore2 = ((CSpellScore)o2).GetFScore(); ContextScore cScore1 = ((CSpellScore)o1).GetCScore(); ContextScore cScore2 = ((CSpellScore)o2).GetCScore(); // 1. compared by orthographic score, best if (oScore1.GetScore() != oScore2.GetScore()) { OrthographicScoreComparator <OrthographicScore> osc = new OrthographicScoreComparator <OrthographicScore>(); @out = osc.Compare(oScore1, oScore2); } // 2. compared by noise channel score, 2nd best else if (nScore1.GetScore() != nScore2.GetScore()) { NoisyChannelScoreComparator <NoisyChannelScore> nsc = new NoisyChannelScoreComparator <NoisyChannelScore>(); @out = nsc.Compare(nScore1, nScore2); } // 3. compared by pure frequency score, 3rd best else if (fScore1.GetScore() != fScore2.GetScore()) { FrequencyScoreComparator <FrequencyScore> fsc = new FrequencyScoreComparator <FrequencyScore>(); @out = fsc.Compare(fScore1, fScore2); } // 4. compared by context score, 4 last else if (cScore1.GetScore() != cScore2.GetScore()) { ContextScoreComparator <ContextScore> csc = new ContextScoreComparator <ContextScore>(); @out = csc.Compare(cScore1, cScore2); } // 5. alphabetic order else { string cand1 = ((CSpellScore)o1).GetCandStr(); string cand2 = ((CSpellScore)o2).GetCandStr(); @out = cand2.CompareTo(cand1); } return(@out); }
private int compareByFrequency(CSpellScore o1, CSpellScore o2) { int @out = 0; FrequencyScore fScore1 = ((CSpellScore)o1).GetFScore(); FrequencyScore fScore2 = ((CSpellScore)o2).GetFScore(); // 1. compared by context score, 4 last if (fScore1.GetScore() != fScore2.GetScore()) { FrequencyScoreComparator <FrequencyScore> fsc = new FrequencyScoreComparator <FrequencyScore>(); @out = fsc.Compare(fScore1, fScore2); } // 2. alphabetic order else { string cand1 = ((CSpellScore)o1).GetCandStr(); string cand2 = ((CSpellScore)o2).GetCandStr(); @out = cand2.CompareTo(cand1); } return(@out); }