/// <summary> /// Compare two object o1 and o2. Both objects o1 and o2 are /// NoisyChannelScore. The compare algorithm: /// </summary> /// <param name="o1"> first object to be compared </param> /// <param name="o2"> second object to be compared /// </param> /// <returns> a negative integer, 0, or positive integer to represent the /// object o1 is less, equals, or greater than object 02. </returns> public virtual int Compare(NoisyChannelScore o1, NoisyChannelScore o2) { // 1. compare how many words for the candidates // for now, we assume less word is better, // i.e. whatever is better than "what ever" int @out = 0; string cand1 = ((NoisyChannelScore)o1).GetCandStr(); string cand2 = ((NoisyChannelScore)o2).GetCandStr(); int wordNo1 = TermUtil.GetWordNo(cand1); int wordNo2 = TermUtil.GetWordNo(cand2); if (wordNo1 != wordNo2) { @out = wordNo1 - wordNo2; // less wordNo has higher rank } else { // 2. compare noisy Channel score double score1 = ((NoisyChannelScore)o1).GetScore(); double score2 = ((NoisyChannelScore)o2).GetScore(); // SCR-2: use a fixed number to ensure result is not 0. if (score2 > score1) { // from high to low @out = 1; } else if (score2 < score1) { @out = -1; } else { // 3. compare by orthographic score OrthographicScore oScore1 = ((NoisyChannelScore)o1).GetOScore(); OrthographicScore oScore2 = ((NoisyChannelScore)o2).GetOScore(); if (oScore1.GetScore() != oScore2.GetScore()) { OrthographicScoreComparator <OrthographicScore> osc = new OrthographicScoreComparator <OrthographicScore>(); @out = osc.Compare(oScore1, oScore2); } else // 4. hannelScore { FrequencyScore fScore1 = ((NoisyChannelScore)o1).GetFScore(); FrequencyScore fScore2 = ((NoisyChannelScore)o2).GetFScore(); if (fScore1.GetScore() != fScore2.GetScore()) { FrequencyScoreComparator <FrequencyScore> fsc = new FrequencyScoreComparator <FrequencyScore>(); @out = fsc.Compare(fScore1, fScore2); } else // 4. alphabetic order { @out = cand2.CompareTo(cand1); } } } } return(@out); }
// return candidate scoreObj list sorted by score, higher first public static List <FrequencyScore> GetCandidateScoreList(HashSet <string> candidates, WordWcMap wordWcMap) { HashSet <FrequencyScore> candScoreSet = GetCandidateScoreSet(candidates, wordWcMap); List <FrequencyScore> candScoreList = new List <FrequencyScore>(candScoreSet); // sort the list, higher fo first FrequencyScoreComparator <FrequencyScore> fsc = new FrequencyScoreComparator <FrequencyScore>(); candScoreList.Sort(fsc); return(candScoreList); }
public static void PrintFrequencyScore(HashSet <string> candSet, WordWcMap wordWcMap, int maxCandNo, bool debugFlag) { if (debugFlag == true) { FrequencyScoreComparator <FrequencyScore> fsc = new FrequencyScoreComparator <FrequencyScore>(); HashSet <FrequencyScore> fScoreSet = RankByFrequency.GetCandidateScoreSet(candSet, wordWcMap); var list = fScoreSet.OrderBy(x => x, fsc).Take(maxCandNo).Select(obj => obj.ToString()).ToList(); foreach (var item in list) { DebugPrint.PrintFScore(item, debugFlag); } } }
// private methods private static int RunTest(bool detailFlag, long limitNo) { // init dic string configFile = "../data/Config/cSpell.properties"; CSpellApi cSpellApi = new CSpellApi(configFile); cSpellApi.SetRankMode(CSpellApi.RANK_MODE_FREQUENCY); WordWcMap wordWcMap = cSpellApi.GetWordWcMap(); // provide cmdLine interface int returnValue = 0; FrequencyScoreComparator <FrequencyScore> fsc = new FrequencyScoreComparator <FrequencyScore>(); try { StreamReader stdInput = new StreamReader(Console.OpenStandardInput()); try { string inText = null; Console.WriteLine("- Please input a text (type \"Ctl-d\" to quit) > "); while (!string.ReferenceEquals((inText = stdInput.ReadLine()), null)) { // --------------------------------- // Get spell correction on the input // --------------------------------- // get all possible candidates HashSet <string> candSet = NonWord1To1Candidates.GetCandidates(inText, cSpellApi); Console.WriteLine("-- canSet.size(): " + candSet.Count); // get final suggestion string topRankStr = GetTopRankStr(candSet, wordWcMap); Console.WriteLine("- top rank str: " + topRankStr); // print details if (detailFlag == true) { HashSet <FrequencyScore> candScoreSet = GetCandidateScoreSet(candSet, wordWcMap); Console.WriteLine("------ Suggestion List ------"); var list = candScoreSet.OrderBy(x => x, fsc).Take((int)limitNo).Select(obj => obj.ToString()).ToList(); foreach (var item in list) { Console.WriteLine(item); } } } } catch (Exception e2) { Console.Error.WriteLine(e2.Message); returnValue = -1; } } catch (Exception e) { Console.Error.WriteLine(e.Message); returnValue = -1; } return(returnValue); }
// by combination, O, N, F, C private int compareByCombo(CSpellScore o1, CSpellScore o2) { int @out = 0; OrthographicScore oScore1 = ((CSpellScore)o1).GetOScore(); OrthographicScore oScore2 = ((CSpellScore)o2).GetOScore(); NoisyChannelScore nScore1 = ((CSpellScore)o1).GetNScore(); NoisyChannelScore nScore2 = ((CSpellScore)o2).GetNScore(); FrequencyScore fScore1 = ((CSpellScore)o1).GetFScore(); FrequencyScore fScore2 = ((CSpellScore)o2).GetFScore(); ContextScore cScore1 = ((CSpellScore)o1).GetCScore(); ContextScore cScore2 = ((CSpellScore)o2).GetCScore(); // 1. compared by orthographic score, best if (oScore1.GetScore() != oScore2.GetScore()) { OrthographicScoreComparator <OrthographicScore> osc = new OrthographicScoreComparator <OrthographicScore>(); @out = osc.Compare(oScore1, oScore2); } // 2. compared by noise channel score, 2nd best else if (nScore1.GetScore() != nScore2.GetScore()) { NoisyChannelScoreComparator <NoisyChannelScore> nsc = new NoisyChannelScoreComparator <NoisyChannelScore>(); @out = nsc.Compare(nScore1, nScore2); } // 3. compared by pure frequency score, 3rd best else if (fScore1.GetScore() != fScore2.GetScore()) { FrequencyScoreComparator <FrequencyScore> fsc = new FrequencyScoreComparator <FrequencyScore>(); @out = fsc.Compare(fScore1, fScore2); } // 4. compared by context score, 4 last else if (cScore1.GetScore() != cScore2.GetScore()) { ContextScoreComparator <ContextScore> csc = new ContextScoreComparator <ContextScore>(); @out = csc.Compare(cScore1, cScore2); } // 5. alphabetic order else { string cand1 = ((CSpellScore)o1).GetCandStr(); string cand2 = ((CSpellScore)o2).GetCandStr(); @out = cand2.CompareTo(cand1); } return(@out); }
private int compareByFrequency(CSpellScore o1, CSpellScore o2) { int @out = 0; FrequencyScore fScore1 = ((CSpellScore)o1).GetFScore(); FrequencyScore fScore2 = ((CSpellScore)o2).GetFScore(); // 1. compared by context score, 4 last if (fScore1.GetScore() != fScore2.GetScore()) { FrequencyScoreComparator <FrequencyScore> fsc = new FrequencyScoreComparator <FrequencyScore>(); @out = fsc.Compare(fScore1, fScore2); } // 2. alphabetic order else { string cand1 = ((CSpellScore)o1).GetCandStr(); string cand2 = ((CSpellScore)o2).GetCandStr(); @out = cand2.CompareTo(cand1); } return(@out); }