public static double GetScore(string srcStr, string tarStr, bool caseFlag) { double score = 1.0; // check case string src = srcStr; string tar = tarStr; if (caseFlag == false) // not case sensitive { src = srcStr.ToLower(); tar = tarStr.ToLower(); } int srcLen = src.Length; int tarLen = tar.Length; // not the same String if (src.Equals(tar) == false) { // get maxLength int maxLen = Math.Max(srcLen, tarLen); // add split penalty maxLen += OrthographicUtil.GetSplitPenalty(src, tar); // cal leadOverlap int minLen = Math.Min(srcLen, tarLen); int leadOverlap = 0; int ii = 0; while ((ii < minLen) && (src[ii] == tar[ii])) { leadOverlap++; ii++; } // cal endOverlap int trailOverlap = 0; int jj = 0; while ((jj < minLen) && (src[srcLen - 1 - jj] == tar[tarLen - 1 - jj])) { trailOverlap++; jj++; } // if match all charactrs to minLen // "123" and "123123" should be 0.55 not 1.0 // spel should have higher score with spell than speil if (leadOverlap == minLen) { score = (1.0 * leadOverlap + 0.1 * trailOverlap) / (1.0 * maxLen); } // spell should have higher score with sspell than nspell else if (trailOverlap == minLen) { score = (0.1 * leadOverlap + 1.0 * trailOverlap) / (1.0 * maxLen); } else { score = (1.0 * leadOverlap + 1.0 * trailOverlap) / (1.0 * maxLen); } } // make sure score is between 0.0 ~ 1.0 score = ((score > 1.0) ? 1.0 : score); return(score); }
private static void TestEdSimScore(string srcStr, string tarStr) { double score = GetScore(srcStr, tarStr); int cost = EditDistance.GetEditDistance(srcStr, tarStr, 96, 90, 100, 94, 10, false); int penalty = OrthographicUtil.GetSplitPenalty(srcStr, tarStr, 90); Console.WriteLine(srcStr + "|" + tarStr + "|" + cost + "|" + penalty + "|" + score); }
public static double GetScore(string srcStr, string tarStr, int deleteCost, int insertCost, int replaceCost, int swapCost, int caseChangeCost, bool enhancedFlag, int splitCost) { int cost = EditDistance.GetEditDistance(srcStr, tarStr, deleteCost, insertCost, replaceCost, swapCost, caseChangeCost, enhancedFlag); int penalty = OrthographicUtil.GetSplitPenalty(srcStr, tarStr, splitCost); double score = OrthographicUtil.GetNormScore(cost + penalty, 1000.0); return(score); }
// TBD: read the values from config file public static double GetScore(string srcStr, string tarStr) { /// <summary> /// init value form ensemble /// int deleteCost = 95; /// int insertCost = 95; /// int replaceCost = 100; /// int swapCost = 90; /// /// </summary> // new value int deleteCost = 100; int insertCost = 100; int replaceCost = 100; int swapCost = 100; int caseChangeCost = 10; bool enhancedFlag = false; int splitCost = insertCost; int maxCodeLength = 10; /// <summary> /// Test on different phonetic methods /// String srcM2 = Metaphone2.GetCode(srcStr, maxCodeLength); /// String tarM2 = Metaphone2.GetCode(tarStr, maxCodeLength); /// String srcM2 = RefinedSoundex.GetCode(srcStr); /// String tarM2 = RefinedSoundex.GetCode(tarStr); /// String srcM2 = Caverphone2.GetCaverphone(srcStr); /// String tarM2 = Caverphone2.GetCaverphone(tarStr); /// String srcM2 = Metaphone.GetMetaphone(srcStr, maxCodeLength); /// String tarM2 = Metaphone.GetMetaphone(tarStr, maxCodeLength); /// Metaphone3 m3 = new Metaphone3(); /// m3.SetKeyLength(maxCodeLength); /// String srcM2 = m3.GetMetaphone(srcStr); /// String tarM2 = m3.GetMetaphone(tarStr); /// /// </summary> string srcM2 = Metaphone2.GetCode(srcStr, maxCodeLength); string tarM2 = Metaphone2.GetCode(tarStr, maxCodeLength); int cost = EditDistance.GetEditDistance(srcM2, tarM2, deleteCost, insertCost, replaceCost, swapCost, caseChangeCost, enhancedFlag); int penalty = OrthographicUtil.GetSplitPenalty(srcStr, tarStr, splitCost); double score = OrthographicUtil.GetNormScore(cost + penalty, 1000.0); return(score); }