Beispiel #1
0
        public static double GetScore(string srcStr, string tarStr, bool caseFlag)
        {
            double score = 1.0;
            // check case
            string src = srcStr;
            string tar = tarStr;

            if (caseFlag == false)               // not case sensitive
            {
                src = srcStr.ToLower();
                tar = tarStr.ToLower();
            }
            int srcLen = src.Length;
            int tarLen = tar.Length;

            // not the same String
            if (src.Equals(tar) == false)
            {
                // get maxLength
                int maxLen = Math.Max(srcLen, tarLen);
                // add split penalty
                maxLen += OrthographicUtil.GetSplitPenalty(src, tar);
                // cal leadOverlap
                int minLen      = Math.Min(srcLen, tarLen);
                int leadOverlap = 0;
                int ii          = 0;
                while ((ii < minLen) && (src[ii] == tar[ii]))
                {
                    leadOverlap++;
                    ii++;
                }
                // cal endOverlap
                int trailOverlap = 0;
                int jj           = 0;
                while ((jj < minLen) && (src[srcLen - 1 - jj] == tar[tarLen - 1 - jj]))
                {
                    trailOverlap++;
                    jj++;
                }
                // if match all charactrs to minLen
                // "123" and "123123" should be 0.55 not 1.0
                // spel should have higher score with spell than speil
                if (leadOverlap == minLen)
                {
                    score = (1.0 * leadOverlap + 0.1 * trailOverlap) / (1.0 * maxLen);
                }
                // spell should have higher score with sspell than nspell
                else if (trailOverlap == minLen)
                {
                    score = (0.1 * leadOverlap + 1.0 * trailOverlap) / (1.0 * maxLen);
                }
                else
                {
                    score = (1.0 * leadOverlap + 1.0 * trailOverlap) / (1.0 * maxLen);
                }
            }
            // make sure score is between 0.0 ~ 1.0
            score = ((score > 1.0) ? 1.0 : score);
            return(score);
        }
        private static void TestEdSimScore(string srcStr, string tarStr)
        {
            double score   = GetScore(srcStr, tarStr);
            int    cost    = EditDistance.GetEditDistance(srcStr, tarStr, 96, 90, 100, 94, 10, false);
            int    penalty = OrthographicUtil.GetSplitPenalty(srcStr, tarStr, 90);

            Console.WriteLine(srcStr + "|" + tarStr + "|" + cost + "|" + penalty + "|" + score);
        }
        public static double GetScore(string srcStr, string tarStr, int deleteCost, int insertCost, int replaceCost, int swapCost, int caseChangeCost, bool enhancedFlag, int splitCost)
        {
            int    cost    = EditDistance.GetEditDistance(srcStr, tarStr, deleteCost, insertCost, replaceCost, swapCost, caseChangeCost, enhancedFlag);
            int    penalty = OrthographicUtil.GetSplitPenalty(srcStr, tarStr, splitCost);
            double score   = OrthographicUtil.GetNormScore(cost + penalty, 1000.0);

            return(score);
        }
        // TBD: read the values from config file
        public static double GetScore(string srcStr, string tarStr)
        {
            /// <summary>
            /// init value form ensemble
            /// int deleteCost = 95;
            /// int insertCost = 95;
            /// int replaceCost = 100;
            /// int swapCost = 90;
            ///
            /// </summary>
            // new value
            int  deleteCost     = 100;
            int  insertCost     = 100;
            int  replaceCost    = 100;
            int  swapCost       = 100;
            int  caseChangeCost = 10;
            bool enhancedFlag   = false;
            int  splitCost      = insertCost;
            int  maxCodeLength  = 10;
            /// <summary>
            /// Test on different phonetic methods
            /// String srcM2 = Metaphone2.GetCode(srcStr, maxCodeLength);
            /// String tarM2 = Metaphone2.GetCode(tarStr, maxCodeLength);
            /// String srcM2 = RefinedSoundex.GetCode(srcStr);
            /// String tarM2 = RefinedSoundex.GetCode(tarStr);
            /// String srcM2 = Caverphone2.GetCaverphone(srcStr);
            /// String tarM2 = Caverphone2.GetCaverphone(tarStr);
            /// String srcM2 = Metaphone.GetMetaphone(srcStr, maxCodeLength);
            /// String tarM2 = Metaphone.GetMetaphone(tarStr, maxCodeLength);
            /// Metaphone3 m3 = new Metaphone3();
            /// m3.SetKeyLength(maxCodeLength);
            /// String srcM2 = m3.GetMetaphone(srcStr);
            /// String tarM2 = m3.GetMetaphone(tarStr);
            ///
            /// </summary>
            string srcM2   = Metaphone2.GetCode(srcStr, maxCodeLength);
            string tarM2   = Metaphone2.GetCode(tarStr, maxCodeLength);
            int    cost    = EditDistance.GetEditDistance(srcM2, tarM2, deleteCost, insertCost, replaceCost, swapCost, caseChangeCost, enhancedFlag);
            int    penalty = OrthographicUtil.GetSplitPenalty(srcStr, tarStr, splitCost);
            double score   = OrthographicUtil.GetNormScore(cost + penalty, 1000.0);

            return(score);
        }