public static double findSimilarDictionaryWord(string word, double maxSimilarity, int index, List<string> equalMinDistanceDictWordList)
        {
            index = index - _minWordLength;
            word = word.ToLower();
            double NewSimilarity = 0;
            int WordLength = word.Length;
            if ((WordLength + index) < 0)
                return maxSimilarity;
            if ((WordLength + index) >= _IndexDictionary.Length)
                return maxSimilarity;
            if (_IndexDictionary[WordLength + index] == null)
                return maxSimilarity;

            for (int j = 0; j < _IndexDictionary[WordLength + index].Count; j++)
            {
                JaroWinklerDistance JaroDist = new JaroWinklerDistance();
                NGramDistance ng = new NGramDistance();
                JaccardDistance jd = new JaccardDistance();

                NewSimilarity = jd.GetDistance(word, _IndexDictionary[WordLength + index][j]);//(double)JaroDist.GetDistance(word, _IndexDictionary[WordLenght - 1 + index][j]);

                if (NewSimilarity > maxSimilarity)
                {
                    equalMinDistanceDictWordList.Clear();
                    equalMinDistanceDictWordList.Add(_IndexDictionary[WordLength + index][j]);
                    maxSimilarity = NewSimilarity;
                }
                else if (NewSimilarity == maxSimilarity)
                    equalMinDistanceDictWordList.Add(_IndexDictionary[WordLength + index][j]);
            }
            return maxSimilarity;
        }
Example #2
0
 public void TestGetDistance2()
 {
     StringDistance sd = new NGramDistance(2);
     float d = sd.GetDistance("al", "al");
     Assert.AreEqual(d, 1.0f, 0.001);
     d = sd.GetDistance("a", "a");
     Assert.AreEqual(d, 1.0f, 0.001);
     d = sd.GetDistance("b", "a");
     Assert.AreEqual(d, 0.0f, 0.001);
     d = sd.GetDistance("a", "aa");
     Assert.AreEqual(d, 0.5f, 0.001);
     d = sd.GetDistance("martha", "marhta");
     Assert.AreEqual(d, 0.6666, 0.001);
     d = sd.GetDistance("jones", "johnson");
     Assert.AreEqual(d, 0.4285, 0.001);
     d = sd.GetDistance("natural", "contrary");
     Assert.AreEqual(d, 0.25, 0.001);
     d = sd.GetDistance("abcvwxyz", "cabvwxyz");
     Assert.AreEqual(d, 0.625, 0.001);
     d = sd.GetDistance("dwayne", "duane");
     Assert.AreEqual(d, 0.5833, 0.001);
     d = sd.GetDistance("dixon", "dicksonx");
     Assert.AreEqual(d, 0.5, 0.001);
     d = sd.GetDistance("six", "ten");
     Assert.AreEqual(d, 0, 0.001);
     float d1 = sd.GetDistance("zac ephron", "zac efron");
     float d2 = sd.GetDistance("zac ephron", "kai ephron");
     Assert.IsTrue(d1 > d2);
     d1 = sd.GetDistance("brittney spears", "britney spears");
     d2 = sd.GetDistance("brittney spears", "brittney startzman");
     Assert.IsTrue(d1 > d2);
     d1 = sd.GetDistance("0012345678", "0012890678");
     d2 = sd.GetDistance("0012345678", "0072385698");
     Assert.AreEqual(d1, d2, 0.001);
 }
        public static double findSimilarDictionaryWord(string word, double maxSimilarity, int index, Dictionary<string, double> equalMinDistanceDictWordList)
        {
            try
            {

                double distancethreshold = 0.3;
                index = index - _minWordLength;
                double NewDistance = 0;
                int WordLenght = word.Length;
                if ((WordLenght + index) < 0)
                    return maxSimilarity;

                if ((WordLenght + index) >= _IndexDictionary.Length)
                    return maxSimilarity;
                if (_IndexDictionary[WordLenght - 1 + index] == null)
                    return 0;
                for (int j = 0; j < _IndexDictionary[WordLenght - 1 + index].Count; j++)
                {

                    JaroWinklerDistance JaroDist = new JaroWinklerDistance();
                    NGramDistance ng = new NGramDistance();
                    JaccardDistance jd = new JaccardDistance();
                    string temp =  _IndexDictionary[WordLenght - 1 + index][j];
                    NewDistance = jd.GetDistance(word, temp);
                    double NewDistance2 = -1;

                    if (NewDistance < NewDistance2)
                        NewDistance = NewDistance2;

                    if (NewDistance > maxSimilarity)
                    {

                        foreach (var item in equalMinDistanceDictWordList.ToList())
                        {
                            if (item.Value <= NewDistance - distancethreshold)
                                equalMinDistanceDictWordList.Remove(item.Key);
                        }

                        tempReplacement = temp;
                        if (!equalMinDistanceDictWordList.ContainsKey(temp))
                            equalMinDistanceDictWordList.Add(temp, NewDistance);
                        //else
                        //    equalMinDistanceDictWordList[tempReplacement] = NewDistance;
                        maxSimilarity = NewDistance;
                    }
                    else if (NewDistance <= maxSimilarity + distancethreshold && NewDistance >= maxSimilarity - distancethreshold && NewDistance > 0)
                        if (!equalMinDistanceDictWordList.ContainsKey(temp))
                            equalMinDistanceDictWordList.Add(temp, NewDistance);

                }
            }
            catch (Exception e)
            {
                throw e;
            }
            return maxSimilarity;
        }
        public static double findSimilarDictionaryWord(string word, double maxSimilarity, int index, List<string> equalMinDistanceDictWordList, bool exact)
        {
            index = index - _minWordLength;
            int WordLength = word.Length;
            int index2 = index;
            if (index < 0 || (WordLength>=2 && char.IsUpper(word[0])&&!char.IsUpper(word[1])) )
                index2 = 0;
            word = word.ToLower();
            bool noSpace = false;
            if (word.CompareTo(word.Trim()) == 0)
                noSpace = true;
            else
                word = word.Trim();

            double NewSimilarity = 0;
            if ((WordLength + index) < 0)
                return maxSimilarity;
            if ((WordLength + index) >= _IndexDictionary.Length)
                return maxSimilarity;
            if (_IndexDictionary[WordLength + index] == null)
                return maxSimilarity;

            for (int j = 0; j < _IndexDictionary[WordLength + index].Count; j++)
            {
                JaroWinklerDistance JaroDist = new JaroWinklerDistance();
                NGramDistance ng = new NGramDistance();
                JaccardDistance jd = new JaccardDistance();
                string temp = _IndexDictionary[WordLength + index][j];
                if(noSpace&&temp.CompareTo(word)==0)
                {
                    equalMinDistanceDictWordList.Clear();
                    equalMinDistanceDictWordList.Add(temp);
                    return 10;
                }
                else if (temp.Contains(word))
                {
                    equalMinDistanceDictWordList.Add(/*item);*/temp);
                    maxSimilarity = 1;
                }
                else if(index <= 2)
                {
                    for (int i = 0; i <= index2; i++)
                    {
                        string s = temp.Substring(i);
                        string s2 = temp.Substring(0, temp.Length - index2);
                        //Console.WriteLine(item);
                        if (!exact)
                            NewSimilarity = Math.Max(jd.GetDistance(word, s), jd.GetDistance(word, s2));
                        else
                        {
                            NewSimilarity = jd.GetDistance(word, temp);
                            if (NewSimilarity == 1)
                            {
                                equalMinDistanceDictWordList.Clear();
                                equalMinDistanceDictWordList.Add(s);
                                maxSimilarity = NewSimilarity;
                            }
                            return maxSimilarity;
                        }

                        if (NewSimilarity > .33)
                        {
                            //equalMinDistanceDictWordList.Clear();
                            equalMinDistanceDictWordList.Add(/*item);*/temp);
                            maxSimilarity = NewSimilarity;
                            break;
                        }

                    }
                }
                           }
            return maxSimilarity;
        }
        public static void TestQGram()
        {
            string fn1 = "acht";
            string fn2 = "yacht";
            string fn3 = "acha";

            NGramDistance ngd = new NGramDistance();

            double x = ngd.GetDistance(fn1, fn2);
            double y = ngd.GetDistance(fn1, fn3);

            double z = x;
        }
Example #6
0
 public void TestEmpty()
 {
     StringDistance nsd = new NGramDistance(1);
     float d = nsd.GetDistance("", "al");
     Assert.AreEqual(d, 0.0f, 0.001);
 }