public static double findSimilarDictionaryWord(string word, double maxSimilarity, int index, List<string> equalMinDistanceDictWordList) { index = index - _minWordLength; word = word.ToLower(); double NewSimilarity = 0; int WordLength = word.Length; if ((WordLength + index) < 0) return maxSimilarity; if ((WordLength + index) >= _IndexDictionary.Length) return maxSimilarity; if (_IndexDictionary[WordLength + index] == null) return maxSimilarity; for (int j = 0; j < _IndexDictionary[WordLength + index].Count; j++) { JaroWinklerDistance JaroDist = new JaroWinklerDistance(); NGramDistance ng = new NGramDistance(); JaccardDistance jd = new JaccardDistance(); NewSimilarity = jd.GetDistance(word, _IndexDictionary[WordLength + index][j]);//(double)JaroDist.GetDistance(word, _IndexDictionary[WordLenght - 1 + index][j]); if (NewSimilarity > maxSimilarity) { equalMinDistanceDictWordList.Clear(); equalMinDistanceDictWordList.Add(_IndexDictionary[WordLength + index][j]); maxSimilarity = NewSimilarity; } else if (NewSimilarity == maxSimilarity) equalMinDistanceDictWordList.Add(_IndexDictionary[WordLength + index][j]); } return maxSimilarity; }
public void TestGetDistance2() { StringDistance sd = new NGramDistance(2); float d = sd.GetDistance("al", "al"); Assert.AreEqual(d, 1.0f, 0.001); d = sd.GetDistance("a", "a"); Assert.AreEqual(d, 1.0f, 0.001); d = sd.GetDistance("b", "a"); Assert.AreEqual(d, 0.0f, 0.001); d = sd.GetDistance("a", "aa"); Assert.AreEqual(d, 0.5f, 0.001); d = sd.GetDistance("martha", "marhta"); Assert.AreEqual(d, 0.6666, 0.001); d = sd.GetDistance("jones", "johnson"); Assert.AreEqual(d, 0.4285, 0.001); d = sd.GetDistance("natural", "contrary"); Assert.AreEqual(d, 0.25, 0.001); d = sd.GetDistance("abcvwxyz", "cabvwxyz"); Assert.AreEqual(d, 0.625, 0.001); d = sd.GetDistance("dwayne", "duane"); Assert.AreEqual(d, 0.5833, 0.001); d = sd.GetDistance("dixon", "dicksonx"); Assert.AreEqual(d, 0.5, 0.001); d = sd.GetDistance("six", "ten"); Assert.AreEqual(d, 0, 0.001); float d1 = sd.GetDistance("zac ephron", "zac efron"); float d2 = sd.GetDistance("zac ephron", "kai ephron"); Assert.IsTrue(d1 > d2); d1 = sd.GetDistance("brittney spears", "britney spears"); d2 = sd.GetDistance("brittney spears", "brittney startzman"); Assert.IsTrue(d1 > d2); d1 = sd.GetDistance("0012345678", "0012890678"); d2 = sd.GetDistance("0012345678", "0072385698"); Assert.AreEqual(d1, d2, 0.001); }
public static double findSimilarDictionaryWord(string word, double maxSimilarity, int index, Dictionary<string, double> equalMinDistanceDictWordList) { try { double distancethreshold = 0.3; index = index - _minWordLength; double NewDistance = 0; int WordLenght = word.Length; if ((WordLenght + index) < 0) return maxSimilarity; if ((WordLenght + index) >= _IndexDictionary.Length) return maxSimilarity; if (_IndexDictionary[WordLenght - 1 + index] == null) return 0; for (int j = 0; j < _IndexDictionary[WordLenght - 1 + index].Count; j++) { JaroWinklerDistance JaroDist = new JaroWinklerDistance(); NGramDistance ng = new NGramDistance(); JaccardDistance jd = new JaccardDistance(); string temp = _IndexDictionary[WordLenght - 1 + index][j]; NewDistance = jd.GetDistance(word, temp); double NewDistance2 = -1; if (NewDistance < NewDistance2) NewDistance = NewDistance2; if (NewDistance > maxSimilarity) { foreach (var item in equalMinDistanceDictWordList.ToList()) { if (item.Value <= NewDistance - distancethreshold) equalMinDistanceDictWordList.Remove(item.Key); } tempReplacement = temp; if (!equalMinDistanceDictWordList.ContainsKey(temp)) equalMinDistanceDictWordList.Add(temp, NewDistance); //else // equalMinDistanceDictWordList[tempReplacement] = NewDistance; maxSimilarity = NewDistance; } else if (NewDistance <= maxSimilarity + distancethreshold && NewDistance >= maxSimilarity - distancethreshold && NewDistance > 0) if (!equalMinDistanceDictWordList.ContainsKey(temp)) equalMinDistanceDictWordList.Add(temp, NewDistance); } } catch (Exception e) { throw e; } return maxSimilarity; }
public static double findSimilarDictionaryWord(string word, double maxSimilarity, int index, List<string> equalMinDistanceDictWordList, bool exact) { index = index - _minWordLength; int WordLength = word.Length; int index2 = index; if (index < 0 || (WordLength>=2 && char.IsUpper(word[0])&&!char.IsUpper(word[1])) ) index2 = 0; word = word.ToLower(); bool noSpace = false; if (word.CompareTo(word.Trim()) == 0) noSpace = true; else word = word.Trim(); double NewSimilarity = 0; if ((WordLength + index) < 0) return maxSimilarity; if ((WordLength + index) >= _IndexDictionary.Length) return maxSimilarity; if (_IndexDictionary[WordLength + index] == null) return maxSimilarity; for (int j = 0; j < _IndexDictionary[WordLength + index].Count; j++) { JaroWinklerDistance JaroDist = new JaroWinklerDistance(); NGramDistance ng = new NGramDistance(); JaccardDistance jd = new JaccardDistance(); string temp = _IndexDictionary[WordLength + index][j]; if(noSpace&&temp.CompareTo(word)==0) { equalMinDistanceDictWordList.Clear(); equalMinDistanceDictWordList.Add(temp); return 10; } else if (temp.Contains(word)) { equalMinDistanceDictWordList.Add(/*item);*/temp); maxSimilarity = 1; } else if(index <= 2) { for (int i = 0; i <= index2; i++) { string s = temp.Substring(i); string s2 = temp.Substring(0, temp.Length - index2); //Console.WriteLine(item); if (!exact) NewSimilarity = Math.Max(jd.GetDistance(word, s), jd.GetDistance(word, s2)); else { NewSimilarity = jd.GetDistance(word, temp); if (NewSimilarity == 1) { equalMinDistanceDictWordList.Clear(); equalMinDistanceDictWordList.Add(s); maxSimilarity = NewSimilarity; } return maxSimilarity; } if (NewSimilarity > .33) { //equalMinDistanceDictWordList.Clear(); equalMinDistanceDictWordList.Add(/*item);*/temp); maxSimilarity = NewSimilarity; break; } } } } return maxSimilarity; }
public static void TestQGram() { string fn1 = "acht"; string fn2 = "yacht"; string fn3 = "acha"; NGramDistance ngd = new NGramDistance(); double x = ngd.GetDistance(fn1, fn2); double y = ngd.GetDistance(fn1, fn3); double z = x; }
public void TestEmpty() { StringDistance nsd = new NGramDistance(1); float d = nsd.GetDistance("", "al"); Assert.AreEqual(d, 0.0f, 0.001); }