public static double findSimilarDictionaryWord(string word, double maxSimilarity, int index, List<string> equalMinDistanceDictWordList) { index = index - _minWordLength; word = word.ToLower(); double NewSimilarity = 0; int WordLength = word.Length; if ((WordLength + index) < 0) return maxSimilarity; if ((WordLength + index) >= _IndexDictionary.Length) return maxSimilarity; if (_IndexDictionary[WordLength + index] == null) return maxSimilarity; for (int j = 0; j < _IndexDictionary[WordLength + index].Count; j++) { JaroWinklerDistance JaroDist = new JaroWinklerDistance(); NGramDistance ng = new NGramDistance(); JaccardDistance jd = new JaccardDistance(); NewSimilarity = jd.GetDistance(word, _IndexDictionary[WordLength + index][j]);//(double)JaroDist.GetDistance(word, _IndexDictionary[WordLenght - 1 + index][j]); if (NewSimilarity > maxSimilarity) { equalMinDistanceDictWordList.Clear(); equalMinDistanceDictWordList.Add(_IndexDictionary[WordLength + index][j]); maxSimilarity = NewSimilarity; } else if (NewSimilarity == maxSimilarity) equalMinDistanceDictWordList.Add(_IndexDictionary[WordLength + index][j]); } return maxSimilarity; }
public static double findSimilarDictionaryWord(string word, double maxSimilarity, int index, Dictionary<string, double> equalMinDistanceDictWordList) { try { double distancethreshold = 0.3; index = index - _minWordLength; double NewDistance = 0; int WordLenght = word.Length; if ((WordLenght + index) < 0) return maxSimilarity; if ((WordLenght + index) >= _IndexDictionary.Length) return maxSimilarity; if (_IndexDictionary[WordLenght - 1 + index] == null) return 0; for (int j = 0; j < _IndexDictionary[WordLenght - 1 + index].Count; j++) { JaroWinklerDistance JaroDist = new JaroWinklerDistance(); NGramDistance ng = new NGramDistance(); JaccardDistance jd = new JaccardDistance(); string temp = _IndexDictionary[WordLenght - 1 + index][j]; NewDistance = jd.GetDistance(word, temp); double NewDistance2 = -1; if (NewDistance < NewDistance2) NewDistance = NewDistance2; if (NewDistance > maxSimilarity) { foreach (var item in equalMinDistanceDictWordList.ToList()) { if (item.Value <= NewDistance - distancethreshold) equalMinDistanceDictWordList.Remove(item.Key); } tempReplacement = temp; if (!equalMinDistanceDictWordList.ContainsKey(temp)) equalMinDistanceDictWordList.Add(temp, NewDistance); //else // equalMinDistanceDictWordList[tempReplacement] = NewDistance; maxSimilarity = NewDistance; } else if (NewDistance <= maxSimilarity + distancethreshold && NewDistance >= maxSimilarity - distancethreshold && NewDistance > 0) if (!equalMinDistanceDictWordList.ContainsKey(temp)) equalMinDistanceDictWordList.Add(temp, NewDistance); } } catch (Exception e) { throw e; } return maxSimilarity; }
public static double findSimilarDictionaryWord(string word, double maxSimilarity, int index, List <string> equalMinDistanceDictWordList) { index = index - _minWordLength; word = word.ToLower(); double NewSimilarity = 0; int WordLength = word.Length; if ((WordLength + index) < 0) { return(maxSimilarity); } if ((WordLength + index) >= _IndexDictionary.Length) { return(maxSimilarity); } if (_IndexDictionary[WordLength + index] == null) { return(maxSimilarity); } for (int j = 0; j < _IndexDictionary[WordLength + index].Count; j++) { JaroWinklerDistance JaroDist = new JaroWinklerDistance(); NGramDistance ng = new NGramDistance(); JaccardDistance jd = new JaccardDistance(); NewSimilarity = jd.GetDistance(word, _IndexDictionary[WordLength + index][j]);//(double)JaroDist.GetDistance(word, _IndexDictionary[WordLenght - 1 + index][j]); if (NewSimilarity > maxSimilarity) { equalMinDistanceDictWordList.Clear(); equalMinDistanceDictWordList.Add(_IndexDictionary[WordLength + index][j]); maxSimilarity = NewSimilarity; } else if (NewSimilarity == maxSimilarity) { equalMinDistanceDictWordList.Add(_IndexDictionary[WordLength + index][j]); } } return(maxSimilarity); }
public static double findSimilarDictionaryWord(string word, double maxSimilarity, int index, Dictionary <string, double> equalMinDistanceDictWordList) { try { double distancethreshold = 0.3; index = index - _minWordLength; double NewDistance = 0; int WordLenght = word.Length; if ((WordLenght + index) < 0) { return(maxSimilarity); } if ((WordLenght + index) >= _IndexDictionary.Length) { return(maxSimilarity); } if (_IndexDictionary[WordLenght - 1 + index] == null) { return(0); } for (int j = 0; j < _IndexDictionary[WordLenght - 1 + index].Count; j++) { JaroWinklerDistance JaroDist = new JaroWinklerDistance(); NGramDistance ng = new NGramDistance(); JaccardDistance jd = new JaccardDistance(); string temp = _IndexDictionary[WordLenght - 1 + index][j]; NewDistance = jd.GetDistance(word, temp); double NewDistance2 = -1; if (NewDistance < NewDistance2) { NewDistance = NewDistance2; } if (NewDistance > maxSimilarity) { foreach (var item in equalMinDistanceDictWordList.ToList()) { if (item.Value <= NewDistance - distancethreshold) { equalMinDistanceDictWordList.Remove(item.Key); } } tempReplacement = temp; if (!equalMinDistanceDictWordList.ContainsKey(temp)) { equalMinDistanceDictWordList.Add(temp, NewDistance); } //else // equalMinDistanceDictWordList[tempReplacement] = NewDistance; maxSimilarity = NewDistance; } else if (NewDistance <= maxSimilarity + distancethreshold && NewDistance >= maxSimilarity - distancethreshold && NewDistance > 0) { if (!equalMinDistanceDictWordList.ContainsKey(temp)) { equalMinDistanceDictWordList.Add(temp, NewDistance); } } } } catch (Exception e) { throw e; } return(maxSimilarity); }
public static double findSimilarDictionaryWord(string word, double maxSimilarity, int index, List<string> equalMinDistanceDictWordList, bool exact) { index = index - _minWordLength; int WordLength = word.Length; int index2 = index; if (index < 0 || (WordLength>=2 && char.IsUpper(word[0])&&!char.IsUpper(word[1])) ) index2 = 0; word = word.ToLower(); bool noSpace = false; if (word.CompareTo(word.Trim()) == 0) noSpace = true; else word = word.Trim(); double NewSimilarity = 0; if ((WordLength + index) < 0) return maxSimilarity; if ((WordLength + index) >= _IndexDictionary.Length) return maxSimilarity; if (_IndexDictionary[WordLength + index] == null) return maxSimilarity; for (int j = 0; j < _IndexDictionary[WordLength + index].Count; j++) { JaroWinklerDistance JaroDist = new JaroWinklerDistance(); NGramDistance ng = new NGramDistance(); JaccardDistance jd = new JaccardDistance(); string temp = _IndexDictionary[WordLength + index][j]; if(noSpace&&temp.CompareTo(word)==0) { equalMinDistanceDictWordList.Clear(); equalMinDistanceDictWordList.Add(temp); return 10; } else if (temp.Contains(word)) { equalMinDistanceDictWordList.Add(/*item);*/temp); maxSimilarity = 1; } else if(index <= 2) { for (int i = 0; i <= index2; i++) { string s = temp.Substring(i); string s2 = temp.Substring(0, temp.Length - index2); //Console.WriteLine(item); if (!exact) NewSimilarity = Math.Max(jd.GetDistance(word, s), jd.GetDistance(word, s2)); else { NewSimilarity = jd.GetDistance(word, temp); if (NewSimilarity == 1) { equalMinDistanceDictWordList.Clear(); equalMinDistanceDictWordList.Add(s); maxSimilarity = NewSimilarity; } return maxSimilarity; } if (NewSimilarity > .33) { //equalMinDistanceDictWordList.Clear(); equalMinDistanceDictWordList.Add(/*item);*/temp); maxSimilarity = NewSimilarity; break; } } } } return maxSimilarity; }
public static double findSimilarDictionaryWord(string word, double maxSimilarity, int index, List <string> equalMinDistanceDictWordList, bool exact) { index = index - _minWordLength; int WordLength = word.Length; int index2 = index; if (index < 0 || (WordLength >= 2 && char.IsUpper(word[0]) && !char.IsUpper(word[1]))) { index2 = 0; } word = word.ToLower(); bool noSpace = false; if (word.CompareTo(word.Trim()) == 0) { noSpace = true; } else { word = word.Trim(); } double NewSimilarity = 0; if ((WordLength + index) < 0) { return(maxSimilarity); } if ((WordLength + index) >= _IndexDictionary.Length) { return(maxSimilarity); } if (_IndexDictionary[WordLength + index] == null) { return(maxSimilarity); } for (int j = 0; j < _IndexDictionary[WordLength + index].Count; j++) { JaroWinklerDistance JaroDist = new JaroWinklerDistance(); NGramDistance ng = new NGramDistance(); JaccardDistance jd = new JaccardDistance(); string temp = _IndexDictionary[WordLength + index][j]; if (noSpace && temp.CompareTo(word) == 0) { equalMinDistanceDictWordList.Clear(); equalMinDistanceDictWordList.Add(temp); return(10); } else if (temp.Contains(word)) { equalMinDistanceDictWordList.Add(/*item);*/ temp); maxSimilarity = 1; } else if (index <= 2) { for (int i = 0; i <= index2; i++) { string s = temp.Substring(i); string s2 = temp.Substring(0, temp.Length - index2); //Console.WriteLine(item); if (!exact) { NewSimilarity = Math.Max(jd.GetDistance(word, s), jd.GetDistance(word, s2)); } else { NewSimilarity = jd.GetDistance(word, temp); if (NewSimilarity == 1) { equalMinDistanceDictWordList.Clear(); equalMinDistanceDictWordList.Add(s); maxSimilarity = NewSimilarity; } return(maxSimilarity); } if (NewSimilarity > .33) { //equalMinDistanceDictWordList.Clear(); equalMinDistanceDictWordList.Add(/*item);*/ temp); maxSimilarity = NewSimilarity; break; } } } } return(maxSimilarity); }
public static void TestJaccard() { string fn1 = "stree"; string fn2 = "street"; string fn3 = "steere"; JaccardDistance jd = new JaccardDistance(2); double x = jd.GetDistanceFast(fn1, fn2); double y = jd.GetDistanceFast(fn1, fn3); double z = 1; }