public static List <string> EditDistanceAtMostN(string s, BKTree bkTree, int n) { List <string> toReturn = new List <string>(); if (bkTree == null) { return(toReturn); } int d = EditDistanceEngine.Compute(s, bkTree.StringValue); if (d <= n) { toReturn.Add(bkTree.StringValue); int maxIndex = System.Math.Min(n - d, bkTree.Children.Length - 1); for (int i = 0; i <= maxIndex; i++) { toReturn.AddRange(AllChildren(bkTree.Children[i])); } } int start = System.Math.Max(System.Math.Max(0, d - n), n - d + 1); int end = System.Math.Min(d + n, bkTree.Children.Length - 1); for (int i = start; i <= end; i++) { toReturn.AddRange(EditDistanceAtMostN(s, bkTree.Children[i], n)); } return(toReturn); }
public static bool OneOrOneDigit(int a, int b) { if (System.Math.Abs(a - b) < 2) { return(true); } return(EditDistanceEngine.Compute(a.ToString(), b.ToString()) <= 1); }
static int EditDistanceUpTo(string a, string b, int max) { if (a == "" || b == "") { return(-1); } int ed = EditDistanceEngine.Compute(a, b); return(ed <= max ? ed : max + 1); }
private static BKTree CreateBKTree(List <string> strings, int inserted, int originalStringCount) { if (strings.Count() == 0) { return(null); } List <string>[] buckets = new List <string> [2 * strings.Max(s => s.Length) + 1]; for (int i = 0; i < buckets.Length; i++) { buckets[i] = new List <string>(); } Random r = new Random(); int rootIndex = r.Next(strings.Count()); string rootString = strings[rootIndex]; BKTree toReturn = new BKTree(); toReturn.StringValue = rootString; //Console.Write($"\r{++inserted}/{originalStringCount} strings added to BK tree"); for (int i = 0; i < strings.Count; i++) { if (i == rootIndex) { continue; } string currentString = strings[i]; int d = EditDistanceEngine.Compute(rootString, currentString); buckets[d].Add(currentString); } int maxIndex; for (maxIndex = buckets.Length - 1; maxIndex >= 0; maxIndex--) { if (buckets[maxIndex].Count != 0) { break; } } toReturn.Children = new BKTree[maxIndex + 1]; for (int i = 0; i <= maxIndex; i++) { toReturn.Children[i] = CreateBKTree(buckets[i], inserted, originalStringCount); inserted += buckets[i].Count; } return(toReturn); }
public static bool FuzzyAddressMatchEditDistance(Row a, Row b) { if (a.ADDRESS1 == "" || b.ADDRESS1 == "") { return(false); } if (a.ADDRESS1 == b.ADDRESS1) { return(true); } int editDistance = EditDistanceEngine.Compute(a.ADDRESS1, b.ADDRESS1); return(editDistance <= 1); }
public static int EasiestAgreementCount(Row ri, Row rj) { int fieldAgreement = 0; if (ri.LAST != "" && rj.LAST != "" && EditDistanceEngine.Compute(ri.LAST, rj.LAST) <= 2) { fieldAgreement++; } if (ri.FIRST != "" && rj.FIRST != "" && EditDistanceEngine.Compute(ri.FIRST, rj.FIRST) <= 2) { fieldAgreement++; } if (MatchingManager.FuzzierSSNMatch(ri.SSN, rj.SSN)) //!!!!!!! This changed !!!!!!!! { fieldAgreement++; } if (ri.ADDRESS1 != "" && rj.ADDRESS1 != "" && EditDistanceEngine.Compute(ri.ADDRESS1, rj.ADDRESS1) <= 2) { fieldAgreement++; } if (MatchingManager.FuzzyDateEquals(ri.DOB, rj.DOB)) //!!!!!!! This changed !!!!!!!! { fieldAgreement++; } if (MatchingManager.FuzzyPhoneMatch(ri.PHONE, rj.PHONE)) { fieldAgreement++; } if (System.Math.Abs(ri.MRN - rj.MRN) < 500) { fieldAgreement++; } return(fieldAgreement); }
public static Matches EditDistanceAtMostN(string[] S, string[] T, int n) { Matches toReturn = MatchesEngine.NewMatches(S.Length); //Create BKTree var bkTree = BKTreeEngine.CreateBKTree(S.ToList()); //Create lookup table Dictionary <string, int> stringToInt = new Dictionary <string, int>(); for (int i = 0; i < S.Length; i++) { stringToInt[S[i]] = i; } int c = 0; object cLock = new object(); object matchLock = new object(); Parallel.For(0, T.Length, j => { lock (cLock) { Console.Write($"\r{c++}/{T.Length}"); } var neighbors = BKTreeEngine.EditDistanceAtMostN(T[j], bkTree, n); foreach (var neighbor in neighbors) { int i = stringToInt[neighbor]; lock (cLock) { toReturn.AddDirectedMatch(i, j, EditDistanceEngine.Compute(T[j], neighbor)); } } } ); return(toReturn); }
public static List <string> LeastEditDistance(string s, BKTree bkTree, ref int bound) { List <string> toReturn = new List <string>(); if (bkTree == null) { return(toReturn); } int d = EditDistanceEngine.Compute(s, bkTree.StringValue); if (d <= bound) { bound = d; toReturn.Add(bkTree.StringValue); } int start = System.Math.Max(0, d - bound); int end = System.Math.Min(d + bound, bkTree.Children.Length - 1); for (int i = start; i <= end; i++) { int oldBound = bound; var subtreeList = LeastEditDistance(s, bkTree.Children[i], ref bound); if (oldBound == bound) { toReturn.AddRange(subtreeList); } else { toReturn = subtreeList; i = System.Math.Max(i, d - bound); end = System.Math.Min(d + bound, bkTree.Children.Length - 1); } } return(toReturn); }
public static bool FuzzierSSNMatch(int a, int b) { return(IsSSNValid(a) && IsSSNValid(b) && EditDistanceEngine.Compute(a.ToString(), b.ToString()) <= 2); }