public static int[] CreateProfile(Row a, Row b) { List <int> toReturn = new List <int>(); int i = 0; toReturn.Add(EditDistanceUpTo(a.LAST, b.LAST, 2)); toReturn.Add(EditDistanceUpTo(a.FIRST, b.FIRST, 2)); toReturn.Add(EditDistanceUpTo(a.MIDDLE, b.MIDDLE, 2)); toReturn.Add(EditDistanceUpTo(a.SUFFIX, b.SUFFIX, 2)); toReturn.Add(a.DOB == default(DateTime) ? -1 : a.DOB == b.DOB ? 0 : MatchingManager.FuzzyDateEquals(a.DOB, b.DOB) ? 1 : 2); toReturn.Add(Equals(a.GENDER, b.GENDER)); toReturn.Add(NumericEditDistanceUpTo(a.SSN, b.SSN, 2)); toReturn.Add(EditDistanceUpTo(a.ADDRESS1, b.ADDRESS1, 2)); toReturn.Add(EditDistanceUpTo(a.ADDRESS2, b.ADDRESS2, 2)); toReturn.Add(NumericEditDistanceUpTo(a.ZIP, b.ZIP, 2)); toReturn.Add(EditDistanceUpTo(a.MOTHERS_MAIDEN_NAME, b.MOTHERS_MAIDEN_NAME, 2)); toReturn.Add(MRNDistance(a.MRN, b.MRN)); toReturn.Add(Equals(a.CITY, b.CITY)); toReturn.Add(Equals(a.STATE, b.STATE)); toReturn.Add(NumericEditDistanceUpTo(a.PHONE, b.PHONE, 1)); toReturn.Add(EditDistanceUpTo(a.EMAIL, b.EMAIL, 2)); //Skipping PHONE2. Might come back to this later, probably by making copies of this person and putting PHONE2 into PHONE1 //Skipping ALIAS. Might come back to this later, probably by making copies of this person and putting ALIAS into ALIAS return(toReturn.ToArray()); }
public static int EasiestAgreementCount(Row ri, Row rj) { int fieldAgreement = 0; if (ri.LAST != "" && rj.LAST != "" && EditDistanceEngine.Compute(ri.LAST, rj.LAST) <= 2) { fieldAgreement++; } if (ri.FIRST != "" && rj.FIRST != "" && EditDistanceEngine.Compute(ri.FIRST, rj.FIRST) <= 2) { fieldAgreement++; } if (MatchingManager.FuzzierSSNMatch(ri.SSN, rj.SSN)) //!!!!!!! This changed !!!!!!!! { fieldAgreement++; } if (ri.ADDRESS1 != "" && rj.ADDRESS1 != "" && EditDistanceEngine.Compute(ri.ADDRESS1, rj.ADDRESS1) <= 2) { fieldAgreement++; } if (MatchingManager.FuzzyDateEquals(ri.DOB, rj.DOB)) //!!!!!!! This changed !!!!!!!! { fieldAgreement++; } if (MatchingManager.FuzzyPhoneMatch(ri.PHONE, rj.PHONE)) { fieldAgreement++; } if (System.Math.Abs(ri.MRN - rj.MRN) < 500) { fieldAgreement++; } return(fieldAgreement); }
public override Matches DistanceAtMostN(string[] strings, int n) { DateTime[] dates = strings.Select(s => DateTime.Parse(s)).ToArray(); Matches toReturn = MatchesEngine.NewMatches(dates.Length); var dateIndices = dates.Select((d, i) => new DateIndex { Date = d, Index = i }).ToArray(); //Day month transpositions var groupedByYearAndNormalizedDateTime = dateIndices.GroupBy(d => System.Math.Min(d.Date.Day, d.Date.Month) + "/" + System.Math.Max(d.Date.Day, d.Date.Month) + "/" + d.Date.Year); foreach (var group in groupedByYearAndNormalizedDateTime) { var groupArray = group.ToArray(); for (int i = 0; i < groupArray.Length; i++) { for (int j = i; j < groupArray.Length; j++) { int distance; if (groupArray[i].Date == groupArray[j].Date) { distance = 0; } else { distance = 1; } toReturn.AddMatch(groupArray[i].Index, groupArray[j].Index, distance); } } } //Day transposed, or off by one, or off by 1 digit var groupedByMonthAndYear = dateIndices.GroupBy(d => d.Date.Month + "/" + d.Date.Year); foreach (var group in groupedByMonthAndYear) { var groupArray = group.ToArray(); for (int i = 0; i < groupArray.Length; i++) { for (int j = i + 1; j < groupArray.Length; j++) { int day1 = groupArray[i].Date.Day; int day2 = groupArray[j].Date.Day; if (MatchingManager.OneOrOneDigit(day1, day2) || MatchingManager.TransposedDigit(day1, day2)) { toReturn.AddMatch(groupArray[i].Index, groupArray[j].Index, 1); } } } } //Month transposed, or off by one, or off by 1 digit var groupedByDayAndYear = dateIndices.GroupBy(d => d.Date.Day + "/" + d.Date.Year); foreach (var group in groupedByDayAndYear) { var groupArray = group.ToArray(); for (int i = 0; i < groupArray.Length; i++) { for (int j = i + 1; j < groupArray.Length; j++) { int month1 = groupArray[i].Date.Month; int month2 = groupArray[j].Date.Month; if (MatchingManager.OneOrOneDigit(month1, month2) || MatchingManager.TransposedDigit(month1, month2)) { toReturn.AddMatch(groupArray[i].Index, groupArray[j].Index, 1); } } } } //Year has transposed digit or is off by one or one digit, or is off by 100 var groupedByDayAndMonth = dateIndices.GroupBy(d => d.Date.Day + "/" + d.Date.Month); foreach (var group in groupedByDayAndMonth) { var groupArray = group.ToArray(); for (int i = 0; i < groupArray.Length; i++) { for (int j = i + 1; j < groupArray.Length; j++) { int year1 = groupArray[i].Date.Year; int year2 = groupArray[j].Date.Year; if (MatchingManager.OneOrOneDigit(year1, year2) || MatchingManager.TransposedDigit(year1, year2) || MatchingManager.OffBy100(year1, year2)) { toReturn.AddMatch(groupArray[i].Index, groupArray[j].Index, 1); } } } } toReturn.Clean(); return(toReturn); }