public static RowMatchObject FuzzyGENDERMatches(Row[] allData) { Console.WriteLine("Matching GENDER"); FastEditDistanceGrouper fastEditDistanceGrouper = new FastEditDistanceGrouper(); return(fastEditDistanceGrouper.DistanceAtMostN(allData, d => d.GENDER, 0)); }
public static RowMatchObject FuzzyADDRESS2Matches(Row[] allData) { Console.WriteLine("Matching ADDRESS2"); FastEditDistanceGrouper fastEditDistanceGrouper = new FastEditDistanceGrouper(); return(fastEditDistanceGrouper.DistanceAtMostN(allData, d => d.ADDRESS2, 2)); }
public static RowMatchObject FuzzyMIDDLEMatches(Row[] allData) { Console.WriteLine("Matching Middle Names"); FastEditDistanceGrouper fastEditDistanceGrouper = new FastEditDistanceGrouper(); return(fastEditDistanceGrouper.DistanceAtMostN(allData, d => d.MIDDLE, 2)); }
public static RowMatchObject FuzzySUFFIXMatches(Row[] allData) { Console.WriteLine("Matching SUFFIXES"); FastEditDistanceGrouper fastEditDistanceGrouper = new FastEditDistanceGrouper(); return(fastEditDistanceGrouper.DistanceAtMostN(allData, d => d.SUFFIX, 2)); }
public static RowMatchObject FuzzySSNMatches(Row[] allData) { Console.WriteLine("Matching SSN"); FastEditDistanceGrouper fastEditDistanceGrouper = new FastEditDistanceGrouper(); return(fastEditDistanceGrouper.DistanceAtMostN(allData, d => d.SSN <= 0 ? "" : d.SSN.ToString(), 2)); }
public static RowMatchObject FuzzyPhoneMatches(Row[] allData) { Console.WriteLine("Matching Phone"); FastEditDistanceGrouper fastGrouper = new FastEditDistanceGrouper(); return(fastGrouper.DistanceAtMostN(allData, d => d.PHONE <= 0 ? "" : d.PHONE.ToString(), 1)); }
public static RowMatchObject FuzzyFirstNameMatches(Row[] allData) { Console.WriteLine("Matching First Names"); FastEditDistanceGrouper fastEditDistanceGrouper = new FastEditDistanceGrouper(); return(fastEditDistanceGrouper.DistanceAtMostN(allData, d => d.FIRST, 2)); }
public static RowMatchObject FuzzyEMAILMatches(Row[] allData) { Console.WriteLine("Matching EMAIL"); FastEditDistanceGrouper fastEditDistanceGrouper = new FastEditDistanceGrouper(); return(fastEditDistanceGrouper.DistanceAtMostN(allData, d => d.EMAIL == "" ? "" : d.EMAIL.Split('@').First(), 2)); }
public static RowMatchObject FuzzyMOTHERS_MAIDEN_NAMEMatches(Row[] allData) { Console.WriteLine("Matching MOTHERS_MAIDEN_NAME"); FastEditDistanceGrouper fastEditDistanceGrouper = new FastEditDistanceGrouper(); return(fastEditDistanceGrouper.DistanceAtMostN(allData, d => d.MOTHERS_MAIDEN_NAME, 2)); }
public static RowMatchObject FuzzyZIPMatches(Row[] allData) { Console.WriteLine("Matching ZIP"); FastEditDistanceGrouper fastEditDistanceGrouper = new FastEditDistanceGrouper(); return(fastEditDistanceGrouper.DistanceAtMostN(allData, d => d.ZIP > 0 ? d.ZIP.ToString() : "", 2)); }
public static Matches FuzzyMatchOnNImportantFields(int[] eids, List <RowMatchObject> matchObjectsForFields, int n, Row[] allData) { int maxEid = eids.Max(); Matches toReturn = MatchesEngine.NewMatches(maxEid + 1); FastEditDistanceGrouper fastEditDistanceGrouper = new FastEditDistanceGrouper(); int c = 0; int[] eidToMatchCount = new int[maxEid + 1]; List <int> usedEids = new List <int>(); Random r = new Random(); foreach (int eid in eids) { usedEids.Clear(); Console.Write($"\r{c++}/{eids.Count()} Final Row Matches"); foreach (var matchObject in matchObjectsForFields) { int index = matchObject.EidToIndex[eid]; if (index == -1) { continue; } var neigborIndices = matchObject.Matches.Neighbors(index); foreach (var neighborIndex in neigborIndices) { var neighborEids = matchObject.IndexToEids[neighborIndex.Index]; foreach (var neighborEid in neighborEids) { if (eid > neighborEid) //We will do the exact same computation when we find all the matches for neighborEID. { continue; } usedEids.Add(neighborEid); eidToMatchCount[neighborEid]++; } } } int bigNeighborCount = 0; foreach (var usedEid in usedEids) { if (eidToMatchCount[usedEid] >= n) { toReturn.AddMatch(eid, usedEid, eidToMatchCount[usedEid]); //One way of recording the NUMBER of fuzzy matches bigNeighborCount++; } eidToMatchCount[usedEid] = 0; } //if (r.NextDouble() < .0001) //{ // Console.WriteLine("Delete me"); // Console.WriteLine(bigNeighborCount); // int betterCount = 0; // var myGuy = allData.Where(d => d.EnterpriseID == eid).First(); // foreach (var datum in allData) // { // if (eid <= datum.EnterpriseID) // { // if (EasiestAgreementCount(myGuy, datum) >= n) // { // betterCount++; // } // } // } // Console.WriteLine(betterCount); //} } Console.WriteLine("\nCleaning Two Field Fuzzy Match Object"); toReturn.Clean(); //I think I've actually staged things in a way that makes this unnecessary Console.WriteLine("Done Cleaning"); return(toReturn); }