예제 #1
0
        public static RowMatchObject FuzzyGENDERMatches(Row[] allData)
        {
            Console.WriteLine("Matching GENDER");
            FastEditDistanceGrouper fastEditDistanceGrouper = new FastEditDistanceGrouper();

            return(fastEditDistanceGrouper.DistanceAtMostN(allData, d => d.GENDER, 0));
        }
예제 #2
0
        public static RowMatchObject FuzzyADDRESS2Matches(Row[] allData)
        {
            Console.WriteLine("Matching ADDRESS2");
            FastEditDistanceGrouper fastEditDistanceGrouper = new FastEditDistanceGrouper();

            return(fastEditDistanceGrouper.DistanceAtMostN(allData, d => d.ADDRESS2, 2));
        }
예제 #3
0
        public static RowMatchObject FuzzyMIDDLEMatches(Row[] allData)
        {
            Console.WriteLine("Matching Middle Names");
            FastEditDistanceGrouper fastEditDistanceGrouper = new FastEditDistanceGrouper();

            return(fastEditDistanceGrouper.DistanceAtMostN(allData, d => d.MIDDLE, 2));
        }
예제 #4
0
        public static RowMatchObject FuzzySUFFIXMatches(Row[] allData)
        {
            Console.WriteLine("Matching SUFFIXES");
            FastEditDistanceGrouper fastEditDistanceGrouper = new FastEditDistanceGrouper();

            return(fastEditDistanceGrouper.DistanceAtMostN(allData, d => d.SUFFIX, 2));
        }
예제 #5
0
        public static RowMatchObject FuzzySSNMatches(Row[] allData)
        {
            Console.WriteLine("Matching SSN");
            FastEditDistanceGrouper fastEditDistanceGrouper = new FastEditDistanceGrouper();

            return(fastEditDistanceGrouper.DistanceAtMostN(allData, d => d.SSN <= 0 ? "" : d.SSN.ToString(), 2));
        }
예제 #6
0
        public static RowMatchObject FuzzyPhoneMatches(Row[] allData)
        {
            Console.WriteLine("Matching Phone");
            FastEditDistanceGrouper fastGrouper = new FastEditDistanceGrouper();

            return(fastGrouper.DistanceAtMostN(allData, d => d.PHONE <= 0 ? "" : d.PHONE.ToString(), 1));
        }
예제 #7
0
        public static RowMatchObject FuzzyFirstNameMatches(Row[] allData)
        {
            Console.WriteLine("Matching First Names");
            FastEditDistanceGrouper fastEditDistanceGrouper = new FastEditDistanceGrouper();

            return(fastEditDistanceGrouper.DistanceAtMostN(allData, d => d.FIRST, 2));
        }
예제 #8
0
        public static RowMatchObject FuzzyEMAILMatches(Row[] allData)
        {
            Console.WriteLine("Matching EMAIL");
            FastEditDistanceGrouper fastEditDistanceGrouper = new FastEditDistanceGrouper();

            return(fastEditDistanceGrouper.DistanceAtMostN(allData, d => d.EMAIL == "" ? "" : d.EMAIL.Split('@').First(), 2));
        }
예제 #9
0
        public static RowMatchObject FuzzyMOTHERS_MAIDEN_NAMEMatches(Row[] allData)
        {
            Console.WriteLine("Matching MOTHERS_MAIDEN_NAME");
            FastEditDistanceGrouper fastEditDistanceGrouper = new FastEditDistanceGrouper();

            return(fastEditDistanceGrouper.DistanceAtMostN(allData, d => d.MOTHERS_MAIDEN_NAME, 2));
        }
예제 #10
0
        public static RowMatchObject FuzzyZIPMatches(Row[] allData)
        {
            Console.WriteLine("Matching ZIP");
            FastEditDistanceGrouper fastEditDistanceGrouper = new FastEditDistanceGrouper();

            return(fastEditDistanceGrouper.DistanceAtMostN(allData, d => d.ZIP > 0 ? d.ZIP.ToString() : "", 2));
        }
예제 #11
0
        public static Matches FuzzyMatchOnNImportantFields(int[] eids, List <RowMatchObject> matchObjectsForFields, int n, Row[] allData)
        {
            int     maxEid   = eids.Max();
            Matches toReturn = MatchesEngine.NewMatches(maxEid + 1);

            FastEditDistanceGrouper fastEditDistanceGrouper = new FastEditDistanceGrouper();

            int c = 0;

            int[]      eidToMatchCount = new int[maxEid + 1];
            List <int> usedEids        = new List <int>();

            Random r = new Random();

            foreach (int eid in eids)
            {
                usedEids.Clear();
                Console.Write($"\r{c++}/{eids.Count()} Final Row Matches");
                foreach (var matchObject in matchObjectsForFields)
                {
                    int index = matchObject.EidToIndex[eid];
                    if (index == -1)
                    {
                        continue;
                    }
                    var neigborIndices = matchObject.Matches.Neighbors(index);
                    foreach (var neighborIndex in neigborIndices)
                    {
                        var neighborEids = matchObject.IndexToEids[neighborIndex.Index];
                        foreach (var neighborEid in neighborEids)
                        {
                            if (eid > neighborEid)  //We will do the exact same computation when we find all the matches for neighborEID.
                            {
                                continue;
                            }
                            usedEids.Add(neighborEid);
                            eidToMatchCount[neighborEid]++;
                        }
                    }
                }

                int bigNeighborCount = 0;
                foreach (var usedEid in usedEids)
                {
                    if (eidToMatchCount[usedEid] >= n)
                    {
                        toReturn.AddMatch(eid, usedEid, eidToMatchCount[usedEid]);  //One way of recording the NUMBER of fuzzy matches
                        bigNeighborCount++;
                    }

                    eidToMatchCount[usedEid] = 0;
                }

                //if (r.NextDouble() < .0001)
                //{
                //    Console.WriteLine("Delete me");
                //    Console.WriteLine(bigNeighborCount);
                //    int betterCount = 0;
                //    var myGuy = allData.Where(d => d.EnterpriseID == eid).First();
                //    foreach (var datum in allData)
                //    {
                //        if (eid <= datum.EnterpriseID)
                //        {
                //            if (EasiestAgreementCount(myGuy, datum) >= n)
                //            {
                //                betterCount++;
                //            }
                //        }
                //    }

                //    Console.WriteLine(betterCount);
                //}
            }

            Console.WriteLine("\nCleaning Two Field Fuzzy Match Object");
            toReturn.Clean();  //I think I've actually staged things in a way that makes this unnecessary
            Console.WriteLine("Done Cleaning");
            return(toReturn);
        }