Exemplo n.º 1
0
        static void Main(string[] args)
        {
            Console.WriteLine("Welcome to the CSV Demo for FuzzyNameMatch!");

            //Default data locations
            string dirtyFilePath  = args.ElementAtOrDefault(0) ?? "TestData/dirty.csv";
            string cleanFilePath  = args.ElementAtOrDefault(1) ?? "TestData/clean.csv";
            string outputFilePath = args.ElementAtOrDefault(2) ?? "out.csv";

            var dirtyDataPeople = GetRecords <DirtyPerson>(dirtyFilePath);  //Get records from the "Dirty" file
            var cleanDataPeople = GetRecords <CleanPerson>(cleanFilePath);  //Get records from the "Clean" reference file

            PrintData(dirtyDataPeople, "Dirty Data");
            PrintData(cleanDataPeople, "Clean Data");

            //var nicknameDictionary = FuzzyNameMatch.Data.GetNicknames().ToDictionary(x => x.Item1, x => x.Item2, StringComparer.InvariantCultureIgnoreCase);
            //var matcher = new FuzzyNameMatch.Matching.Algorithms.NameIsNickname(nicknameDictionary);

            var matcher         = new FuzzyNameMatch.Matching.Algorithms.NameMatchesSoundex();
            var metricGenerator = new FuzzyNameMatch.Matching.MetricGenerator.DamerauLevenshteinDistance();

            var matchedPeople = MatchedPeople(dirtyDataPeople, cleanDataPeople, matcher, metricGenerator).ToList();

            PrintData(matchedPeople, "Output Data");

            WriteRecords(matchedPeople, outputFilePath);
            Console.WriteLine("Records Written to file...");
        }
Exemplo n.º 2
0
        private static IEnumerable <OutputPerson> MatchedPeople(List <DirtyPerson> dirtyDataPeople, List <CleanPerson> cleanDataPeople, FuzzyNameMatch.Matching.Algorithms.NameMatchesSoundex matcher, FuzzyNameMatch.Matching.MetricGenerator.DamerauLevenshteinDistance metricGenerator)
        {
            foreach (var cleanDataPerson in cleanDataPeople)
            {
                var toMatch          = cleanDataPerson.AsMatchableOn(cleanDataPerson.Name);
                var potentialMatches = dirtyDataPeople.Select(x => x.AsMatchableOn(x.Name));

                var matchResults = matcher
                                   .Match(toMatch, potentialMatches)
                                   .GetMatchResults(toMatch, metricGenerator)
                                   .ToList();

                if (matchResults.Any())
                {
                    var bestMatch = matchResults.OrderBy(x => x.Metric).First().Match.Value;

                    yield return(new OutputPerson()
                    {
                        Name = cleanDataPerson.Name,
                        ExistingData = cleanDataPerson.ExistingData,
                        NewData = bestMatch.NewData
                    });
                }
                else
                {
                    yield return(new OutputPerson()
                    {
                        Name = cleanDataPerson.Name,
                        ExistingData = cleanDataPerson.ExistingData,
                        NewData = null
                    });
                }
            }
        }