public static List <Tuple <double, int, int> > ReturnMaxErrorForMatchedGroups <T>(List <IGrouping <T, Row> > matches) { List <Tuple <double, int, int> > ret = new List <Tuple <double, int, int> >(); foreach (IGrouping <T, Row> match in matches) { Row[] rows = match.ToArray(); double maxDifference = -1.0f; int row1Id = -1, row2Id = -1; foreach (Row row1 in rows) { foreach (Row row2 in rows) { if (row1 != row2) { double distance = EditDistanceEngine.ComputeDistanceForRecordPair(row1, row2); if (distance > maxDifference) { maxDifference = distance; row1Id = row1.EnterpriseID; row2Id = row2.EnterpriseID; } } } } ret.Add(new Tuple <double, int, int>(maxDifference, row1Id, row2Id)); } return(ret); }
public static bool IsSoftAddressMatch(Address address, Data data, List <string> alternateLines) { List <string> innerAlternateLines = new List <string>(); if (!string.IsNullOrEmpty(address.StreetName)) { List <string> softMatchedStreets = BKTreeEngine.LeastEditDistance(address.StreetName, data.StreetNameBKTree).Distinct().ToList(); StreetName[] possibleStreets = data.StreetData.Where(n => softMatchedStreets.Contains(n.Name)).ToArray(); if (softMatchedStreets.Count > 0 && EditDistanceEngine.ComputeNormalized(softMatchedStreets[0], address.StreetName) < .5f) { foreach (StreetName name in possibleStreets) { if (!string.IsNullOrEmpty(name.Name)) { foreach (string softMatchedStreet in softMatchedStreets) { if (softMatchedStreet == name.Name) { string[] possibleCities = name.Cities.ToArray(); foreach (string city in possibleCities) { int[] possibleZips = data.StreetNameCity2Zips[ new StreetNameAndCity { City = city, FullStreetName = name.FullStreetName, }].ToArray(); foreach (int possibleZip in possibleZips) { innerAlternateLines.Add(AddressUtility.CreateLineFromAddress(address, name.FullStreetName, possibleZip, city)); } } } } } } } lock (alternateLines) { alternateLines.AddRange(innerAlternateLines); } return(innerAlternateLines.Count > 0); } return(false); }
public static bool IsSoftAddressAndHardZipMatch(Address address, Data data, List <string> alternateLines) { List <string> innerAlternateLines = new List <string>(); if (!string.IsNullOrEmpty(address.StreetName) && address.Zip.HasValue) { StreetName[] streetsWithThisZip = data.StreetData.Where(n => n.ZipCodes.Contains(address.Zip.Value)).ToArray(); double lowestEditDistance = double.MaxValue; StreetName bestMatch = null; foreach (StreetName streetWithThisZip in streetsWithThisZip) { string[] citiesWithThisStreetAndZip = data.StreetNameZip2Cities[new StreetNameAndZip { FullStreetName = streetWithThisZip.FullStreetName, Zip = address.Zip.Value }].ToArray(); double editDistance = EditDistanceEngine.ComputeNormalized(streetWithThisZip.Name, address.StreetName); if (editDistance < lowestEditDistance) { innerAlternateLines.Clear(); lowestEditDistance = editDistance; bestMatch = streetWithThisZip; foreach (string city in citiesWithThisStreetAndZip) { innerAlternateLines.Add(AddressUtility.CreateLineFromAddress(address, streetWithThisZip.FullStreetName, address.Zip.Value, city)); } } else if (editDistance == lowestEditDistance) { foreach (string city in citiesWithThisStreetAndZip) { innerAlternateLines.Add(AddressUtility.CreateLineFromAddress(address, streetWithThisZip.FullStreetName, address.Zip.Value, city)); } } } } lock (alternateLines) { alternateLines.AddRange(innerAlternateLines); } return(innerAlternateLines.Count > 0); }