public static bool IsSoftAddressMatch(Address address, Data data, List <string> alternateLines) { List <string> innerAlternateLines = new List <string>(); if (!string.IsNullOrEmpty(address.StreetName)) { List <string> softMatchedStreets = BKTreeEngine.LeastEditDistance(address.StreetName, data.StreetNameBKTree).Distinct().ToList(); StreetName[] possibleStreets = data.StreetData.Where(n => softMatchedStreets.Contains(n.Name)).ToArray(); if (softMatchedStreets.Count > 0 && EditDistanceEngine.ComputeNormalized(softMatchedStreets[0], address.StreetName) < .5f) { foreach (StreetName name in possibleStreets) { if (!string.IsNullOrEmpty(name.Name)) { foreach (string softMatchedStreet in softMatchedStreets) { if (softMatchedStreet == name.Name) { string[] possibleCities = name.Cities.ToArray(); foreach (string city in possibleCities) { int[] possibleZips = data.StreetNameCity2Zips[ new StreetNameAndCity { City = city, FullStreetName = name.FullStreetName, }].ToArray(); foreach (int possibleZip in possibleZips) { innerAlternateLines.Add(AddressUtility.CreateLineFromAddress(address, name.FullStreetName, possibleZip, city)); } } } } } } } lock (alternateLines) { alternateLines.AddRange(innerAlternateLines); } return(innerAlternateLines.Count > 0); } return(false); }
public static bool IsSoftAddressAndSoftCityHardZipMatch(Address address, Data data, List <string> alternateLines) { if (!string.IsNullOrEmpty(address.StreetName) && address.Zip != null && !string.IsNullOrEmpty(address.City)) { List <string> innerAlternateLines = new List <string>(); List <string> softMatchedCities = BKTreeEngine.LeastEditDistance(address.City, data.CityNameBKTree).Distinct().ToList(); List <string> softMatchedStreets = BKTreeEngine.LeastEditDistance(address.StreetName, data.StreetNameBKTree).Distinct().ToList(); foreach (StreetName name in data.StreetData) { if (!string.IsNullOrEmpty(name.Name) && name.ZipCodes.Contains(address.Zip.Value)) { foreach (string softMatchedCity in softMatchedCities) { if (data.StreetNameZip2Cities[new StreetNameAndZip { Zip = address.Zip.Value, FullStreetName = name.FullStreetName, }].Contains(softMatchedCity)) { if (name.Cities.Contains(softMatchedCity)) { foreach (string softMatchedStreet in softMatchedStreets) { if (softMatchedStreet == name.Name) { innerAlternateLines.Add(AddressUtility.CreateLineFromAddress(address, name.FullStreetName, address.Zip, softMatchedCity)); } } } } } } } lock (alternateLines) { alternateLines.AddRange(innerAlternateLines); } return(innerAlternateLines.Count > 0); } return(false); }
public static bool IsSoftAddressAndSoftCityMatch(Address address, Data data, List <string> alternateLines) { List <string> innerAlternateLines = new List <string>(); if (!string.IsNullOrEmpty(address.StreetName) && !string.IsNullOrEmpty(address.City)) { List <string> softMatchedCities = BKTreeEngine.LeastEditDistance(address.City, data.CityNameBKTree).Distinct().ToList(); List <string> softMatchedStreets = BKTreeEngine.LeastEditDistance(address.StreetName, data.StreetNameBKTree).Distinct().ToList(); foreach (StreetName name in data.StreetData) { if (!string.IsNullOrEmpty(name.Name)) { foreach (string softMatchedCity in softMatchedCities) { if (name.Cities.Contains(softMatchedCity)) { foreach (string softMatchedStreet in softMatchedStreets) { if (softMatchedStreet == name.Name) { int[] zipsForCityAndStreet = data.StreetNameCity2Zips[new StreetNameAndCity { City = softMatchedCity, FullStreetName = name.FullStreetName, }].ToArray(); foreach (int zip in zipsForCityAndStreet) { innerAlternateLines.Add(AddressUtility.CreateLineFromAddress(address, name.FullStreetName, zip, softMatchedCity)); } } } } } } } } lock (alternateLines) { alternateLines.AddRange(innerAlternateLines); } return(innerAlternateLines.Count > 0); }
public static bool IsRearrangedAddressAndSoftCityMatch(Address address, Data data, List <string> alternateLines) { if (!string.IsNullOrEmpty(address.StreetName) && !string.IsNullOrEmpty(address.City)) { List <string> softMatchedCities = BKTreeEngine.LeastEditDistance(address.City, data.CityNameBKTree).Distinct().ToList(); StreetName bestMatch = null; int highestMatchNumber = 0; string bestSoftMatchedCity = null; foreach (StreetName name in data.StreetData) { foreach (string softMatchedCity in softMatchedCities) { // demand the stem and city/zip are matches if (!string.IsNullOrEmpty(name.Name) && name.Cities.Contains(softMatchedCity) && StringUtility.Contains(address.FullStreetName, name.Name)) { // demand the suffix, somewhere (if it exists) List <string> partsToCheck = new List <string>(); // pretype if exists. if (!string.IsNullOrEmpty(name.PreType)) { partsToCheck.Add(name.PreType); } if (!string.IsNullOrEmpty(name.Suffix)) { partsToCheck.Add(name.Suffix); } int matchNumber = 0; if (!string.IsNullOrEmpty(name.PreDirection)) { if (name.PreDirection == "E" && address.CardinalDirection == "EAST") { matchNumber++; } else if (name.PreDirection == "W" && address.CardinalDirection == "WEST") { matchNumber++; } else if (name.PreDirection == "S" && address.CardinalDirection == "SOUTH") { matchNumber++; } else if (name.PreDirection == "N" && address.CardinalDirection == "NORTH") { matchNumber++; } } string fullName = string.Join(" ", address.StreetNumber, address.StreetName, address.Suffix); foreach (string partToCheck in partsToCheck) { if (StringUtility.Contains(fullName, partToCheck)) { matchNumber++; } } if (matchNumber > highestMatchNumber) { highestMatchNumber = matchNumber; bestMatch = name; bestSoftMatchedCity = softMatchedCity; } } } } if (highestMatchNumber > 0) { // given the city + street, what are the available zips? int[] possibleZips = data.StreetNameCity2Zips[new StreetNameAndCity { City = bestSoftMatchedCity, FullStreetName = bestMatch.FullStreetName }].ToArray(); lock (alternateLines) { foreach (int possibleZip in possibleZips) { alternateLines.Add(AddressUtility.CreateLineFromAddress(address, bestMatch.FullStreetName, possibleZip, bestSoftMatchedCity)); } } return(true); } } return(false); }
private static Address LucasAddressMatch(string line, Data data) { string matched = line; Address address = AddressUtility.InitializeAddress(line, data); bool matchFound = (address.MatchQuality != MatchQuality.MatchNotYetDetermined); if (matchFound) { level1Match.Add(address.FullStreetName); } if (!matchFound) { matched = TestAddress(data, ref address, address.FullStreetName, 0); matchFound = (matched != null); if (matchFound) { level2Match.Add(address.FullStreetName); } } if (!matchFound) { matched = TestAddress(data, ref address, address.FullStreetName + " ST", 0); matchFound = (matched != null); if (matchFound) { level3Match.Add(address.FullStreetName); } } if (!matchFound) { matched = TestAddress(data, ref address, address.FullStreetName + " AVE", 0); matchFound = (matched != null); if (matchFound) { level3Match.Add(address.FullStreetName); } } if (!matchFound) { matched = TestAddress(data, ref address, address.FullStreetName + " BLVD", 0); matchFound = (matched != null); if (matchFound) { level3Match.Add(address.FullStreetName); } } if (!matchFound) { List <string> closestNeighbors; int distance; // Search by ZIP if (address.Zip != null && !string.IsNullOrEmpty(address.StreetName) && !string.IsNullOrEmpty(address.StreetNumber)) { StateOfNewYorkAddressRange[] streetsWithZip = data.NYCityStreets.Where(n => n.StreetNumber.IsInRange(address.StreetNumber) && n.ZipCode == address.Zip.Value).ToArray(); List <string> streetsWithZipStrings = streetsWithZip.Select(s => s.FullStreetName).Distinct().ToList(); BKTree bkTreeLocal = BKTreeEngine.CreateBKTree(streetsWithZipStrings); closestNeighbors = BKTreeEngine.LeastEditDistanceWithDistance(address.FullStreetName, bkTreeLocal, out distance); if (closestNeighbors.Count == 1 && distance <= 1) { matched = CorrectAddress(data, ref address, closestNeighbors[0]); matchFound = true; level5Match.Add(address.FullStreetName); } else if (closestNeighbors.Count == 1 && distance <= 2) { matched = CorrectAddress(data, ref address, closestNeighbors[0]); matchFound = true; level6Match.Add(address.FullStreetName); } } } if (!matchFound) { matched = TestAddress(data, ref address, address.FullStreetName, 2); matchFound = (matched != null); if (matchFound) { level7Match.Add(address.FullStreetName); } } if (!matchFound) { matched = TestAddress(data, ref address, address.FullStreetName + " ST", 2); matchFound = (matched != null); if (matchFound) { level8Match.Add(address.FullStreetName); } } if (!matchFound) { matched = TestAddress(data, ref address, address.FullStreetName + " AVE", 2); matchFound = (matched != null); if (matchFound) { level8Match.Add(address.FullStreetName); } } if (!matchFound) { matched = TestAddress(data, ref address, address.FullStreetName + " BLVD", 2); matchFound = (matched != null); if (matchFound) { level8Match.Add(address.FullStreetName); } } // Debug if (!matchFound) { if (false) { string addressRaw = $"{address.RawAddress1} / {address.RawAddress2}"; string addressCleaned = $"{ address.StreetNumber } / { address.StreetName} / { address.Suffix}"; if (!string.IsNullOrEmpty(address.ApartmentNumber)) { addressCleaned += $" / {address.ApartmentNumber}"; } string closestNeighborsConcatenated = string.Join(" OR ", BKTreeEngine.LeastEditDistance(address.FullStreetName, data.StreetNameBKTree)); Console.WriteLine($"{addressRaw} => {addressCleaned} => {closestNeighborsConcatenated}"); } } if (address.MatchQuality == MatchQuality.Unknown) { lock (unknown) { unknown.Add(AddressUtility.CreateLineFromAddress(address, "UNKNOWN")); } } else if (address.MatchQuality == MatchQuality.Homeless) { lock (homeless) { homeless.Add(AddressUtility.CreateLineFromAddress(address, "HOMELESS")); } } else if (address.MatchQuality == MatchQuality.Alternate) { lock (alternate) { alternate.Add(address.RawAddress1); } } else if (address.MatchQuality == MatchQuality.LeaveAlone) { lock (leaveAlone) { leaveAlone.Add(address.RawAddress1); } } else if (address.MatchQuality == MatchQuality.MatchNotYetDetermined) { lock (matchNotYetDetermined) { matchNotYetDetermined.Add($"{address.RawAddress1}=>{address.FullStreetName}"); } } return(address); }