private static string[] LoadNYStateStreets(Data data) { //string[] streets = File.ReadAllLines("allStreets.csv"); string[] streets = File.ReadAllLines("state_of_new_york_revised.csv").ToArray(); return(streets.Distinct().ToArray()); string[] streetsRevised = new string[streets.Length + 1]; List <string> streetsCleaned = new List <string>(); for (int i = 0; i < streets.Length; i++) { string street = streets[i]; street = AddressUtility.CleanSpacesAndPunctuation(street); street = AddressUtility.CleanAddressFormat(street, data.AbbreviationsShortened); street = AddressUtility.NormalizeSuffix(street, data).FullStreetName; streetsCleaned.Add(street); streetsRevised[i] = street; //Console.Write($"{streets[i]} => {street}, "); } File.WriteAllLines("state_of_new_york_revised.csv", streetsRevised); return(streetsCleaned.Distinct().ToArray()); }
private static string CorrectAddress(Data data, ref Address address, string correctedString) { Address correctStreet = AddressUtility.NormalizeSuffix(correctedString, data); address.StreetName = correctStreet.StreetName; address.Suffix = correctStreet.Suffix; address.MatchQuality = MatchQuality.Alternate; return($"{address.StreetName} {address.Suffix}"); }
public static bool IsSoftAddressMatch(Address address, Data data, List <string> alternateLines) { List <string> innerAlternateLines = new List <string>(); if (!string.IsNullOrEmpty(address.StreetName)) { List <string> softMatchedStreets = BKTreeEngine.LeastEditDistance(address.StreetName, data.StreetNameBKTree).Distinct().ToList(); StreetName[] possibleStreets = data.StreetData.Where(n => softMatchedStreets.Contains(n.Name)).ToArray(); if (softMatchedStreets.Count > 0 && EditDistanceEngine.ComputeNormalized(softMatchedStreets[0], address.StreetName) < .5f) { foreach (StreetName name in possibleStreets) { if (!string.IsNullOrEmpty(name.Name)) { foreach (string softMatchedStreet in softMatchedStreets) { if (softMatchedStreet == name.Name) { string[] possibleCities = name.Cities.ToArray(); foreach (string city in possibleCities) { int[] possibleZips = data.StreetNameCity2Zips[ new StreetNameAndCity { City = city, FullStreetName = name.FullStreetName, }].ToArray(); foreach (int possibleZip in possibleZips) { innerAlternateLines.Add(AddressUtility.CreateLineFromAddress(address, name.FullStreetName, possibleZip, city)); } } } } } } } lock (alternateLines) { alternateLines.AddRange(innerAlternateLines); } return(innerAlternateLines.Count > 0); } return(false); }
public static bool IsSoftAddressAndHardZipMatch(Address address, Data data, List <string> alternateLines) { List <string> innerAlternateLines = new List <string>(); if (!string.IsNullOrEmpty(address.StreetName) && address.Zip.HasValue) { StreetName[] streetsWithThisZip = data.StreetData.Where(n => n.ZipCodes.Contains(address.Zip.Value)).ToArray(); double lowestEditDistance = double.MaxValue; StreetName bestMatch = null; foreach (StreetName streetWithThisZip in streetsWithThisZip) { string[] citiesWithThisStreetAndZip = data.StreetNameZip2Cities[new StreetNameAndZip { FullStreetName = streetWithThisZip.FullStreetName, Zip = address.Zip.Value }].ToArray(); double editDistance = EditDistanceEngine.ComputeNormalized(streetWithThisZip.Name, address.StreetName); if (editDistance < lowestEditDistance) { innerAlternateLines.Clear(); lowestEditDistance = editDistance; bestMatch = streetWithThisZip; foreach (string city in citiesWithThisStreetAndZip) { innerAlternateLines.Add(AddressUtility.CreateLineFromAddress(address, streetWithThisZip.FullStreetName, address.Zip.Value, city)); } } else if (editDistance == lowestEditDistance) { foreach (string city in citiesWithThisStreetAndZip) { innerAlternateLines.Add(AddressUtility.CreateLineFromAddress(address, streetWithThisZip.FullStreetName, address.Zip.Value, city)); } } } } lock (alternateLines) { alternateLines.AddRange(innerAlternateLines); } return(innerAlternateLines.Count > 0); }
public static bool IsSoftAddressAndSoftCityHardZipMatch(Address address, Data data, List <string> alternateLines) { if (!string.IsNullOrEmpty(address.StreetName) && address.Zip != null && !string.IsNullOrEmpty(address.City)) { List <string> innerAlternateLines = new List <string>(); List <string> softMatchedCities = BKTreeEngine.LeastEditDistance(address.City, data.CityNameBKTree).Distinct().ToList(); List <string> softMatchedStreets = BKTreeEngine.LeastEditDistance(address.StreetName, data.StreetNameBKTree).Distinct().ToList(); foreach (StreetName name in data.StreetData) { if (!string.IsNullOrEmpty(name.Name) && name.ZipCodes.Contains(address.Zip.Value)) { foreach (string softMatchedCity in softMatchedCities) { if (data.StreetNameZip2Cities[new StreetNameAndZip { Zip = address.Zip.Value, FullStreetName = name.FullStreetName, }].Contains(softMatchedCity)) { if (name.Cities.Contains(softMatchedCity)) { foreach (string softMatchedStreet in softMatchedStreets) { if (softMatchedStreet == name.Name) { innerAlternateLines.Add(AddressUtility.CreateLineFromAddress(address, name.FullStreetName, address.Zip, softMatchedCity)); } } } } } } } lock (alternateLines) { alternateLines.AddRange(innerAlternateLines); } return(innerAlternateLines.Count > 0); } return(false); }
public static bool IsSoftAddressAndSoftCityMatch(Address address, Data data, List <string> alternateLines) { List <string> innerAlternateLines = new List <string>(); if (!string.IsNullOrEmpty(address.StreetName) && !string.IsNullOrEmpty(address.City)) { List <string> softMatchedCities = BKTreeEngine.LeastEditDistance(address.City, data.CityNameBKTree).Distinct().ToList(); List <string> softMatchedStreets = BKTreeEngine.LeastEditDistance(address.StreetName, data.StreetNameBKTree).Distinct().ToList(); foreach (StreetName name in data.StreetData) { if (!string.IsNullOrEmpty(name.Name)) { foreach (string softMatchedCity in softMatchedCities) { if (name.Cities.Contains(softMatchedCity)) { foreach (string softMatchedStreet in softMatchedStreets) { if (softMatchedStreet == name.Name) { int[] zipsForCityAndStreet = data.StreetNameCity2Zips[new StreetNameAndCity { City = softMatchedCity, FullStreetName = name.FullStreetName, }].ToArray(); foreach (int zip in zipsForCityAndStreet) { innerAlternateLines.Add(AddressUtility.CreateLineFromAddress(address, name.FullStreetName, zip, softMatchedCity)); } } } } } } } } lock (alternateLines) { alternateLines.AddRange(innerAlternateLines); } return(innerAlternateLines.Count > 0); }
internal static bool IsSolidStreetMatchOnly(Address address, Data data, List <string> alternateLines) { if (!string.IsNullOrEmpty(address.StreetName)) { List <string> innerAlternateLines = new List <string>(); string streetToMatch = string.Join(" ", address.StreetName, address.Suffix); StreetName[] streetNamesThatMatch = data.StreetData.Where(n => n.FullStreetName == streetToMatch).ToArray(); foreach (StreetName streetNameThatMatches in streetNamesThatMatch) { string[] citiesForThisStreetName = streetNameThatMatches.Cities.ToArray(); foreach (string city in citiesForThisStreetName) { int[] zips = data.StreetNameCity2Zips[new StreetNameAndCity { City = city, FullStreetName = streetNameThatMatches.FullStreetName, }].ToArray(); foreach (int zip in zips) { innerAlternateLines.Add(AddressUtility.CreateLineFromAddress(address, streetNameThatMatches.FullStreetName, zip, city)); } } } lock (alternateLines) { alternateLines.AddRange(innerAlternateLines); } return(innerAlternateLines.Count > 0); } else { return(false); } }
internal static bool IsSolidAddressMatchWithNoZipCityAvailable(Address address, Data data, List <string> alternateLines) { if (!string.IsNullOrEmpty(address.StreetName) && string.IsNullOrEmpty(address.City) && !address.Zip.HasValue) { string streetToMatch = string.Join(" ", address.StreetName, address.Suffix); StreetName[] matches = data.StreetData.Where(n => n.FullStreetName == streetToMatch).ToArray(); foreach (StreetName match in matches) { string[] allPossibleCities = match.Cities.ToArray(); foreach (string possibleCity in allPossibleCities) { int[] possibleZipsForThisCityAndStreet = data.StreetNameCity2Zips[new StreetNameAndCity { City = possibleCity, FullStreetName = match.FullStreetName, }].ToArray(); lock (alternateLines) { foreach (int possibleZip in possibleZipsForThisCityAndStreet) { alternateLines.AddRange(matches.Select(n => AddressUtility.CreateLineFromAddress(address, n.FullStreetName, possibleZip, possibleCity))); } } } } return(true); } else { return(false); } }
/// <summary> /// Maybe portions of the address have been rearranged. The west is at the end, or the avenue is placed /// before an apartment number or something wonky. But otherwise most of the parts are there. /// </summary> /// <returns></returns> public static bool IsRearrangedAddressAndCityZipMatch(Address address, Data data, List <string> alternateLines) { if (!string.IsNullOrEmpty(address.StreetName) && address.Zip != null && !string.IsNullOrEmpty(address.City)) { StreetName[] possibleStreets = data.StreetData.Where(n => n.Name == address.StreetName && n.Cities.Contains(address.City) && n.ZipCodes.Contains(address.Zip.Value)).ToArray(); StreetName bestMatch = null; int highestMatchNumber = 0; foreach (StreetName name in possibleStreets) { // demand the stem and city/zip are matches //if (!string.IsNullOrEmpty(name.Name) && name.Cities.Contains(address.City) && // name.ZipCodes.Contains(address.Zip.Value) && // StringUtility.Contains(address.FullStreetName, " " + name.Name + " ")) { // demand the suffix, somewhere (if it exists) List <string> partsToCheck = new List <string>(); // pretype if exists. if (!string.IsNullOrEmpty(name.PreType)) { partsToCheck.Add(name.PreType); } if (!string.IsNullOrEmpty(name.Suffix)) { partsToCheck.Add(name.Suffix); } int matchNumber = 0; if (!string.IsNullOrEmpty(name.PreDirection)) { if (name.PreDirection == "E" && address.CardinalDirection == "EAST") { matchNumber++; } else if (name.PreDirection == "W" && address.CardinalDirection == "WEST") { matchNumber++; } else if (name.PreDirection == "S" && address.CardinalDirection == "SOUTH") { matchNumber++; } else if (name.PreDirection == "N" && address.CardinalDirection == "NORTH") { matchNumber++; } } string fullName = string.Join(" ", address.StreetNumber, address.StreetName, address.Suffix); foreach (string partToCheck in partsToCheck) { if (StringUtility.Contains(fullName, partToCheck)) { matchNumber++; } } if (matchNumber > highestMatchNumber) { highestMatchNumber = matchNumber; bestMatch = name; } } } if (highestMatchNumber > 0) { // street name correction, zip/city okay. string newLine = AddressUtility.CreateLineFromAddress(address, bestMatch.FullStreetName, address.Zip.Value, address.City); lock (alternateLines) { alternateLines.Add(newLine); } return(true); } } return(false); }
public static bool IsRearrangedAddressAndSoftCityMatch(Address address, Data data, List <string> alternateLines) { if (!string.IsNullOrEmpty(address.StreetName) && !string.IsNullOrEmpty(address.City)) { List <string> softMatchedCities = BKTreeEngine.LeastEditDistance(address.City, data.CityNameBKTree).Distinct().ToList(); StreetName bestMatch = null; int highestMatchNumber = 0; string bestSoftMatchedCity = null; foreach (StreetName name in data.StreetData) { foreach (string softMatchedCity in softMatchedCities) { // demand the stem and city/zip are matches if (!string.IsNullOrEmpty(name.Name) && name.Cities.Contains(softMatchedCity) && StringUtility.Contains(address.FullStreetName, name.Name)) { // demand the suffix, somewhere (if it exists) List <string> partsToCheck = new List <string>(); // pretype if exists. if (!string.IsNullOrEmpty(name.PreType)) { partsToCheck.Add(name.PreType); } if (!string.IsNullOrEmpty(name.Suffix)) { partsToCheck.Add(name.Suffix); } int matchNumber = 0; if (!string.IsNullOrEmpty(name.PreDirection)) { if (name.PreDirection == "E" && address.CardinalDirection == "EAST") { matchNumber++; } else if (name.PreDirection == "W" && address.CardinalDirection == "WEST") { matchNumber++; } else if (name.PreDirection == "S" && address.CardinalDirection == "SOUTH") { matchNumber++; } else if (name.PreDirection == "N" && address.CardinalDirection == "NORTH") { matchNumber++; } } string fullName = string.Join(" ", address.StreetNumber, address.StreetName, address.Suffix); foreach (string partToCheck in partsToCheck) { if (StringUtility.Contains(fullName, partToCheck)) { matchNumber++; } } if (matchNumber > highestMatchNumber) { highestMatchNumber = matchNumber; bestMatch = name; bestSoftMatchedCity = softMatchedCity; } } } } if (highestMatchNumber > 0) { // given the city + street, what are the available zips? int[] possibleZips = data.StreetNameCity2Zips[new StreetNameAndCity { City = bestSoftMatchedCity, FullStreetName = bestMatch.FullStreetName }].ToArray(); lock (alternateLines) { foreach (int possibleZip in possibleZips) { alternateLines.Add(AddressUtility.CreateLineFromAddress(address, bestMatch.FullStreetName, possibleZip, bestSoftMatchedCity)); } } return(true); } } return(false); }
/// <summary> /// Maybe portions of the address have been rearranged. The west is at the end, or the avenue is placed /// before an apartment number or something wonky. But otherwise most of the parts are there. /// </summary> /// <returns></returns> public static bool IsRearrangedAddressAndCityOrZipMatch(Address address, Data data, List <string> alternateLines) { if (!string.IsNullOrEmpty(address.StreetName) && (address.Zip != null || !string.IsNullOrEmpty(address.City))) { StreetName bestMatch = null; int highestMatchNumber = 0; StreetName[] possibleStreets = data.StreetData.Where(n => n.Name == address.StreetName && (n.Cities.Contains(address.City) || (address.Zip != null && n.ZipCodes.Contains(address.Zip.Value)))).ToArray(); foreach (StreetName name in possibleStreets) { // demand the stem and city/zip are matches //if (!string.IsNullOrEmpty(name.Name) && (name.Cities.Contains(address.City) || // (address.Zip.HasValue && name.ZipCodes.Contains(address.Zip.Value)) && // StringUtility.Contains(address.FullStreetName, name.Name))) { // demand the suffix, somewhere (if it exists) List <string> partsToCheck = new List <string>(); // pretype if exists. if (!string.IsNullOrEmpty(name.PreType)) { partsToCheck.Add(name.PreType); } if (!string.IsNullOrEmpty(name.Suffix)) { partsToCheck.Add(name.Suffix); } int matchNumber = 0; if (!string.IsNullOrEmpty(name.PreDirection)) { if (name.PreDirection == "E" && address.CardinalDirection == "EAST") { matchNumber++; } else if (name.PreDirection == "W" && address.CardinalDirection == "WEST") { matchNumber++; } else if (name.PreDirection == "S" && address.CardinalDirection == "SOUTH") { matchNumber++; } else if (name.PreDirection == "N" && address.CardinalDirection == "NORTH") { matchNumber++; } } string fullName = string.Join(" ", address.StreetNumber, address.StreetName, address.Suffix); foreach (string partToCheck in partsToCheck) { if (StringUtility.Contains(fullName, partToCheck)) { matchNumber++; } } if (matchNumber > highestMatchNumber) { highestMatchNumber = matchNumber; bestMatch = name; } } } if (highestMatchNumber > 0) { // either or on zip/city. // which one did we settle on? if (!string.IsNullOrEmpty(address.City) && bestMatch.Cities.Contains(address.City)) { // city // given the street + city, what are the available zips? StreetNameAndCity key1 = new StreetNameAndCity { City = address.City, FullStreetName = bestMatch.FullStreetName }; int[] availableZips = data.StreetNameCity2Zips[key1].ToArray(); lock (alternateLines) { foreach (int zip in availableZips) { alternateLines.Add(AddressUtility.CreateLineFromAddress(address, bestMatch.FullStreetName, zip, address.City)); } } return(true); } else if (address.Zip != null && bestMatch.ZipCodes.Contains(address.Zip.Value)) { // zip // given the street + zip, what are the available cities? StreetNameAndZip key2 = new StreetNameAndZip { Zip = address.Zip.Value, FullStreetName = bestMatch.FullStreetName }; string[] availableCities = data.StreetNameZip2Cities[key2].ToArray(); lock (alternateLines) { foreach (string city in availableCities) { alternateLines.Add(AddressUtility.CreateLineFromAddress(address, bestMatch.FullStreetName, address.Zip.Value, city)); } } return(true); } } } return(false); }
private static Address LucasAddressMatch(string line, Data data) { string matched = line; Address address = AddressUtility.InitializeAddress(line, data); bool matchFound = (address.MatchQuality != MatchQuality.MatchNotYetDetermined); if (matchFound) { level1Match.Add(address.FullStreetName); } if (!matchFound) { matched = TestAddress(data, ref address, address.FullStreetName, 0); matchFound = (matched != null); if (matchFound) { level2Match.Add(address.FullStreetName); } } if (!matchFound) { matched = TestAddress(data, ref address, address.FullStreetName + " ST", 0); matchFound = (matched != null); if (matchFound) { level3Match.Add(address.FullStreetName); } } if (!matchFound) { matched = TestAddress(data, ref address, address.FullStreetName + " AVE", 0); matchFound = (matched != null); if (matchFound) { level3Match.Add(address.FullStreetName); } } if (!matchFound) { matched = TestAddress(data, ref address, address.FullStreetName + " BLVD", 0); matchFound = (matched != null); if (matchFound) { level3Match.Add(address.FullStreetName); } } if (!matchFound) { List <string> closestNeighbors; int distance; // Search by ZIP if (address.Zip != null && !string.IsNullOrEmpty(address.StreetName) && !string.IsNullOrEmpty(address.StreetNumber)) { StateOfNewYorkAddressRange[] streetsWithZip = data.NYCityStreets.Where(n => n.StreetNumber.IsInRange(address.StreetNumber) && n.ZipCode == address.Zip.Value).ToArray(); List <string> streetsWithZipStrings = streetsWithZip.Select(s => s.FullStreetName).Distinct().ToList(); BKTree bkTreeLocal = BKTreeEngine.CreateBKTree(streetsWithZipStrings); closestNeighbors = BKTreeEngine.LeastEditDistanceWithDistance(address.FullStreetName, bkTreeLocal, out distance); if (closestNeighbors.Count == 1 && distance <= 1) { matched = CorrectAddress(data, ref address, closestNeighbors[0]); matchFound = true; level5Match.Add(address.FullStreetName); } else if (closestNeighbors.Count == 1 && distance <= 2) { matched = CorrectAddress(data, ref address, closestNeighbors[0]); matchFound = true; level6Match.Add(address.FullStreetName); } } } if (!matchFound) { matched = TestAddress(data, ref address, address.FullStreetName, 2); matchFound = (matched != null); if (matchFound) { level7Match.Add(address.FullStreetName); } } if (!matchFound) { matched = TestAddress(data, ref address, address.FullStreetName + " ST", 2); matchFound = (matched != null); if (matchFound) { level8Match.Add(address.FullStreetName); } } if (!matchFound) { matched = TestAddress(data, ref address, address.FullStreetName + " AVE", 2); matchFound = (matched != null); if (matchFound) { level8Match.Add(address.FullStreetName); } } if (!matchFound) { matched = TestAddress(data, ref address, address.FullStreetName + " BLVD", 2); matchFound = (matched != null); if (matchFound) { level8Match.Add(address.FullStreetName); } } // Debug if (!matchFound) { if (false) { string addressRaw = $"{address.RawAddress1} / {address.RawAddress2}"; string addressCleaned = $"{ address.StreetNumber } / { address.StreetName} / { address.Suffix}"; if (!string.IsNullOrEmpty(address.ApartmentNumber)) { addressCleaned += $" / {address.ApartmentNumber}"; } string closestNeighborsConcatenated = string.Join(" OR ", BKTreeEngine.LeastEditDistance(address.FullStreetName, data.StreetNameBKTree)); Console.WriteLine($"{addressRaw} => {addressCleaned} => {closestNeighborsConcatenated}"); } } if (address.MatchQuality == MatchQuality.Unknown) { lock (unknown) { unknown.Add(AddressUtility.CreateLineFromAddress(address, "UNKNOWN")); } } else if (address.MatchQuality == MatchQuality.Homeless) { lock (homeless) { homeless.Add(AddressUtility.CreateLineFromAddress(address, "HOMELESS")); } } else if (address.MatchQuality == MatchQuality.Alternate) { lock (alternate) { alternate.Add(address.RawAddress1); } } else if (address.MatchQuality == MatchQuality.LeaveAlone) { lock (leaveAlone) { leaveAlone.Add(address.RawAddress1); } } else if (address.MatchQuality == MatchQuality.MatchNotYetDetermined) { lock (matchNotYetDetermined) { matchNotYetDetermined.Add($"{address.RawAddress1}=>{address.FullStreetName}"); } } return(address); }
private static List <string> BenAddressMatch(string line, Data data) { // clean the address and do what we can with pure NLP. Address address = AddressUtility.InitializeAddressBen(line, data); List <string> alternateLines = new List <string>(); if (address.MatchQuality == MatchQuality.MatchNotYetDetermined) { bool matched = false; if (address.POBoxNumber != 0) { matched = true; level1Match.Add(address.OriginalLine); string alternateLine = AddressUtility.CreateLineFromAddress(address, "PO BOX " + address.POBoxNumber.ToString()); alternateLines.Add(alternateLine); } else { //matched = LucasAddressMatch(address, data); //if (address.MatchQuality == MatchQuality.Alternate) //{ // lock (level1Match) // { // level1Match.Add(AddressUtility.CreateLineFromAddress(address)); // } // matched = true; //} //else //{ if (MatchEngine.IsPerfectMatchIncludingZipAndCity(address, data)) { lock (level1Match) { lock (level1Match) { level1Match.Add(address.OriginalLine); } } } else if (MatchEngine.IsRearrangedAddressAndCityZipMatch(address, data, alternateLines)) { lock (level2Match) { level2Match.Add(address.OriginalLine); } } else if (MatchEngine.IsRearrangedAddressAndCityOrZipMatch(address, data, alternateLines)) { lock (level3Match) { level3Match.Add(address.OriginalLine); } } else if (MatchEngine.IsRearrangedAddressAndSoftCityMatch(address, data, alternateLines)) { lock (level4Match) { level4Match.Add(address.OriginalLine); } } else if (MatchEngine.IsSolidAddressMatchWithNoZipCityAvailable(address, data, alternateLines)) { lock (level5Match) { level5Match.Add(address.OriginalLine); } } else if (MatchEngine.IsSoftAddressAndSoftCityHardZipMatch(address, data, alternateLines)) { lock (level6Match) { level6Match.Add(address.OriginalLine); } } else if (MatchEngine.IsSoftAddressAndHardZipMatch(address, data, alternateLines)) { lock (level7Match) { level7Match.Add(address.OriginalLine); } } else if (MatchEngine.IsSoftAddressAndSoftCityMatch(address, data, alternateLines)) { lock (level8Match) { level8Match.Add(address.OriginalLine); } } else if (MatchEngine.IsSolidStreetMatchOnly(address, data, alternateLines)) { lock (level9Match) { level9Match.Add(address.OriginalLine); } } else if (MatchEngine.IsSoftAddressMatch(address, data, alternateLines)) { lock (level10Match) { level10Match.Add(address.OriginalLine); } } else { lock (failed) { failed.Add(address.OriginalLine); } } //} } } else if (address.MatchQuality == MatchQuality.Unknown) { lock (alternateLines) { alternateLines.Add(AddressUtility.CreateLineFromAddress(address, "UNKNOWN")); unknown.Add(address.OriginalLine); } } else if (address.MatchQuality == MatchQuality.Homeless) { lock (alternateLines) { alternateLines.Add(AddressUtility.CreateLineFromAddress(address, "HOMELESS")); homeless.Add(address.OriginalLine); } } return(alternateLines); }
private static List <StateOfNewYorkAddressRange> LoadNYCityAddresses(Data data) { string[] addresses = File.ReadAllLines("city_of_new_york_revised.csv").Skip(1).ToArray(); //string[] addressesRevised = new string[addresses.Length + 1]; //addressesRevised[0] = addresses[0]; //int counter = 1; List <StateOfNewYorkAddressRange> ret = new List <StateOfNewYorkAddressRange>(); foreach (string address in addresses) { string[] bits = address.Split(',').Select(n => n.Trim()).ToArray(); string addressBit = bits[3]; string numberBit = bits[2]; int zip = -1; int.TryParse(bits[8], out zip); string city = bits[5]; string nonNumberNumberAddress = ""; //// Clean up addressBit //addressBit = AddressUtility.CleanSpacesAndPunctuation(addressBit); //addressBit = AddressUtility.CleanAddressFormat(addressBit, data.AbbreviationsShortened); Address cleanAddress = AddressUtility.NormalizeSuffix(addressBit, data); ////Console.Write($"{bits[3]} => {cleanAddress.FullStreetName}, "); //addressesRevised[counter++] = string.Join(",", bits.Take(3)) + $",{cleanAddress.FullStreetName}," + string.Join(",",bits.Skip(4)); //if(cleanAddress.FullStreetName != addressBit) //{ // Console.WriteLine($"{addressBit} => {cleanAddress.FullStreetName}"); //} // Parse numberBit int startNumber = -1, endNumber = -1; if (Regex.IsMatch(numberBit, @"^\d+-\d+$")) { int[] numberBitBits = numberBit.Split('-').Select(n => int.Parse(n)).ToArray(); int start = 0, end = 0; if (numberBitBits[0] > numberBitBits[1]) { start = numberBitBits[1]; end = numberBitBits[0]; } else { start = numberBitBits[0]; end = numberBitBits[1]; } startNumber = start; endNumber = end; } else if (Regex.IsMatch(numberBit, @"^\d+$")) { startNumber = int.Parse(numberBit); endNumber = int.Parse(numberBit); } else if (numberBit.Length > 0) { nonNumberNumberAddress = numberBit; } lock (ret) { if (!string.IsNullOrEmpty(nonNumberNumberAddress)) { ret.Add(new StateOfNewYorkAddressRange { StreetNumber = new StreetNumberRange(nonNumberNumberAddress), City = city, StreetName = cleanAddress.StreetName, ZipCode = zip, Suffix = cleanAddress.Suffix, FullStreetName = cleanAddress.FullStreetName, }); } else { ret.Add(new StateOfNewYorkAddressRange { StreetNumber = new StreetNumberRange(startNumber, endNumber), City = city, StreetName = cleanAddress.StreetName, ZipCode = zip, Suffix = cleanAddress.Suffix, FullStreetName = cleanAddress.FullStreetName, }); } } } //File.WriteAllLines("city_of_new_york_revised.csv", addressesRevised); return(ret); }