コード例 #1
0
        private static string[] LoadNYStateStreets(Data data)
        {
            //string[] streets = File.ReadAllLines("allStreets.csv");
            string[] streets = File.ReadAllLines("state_of_new_york_revised.csv").ToArray();
            return(streets.Distinct().ToArray());

            string[] streetsRevised = new string[streets.Length + 1];

            List <string> streetsCleaned = new List <string>();

            for (int i = 0; i < streets.Length; i++)
            {
                string street = streets[i];
                street = AddressUtility.CleanSpacesAndPunctuation(street);
                street = AddressUtility.CleanAddressFormat(street, data.AbbreviationsShortened);
                street = AddressUtility.NormalizeSuffix(street, data).FullStreetName;
                streetsCleaned.Add(street);
                streetsRevised[i] = street;
                //Console.Write($"{streets[i]} => {street},     ");
            }

            File.WriteAllLines("state_of_new_york_revised.csv", streetsRevised);

            return(streetsCleaned.Distinct().ToArray());
        }
コード例 #2
0
ファイル: Program.cs プロジェクト: kwende/MitchMatch
        private static string CorrectAddress(Data data, ref Address address, string correctedString)
        {
            Address correctStreet = AddressUtility.NormalizeSuffix(correctedString, data);

            address.StreetName   = correctStreet.StreetName;
            address.Suffix       = correctStreet.Suffix;
            address.MatchQuality = MatchQuality.Alternate;
            return($"{address.StreetName} {address.Suffix}");
        }
コード例 #3
0
ファイル: MatchEngine.cs プロジェクト: kwende/MitchMatch
        public static bool IsSoftAddressMatch(Address address, Data data, List <string> alternateLines)
        {
            List <string> innerAlternateLines = new List <string>();

            if (!string.IsNullOrEmpty(address.StreetName))
            {
                List <string> softMatchedStreets = BKTreeEngine.LeastEditDistance(address.StreetName, data.StreetNameBKTree).Distinct().ToList();

                StreetName[] possibleStreets = data.StreetData.Where(n => softMatchedStreets.Contains(n.Name)).ToArray();

                if (softMatchedStreets.Count > 0 &&
                    EditDistanceEngine.ComputeNormalized(softMatchedStreets[0], address.StreetName) < .5f)
                {
                    foreach (StreetName name in possibleStreets)
                    {
                        if (!string.IsNullOrEmpty(name.Name))
                        {
                            foreach (string softMatchedStreet in softMatchedStreets)
                            {
                                if (softMatchedStreet == name.Name)
                                {
                                    string[] possibleCities = name.Cities.ToArray();
                                    foreach (string city in possibleCities)
                                    {
                                        int[] possibleZips = data.StreetNameCity2Zips[
                                            new StreetNameAndCity
                                            {
                                                City = city,
                                                FullStreetName = name.FullStreetName,
                                            }].ToArray();

                                        foreach (int possibleZip in possibleZips)
                                        {
                                            innerAlternateLines.Add(AddressUtility.CreateLineFromAddress(address, name.FullStreetName, possibleZip, city));
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
                lock (alternateLines)
                {
                    alternateLines.AddRange(innerAlternateLines);
                }
                return(innerAlternateLines.Count > 0);
            }

            return(false);
        }
コード例 #4
0
ファイル: MatchEngine.cs プロジェクト: kwende/MitchMatch
        public static bool IsSoftAddressAndHardZipMatch(Address address, Data data, List <string> alternateLines)
        {
            List <string> innerAlternateLines = new List <string>();

            if (!string.IsNullOrEmpty(address.StreetName) &&
                address.Zip.HasValue)
            {
                StreetName[] streetsWithThisZip = data.StreetData.Where(n => n.ZipCodes.Contains(address.Zip.Value)).ToArray();

                double     lowestEditDistance = double.MaxValue;
                StreetName bestMatch          = null;
                foreach (StreetName streetWithThisZip in streetsWithThisZip)
                {
                    string[] citiesWithThisStreetAndZip = data.StreetNameZip2Cities[new StreetNameAndZip
                                                                                    {
                                                                                        FullStreetName = streetWithThisZip.FullStreetName,
                                                                                        Zip = address.Zip.Value
                                                                                    }].ToArray();

                    double editDistance = EditDistanceEngine.ComputeNormalized(streetWithThisZip.Name, address.StreetName);
                    if (editDistance < lowestEditDistance)
                    {
                        innerAlternateLines.Clear();

                        lowestEditDistance = editDistance;
                        bestMatch          = streetWithThisZip;

                        foreach (string city in citiesWithThisStreetAndZip)
                        {
                            innerAlternateLines.Add(AddressUtility.CreateLineFromAddress(address, streetWithThisZip.FullStreetName, address.Zip.Value, city));
                        }
                    }
                    else if (editDistance == lowestEditDistance)
                    {
                        foreach (string city in citiesWithThisStreetAndZip)
                        {
                            innerAlternateLines.Add(AddressUtility.CreateLineFromAddress(address, streetWithThisZip.FullStreetName, address.Zip.Value, city));
                        }
                    }
                }
            }

            lock (alternateLines)
            {
                alternateLines.AddRange(innerAlternateLines);
            }

            return(innerAlternateLines.Count > 0);
        }
コード例 #5
0
ファイル: MatchEngine.cs プロジェクト: kwende/MitchMatch
        public static bool IsSoftAddressAndSoftCityHardZipMatch(Address address, Data data, List <string> alternateLines)
        {
            if (!string.IsNullOrEmpty(address.StreetName) &&
                address.Zip != null &&
                !string.IsNullOrEmpty(address.City))
            {
                List <string> innerAlternateLines = new List <string>();

                List <string> softMatchedCities  = BKTreeEngine.LeastEditDistance(address.City, data.CityNameBKTree).Distinct().ToList();
                List <string> softMatchedStreets = BKTreeEngine.LeastEditDistance(address.StreetName, data.StreetNameBKTree).Distinct().ToList();

                foreach (StreetName name in data.StreetData)
                {
                    if (!string.IsNullOrEmpty(name.Name) && name.ZipCodes.Contains(address.Zip.Value))
                    {
                        foreach (string softMatchedCity in softMatchedCities)
                        {
                            if (data.StreetNameZip2Cities[new StreetNameAndZip
                                                          {
                                                              Zip = address.Zip.Value,
                                                              FullStreetName = name.FullStreetName,
                                                          }].Contains(softMatchedCity))
                            {
                                if (name.Cities.Contains(softMatchedCity))
                                {
                                    foreach (string softMatchedStreet in softMatchedStreets)
                                    {
                                        if (softMatchedStreet == name.Name)
                                        {
                                            innerAlternateLines.Add(AddressUtility.CreateLineFromAddress(address, name.FullStreetName, address.Zip, softMatchedCity));
                                        }
                                    }
                                }
                            }
                        }
                    }
                }

                lock (alternateLines)
                {
                    alternateLines.AddRange(innerAlternateLines);
                }

                return(innerAlternateLines.Count > 0);
            }

            return(false);
        }
コード例 #6
0
ファイル: MatchEngine.cs プロジェクト: kwende/MitchMatch
        public static bool IsSoftAddressAndSoftCityMatch(Address address, Data data, List <string> alternateLines)
        {
            List <string> innerAlternateLines = new List <string>();

            if (!string.IsNullOrEmpty(address.StreetName) &&
                !string.IsNullOrEmpty(address.City))
            {
                List <string> softMatchedCities  = BKTreeEngine.LeastEditDistance(address.City, data.CityNameBKTree).Distinct().ToList();
                List <string> softMatchedStreets = BKTreeEngine.LeastEditDistance(address.StreetName, data.StreetNameBKTree).Distinct().ToList();

                foreach (StreetName name in data.StreetData)
                {
                    if (!string.IsNullOrEmpty(name.Name))
                    {
                        foreach (string softMatchedCity in softMatchedCities)
                        {
                            if (name.Cities.Contains(softMatchedCity))
                            {
                                foreach (string softMatchedStreet in softMatchedStreets)
                                {
                                    if (softMatchedStreet == name.Name)
                                    {
                                        int[] zipsForCityAndStreet = data.StreetNameCity2Zips[new StreetNameAndCity
                                                                                              {
                                                                                                  City = softMatchedCity,
                                                                                                  FullStreetName = name.FullStreetName,
                                                                                              }].ToArray();

                                        foreach (int zip in zipsForCityAndStreet)
                                        {
                                            innerAlternateLines.Add(AddressUtility.CreateLineFromAddress(address, name.FullStreetName, zip, softMatchedCity));
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }

            lock (alternateLines)
            {
                alternateLines.AddRange(innerAlternateLines);
            }

            return(innerAlternateLines.Count > 0);
        }
コード例 #7
0
ファイル: MatchEngine.cs プロジェクト: kwende/MitchMatch
        internal static bool IsSolidStreetMatchOnly(Address address, Data data, List <string> alternateLines)
        {
            if (!string.IsNullOrEmpty(address.StreetName))
            {
                List <string> innerAlternateLines  = new List <string>();
                string        streetToMatch        = string.Join(" ", address.StreetName, address.Suffix);
                StreetName[]  streetNamesThatMatch = data.StreetData.Where(n => n.FullStreetName == streetToMatch).ToArray();

                foreach (StreetName streetNameThatMatches in streetNamesThatMatch)
                {
                    string[] citiesForThisStreetName = streetNameThatMatches.Cities.ToArray();

                    foreach (string city in citiesForThisStreetName)
                    {
                        int[] zips = data.StreetNameCity2Zips[new StreetNameAndCity
                                                              {
                                                                  City = city,
                                                                  FullStreetName = streetNameThatMatches.FullStreetName,
                                                              }].ToArray();

                        foreach (int zip in zips)
                        {
                            innerAlternateLines.Add(AddressUtility.CreateLineFromAddress(address, streetNameThatMatches.FullStreetName, zip, city));
                        }
                    }
                }

                lock (alternateLines)
                {
                    alternateLines.AddRange(innerAlternateLines);
                }

                return(innerAlternateLines.Count > 0);
            }
            else
            {
                return(false);
            }
        }
コード例 #8
0
ファイル: MatchEngine.cs プロジェクト: kwende/MitchMatch
        internal static bool IsSolidAddressMatchWithNoZipCityAvailable(Address address, Data data, List <string> alternateLines)
        {
            if (!string.IsNullOrEmpty(address.StreetName) &&
                string.IsNullOrEmpty(address.City) &&
                !address.Zip.HasValue)
            {
                string       streetToMatch = string.Join(" ", address.StreetName, address.Suffix);
                StreetName[] matches       = data.StreetData.Where(n => n.FullStreetName == streetToMatch).ToArray();

                foreach (StreetName match in matches)
                {
                    string[] allPossibleCities = match.Cities.ToArray();

                    foreach (string possibleCity in allPossibleCities)
                    {
                        int[] possibleZipsForThisCityAndStreet = data.StreetNameCity2Zips[new StreetNameAndCity
                                                                                          {
                                                                                              City = possibleCity,
                                                                                              FullStreetName = match.FullStreetName,
                                                                                          }].ToArray();
                        lock (alternateLines)
                        {
                            foreach (int possibleZip in possibleZipsForThisCityAndStreet)
                            {
                                alternateLines.AddRange(matches.Select(n => AddressUtility.CreateLineFromAddress(address, n.FullStreetName, possibleZip, possibleCity)));
                            }
                        }
                    }
                }

                return(true);
            }
            else
            {
                return(false);
            }
        }
コード例 #9
0
ファイル: MatchEngine.cs プロジェクト: kwende/MitchMatch
        /// <summary>
        /// Maybe portions of the address have been rearranged. The west is at the end, or the avenue is placed
        /// before an apartment number or something wonky. But otherwise most of the parts are there.
        /// </summary>
        /// <returns></returns>
        public static bool IsRearrangedAddressAndCityZipMatch(Address address, Data data, List <string> alternateLines)
        {
            if (!string.IsNullOrEmpty(address.StreetName) &&
                address.Zip != null &&
                !string.IsNullOrEmpty(address.City))
            {
                StreetName[] possibleStreets = data.StreetData.Where(n =>
                                                                     n.Name == address.StreetName &&
                                                                     n.Cities.Contains(address.City) &&
                                                                     n.ZipCodes.Contains(address.Zip.Value)).ToArray();

                StreetName bestMatch          = null;
                int        highestMatchNumber = 0;
                foreach (StreetName name in possibleStreets)
                {
                    // demand the stem and city/zip are matches
                    //if (!string.IsNullOrEmpty(name.Name) && name.Cities.Contains(address.City) &&
                    //    name.ZipCodes.Contains(address.Zip.Value) &&
                    //    StringUtility.Contains(address.FullStreetName, " " + name.Name + " "))
                    {
                        // demand the suffix, somewhere (if it exists)
                        List <string> partsToCheck = new List <string>();

                        // pretype if exists.
                        if (!string.IsNullOrEmpty(name.PreType))
                        {
                            partsToCheck.Add(name.PreType);
                        }

                        if (!string.IsNullOrEmpty(name.Suffix))
                        {
                            partsToCheck.Add(name.Suffix);
                        }

                        int matchNumber = 0;

                        if (!string.IsNullOrEmpty(name.PreDirection))
                        {
                            if (name.PreDirection == "E" &&
                                address.CardinalDirection == "EAST")
                            {
                                matchNumber++;
                            }
                            else if (name.PreDirection == "W" &&
                                     address.CardinalDirection == "WEST")
                            {
                                matchNumber++;
                            }
                            else if (name.PreDirection == "S" &&
                                     address.CardinalDirection == "SOUTH")
                            {
                                matchNumber++;
                            }
                            else if (name.PreDirection == "N" &&
                                     address.CardinalDirection == "NORTH")
                            {
                                matchNumber++;
                            }
                        }

                        string fullName = string.Join(" ", address.StreetNumber, address.StreetName, address.Suffix);

                        foreach (string partToCheck in partsToCheck)
                        {
                            if (StringUtility.Contains(fullName, partToCheck))
                            {
                                matchNumber++;
                            }
                        }

                        if (matchNumber > highestMatchNumber)
                        {
                            highestMatchNumber = matchNumber;
                            bestMatch          = name;
                        }
                    }
                }

                if (highestMatchNumber > 0)
                {
                    // street name correction, zip/city okay.
                    string newLine = AddressUtility.CreateLineFromAddress(address, bestMatch.FullStreetName, address.Zip.Value, address.City);
                    lock (alternateLines)
                    {
                        alternateLines.Add(newLine);
                    }
                    return(true);
                }
            }

            return(false);
        }
コード例 #10
0
ファイル: MatchEngine.cs プロジェクト: kwende/MitchMatch
        public static bool IsRearrangedAddressAndSoftCityMatch(Address address, Data data, List <string> alternateLines)
        {
            if (!string.IsNullOrEmpty(address.StreetName) &&
                !string.IsNullOrEmpty(address.City))
            {
                List <string> softMatchedCities = BKTreeEngine.LeastEditDistance(address.City, data.CityNameBKTree).Distinct().ToList();

                StreetName bestMatch           = null;
                int        highestMatchNumber  = 0;
                string     bestSoftMatchedCity = null;
                foreach (StreetName name in data.StreetData)
                {
                    foreach (string softMatchedCity in softMatchedCities)
                    {
                        // demand the stem and city/zip are matches
                        if (!string.IsNullOrEmpty(name.Name) && name.Cities.Contains(softMatchedCity) &&
                            StringUtility.Contains(address.FullStreetName, name.Name))
                        {
                            // demand the suffix, somewhere (if it exists)
                            List <string> partsToCheck = new List <string>();

                            // pretype if exists.
                            if (!string.IsNullOrEmpty(name.PreType))
                            {
                                partsToCheck.Add(name.PreType);
                            }

                            if (!string.IsNullOrEmpty(name.Suffix))
                            {
                                partsToCheck.Add(name.Suffix);
                            }

                            int matchNumber = 0;

                            if (!string.IsNullOrEmpty(name.PreDirection))
                            {
                                if (name.PreDirection == "E" &&
                                    address.CardinalDirection == "EAST")
                                {
                                    matchNumber++;
                                }
                                else if (name.PreDirection == "W" &&
                                         address.CardinalDirection == "WEST")
                                {
                                    matchNumber++;
                                }
                                else if (name.PreDirection == "S" &&
                                         address.CardinalDirection == "SOUTH")
                                {
                                    matchNumber++;
                                }
                                else if (name.PreDirection == "N" &&
                                         address.CardinalDirection == "NORTH")
                                {
                                    matchNumber++;
                                }
                            }

                            string fullName = string.Join(" ", address.StreetNumber, address.StreetName, address.Suffix);

                            foreach (string partToCheck in partsToCheck)
                            {
                                if (StringUtility.Contains(fullName, partToCheck))
                                {
                                    matchNumber++;
                                }
                            }

                            if (matchNumber > highestMatchNumber)
                            {
                                highestMatchNumber  = matchNumber;
                                bestMatch           = name;
                                bestSoftMatchedCity = softMatchedCity;
                            }
                        }
                    }
                }

                if (highestMatchNumber > 0)
                {
                    // given the city + street, what are the available zips?
                    int[] possibleZips = data.StreetNameCity2Zips[new StreetNameAndCity
                                                                  {
                                                                      City = bestSoftMatchedCity,
                                                                      FullStreetName = bestMatch.FullStreetName
                                                                  }].ToArray();

                    lock (alternateLines)
                    {
                        foreach (int possibleZip in possibleZips)
                        {
                            alternateLines.Add(AddressUtility.CreateLineFromAddress(address, bestMatch.FullStreetName, possibleZip, bestSoftMatchedCity));
                        }
                    }

                    return(true);
                }
            }

            return(false);
        }
コード例 #11
0
ファイル: MatchEngine.cs プロジェクト: kwende/MitchMatch
        /// <summary>
        /// Maybe portions of the address have been rearranged. The west is at the end, or the avenue is placed
        /// before an apartment number or something wonky. But otherwise most of the parts are there.
        /// </summary>
        /// <returns></returns>
        public static bool IsRearrangedAddressAndCityOrZipMatch(Address address, Data data, List <string> alternateLines)
        {
            if (!string.IsNullOrEmpty(address.StreetName) &&
                (address.Zip != null ||
                 !string.IsNullOrEmpty(address.City)))
            {
                StreetName bestMatch          = null;
                int        highestMatchNumber = 0;

                StreetName[] possibleStreets = data.StreetData.Where(n =>
                                                                     n.Name == address.StreetName &&
                                                                     (n.Cities.Contains(address.City) ||
                                                                      (address.Zip != null && n.ZipCodes.Contains(address.Zip.Value)))).ToArray();

                foreach (StreetName name in possibleStreets)
                {
                    // demand the stem and city/zip are matches
                    //if (!string.IsNullOrEmpty(name.Name) && (name.Cities.Contains(address.City) ||
                    //    (address.Zip.HasValue && name.ZipCodes.Contains(address.Zip.Value)) &&
                    //    StringUtility.Contains(address.FullStreetName, name.Name)))
                    {
                        // demand the suffix, somewhere (if it exists)
                        List <string> partsToCheck = new List <string>();

                        // pretype if exists.
                        if (!string.IsNullOrEmpty(name.PreType))
                        {
                            partsToCheck.Add(name.PreType);
                        }

                        if (!string.IsNullOrEmpty(name.Suffix))
                        {
                            partsToCheck.Add(name.Suffix);
                        }

                        int matchNumber = 0;

                        if (!string.IsNullOrEmpty(name.PreDirection))
                        {
                            if (name.PreDirection == "E" &&
                                address.CardinalDirection == "EAST")
                            {
                                matchNumber++;
                            }
                            else if (name.PreDirection == "W" &&
                                     address.CardinalDirection == "WEST")
                            {
                                matchNumber++;
                            }
                            else if (name.PreDirection == "S" &&
                                     address.CardinalDirection == "SOUTH")
                            {
                                matchNumber++;
                            }
                            else if (name.PreDirection == "N" &&
                                     address.CardinalDirection == "NORTH")
                            {
                                matchNumber++;
                            }
                        }

                        string fullName = string.Join(" ", address.StreetNumber, address.StreetName, address.Suffix);

                        foreach (string partToCheck in partsToCheck)
                        {
                            if (StringUtility.Contains(fullName, partToCheck))
                            {
                                matchNumber++;
                            }
                        }

                        if (matchNumber > highestMatchNumber)
                        {
                            highestMatchNumber = matchNumber;
                            bestMatch          = name;
                        }
                    }
                }

                if (highestMatchNumber > 0)
                {
                    // either or on zip/city.
                    // which one did we settle on?

                    if (!string.IsNullOrEmpty(address.City) &&
                        bestMatch.Cities.Contains(address.City))
                    {
                        // city
                        // given the street + city, what are the available zips?
                        StreetNameAndCity key1 = new StreetNameAndCity
                        {
                            City           = address.City,
                            FullStreetName = bestMatch.FullStreetName
                        };

                        int[] availableZips = data.StreetNameCity2Zips[key1].ToArray();

                        lock (alternateLines)
                        {
                            foreach (int zip in availableZips)
                            {
                                alternateLines.Add(AddressUtility.CreateLineFromAddress(address, bestMatch.FullStreetName, zip, address.City));
                            }
                        }

                        return(true);
                    }
                    else if (address.Zip != null &&
                             bestMatch.ZipCodes.Contains(address.Zip.Value))
                    {
                        // zip
                        // given the street + zip, what are the available cities?
                        StreetNameAndZip key2 = new StreetNameAndZip
                        {
                            Zip            = address.Zip.Value,
                            FullStreetName = bestMatch.FullStreetName
                        };
                        string[] availableCities = data.StreetNameZip2Cities[key2].ToArray();

                        lock (alternateLines)
                        {
                            foreach (string city in availableCities)
                            {
                                alternateLines.Add(AddressUtility.CreateLineFromAddress(address, bestMatch.FullStreetName, address.Zip.Value, city));
                            }
                        }


                        return(true);
                    }
                }
            }

            return(false);
        }
コード例 #12
0
ファイル: Program.cs プロジェクト: kwende/MitchMatch
        private static Address LucasAddressMatch(string line, Data data)
        {
            string matched = line;

            Address address = AddressUtility.InitializeAddress(line, data);

            bool matchFound = (address.MatchQuality != MatchQuality.MatchNotYetDetermined);

            if (matchFound)
            {
                level1Match.Add(address.FullStreetName);
            }

            if (!matchFound)
            {
                matched    = TestAddress(data, ref address, address.FullStreetName, 0);
                matchFound = (matched != null);
                if (matchFound)
                {
                    level2Match.Add(address.FullStreetName);
                }
            }

            if (!matchFound)
            {
                matched    = TestAddress(data, ref address, address.FullStreetName + " ST", 0);
                matchFound = (matched != null);
                if (matchFound)
                {
                    level3Match.Add(address.FullStreetName);
                }
            }
            if (!matchFound)
            {
                matched    = TestAddress(data, ref address, address.FullStreetName + " AVE", 0);
                matchFound = (matched != null);
                if (matchFound)
                {
                    level3Match.Add(address.FullStreetName);
                }
            }
            if (!matchFound)
            {
                matched    = TestAddress(data, ref address, address.FullStreetName + " BLVD", 0);
                matchFound = (matched != null);
                if (matchFound)
                {
                    level3Match.Add(address.FullStreetName);
                }
            }

            if (!matchFound)
            {
                List <string> closestNeighbors;
                int           distance;
                // Search by ZIP
                if (address.Zip != null && !string.IsNullOrEmpty(address.StreetName) && !string.IsNullOrEmpty(address.StreetNumber))
                {
                    StateOfNewYorkAddressRange[] streetsWithZip = data.NYCityStreets.Where(n => n.StreetNumber.IsInRange(address.StreetNumber) && n.ZipCode == address.Zip.Value).ToArray();

                    List <string> streetsWithZipStrings = streetsWithZip.Select(s => s.FullStreetName).Distinct().ToList();
                    BKTree        bkTreeLocal           = BKTreeEngine.CreateBKTree(streetsWithZipStrings);

                    closestNeighbors = BKTreeEngine.LeastEditDistanceWithDistance(address.FullStreetName, bkTreeLocal, out distance);

                    if (closestNeighbors.Count == 1 && distance <= 1)
                    {
                        matched    = CorrectAddress(data, ref address, closestNeighbors[0]);
                        matchFound = true;
                        level5Match.Add(address.FullStreetName);
                    }
                    else if (closestNeighbors.Count == 1 && distance <= 2)
                    {
                        matched    = CorrectAddress(data, ref address, closestNeighbors[0]);
                        matchFound = true;
                        level6Match.Add(address.FullStreetName);
                    }
                }
            }

            if (!matchFound)
            {
                matched    = TestAddress(data, ref address, address.FullStreetName, 2);
                matchFound = (matched != null);
                if (matchFound)
                {
                    level7Match.Add(address.FullStreetName);
                }
            }
            if (!matchFound)
            {
                matched    = TestAddress(data, ref address, address.FullStreetName + " ST", 2);
                matchFound = (matched != null);
                if (matchFound)
                {
                    level8Match.Add(address.FullStreetName);
                }
            }
            if (!matchFound)
            {
                matched    = TestAddress(data, ref address, address.FullStreetName + " AVE", 2);
                matchFound = (matched != null);
                if (matchFound)
                {
                    level8Match.Add(address.FullStreetName);
                }
            }
            if (!matchFound)
            {
                matched    = TestAddress(data, ref address, address.FullStreetName + " BLVD", 2);
                matchFound = (matched != null);
                if (matchFound)
                {
                    level8Match.Add(address.FullStreetName);
                }
            }

            // Debug
            if (!matchFound)
            {
                if (false)
                {
                    string addressRaw     = $"{address.RawAddress1} / {address.RawAddress2}";
                    string addressCleaned = $"{ address.StreetNumber } / { address.StreetName} / { address.Suffix}";
                    if (!string.IsNullOrEmpty(address.ApartmentNumber))
                    {
                        addressCleaned += $" / {address.ApartmentNumber}";
                    }
                    string closestNeighborsConcatenated = string.Join(" OR ", BKTreeEngine.LeastEditDistance(address.FullStreetName, data.StreetNameBKTree));

                    Console.WriteLine($"{addressRaw} => {addressCleaned} => {closestNeighborsConcatenated}");
                }
            }


            if (address.MatchQuality == MatchQuality.Unknown)
            {
                lock (unknown)
                {
                    unknown.Add(AddressUtility.CreateLineFromAddress(address, "UNKNOWN"));
                }
            }
            else if (address.MatchQuality == MatchQuality.Homeless)
            {
                lock (homeless)
                {
                    homeless.Add(AddressUtility.CreateLineFromAddress(address, "HOMELESS"));
                }
            }
            else if (address.MatchQuality == MatchQuality.Alternate)
            {
                lock (alternate)
                {
                    alternate.Add(address.RawAddress1);
                }
            }
            else if (address.MatchQuality == MatchQuality.LeaveAlone)
            {
                lock (leaveAlone)
                {
                    leaveAlone.Add(address.RawAddress1);
                }
            }
            else if (address.MatchQuality == MatchQuality.MatchNotYetDetermined)
            {
                lock (matchNotYetDetermined)
                {
                    matchNotYetDetermined.Add($"{address.RawAddress1}=>{address.FullStreetName}");
                }
            }
            return(address);
        }
コード例 #13
0
ファイル: Program.cs プロジェクト: kwende/MitchMatch
        private static List <string> BenAddressMatch(string line, Data data)
        {
            // clean the address and do what we can with pure NLP.
            Address address = AddressUtility.InitializeAddressBen(line, data);

            List <string> alternateLines = new List <string>();

            if (address.MatchQuality == MatchQuality.MatchNotYetDetermined)
            {
                bool matched = false;

                if (address.POBoxNumber != 0)
                {
                    matched = true;
                    level1Match.Add(address.OriginalLine);

                    string alternateLine =
                        AddressUtility.CreateLineFromAddress(address, "PO BOX " + address.POBoxNumber.ToString());
                    alternateLines.Add(alternateLine);
                }
                else
                {
                    //matched = LucasAddressMatch(address, data);
                    //if (address.MatchQuality == MatchQuality.Alternate)
                    //{
                    //    lock (level1Match)
                    //    {
                    //        level1Match.Add(AddressUtility.CreateLineFromAddress(address));
                    //    }
                    //    matched = true;
                    //}
                    //else
                    //{
                    if (MatchEngine.IsPerfectMatchIncludingZipAndCity(address, data))
                    {
                        lock (level1Match)
                        {
                            lock (level1Match)
                            {
                                level1Match.Add(address.OriginalLine);
                            }
                        }
                    }
                    else if (MatchEngine.IsRearrangedAddressAndCityZipMatch(address, data, alternateLines))
                    {
                        lock (level2Match)
                        {
                            level2Match.Add(address.OriginalLine);
                        }
                    }
                    else if (MatchEngine.IsRearrangedAddressAndCityOrZipMatch(address, data, alternateLines))
                    {
                        lock (level3Match)
                        {
                            level3Match.Add(address.OriginalLine);
                        }
                    }
                    else if (MatchEngine.IsRearrangedAddressAndSoftCityMatch(address, data, alternateLines))
                    {
                        lock (level4Match)
                        {
                            level4Match.Add(address.OriginalLine);
                        }
                    }
                    else if (MatchEngine.IsSolidAddressMatchWithNoZipCityAvailable(address, data, alternateLines))
                    {
                        lock (level5Match)
                        {
                            level5Match.Add(address.OriginalLine);
                        }
                    }

                    else if (MatchEngine.IsSoftAddressAndSoftCityHardZipMatch(address, data, alternateLines))
                    {
                        lock (level6Match)
                        {
                            level6Match.Add(address.OriginalLine);
                        }
                    }
                    else if (MatchEngine.IsSoftAddressAndHardZipMatch(address, data, alternateLines))
                    {
                        lock (level7Match)
                        {
                            level7Match.Add(address.OriginalLine);
                        }
                    }
                    else if (MatchEngine.IsSoftAddressAndSoftCityMatch(address, data, alternateLines))
                    {
                        lock (level8Match)
                        {
                            level8Match.Add(address.OriginalLine);
                        }
                    }
                    else if (MatchEngine.IsSolidStreetMatchOnly(address, data, alternateLines))
                    {
                        lock (level9Match)
                        {
                            level9Match.Add(address.OriginalLine);
                        }
                    }
                    else if (MatchEngine.IsSoftAddressMatch(address, data, alternateLines))
                    {
                        lock (level10Match)
                        {
                            level10Match.Add(address.OriginalLine);
                        }
                    }
                    else
                    {
                        lock (failed)
                        {
                            failed.Add(address.OriginalLine);
                        }
                    }
                    //}
                }
            }
            else if (address.MatchQuality == MatchQuality.Unknown)
            {
                lock (alternateLines)
                {
                    alternateLines.Add(AddressUtility.CreateLineFromAddress(address, "UNKNOWN"));
                    unknown.Add(address.OriginalLine);
                }
            }
            else if (address.MatchQuality == MatchQuality.Homeless)
            {
                lock (alternateLines)
                {
                    alternateLines.Add(AddressUtility.CreateLineFromAddress(address, "HOMELESS"));
                    homeless.Add(address.OriginalLine);
                }
            }

            return(alternateLines);
        }
コード例 #14
0
        private static List <StateOfNewYorkAddressRange> LoadNYCityAddresses(Data data)
        {
            string[] addresses = File.ReadAllLines("city_of_new_york_revised.csv").Skip(1).ToArray();
            //string[] addressesRevised = new string[addresses.Length + 1];
            //addressesRevised[0] = addresses[0];
            //int counter = 1;

            List <StateOfNewYorkAddressRange> ret = new List <StateOfNewYorkAddressRange>();

            foreach (string address in addresses)
            {
                string[] bits = address.Split(',').Select(n => n.Trim()).ToArray();

                string addressBit = bits[3];
                string numberBit  = bits[2];
                int    zip        = -1;
                int.TryParse(bits[8], out zip);
                string city = bits[5];
                string nonNumberNumberAddress = "";



                //// Clean up addressBit
                //addressBit = AddressUtility.CleanSpacesAndPunctuation(addressBit);
                //addressBit = AddressUtility.CleanAddressFormat(addressBit, data.AbbreviationsShortened);
                Address cleanAddress = AddressUtility.NormalizeSuffix(addressBit, data);
                ////Console.Write($"{bits[3]} => {cleanAddress.FullStreetName},     ");
                //addressesRevised[counter++] = string.Join(",", bits.Take(3)) + $",{cleanAddress.FullStreetName}," + string.Join(",",bits.Skip(4));

                //if(cleanAddress.FullStreetName != addressBit)
                //{
                //    Console.WriteLine($"{addressBit} => {cleanAddress.FullStreetName}");
                //}

                // Parse numberBit
                int startNumber = -1, endNumber = -1;

                if (Regex.IsMatch(numberBit, @"^\d+-\d+$"))
                {
                    int[] numberBitBits = numberBit.Split('-').Select(n => int.Parse(n)).ToArray();

                    int start = 0, end = 0;

                    if (numberBitBits[0] > numberBitBits[1])
                    {
                        start = numberBitBits[1];
                        end   = numberBitBits[0];
                    }
                    else
                    {
                        start = numberBitBits[0];
                        end   = numberBitBits[1];
                    }

                    startNumber = start;
                    endNumber   = end;
                }
                else if (Regex.IsMatch(numberBit, @"^\d+$"))
                {
                    startNumber = int.Parse(numberBit);
                    endNumber   = int.Parse(numberBit);
                }
                else if (numberBit.Length > 0)
                {
                    nonNumberNumberAddress = numberBit;
                }

                lock (ret)
                {
                    if (!string.IsNullOrEmpty(nonNumberNumberAddress))
                    {
                        ret.Add(new StateOfNewYorkAddressRange
                        {
                            StreetNumber   = new StreetNumberRange(nonNumberNumberAddress),
                            City           = city,
                            StreetName     = cleanAddress.StreetName,
                            ZipCode        = zip,
                            Suffix         = cleanAddress.Suffix,
                            FullStreetName = cleanAddress.FullStreetName,
                        });
                    }
                    else
                    {
                        ret.Add(new StateOfNewYorkAddressRange
                        {
                            StreetNumber   = new StreetNumberRange(startNumber, endNumber),
                            City           = city,
                            StreetName     = cleanAddress.StreetName,
                            ZipCode        = zip,
                            Suffix         = cleanAddress.Suffix,
                            FullStreetName = cleanAddress.FullStreetName,
                        });
                    }
                }
            }

            //File.WriteAllLines("city_of_new_york_revised.csv", addressesRevised);


            return(ret);
        }