Ejemplo n.º 1
0
        public static List <Tuple <double, int, int> > ReturnMaxErrorForMatchedGroups <T>(List <IGrouping <T, Row> > matches)
        {
            List <Tuple <double, int, int> > ret = new List <Tuple <double, int, int> >();

            foreach (IGrouping <T, Row> match in matches)
            {
                Row[]  rows = match.ToArray();
                double maxDifference = -1.0f;
                int    row1Id = -1, row2Id = -1;
                foreach (Row row1 in rows)
                {
                    foreach (Row row2 in rows)
                    {
                        if (row1 != row2)
                        {
                            double distance = EditDistanceEngine.ComputeDistanceForRecordPair(row1, row2);
                            if (distance > maxDifference)
                            {
                                maxDifference = distance;
                                row1Id        = row1.EnterpriseID;
                                row2Id        = row2.EnterpriseID;
                            }
                        }
                    }
                }
                ret.Add(new Tuple <double, int, int>(maxDifference, row1Id, row2Id));
            }

            return(ret);
        }
Ejemplo n.º 2
0
        public static bool IsSoftAddressMatch(Address address, Data data, List <string> alternateLines)
        {
            List <string> innerAlternateLines = new List <string>();

            if (!string.IsNullOrEmpty(address.StreetName))
            {
                List <string> softMatchedStreets = BKTreeEngine.LeastEditDistance(address.StreetName, data.StreetNameBKTree).Distinct().ToList();

                StreetName[] possibleStreets = data.StreetData.Where(n => softMatchedStreets.Contains(n.Name)).ToArray();

                if (softMatchedStreets.Count > 0 &&
                    EditDistanceEngine.ComputeNormalized(softMatchedStreets[0], address.StreetName) < .5f)
                {
                    foreach (StreetName name in possibleStreets)
                    {
                        if (!string.IsNullOrEmpty(name.Name))
                        {
                            foreach (string softMatchedStreet in softMatchedStreets)
                            {
                                if (softMatchedStreet == name.Name)
                                {
                                    string[] possibleCities = name.Cities.ToArray();
                                    foreach (string city in possibleCities)
                                    {
                                        int[] possibleZips = data.StreetNameCity2Zips[
                                            new StreetNameAndCity
                                            {
                                                City = city,
                                                FullStreetName = name.FullStreetName,
                                            }].ToArray();

                                        foreach (int possibleZip in possibleZips)
                                        {
                                            innerAlternateLines.Add(AddressUtility.CreateLineFromAddress(address, name.FullStreetName, possibleZip, city));
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
                lock (alternateLines)
                {
                    alternateLines.AddRange(innerAlternateLines);
                }
                return(innerAlternateLines.Count > 0);
            }

            return(false);
        }
Ejemplo n.º 3
0
        public static bool IsSoftAddressAndHardZipMatch(Address address, Data data, List <string> alternateLines)
        {
            List <string> innerAlternateLines = new List <string>();

            if (!string.IsNullOrEmpty(address.StreetName) &&
                address.Zip.HasValue)
            {
                StreetName[] streetsWithThisZip = data.StreetData.Where(n => n.ZipCodes.Contains(address.Zip.Value)).ToArray();

                double     lowestEditDistance = double.MaxValue;
                StreetName bestMatch          = null;
                foreach (StreetName streetWithThisZip in streetsWithThisZip)
                {
                    string[] citiesWithThisStreetAndZip = data.StreetNameZip2Cities[new StreetNameAndZip
                                                                                    {
                                                                                        FullStreetName = streetWithThisZip.FullStreetName,
                                                                                        Zip = address.Zip.Value
                                                                                    }].ToArray();

                    double editDistance = EditDistanceEngine.ComputeNormalized(streetWithThisZip.Name, address.StreetName);
                    if (editDistance < lowestEditDistance)
                    {
                        innerAlternateLines.Clear();

                        lowestEditDistance = editDistance;
                        bestMatch          = streetWithThisZip;

                        foreach (string city in citiesWithThisStreetAndZip)
                        {
                            innerAlternateLines.Add(AddressUtility.CreateLineFromAddress(address, streetWithThisZip.FullStreetName, address.Zip.Value, city));
                        }
                    }
                    else if (editDistance == lowestEditDistance)
                    {
                        foreach (string city in citiesWithThisStreetAndZip)
                        {
                            innerAlternateLines.Add(AddressUtility.CreateLineFromAddress(address, streetWithThisZip.FullStreetName, address.Zip.Value, city));
                        }
                    }
                }
            }

            lock (alternateLines)
            {
                alternateLines.AddRange(innerAlternateLines);
            }

            return(innerAlternateLines.Count > 0);
        }