Example #1
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="value"></param>
        /// <returns></returns>
        private HashSet <string> GetPhoneticKeys(string value)
        {
            string primary   = string.Empty;
            string alternate = string.Empty;

            HashSet <string> values = new HashSet <string>();

            if (string.IsNullOrEmpty(value))
            {
                return(values);
            }

            // Loop over all the words in the provided string
            foreach (string word in value.SplitWords())
            {
                // Clear the key variables
                primary   = string.Empty;
                alternate = string.Empty;

                DoubleMetaphone.doubleMetaphone(word, ref primary, ref alternate);

                if (!string.IsNullOrEmpty(primary))
                {
                    values.Add(primary);
                }

                if (!string.IsNullOrEmpty(alternate))
                {
                    values.Add(alternate);
                }
            }

            return(values);
        }
Example #2
0
        /// <summary>
        /// Calculates distance using a double metaphone (soundex) to compare
        /// each word in a string.
        /// </summary>
        /// <param name="sourceValue">The source value</param>
        /// <param name="targetValue">The target value</param>
        /// <returns></returns>
        public override double?CalculateDistance(string sourceValue, string targetValue)
        {
            HashSet <string> targetKeys = GetPhoneticKeys(targetValue);

            string primary        = string.Empty;
            string alternate      = string.Empty;
            double matches        = 0;
            int    totalWordCount = sourceValue.SplitWords().Length + targetValue.SplitWords().Length;

            // Loop over all the words in the provided string
            foreach (string word in sourceValue.SplitWords())
            {
                // Clear the key variables
                primary   = string.Empty;
                alternate = string.Empty;

                DoubleMetaphone.doubleMetaphone(word, ref primary, ref alternate);

                if (!string.IsNullOrEmpty(primary) && targetKeys.Contains(primary))
                {
                    matches++;
                }
                else if (!string.IsNullOrEmpty(alternate))
                {
                    if (targetKeys.Contains(alternate))
                    {
                        matches++;
                    }
                }
            }

            return(1 - (2 * matches / totalWordCount));
        }
Example #3
0
        private static List <string[]> DoubleMetaphoneGenerateLists(string name, string path, string worksheet)
        {
            DoubleMetaphone metaphone          = new DoubleMetaphone(name);
            string          code               = metaphone.PrimaryKey;
            List <string[]> namesWithSameSound = new List <string[]>();
            DataTable       table              = new ExcelReader(path).GetWorksheet(worksheet);

            var tableValues = table.ToJagged <string>("NAMES", "GENDER");

            string[] names  = new string[tableValues.Length];
            string[] gender = new string[tableValues.Length];
            for (int i = 0; i < tableValues.Length; i++)
            {
                names[i]  = tableValues[i][0].ToLower();
                gender[i] = tableValues[i][1];
            }

            for (int i = 0; i < names.Length; i++)
            {
                metaphone.computeKeys(names[i]);
                if (metaphone.PrimaryKey == code)
                {
                    namesWithSameSound.Add(new string[] { names[i], gender[i] });
                }
            }

            return(namesWithSameSound);
        }
 /// <summary>
 /// This will return the Double Metaphone Secondary Key for a given string.
 /// </summary>
 /// <param name="myString"></param>
 /// <returns></returns>
 public static string DoubleMetaphoneSecondaryKey(this string myString)
 {
     if (myString == null)
     {
         return(null);
     }
     return(DoubleMetaphone.GetDoubleMetaphone(myString).Secondary);
 }
Example #5
0
        public OS50kGazetteer(string line)
        {
            line = EnhancedTextInfo.RemoveDiacritics(line); // remove any special characters for Gazatteer processing
            string[] values = line.Split(':');
            int.TryParse(values[0], out int intval);
            SequenceNumber = intval;
            DefinitiveName = values[2];
            int.TryParse(values[4], out intval);
            double.TryParse(values[5], out double latitude);
            Latitude = intval + latitude / 60;
            int.TryParse(values[6], out intval);
            double.TryParse(values[7], out double longitude);
            Longitude = intval + longitude / 60;
            if (values[10] == "W")
            {
                Longitude = -1 * Longitude; // West Longitudes are negative
            }
            Coordinate c = new Coordinate(Longitude, Latitude);

            c     = MapTransforms.TransformCoordinate(c);
            Point = GeometryFactory.Default.CreatePoint(c);

            CountyCode  = values[11];
            CountyName  = values[13];
            FeatureCode = values[14];
            ParishName  = values[20];
            if (ParishName.EndsWith(" CP"))
            {
                ParishName = ParishName.Substring(0, ParishName.Length - 3);
            }
            if (ParishName.EndsWith(" Community"))
            {
                ParishName = ParishName.Substring(0, ParishName.Length - 10);
            }
            FixCommas();
            FixAbbreviations();
            ModernCounty county = Regions.OS_GetCounty(CountyCode);

            if (county == null)
            {
                CountryName = string.Empty;
            }
            else
            {
                CountryName = county.CountryName;
                DoubleMetaphone meta = new DoubleMetaphone(DefinitiveName);
                FuzzyMatch         = meta.PrimaryKey + ":";
                FuzzyNoParishMatch = meta.PrimaryKey + ":";
                meta               = new DoubleMetaphone(ParishName);
                FuzzyMatch        += meta.PrimaryKey + ":";
                meta               = new DoubleMetaphone(CountyName);
                FuzzyMatch        += meta.PrimaryKey + ":";
                FuzzyNoParishMatch = meta.PrimaryKey + ":";
                meta               = new DoubleMetaphone(county.CountryName);
                FuzzyMatch        += meta.PrimaryKey;
                FuzzyNoParishMatch = meta.PrimaryKey + ":";
            }
        }
        public float CalcSimilarity(string p_ValueA, string p_ValueB)
        {
            if (p_ValueA.Equals(p_ValueB, StringComparison.OrdinalIgnoreCase))
            {
                return(Similarity.MAX_POSSIBLE_SIMILARITY);
            }

            DoubleMetaphone l_DoubleMetaphoneA = new DoubleMetaphone(p_ValueA);
            DoubleMetaphone l_DoubleMetaphoneB = new DoubleMetaphone(p_ValueB);

            return(_DoubleMetaphoneSimilarityComparer.CalcSimilarity(l_DoubleMetaphoneA, l_DoubleMetaphoneB));
        }
        // TODO:  Make a password strength method that take requirements as arguments.

        /// <summary>
        /// This will return true if two English spoken strings sound alike based on Double Metaphone primary key.
        /// </summary>
        /// <param name="myString"></param>
        /// <param name="target"></param>
        /// <returns>Returns true if the SoundEx difference = 4.</returns>
        public static bool IsHomophone(this string myString, string target)
        {
            if (myString == null || target == null)
            {
                return(false);
            }

            //return (myString.SoundExDifference(target) == 4)
            var primaryMetaphone1 = DoubleMetaphone.GetDoubleMetaphone(myString).Primary;
            var primaryMetaphone2 = DoubleMetaphone.GetDoubleMetaphone(target).Primary;

            return(primaryMetaphone1 == primaryMetaphone2);
        }
        public Task <List <string> > GetPhoneticMatches(string nickname)
        {
            var           meta    = new DoubleMetaphone();
            var           toMatch = meta.GetDoubleMetaphone(nickname);
            List <string> names   = new();

            foreach (var pair in Keys)
            {
                if (meta.IsDoubleMetaphoneEqual(pair.Key, toMatch))
                {
                    names.AddRange(pair.Value);
                }
            }

            return(Task.FromResult(names));
        }
Example #9
0
        private static List <string[]> DoubleMetaphoneGenerateLists(string name, IEnumerable <dynamic> DbNames)
        {
            DoubleMetaphone metaphone          = new DoubleMetaphone(name);
            string          code               = metaphone.PrimaryKey;
            List <string[]> namesWithSameSound = new List <string[]>();



            //DataTable table = new ExcelReader(path).GetWorksheet(worksheet);

            //var tableValues = table.ToJagged<string>("NAMES", "GENDER");
            //string[] names = new string[tableValues.Length];
            //string[] gender = new string[tableValues.Length];
            //for (int i = 0; i < tableValues.Length; i++)
            //{
            //    names[i] = tableValues[i][0].ToLower();
            //    gender[i] = tableValues[i][1];
            //}

            //for (int i = 0; i < names.Length; i++)
            //{
            //    metaphone.computeKeys(names[i]);
            //    if (metaphone.PrimaryKey == code)
            //    {
            //        namesWithSameSound.Add(new string[] { names[i], gender[i] });
            //    }
            //}


            foreach (var item in DbNames)
            {
                metaphone.computeKeys(item.Name.ToString());
                if (metaphone.PrimaryKey == code)
                {
                    namesWithSameSound.Add(new string[] { item.Name.ToString(), item.gender.ToString() });
                }
            }
            return(namesWithSameSound);
        }
        public async Task InitalizeAsync()
        {
            var encoder = new DoubleMetaphone();
            await _dbContext.Users.AsNoTracking().ForEachAsync(x =>
            {
                var key = encoder.GetDoubleMetaphone(x.NickName);

                if (key is null)
                {
                    return;
                }

                if (Keys.TryGetValue(key, out var list))
                {
                    list.Add(x.NickName);
                }
                else
                {
                    Keys[key] = new() { x.NickName };
                }
            });
        }
Example #11
0
        public void TestCreation()
        {
            var dmp1 = new DoubleMetaphone("Seward");
            var dmp2 = new DoubleMetaphone("Soowerred");

            Assert.That(dmp1.PrimaryKey, Is.Not.Empty);
            Assert.That(dmp1.PrimaryKey, Is.EqualTo(dmp2.PrimaryKey));

            Assert.That(new DoubleMetaphone("ACTION").PrimaryKey, Is.Not.EqualTo(new DoubleMetaphone("ACTON").PrimaryKey));
            Assert.That(new DoubleMetaphone("SO PAULO").PrimaryKey, Is.EqualTo(new DoubleMetaphone("SAO PAULO").PrimaryKey));
            Assert.That(new DoubleMetaphone("ST BARTHELEMY").PrimaryKey, Is.EqualTo(new DoubleMetaphone("ST BARTHALEMY").PrimaryKey));

            Assert.That("West Rutland".SoundsLike("Westford"), Is.False);

            /* JM - these egregious false positives makes me want to abandon double metaphone all together
             * Assert.That("Caledonia".SoundsLike("Golden Eagle"), Is.False);
             * Assert.That("Pickstown".SoundsLike("Big Stone City"), Is.False);
             * Assert.That("Bentleyville".SoundsLike("Penndel"), Is.False);*/

            foreach (var c in "abcdefghijklmnopqrstuvwxyz")
            {
                Assert.DoesNotThrow(() => new DoubleMetaphone(c.ToString()));
            }
        }
Example #12
0
        public void EliminateFuzzyDuplicates()
        {
            // Group by 1st 3 digits of zip and city metaphone
            var zipGroups = Addresses
                            .GroupBy(addr =>
                                     addr.Zip5.Substring(0, Math.Min(addr.Zip5.Length, 3)) + "|" +
                                     DoubleMetaphone.EncodePhrase(addr.City))
                            .OrderBy(group => group.Key)
                            .ToList();

            List <ExtractedAddress> newAddresses = new List <ExtractedAddress>();

            foreach (var group in zipGroups)
            {
                var zipGroup = group.ToList();
                if (!StopPresentingPossibleDuplicates)
                {
                    EliminateFuzzyDuplicatesInOneGroup(zipGroup);
                }
                newAddresses.AddRange(zipGroup);
            }

            Addresses = newAddresses;
        }
Example #13
0
        private void ApplyUpdates(TextReader textReader)
        {
            // In suppressUpdate mode, this keeps track of rows that would have been deleted
            Dictionary <string, object> phantomDeletes = new Dictionary <string, object>();

            int  adds           = 0;
            int  deletes        = 0;
            int  couldNotAdd    = 0;
            int  couldNotDelete = 0;
            int  rowCount       = 0;
            bool suppressUpdate = this.Invoke(() => SuppressUpdateCheckBox.Checked);

            string actionField = "Type";

            using (var csvReader = new CsvReader(textReader, true, '\t'))
            {
                int      fieldCount = csvReader.FieldCount;
                string[] headers    = csvReader.GetFieldHeaders();

                while (csvReader.ReadNextRecord())
                {
                    string action        = csvReader[actionField].ToString();
                    string zipCode       = csvReader["ZipCode"].ToString();
                    string cityAliasName = csvReader["CityAliasName"].ToString();
                    string key           = zipCode + cityAliasName;

                    switch (action)
                    {
                    case "A":
                        if (suppressUpdate)
                        {
                            if (!phantomDeletes.ContainsKey(key) &&
                                DB.VoteZipNew.ZipCitiesDownloaded.PrimaryKeyExists(zipCode, cityAliasName))
                            {
                                couldNotAdd++;
                            }
                            else
                            {
                                adds++;
                            }
                        }
                        else
                        {
                            try
                            {
                                var table = new DB.VoteZipNew.ZipCitiesDownloadedTable();
                                var row   = table.NewRow();
                                foreach (string field in headers)
                                {
                                    if (field != actionField)
                                    {
                                        row[field] = csvReader[field];
                                    }
                                }
                                string metaphoneAliasName = DoubleMetaphone.EncodePhrase(row.CityAliasName);
                                if (metaphoneAliasName.Length > DB.VoteZipNew.ZipCitiesDownloaded.MetaphoneAliasNameMaxLength)
                                {
                                    metaphoneAliasName =
                                        metaphoneAliasName.Substring(0, DB.VoteZipNew.ZipCitiesDownloaded.MetaphoneAliasNameMaxLength);
                                }
                                string metaphoneAliasAbbreviation = DoubleMetaphone.EncodePhrase(row.CityAliasAbbreviation);
                                if (metaphoneAliasAbbreviation.Length > DB.VoteZipNew.ZipCitiesDownloaded.MetaphoneAliasAbbreviationMaxLength)
                                {
                                    metaphoneAliasAbbreviation =
                                        metaphoneAliasAbbreviation.Substring(0, DB.VoteZipNew.ZipCitiesDownloaded.MetaphoneAliasAbbreviationMaxLength);
                                }
                                row.MetaphoneAliasName         = metaphoneAliasName;
                                row.MetaphoneAliasAbbreviation = metaphoneAliasAbbreviation;
                                table.AddRow(row);
                                DB.VoteZipNew.ZipCitiesDownloaded.UpdateTable(table);
                                adds++;
                            }
                            catch
                            {
                                couldNotAdd++;
                            }
                        }
                        break;

                    case "D":
                        if (suppressUpdate)
                        {
                            if (DB.VoteZipNew.ZipCitiesDownloaded.PrimaryKeyExists(zipCode, cityAliasName))
                            {
                                deletes++;
                                if (!phantomDeletes.ContainsKey(key))
                                {
                                    phantomDeletes.Add(key, null);
                                }
                            }
                            else
                            {
                                couldNotDelete++;
                            }
                        }
                        else
                        {
                            try
                            {
                                int deleted = DB.VoteZipNew.ZipCitiesDownloaded.DeleteByPrimaryKey(zipCode, cityAliasName);
                                if (deleted != 1)
                                {
                                    throw new VoteException();
                                }
                                deletes++;
                            }
                            catch
                            {
                                couldNotDelete++;
                            }
                        }
                        break;

                    default:
                        AppendStatusText("Invalid action: {0}", csvReader[actionField]);
                        break;
                    }
                    rowCount++;
                }
            }

            AppendStatusText("Processed data,  {0} rows", rowCount);
            AppendStatusText("Adds: {0}", adds);
            AppendStatusText("Deletes: {0}", deletes);
            AppendStatusText("Could not add: {0}", couldNotAdd);
            AppendStatusText("Could not delete: {0}", couldNotDelete);
        }
Example #14
0
        private void StringMatchKeyTest(string[] testCases)
        {
            Debug.WriteLine("EditexKey");
            StringPhoneticKeyBuilder keyBuilder = new EditexKey();

            foreach (var name in testCases)
            {
                string key = keyBuilder.BuildKey(name);
                Debug.WriteLine("\t{0} for {1}", key, name);
            }

            Debug.WriteLine("DaitchMokotoff");
            StringPhoneticKeyBuilder keyBuilder2 = new DaitchMokotoff();

            foreach (var name in testCases)
            {
                string key = keyBuilder2.BuildKey(name);
                Debug.WriteLine("\t{0} for {1}", key, name);
            }

            Debug.WriteLine("Phonix");
            StringPhoneticKeyBuilder keyBuilder3 = new Phonix();

            foreach (var name in testCases)
            {
                string key = keyBuilder3.BuildKey(name);
                Debug.WriteLine("\t{0} for {1}", key, name);
            }

            Debug.WriteLine("SoundEx");
            StringPhoneticKeyBuilder keyBuilder4 = new SoundEx();

            foreach (var name in testCases)
            {
                string key = keyBuilder4.BuildKey(name);
                Debug.WriteLine("\t{0} for {1}", key, name);
            }

            Debug.WriteLine("SimpleTextKey");
            StringPhoneticKeyBuilder keyBuilder5 = new SimpleTextKey();

            foreach (var name in testCases)
            {
                string key = keyBuilder5.BuildKey(name);
                Debug.WriteLine("\t{0} for {1}", key, name);
            }

            Debug.WriteLine("Metaphone");
            StringPhoneticKeyBuilder keyBuilder6 = new Metaphone();

            foreach (var name in testCases)
            {
                string key = keyBuilder6.BuildKey(name);
                Debug.WriteLine("\t{0} for {1}", key, name);
            }

            Debug.WriteLine("DoubleMetaphone");
            StringPhoneticKeyBuilder keyBuilder7 = new DoubleMetaphone();

            foreach (var name in testCases)
            {
                string key = keyBuilder7.BuildKey(name);
                Debug.WriteLine("\t{0} for {1}", key, name);
            }
        }
Example #15
0
        private bool AddWordInternal(string word)
        {
            if (!ContainsWord(word))
            {
                var metaphone = new DoubleMetaphone();
                var primaryKey = metaphone.Encode(word, false);
                var alternateKey = metaphone.Encode(word, true);

                WordsList.Add(word);
                AddWordToMetaphoneDictionaries(word, primaryKey, alternateKey);

                return true;
            }

            return false;
        }
Example #16
0
        internal DoubleMetaphoneResult(int maxLength, DoubleMetaphone owner)
        {
            this.maxLength = maxLength;
            this.owner = owner;

            primary = new StringBuilder(owner.GetMaxCodeLen());
            alternate = new StringBuilder(owner.GetMaxCodeLen());
        }
Example #17
0
        private int LoadData(TextReader textReader)
        {
            TextWriter writer   = null;
            int        rowCount = 0;

            try
            {
                if (ToCsv)
                {
                    writer = new StreamWriter(OutputFilePath);
                }
                using (var csvReader = new CsvReader(textReader, true))
                {
                    int      fieldCount = csvReader.FieldCount;
                    string[] headers    = csvReader.GetFieldHeaders();

                    DB.VoteZipNew.ZipCitiesDownloadedTable table = null;
                    if (!ToCsv)
                    {
                        table = new DB.VoteZipNew.ZipCitiesDownloadedTable();
                    }
                    DB.VoteZipNew.ZipCitiesDownloadedRow row;
                    //List<string> fields = new List<string>();
                    SimpleCsvWriter csvWriter = new SimpleCsvWriter();
                    while (csvReader.ReadNextRecord())
                    {
                        string cityAliasName         = csvReader["CityAliasName"];
                        string cityAliasAbbreviation = csvReader["CityAliasAbbreviation"];
                        string metaphoneAliasName    = DoubleMetaphone.EncodePhrase(cityAliasName);
                        if (metaphoneAliasName.Length > DB.VoteZipNew.ZipCitiesDownloaded.MetaphoneAliasNameMaxLength)
                        {
                            metaphoneAliasName =
                                metaphoneAliasName.Substring(0, DB.VoteZipNew.ZipCitiesDownloaded.MetaphoneAliasNameMaxLength);
                        }
                        string metaphoneAliasAbbreviation = DoubleMetaphone.EncodePhrase(cityAliasAbbreviation);
                        if (metaphoneAliasAbbreviation.Length > DB.VoteZipNew.ZipCitiesDownloaded.MetaphoneAliasAbbreviationMaxLength)
                        {
                            metaphoneAliasAbbreviation =
                                metaphoneAliasAbbreviation.Substring(0, DB.VoteZipNew.ZipCitiesDownloaded.MetaphoneAliasAbbreviationMaxLength);
                        }
                        rowCount++;
                        if (ToCsv)
                        {
                            //fields.Clear();
                            foreach (string field in headers)
                            {
                                //AddField(fields, csvReader[field]);
                                csvWriter.AddField(csvReader[field]);
                            }
                            //AddField(fields, metaphoneAliasName);
                            //AddField(fields, metaphoneAliasAbbreviation);
                            csvWriter.AddField(metaphoneAliasName);
                            csvWriter.AddField(metaphoneAliasAbbreviation);
                            //writer.WriteLine(string.Join(",", fields));
                            csvWriter.Write(writer);
                        }
                        else
                        {
                            row = table.NewRow();
                            foreach (string field in headers)
                            {
                                row[field] = csvReader[field];
                            }
                            row.MetaphoneAliasName         = metaphoneAliasName;
                            row.MetaphoneAliasAbbreviation = metaphoneAliasAbbreviation;
                            table.AddRow(row);
                            if (rowCount % 1000 == 0) // flush every 1000 rows
                            {
                                DB.VoteZipNew.ZipCitiesDownloaded.UpdateTable(table, 0);
                                table = new DB.VoteZipNew.ZipCitiesDownloadedTable();
                            }
                        }
                    }
                    if (!ToCsv)
                    {
                        DB.VoteZipNew.ZipCitiesDownloaded.UpdateTable(table);
                    }
                }
            }
            finally
            {
                if (writer != null)
                {
                    writer.Close();
                }
            }
            return(rowCount);
        }
        /// <summary>Static wrapper around the class, enables computation of metaphone keys
        ///     without instantiating a class.</summary>
        /// 
        /// <param name="word">Word whose metaphone keys are to be computed</param>
        /// <param name="primaryKey">Ref to var to receive primary metaphone key</param>
        /// <param name="alternateKey">Ref to var to receive alternate metaphone key, or be set to null if
        ///     word has no alternate key by double metaphone</param>
        public static void doubleMetaphone(String word, out String primaryKey, out String alternateKey)
        {
            var mp = new DoubleMetaphone(word);

            primaryKey = mp.PrimaryKey;
            alternateKey = mp.AlternateKey;
        }
        /// <summary>Static wrapper around the class, enables computation of metaphone keys
        ///     without instantiating a class.</summary>
        /// 
        /// <param name="word">Word whose metaphone keys are to be computed</param>
        /// <param name="primaryKey">Ref to var to receive primary metaphone key</param>
        /// <param name="alternateKey">Ref to var to receive alternate metaphone key, or be set to null if
        ///     word has no alternate key by double metaphone</param>
        public static void CalcDoubleMetaphone(String word, ref String primaryKey, ref String alternateKey)
        {
            DoubleMetaphone mp = new DoubleMetaphone(word);

            primaryKey = mp.PrimaryKey;
            alternateKey = mp.AlternateKey;
        }