/// <summary> /// /// </summary> /// <param name="value"></param> /// <returns></returns> private HashSet <string> GetPhoneticKeys(string value) { string primary = string.Empty; string alternate = string.Empty; HashSet <string> values = new HashSet <string>(); if (string.IsNullOrEmpty(value)) { return(values); } // Loop over all the words in the provided string foreach (string word in value.SplitWords()) { // Clear the key variables primary = string.Empty; alternate = string.Empty; DoubleMetaphone.doubleMetaphone(word, ref primary, ref alternate); if (!string.IsNullOrEmpty(primary)) { values.Add(primary); } if (!string.IsNullOrEmpty(alternate)) { values.Add(alternate); } } return(values); }
/// <summary> /// Calculates distance using a double metaphone (soundex) to compare /// each word in a string. /// </summary> /// <param name="sourceValue">The source value</param> /// <param name="targetValue">The target value</param> /// <returns></returns> public override double?CalculateDistance(string sourceValue, string targetValue) { HashSet <string> targetKeys = GetPhoneticKeys(targetValue); string primary = string.Empty; string alternate = string.Empty; double matches = 0; int totalWordCount = sourceValue.SplitWords().Length + targetValue.SplitWords().Length; // Loop over all the words in the provided string foreach (string word in sourceValue.SplitWords()) { // Clear the key variables primary = string.Empty; alternate = string.Empty; DoubleMetaphone.doubleMetaphone(word, ref primary, ref alternate); if (!string.IsNullOrEmpty(primary) && targetKeys.Contains(primary)) { matches++; } else if (!string.IsNullOrEmpty(alternate)) { if (targetKeys.Contains(alternate)) { matches++; } } } return(1 - (2 * matches / totalWordCount)); }
private static List <string[]> DoubleMetaphoneGenerateLists(string name, string path, string worksheet) { DoubleMetaphone metaphone = new DoubleMetaphone(name); string code = metaphone.PrimaryKey; List <string[]> namesWithSameSound = new List <string[]>(); DataTable table = new ExcelReader(path).GetWorksheet(worksheet); var tableValues = table.ToJagged <string>("NAMES", "GENDER"); string[] names = new string[tableValues.Length]; string[] gender = new string[tableValues.Length]; for (int i = 0; i < tableValues.Length; i++) { names[i] = tableValues[i][0].ToLower(); gender[i] = tableValues[i][1]; } for (int i = 0; i < names.Length; i++) { metaphone.computeKeys(names[i]); if (metaphone.PrimaryKey == code) { namesWithSameSound.Add(new string[] { names[i], gender[i] }); } } return(namesWithSameSound); }
/// <summary> /// This will return the Double Metaphone Secondary Key for a given string. /// </summary> /// <param name="myString"></param> /// <returns></returns> public static string DoubleMetaphoneSecondaryKey(this string myString) { if (myString == null) { return(null); } return(DoubleMetaphone.GetDoubleMetaphone(myString).Secondary); }
public OS50kGazetteer(string line) { line = EnhancedTextInfo.RemoveDiacritics(line); // remove any special characters for Gazatteer processing string[] values = line.Split(':'); int.TryParse(values[0], out int intval); SequenceNumber = intval; DefinitiveName = values[2]; int.TryParse(values[4], out intval); double.TryParse(values[5], out double latitude); Latitude = intval + latitude / 60; int.TryParse(values[6], out intval); double.TryParse(values[7], out double longitude); Longitude = intval + longitude / 60; if (values[10] == "W") { Longitude = -1 * Longitude; // West Longitudes are negative } Coordinate c = new Coordinate(Longitude, Latitude); c = MapTransforms.TransformCoordinate(c); Point = GeometryFactory.Default.CreatePoint(c); CountyCode = values[11]; CountyName = values[13]; FeatureCode = values[14]; ParishName = values[20]; if (ParishName.EndsWith(" CP")) { ParishName = ParishName.Substring(0, ParishName.Length - 3); } if (ParishName.EndsWith(" Community")) { ParishName = ParishName.Substring(0, ParishName.Length - 10); } FixCommas(); FixAbbreviations(); ModernCounty county = Regions.OS_GetCounty(CountyCode); if (county == null) { CountryName = string.Empty; } else { CountryName = county.CountryName; DoubleMetaphone meta = new DoubleMetaphone(DefinitiveName); FuzzyMatch = meta.PrimaryKey + ":"; FuzzyNoParishMatch = meta.PrimaryKey + ":"; meta = new DoubleMetaphone(ParishName); FuzzyMatch += meta.PrimaryKey + ":"; meta = new DoubleMetaphone(CountyName); FuzzyMatch += meta.PrimaryKey + ":"; FuzzyNoParishMatch = meta.PrimaryKey + ":"; meta = new DoubleMetaphone(county.CountryName); FuzzyMatch += meta.PrimaryKey; FuzzyNoParishMatch = meta.PrimaryKey + ":"; } }
public float CalcSimilarity(string p_ValueA, string p_ValueB) { if (p_ValueA.Equals(p_ValueB, StringComparison.OrdinalIgnoreCase)) { return(Similarity.MAX_POSSIBLE_SIMILARITY); } DoubleMetaphone l_DoubleMetaphoneA = new DoubleMetaphone(p_ValueA); DoubleMetaphone l_DoubleMetaphoneB = new DoubleMetaphone(p_ValueB); return(_DoubleMetaphoneSimilarityComparer.CalcSimilarity(l_DoubleMetaphoneA, l_DoubleMetaphoneB)); }
// TODO: Make a password strength method that take requirements as arguments. /// <summary> /// This will return true if two English spoken strings sound alike based on Double Metaphone primary key. /// </summary> /// <param name="myString"></param> /// <param name="target"></param> /// <returns>Returns true if the SoundEx difference = 4.</returns> public static bool IsHomophone(this string myString, string target) { if (myString == null || target == null) { return(false); } //return (myString.SoundExDifference(target) == 4) var primaryMetaphone1 = DoubleMetaphone.GetDoubleMetaphone(myString).Primary; var primaryMetaphone2 = DoubleMetaphone.GetDoubleMetaphone(target).Primary; return(primaryMetaphone1 == primaryMetaphone2); }
public Task <List <string> > GetPhoneticMatches(string nickname) { var meta = new DoubleMetaphone(); var toMatch = meta.GetDoubleMetaphone(nickname); List <string> names = new(); foreach (var pair in Keys) { if (meta.IsDoubleMetaphoneEqual(pair.Key, toMatch)) { names.AddRange(pair.Value); } } return(Task.FromResult(names)); }
private static List <string[]> DoubleMetaphoneGenerateLists(string name, IEnumerable <dynamic> DbNames) { DoubleMetaphone metaphone = new DoubleMetaphone(name); string code = metaphone.PrimaryKey; List <string[]> namesWithSameSound = new List <string[]>(); //DataTable table = new ExcelReader(path).GetWorksheet(worksheet); //var tableValues = table.ToJagged<string>("NAMES", "GENDER"); //string[] names = new string[tableValues.Length]; //string[] gender = new string[tableValues.Length]; //for (int i = 0; i < tableValues.Length; i++) //{ // names[i] = tableValues[i][0].ToLower(); // gender[i] = tableValues[i][1]; //} //for (int i = 0; i < names.Length; i++) //{ // metaphone.computeKeys(names[i]); // if (metaphone.PrimaryKey == code) // { // namesWithSameSound.Add(new string[] { names[i], gender[i] }); // } //} foreach (var item in DbNames) { metaphone.computeKeys(item.Name.ToString()); if (metaphone.PrimaryKey == code) { namesWithSameSound.Add(new string[] { item.Name.ToString(), item.gender.ToString() }); } } return(namesWithSameSound); }
public async Task InitalizeAsync() { var encoder = new DoubleMetaphone(); await _dbContext.Users.AsNoTracking().ForEachAsync(x => { var key = encoder.GetDoubleMetaphone(x.NickName); if (key is null) { return; } if (Keys.TryGetValue(key, out var list)) { list.Add(x.NickName); } else { Keys[key] = new() { x.NickName }; } }); }
public void TestCreation() { var dmp1 = new DoubleMetaphone("Seward"); var dmp2 = new DoubleMetaphone("Soowerred"); Assert.That(dmp1.PrimaryKey, Is.Not.Empty); Assert.That(dmp1.PrimaryKey, Is.EqualTo(dmp2.PrimaryKey)); Assert.That(new DoubleMetaphone("ACTION").PrimaryKey, Is.Not.EqualTo(new DoubleMetaphone("ACTON").PrimaryKey)); Assert.That(new DoubleMetaphone("SO PAULO").PrimaryKey, Is.EqualTo(new DoubleMetaphone("SAO PAULO").PrimaryKey)); Assert.That(new DoubleMetaphone("ST BARTHELEMY").PrimaryKey, Is.EqualTo(new DoubleMetaphone("ST BARTHALEMY").PrimaryKey)); Assert.That("West Rutland".SoundsLike("Westford"), Is.False); /* JM - these egregious false positives makes me want to abandon double metaphone all together * Assert.That("Caledonia".SoundsLike("Golden Eagle"), Is.False); * Assert.That("Pickstown".SoundsLike("Big Stone City"), Is.False); * Assert.That("Bentleyville".SoundsLike("Penndel"), Is.False);*/ foreach (var c in "abcdefghijklmnopqrstuvwxyz") { Assert.DoesNotThrow(() => new DoubleMetaphone(c.ToString())); } }
public void EliminateFuzzyDuplicates() { // Group by 1st 3 digits of zip and city metaphone var zipGroups = Addresses .GroupBy(addr => addr.Zip5.Substring(0, Math.Min(addr.Zip5.Length, 3)) + "|" + DoubleMetaphone.EncodePhrase(addr.City)) .OrderBy(group => group.Key) .ToList(); List <ExtractedAddress> newAddresses = new List <ExtractedAddress>(); foreach (var group in zipGroups) { var zipGroup = group.ToList(); if (!StopPresentingPossibleDuplicates) { EliminateFuzzyDuplicatesInOneGroup(zipGroup); } newAddresses.AddRange(zipGroup); } Addresses = newAddresses; }
private void ApplyUpdates(TextReader textReader) { // In suppressUpdate mode, this keeps track of rows that would have been deleted Dictionary <string, object> phantomDeletes = new Dictionary <string, object>(); int adds = 0; int deletes = 0; int couldNotAdd = 0; int couldNotDelete = 0; int rowCount = 0; bool suppressUpdate = this.Invoke(() => SuppressUpdateCheckBox.Checked); string actionField = "Type"; using (var csvReader = new CsvReader(textReader, true, '\t')) { int fieldCount = csvReader.FieldCount; string[] headers = csvReader.GetFieldHeaders(); while (csvReader.ReadNextRecord()) { string action = csvReader[actionField].ToString(); string zipCode = csvReader["ZipCode"].ToString(); string cityAliasName = csvReader["CityAliasName"].ToString(); string key = zipCode + cityAliasName; switch (action) { case "A": if (suppressUpdate) { if (!phantomDeletes.ContainsKey(key) && DB.VoteZipNew.ZipCitiesDownloaded.PrimaryKeyExists(zipCode, cityAliasName)) { couldNotAdd++; } else { adds++; } } else { try { var table = new DB.VoteZipNew.ZipCitiesDownloadedTable(); var row = table.NewRow(); foreach (string field in headers) { if (field != actionField) { row[field] = csvReader[field]; } } string metaphoneAliasName = DoubleMetaphone.EncodePhrase(row.CityAliasName); if (metaphoneAliasName.Length > DB.VoteZipNew.ZipCitiesDownloaded.MetaphoneAliasNameMaxLength) { metaphoneAliasName = metaphoneAliasName.Substring(0, DB.VoteZipNew.ZipCitiesDownloaded.MetaphoneAliasNameMaxLength); } string metaphoneAliasAbbreviation = DoubleMetaphone.EncodePhrase(row.CityAliasAbbreviation); if (metaphoneAliasAbbreviation.Length > DB.VoteZipNew.ZipCitiesDownloaded.MetaphoneAliasAbbreviationMaxLength) { metaphoneAliasAbbreviation = metaphoneAliasAbbreviation.Substring(0, DB.VoteZipNew.ZipCitiesDownloaded.MetaphoneAliasAbbreviationMaxLength); } row.MetaphoneAliasName = metaphoneAliasName; row.MetaphoneAliasAbbreviation = metaphoneAliasAbbreviation; table.AddRow(row); DB.VoteZipNew.ZipCitiesDownloaded.UpdateTable(table); adds++; } catch { couldNotAdd++; } } break; case "D": if (suppressUpdate) { if (DB.VoteZipNew.ZipCitiesDownloaded.PrimaryKeyExists(zipCode, cityAliasName)) { deletes++; if (!phantomDeletes.ContainsKey(key)) { phantomDeletes.Add(key, null); } } else { couldNotDelete++; } } else { try { int deleted = DB.VoteZipNew.ZipCitiesDownloaded.DeleteByPrimaryKey(zipCode, cityAliasName); if (deleted != 1) { throw new VoteException(); } deletes++; } catch { couldNotDelete++; } } break; default: AppendStatusText("Invalid action: {0}", csvReader[actionField]); break; } rowCount++; } } AppendStatusText("Processed data, {0} rows", rowCount); AppendStatusText("Adds: {0}", adds); AppendStatusText("Deletes: {0}", deletes); AppendStatusText("Could not add: {0}", couldNotAdd); AppendStatusText("Could not delete: {0}", couldNotDelete); }
private void StringMatchKeyTest(string[] testCases) { Debug.WriteLine("EditexKey"); StringPhoneticKeyBuilder keyBuilder = new EditexKey(); foreach (var name in testCases) { string key = keyBuilder.BuildKey(name); Debug.WriteLine("\t{0} for {1}", key, name); } Debug.WriteLine("DaitchMokotoff"); StringPhoneticKeyBuilder keyBuilder2 = new DaitchMokotoff(); foreach (var name in testCases) { string key = keyBuilder2.BuildKey(name); Debug.WriteLine("\t{0} for {1}", key, name); } Debug.WriteLine("Phonix"); StringPhoneticKeyBuilder keyBuilder3 = new Phonix(); foreach (var name in testCases) { string key = keyBuilder3.BuildKey(name); Debug.WriteLine("\t{0} for {1}", key, name); } Debug.WriteLine("SoundEx"); StringPhoneticKeyBuilder keyBuilder4 = new SoundEx(); foreach (var name in testCases) { string key = keyBuilder4.BuildKey(name); Debug.WriteLine("\t{0} for {1}", key, name); } Debug.WriteLine("SimpleTextKey"); StringPhoneticKeyBuilder keyBuilder5 = new SimpleTextKey(); foreach (var name in testCases) { string key = keyBuilder5.BuildKey(name); Debug.WriteLine("\t{0} for {1}", key, name); } Debug.WriteLine("Metaphone"); StringPhoneticKeyBuilder keyBuilder6 = new Metaphone(); foreach (var name in testCases) { string key = keyBuilder6.BuildKey(name); Debug.WriteLine("\t{0} for {1}", key, name); } Debug.WriteLine("DoubleMetaphone"); StringPhoneticKeyBuilder keyBuilder7 = new DoubleMetaphone(); foreach (var name in testCases) { string key = keyBuilder7.BuildKey(name); Debug.WriteLine("\t{0} for {1}", key, name); } }
private bool AddWordInternal(string word) { if (!ContainsWord(word)) { var metaphone = new DoubleMetaphone(); var primaryKey = metaphone.Encode(word, false); var alternateKey = metaphone.Encode(word, true); WordsList.Add(word); AddWordToMetaphoneDictionaries(word, primaryKey, alternateKey); return true; } return false; }
internal DoubleMetaphoneResult(int maxLength, DoubleMetaphone owner) { this.maxLength = maxLength; this.owner = owner; primary = new StringBuilder(owner.GetMaxCodeLen()); alternate = new StringBuilder(owner.GetMaxCodeLen()); }
private int LoadData(TextReader textReader) { TextWriter writer = null; int rowCount = 0; try { if (ToCsv) { writer = new StreamWriter(OutputFilePath); } using (var csvReader = new CsvReader(textReader, true)) { int fieldCount = csvReader.FieldCount; string[] headers = csvReader.GetFieldHeaders(); DB.VoteZipNew.ZipCitiesDownloadedTable table = null; if (!ToCsv) { table = new DB.VoteZipNew.ZipCitiesDownloadedTable(); } DB.VoteZipNew.ZipCitiesDownloadedRow row; //List<string> fields = new List<string>(); SimpleCsvWriter csvWriter = new SimpleCsvWriter(); while (csvReader.ReadNextRecord()) { string cityAliasName = csvReader["CityAliasName"]; string cityAliasAbbreviation = csvReader["CityAliasAbbreviation"]; string metaphoneAliasName = DoubleMetaphone.EncodePhrase(cityAliasName); if (metaphoneAliasName.Length > DB.VoteZipNew.ZipCitiesDownloaded.MetaphoneAliasNameMaxLength) { metaphoneAliasName = metaphoneAliasName.Substring(0, DB.VoteZipNew.ZipCitiesDownloaded.MetaphoneAliasNameMaxLength); } string metaphoneAliasAbbreviation = DoubleMetaphone.EncodePhrase(cityAliasAbbreviation); if (metaphoneAliasAbbreviation.Length > DB.VoteZipNew.ZipCitiesDownloaded.MetaphoneAliasAbbreviationMaxLength) { metaphoneAliasAbbreviation = metaphoneAliasAbbreviation.Substring(0, DB.VoteZipNew.ZipCitiesDownloaded.MetaphoneAliasAbbreviationMaxLength); } rowCount++; if (ToCsv) { //fields.Clear(); foreach (string field in headers) { //AddField(fields, csvReader[field]); csvWriter.AddField(csvReader[field]); } //AddField(fields, metaphoneAliasName); //AddField(fields, metaphoneAliasAbbreviation); csvWriter.AddField(metaphoneAliasName); csvWriter.AddField(metaphoneAliasAbbreviation); //writer.WriteLine(string.Join(",", fields)); csvWriter.Write(writer); } else { row = table.NewRow(); foreach (string field in headers) { row[field] = csvReader[field]; } row.MetaphoneAliasName = metaphoneAliasName; row.MetaphoneAliasAbbreviation = metaphoneAliasAbbreviation; table.AddRow(row); if (rowCount % 1000 == 0) // flush every 1000 rows { DB.VoteZipNew.ZipCitiesDownloaded.UpdateTable(table, 0); table = new DB.VoteZipNew.ZipCitiesDownloadedTable(); } } } if (!ToCsv) { DB.VoteZipNew.ZipCitiesDownloaded.UpdateTable(table); } } } finally { if (writer != null) { writer.Close(); } } return(rowCount); }
/// <summary>Static wrapper around the class, enables computation of metaphone keys /// without instantiating a class.</summary> /// /// <param name="word">Word whose metaphone keys are to be computed</param> /// <param name="primaryKey">Ref to var to receive primary metaphone key</param> /// <param name="alternateKey">Ref to var to receive alternate metaphone key, or be set to null if /// word has no alternate key by double metaphone</param> public static void doubleMetaphone(String word, out String primaryKey, out String alternateKey) { var mp = new DoubleMetaphone(word); primaryKey = mp.PrimaryKey; alternateKey = mp.AlternateKey; }
/// <summary>Static wrapper around the class, enables computation of metaphone keys /// without instantiating a class.</summary> /// /// <param name="word">Word whose metaphone keys are to be computed</param> /// <param name="primaryKey">Ref to var to receive primary metaphone key</param> /// <param name="alternateKey">Ref to var to receive alternate metaphone key, or be set to null if /// word has no alternate key by double metaphone</param> public static void CalcDoubleMetaphone(String word, ref String primaryKey, ref String alternateKey) { DoubleMetaphone mp = new DoubleMetaphone(word); primaryKey = mp.PrimaryKey; alternateKey = mp.AlternateKey; }