public static string GenerateDoubleMetaphone(this string self) { MetaphoneData metaphoneData = new MetaphoneData(); int current = 0; if (String.IsNullOrEmpty(self)) { return self; } int last = self.Length - 1; //zero based index string workingString = self.ToUpperInvariant() + " "; bool isSlavoGermanic = (self.IndexOf('W') > -1) || (self.IndexOf('K') > -1) || (self.IndexOf("CZ", StringComparison.OrdinalIgnoreCase) > -1) || (self.IndexOf("WITZ", StringComparison.OrdinalIgnoreCase) > -1); //skip these when at start of word if (workingString.StartsWith(StringComparison.OrdinalIgnoreCase, "GN", "KN", "PN", "WR", "PS")) { current += 1; } //Initial 'X' is pronounced 'Z' e.g. 'Xavier' if (workingString[0] == 'X') { metaphoneData.Add("S"); //'Z' maps to 'S' current += 1; } while ((metaphoneData.PrimaryLength < 4) || (metaphoneData.SecondaryLength < 4)) { if (current >= self.Length) { break; } switch (workingString[current]) { case 'A': case 'E': case 'I': case 'O': case 'U': case 'Y': if (current == 0) { //all init vowels now map to 'A' metaphoneData.Add("A"); } current += 1; break; case 'B': //"-mb", e.g", "dumb", already skipped over... metaphoneData.Add("P"); if (workingString[current + 1] == 'B') { current += 2; } else { current += 1; } break; case 'Ã': metaphoneData.Add("S"); current += 1; break; case 'C': //various germanic if ((current > 1) && !IsVowel(workingString[current - 2]) && StringAt(workingString, (current - 1), "ACH") && ((workingString[current + 2] != 'I') && ((workingString[current + 2] != 'E') || StringAt(workingString, (current - 2), "BACHER", "MACHER")))) { metaphoneData.Add("K"); current += 2; break; } //special case 'caesar' if ((current == 0) && StringAt(workingString, current, "CAESAR")) { metaphoneData.Add("S"); current += 2; break; } //italian 'chianti' if (StringAt(workingString, current, "CHIA")) { metaphoneData.Add("K"); current += 2; break; } if (StringAt(workingString, current, "CH")) { //find 'michael' if ((current > 0) && StringAt(workingString, current, "CHAE")) { metaphoneData.Add("K", "X"); current += 2; break; } //greek roots e.g. 'chemistry', 'chorus' if ((current == 0) && (StringAt(workingString, (current + 1), "HARAC", "HARIS") || StringAt(workingString, (current + 1), "HOR", "HYM", "HIA", "HEM")) && !StringAt(workingString, 0, "CHORE")) { metaphoneData.Add("K"); current += 2; break; } //germanic, greek, or otherwise 'ch' for 'kh' sound if ((StringAt(workingString, 0, "VAN ", "VON ") || StringAt(workingString, 0, "SCH")) // 'architect but not 'arch', 'orchestra', 'orchid' || StringAt(workingString, (current - 2), "ORCHES", "ARCHIT", "ORCHID") || StringAt(workingString, (current + 2), "T", "S") || ((StringAt(workingString, (current - 1), "A", "O", "U", "E") || (current == 0)) //e.g., 'wachtler', 'wechsler', but not 'tichner' && StringAt(workingString, (current + 2), "L", "R", "N", "M", "B", "H", "F", "V", "W", " "))) { metaphoneData.Add("K"); } else { if (current > 0) { if (StringAt(workingString, 0, "MC")) { //e.g., "McHugh" metaphoneData.Add("K"); } else { metaphoneData.Add("X", "K"); } } else { metaphoneData.Add("X"); } } current += 2; break; } //e.g, 'czerny' if (StringAt(workingString, current, "CZ") && !StringAt(workingString, (current - 2), "WICZ")) { metaphoneData.Add("S", "X"); current += 2; break; } //e.g., 'focaccia' if (StringAt(workingString, (current + 1), "CIA")) { metaphoneData.Add("X"); current += 3; break; } //double 'C', but not if e.g. 'McClellan' if (StringAt(workingString, current, "CC") && !((current == 1) && (workingString[0] == 'M'))) { //'bellocchio' but not 'bacchus' if (StringAt(workingString, (current + 2), "I", "E", "H") && !StringAt(workingString, (current + 2), "HU")) { //'accident', 'accede' 'succeed' if (((current == 1) && (workingString[current - 1] == 'A')) || StringAt(workingString, (current - 1), "UCCEE", "UCCES")) { metaphoneData.Add("KS"); } //'bacci', 'bertucci', other italian else { metaphoneData.Add("X"); } current += 3; break; } else { //Pierce's rule metaphoneData.Add("K"); current += 2; break; } } if (StringAt(workingString, current, "CK", "CG", "CQ")) { metaphoneData.Add("K"); current += 2; break; } if (StringAt(workingString, current, "CI", "CE", "CY")) { //italian vs. english if (StringAt(workingString, current, "CIO", "CIE", "CIA")) { metaphoneData.Add("S", "X"); } else { metaphoneData.Add("S"); } current += 2; break; } //else metaphoneData.Add("K"); //name sent in 'mac caffrey', 'mac gregor if (StringAt(workingString, (current + 1), " C", " Q", " G")) { current += 3; } else if (StringAt(workingString, (current + 1), "C", "K", "Q") && !StringAt(workingString, (current + 1), "CE", "CI")) { current += 2; } else { current += 1; } break; case 'D': if (StringAt(workingString, current, "DG")) { if (StringAt(workingString, (current + 2), "I", "E", "Y")) { //e.g. 'edge' metaphoneData.Add("J"); current += 3; break; } else { //e.g. 'edgar' metaphoneData.Add("TK"); current += 2; break; } } if (StringAt(workingString, current, "DT", "DD")) { metaphoneData.Add("T"); current += 2; break; } //else metaphoneData.Add("T"); current += 1; break; case 'F': if (workingString[current + 1] == 'F') { current += 2; } else { current += 1; } metaphoneData.Add("F"); break; case 'G': if (workingString[current + 1] == 'H') { if ((current > 0) && !IsVowel(workingString[current - 1])) { metaphoneData.Add("K"); current += 2; break; } if (current < 3) { //'ghislane', ghiradelli if (current == 0) { if (workingString[current + 2] == 'I') { metaphoneData.Add("J"); } else { metaphoneData.Add("K"); } current += 2; break; } } //Parker's rule (with some further refinements) - e.g., 'hugh' if (((current > 1) && StringAt(workingString, (current - 2), "B", "H", "D")) //e.g., 'bough' || ((current > 2) && StringAt(workingString, (current - 3), "B", "H", "D")) //e.g., 'broughton' || ((current > 3) && StringAt(workingString, (current - 4), "B", "H"))) { current += 2; break; } else { //e.g., 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough' if ((current > 2) && (workingString[current - 1] == 'U') && StringAt(workingString, (current - 3), "C", "G", "L", "R", "T")) { metaphoneData.Add("F"); } else if ((current > 0) && workingString[current - 1] != 'I') { metaphoneData.Add("K"); } current += 2; break; } } if (workingString[current + 1] == 'N') { if ((current == 1) && IsVowel(workingString[0]) && !isSlavoGermanic) { metaphoneData.Add("KN", "N"); } else //not e.g. 'cagney' if (!StringAt(workingString, (current + 2), "EY") && (workingString[current + 1] != 'Y') && !isSlavoGermanic) { metaphoneData.Add("N", "KN"); } else { metaphoneData.Add("KN"); } current += 2; break; } //'tagliaro' if (StringAt(workingString, (current + 1), "LI") && !isSlavoGermanic) { metaphoneData.Add("KL", "L"); current += 2; break; } //-ges-,-gep-,-gel-, -gie- at beginning if ((current == 0) && ((workingString[current + 1] == 'Y') || StringAt(workingString, (current + 1), "ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER"))) { metaphoneData.Add("K", "J"); current += 2; break; } // -ger-, -gy- if ((StringAt(workingString, (current + 1), "ER") || (workingString[current + 1] == 'Y')) && !StringAt(workingString, 0, "DANGER", "RANGER", "MANGER") && !StringAt(workingString, (current - 1), "E", "I") && !StringAt(workingString, (current - 1), "RGY", "OGY")) { metaphoneData.Add("K", "J"); current += 2; break; } // italian e.g, 'biaggi' if (StringAt(workingString, (current + 1), "E", "I", "Y") || StringAt(workingString, (current - 1), "AGGI", "OGGI")) { //obvious germanic if ((StringAt(workingString, 0, "VAN ", "VON ") || StringAt(workingString, 0, "SCH")) || StringAt(workingString, (current + 1), "ET")) { metaphoneData.Add("K"); } else //always soft if french ending if (StringAt(workingString, (current + 1), "IER ")) { metaphoneData.Add("J"); } else { metaphoneData.Add("J", "K"); } current += 2; break; } if (workingString[current + 1] == 'G') { current += 2; } else { current += 1; } metaphoneData.Add("K"); break; case 'H': //only keep if first & before vowel or btw. 2 vowels if (((current == 0) || IsVowel(workingString[current - 1])) && IsVowel(workingString[current + 1])) { metaphoneData.Add("H"); current += 2; } else //also takes care of 'HH' { current += 1; } break; case 'J': //obvious spanish, 'jose', 'san jacinto' if (StringAt(workingString, current, "JOSE") || StringAt(workingString, 0, "SAN ")) { if (((current == 0) && (workingString[current + 4] == ' ')) || StringAt(workingString, 0, "SAN ")) { metaphoneData.Add("H"); } else { metaphoneData.Add("J", "H"); } current += 1; break; } if ((current == 0) && !StringAt(workingString, current, "JOSE")) { metaphoneData.Add("J", "A"); //Yankelovich/Jankelowicz } else //spanish pron. of e.g. 'bajador' if (IsVowel(workingString[current - 1]) && !isSlavoGermanic && ((workingString[current + 1] == 'A') || (workingString[current + 1] == 'O'))) { metaphoneData.Add("J", "H"); } else if (current == last) { metaphoneData.Add("J", " "); } else if (!StringAt(workingString, (current + 1), "L", "T", "K", "S", "N", "M", "B", "Z") && !StringAt(workingString, (current - 1), "S", "K", "L")) { metaphoneData.Add("J"); } if (workingString[current + 1] == 'J') //it could happen! { current += 2; } else { current += 1; } break; case 'K': if (workingString[current + 1] == 'K') { current += 2; } else { current += 1; } metaphoneData.Add("K"); break; case 'L': if (workingString[current + 1] == 'L') { //spanish e.g. 'cabrillo', 'gallegos' if (((current == (self.Length - 3)) && StringAt(workingString, (current - 1), "ILLO", "ILLA", "ALLE")) || ((StringAt(workingString, (last - 1), "AS", "OS") || StringAt(workingString, last, "A", "O")) && StringAt(workingString, (current - 1), "ALLE"))) { metaphoneData.Add("L", " "); current += 2; break; } current += 2; } else { current += 1; } metaphoneData.Add("L"); break; case 'M': if ((StringAt(workingString, (current - 1), "UMB") && (((current + 1) == last) || StringAt(workingString, (current + 2), "ER"))) //'dumb','thumb' || (workingString[current + 1] == 'M')) { current += 2; } else { current += 1; } metaphoneData.Add("M"); break; case 'N': if (workingString[current + 1] == 'N') { current += 2; } else { current += 1; } metaphoneData.Add("N"); break; case 'Ð': current += 1; metaphoneData.Add("N"); break; case 'P': if (workingString[current + 1] == 'H') { metaphoneData.Add("F"); current += 2; break; } //also account for "campbell", "raspberry" if (StringAt(workingString, (current + 1), "P", "B")) { current += 2; } else { current += 1; } metaphoneData.Add("P"); break; case 'Q': if (workingString[current + 1] == 'Q') { current += 2; } else { current += 1; } metaphoneData.Add("K"); break; case 'R': //french e.g. 'rogier', but exclude 'hochmeier' if ((current == last) && !isSlavoGermanic && StringAt(workingString, (current - 2), "IE") && !StringAt(workingString, (current - 4), "ME", "MA")) { metaphoneData.Add("", "R"); } else { metaphoneData.Add("R"); } if (workingString[current + 1] == 'R') { current += 2; } else { current += 1; } break; case 'S': //special cases 'island', 'isle', 'carlisle', 'carlysle' if (StringAt(workingString, (current - 1), "ISL", "YSL")) { current += 1; break; } //special case 'sugar-' if ((current == 0) && StringAt(workingString, current, "SUGAR")) { metaphoneData.Add("X", "S"); current += 1; break; } if (StringAt(workingString, current, "SH")) { //germanic if (StringAt(workingString, (current + 1), "HEIM", "HOEK", "HOLM", "HOLZ")) { metaphoneData.Add("S"); } else { metaphoneData.Add("X"); } current += 2; break; } //italian & armenian if (StringAt(workingString, current, "SIO", "SIA") || StringAt(workingString, current, "SIAN")) { if (!isSlavoGermanic) { metaphoneData.Add("S", "X"); } else { metaphoneData.Add("S"); } current += 3; break; } //german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider' //also, -sz- in slavic language altho in hungarian it is pronounced 's' if (((current == 0) && StringAt(workingString, (current + 1), "M", "N", "L", "W")) || StringAt(workingString, (current + 1), "Z")) { metaphoneData.Add("S", "X"); if (StringAt(workingString, (current + 1), "Z")) { current += 2; } else { current += 1; } break; } if (StringAt(workingString, current, "SC")) { //Schlesinger's rule if (workingString[current + 2] == 'H') { //dutch origin, e.g. 'school', 'schooner' if (StringAt(workingString, (current + 3), "OO", "ER", "EN", "UY", "ED", "EM")) { //'schermerhorn', 'schenker' if (StringAt(workingString, (current + 3), "ER", "EN")) { metaphoneData.Add("X", "SK"); } else { metaphoneData.Add("SK"); } current += 3; break; } else { if ((current == 0) && !IsVowel(workingString[3]) && (workingString[3] != 'W')) { metaphoneData.Add("X", "S"); } else { metaphoneData.Add("X"); } current += 3; break; } } if (StringAt(workingString, (current + 2), "I", "E", "Y")) { metaphoneData.Add("S"); current += 3; break; } //else metaphoneData.Add("SK"); current += 3; break; } //french e.g. 'resnais', 'artois' if ((current == last) && StringAt(workingString, (current - 2), "AI", "OI")) { metaphoneData.Add("", "S"); } else { metaphoneData.Add("S"); } if (StringAt(workingString, (current + 1), "S", "Z")) { current += 2; } else { current += 1; } break; case 'T': if (StringAt(workingString, current, "TION")) { metaphoneData.Add("X"); current += 3; break; } if (StringAt(workingString, current, "TIA", "TCH")) { metaphoneData.Add("X"); current += 3; break; } if (StringAt(workingString, current, "TH") || StringAt(workingString, current, "TTH")) { //special case 'thomas', 'thames' or germanic if (StringAt(workingString, (current + 2), "OM", "AM") || StringAt(workingString, 0, "VAN ", "VON ") || StringAt(workingString, 0, "SCH")) { metaphoneData.Add("T"); } else { metaphoneData.Add("O", "T"); } current += 2; break; } if (StringAt(workingString, (current + 1), "T", "D")) { current += 2; } else { current += 1; } metaphoneData.Add("T"); break; case 'V': if (workingString[current + 1] == 'V') { current += 2; } else { current += 1; } metaphoneData.Add("F"); break; case 'W': //can also be in middle of word if (StringAt(workingString, current, "WR")) { metaphoneData.Add("R"); current += 2; break; } if ((current == 0) && (IsVowel(workingString[current + 1]) || StringAt(workingString, current, "WH"))) { //Wasserman should match Vasserman if (IsVowel(workingString[current + 1])) { metaphoneData.Add("A", "F"); } else { //need Uomo to match Womo metaphoneData.Add("A"); } } //Arnow should match Arnoff if (((current == last) && IsVowel(workingString[current - 1])) || StringAt(workingString, (current - 1), "EWSKI", "EWSKY", "OWSKI", "OWSKY") || StringAt(workingString, 0, "SCH")) { metaphoneData.Add("", "F"); current += 1; break; } //polish e.g. 'filipowicz' if (StringAt(workingString, current, "WICZ", "WITZ")) { metaphoneData.Add("TS", "FX"); current += 4; break; } //else skip it current += 1; break; case 'X': //french e.g. breaux if (!((current == last) && (StringAt(workingString, (current - 3), "IAU", "EAU") || StringAt(workingString, (current - 2), "AU", "OU")))) { metaphoneData.Add("KS"); } if (StringAt(workingString, (current + 1), "C", "X")) { current += 2; } else { current += 1; } break; case 'Z': //chinese pinyin e.g. 'zhao' if (workingString[current + 1] == 'H') { metaphoneData.Add("J"); current += 2; break; } else if (StringAt(workingString, (current + 1), "ZO", "ZI", "ZA") || (isSlavoGermanic && ((current > 0) && workingString[current - 1] != 'T'))) { metaphoneData.Add("S", "TS"); } else { metaphoneData.Add("S"); } if (workingString[current + 1] == 'Z') { current += 2; } else { current += 1; } break; default: current += 1; break; } } return metaphoneData.ToString(); }
public static string GenerateDoubleMetaphone(this string self) { MetaphoneData metaphoneData = new MetaphoneData(); int current = 0; if (self.Length < 1) { return(self); } int last = self.Length - 1; //zero based index string workingString = self.ToUpperInvariant() + " "; bool isSlavoGermanic = (self.IndexOf('W') > -1) || (self.IndexOf('K') > -1) || (self.IndexOf("CZ", StringComparison.OrdinalIgnoreCase) > -1) || (self.IndexOf("WITZ", StringComparison.OrdinalIgnoreCase) > -1); //skip these when at start of word if (workingString.StartsWith(StringComparison.OrdinalIgnoreCase, "GN", "KN", "PN", "WR", "PS")) { current += 1; } //Initial 'X' is pronounced 'Z' e.g. 'Xavier' if (workingString[0] == 'X') { metaphoneData.Add("S"); //'Z' maps to 'S' current += 1; } while ((metaphoneData.PrimaryLength < 4) || (metaphoneData.SecondaryLength < 4)) { if (current >= self.Length) { break; } switch (workingString[current]) { case 'A': case 'E': case 'I': case 'O': case 'U': case 'Y': if (current == 0) { //all init vowels now map to 'A' metaphoneData.Add("A"); } current += 1; break; case 'B': //"-mb", e.g", "dumb", already skipped over... metaphoneData.Add("P"); if (workingString[current + 1] == 'B') { current += 2; } else { current += 1; } break; case 'Ã': metaphoneData.Add("S"); current += 1; break; case 'C': //various germanic if ((current > 1) && !IsVowel(workingString[current - 2]) && StringAt(workingString, (current - 1), "ACH") && ((workingString[current + 2] != 'I') && ((workingString[current + 2] != 'E') || StringAt(workingString, (current - 2), "BACHER", "MACHER")))) { metaphoneData.Add("K"); current += 2; break; } //special case 'caesar' if ((current == 0) && StringAt(workingString, current, "CAESAR")) { metaphoneData.Add("S"); current += 2; break; } //italian 'chianti' if (StringAt(workingString, current, "CHIA")) { metaphoneData.Add("K"); current += 2; break; } if (StringAt(workingString, current, "CH")) { //find 'michael' if ((current > 0) && StringAt(workingString, current, "CHAE")) { metaphoneData.Add("K", "X"); current += 2; break; } //greek roots e.g. 'chemistry', 'chorus' if ((current == 0) && (StringAt(workingString, (current + 1), "HARAC", "HARIS") || StringAt(workingString, (current + 1), "HOR", "HYM", "HIA", "HEM")) && !StringAt(workingString, 0, "CHORE")) { metaphoneData.Add("K"); current += 2; break; } //germanic, greek, or otherwise 'ch' for 'kh' sound if ((StringAt(workingString, 0, "VAN ", "VON ") || StringAt(workingString, 0, "SCH")) || // 'architect but not 'arch', 'orchestra', 'orchid' StringAt(workingString, (current - 2), "ORCHES", "ARCHIT", "ORCHID") || StringAt(workingString, (current + 2), "T", "S") || ((StringAt(workingString, (current - 1), "A", "O", "U", "E") || (current == 0)) && //e.g., 'wachtler', 'wechsler', but not 'tichner' StringAt(workingString, (current + 2), "L", "R", "N", "M", "B", "H", "F", "V", "W", " "))) { metaphoneData.Add("K"); } else { if (current > 0) { if (StringAt(workingString, 0, "MC")) { //e.g., "McHugh" metaphoneData.Add("K"); } else { metaphoneData.Add("X", "K"); } } else { metaphoneData.Add("X"); } } current += 2; break; } //e.g, 'czerny' if (StringAt(workingString, current, "CZ") && !StringAt(workingString, (current - 2), "WICZ")) { metaphoneData.Add("S", "X"); current += 2; break; } //e.g., 'focaccia' if (StringAt(workingString, (current + 1), "CIA")) { metaphoneData.Add("X"); current += 3; break; } //double 'C', but not if e.g. 'McClellan' if (StringAt(workingString, current, "CC") && !((current == 1) && (workingString[0] == 'M'))) { //'bellocchio' but not 'bacchus' if (StringAt(workingString, (current + 2), "I", "E", "H") && !StringAt(workingString, (current + 2), "HU")) { //'accident', 'accede' 'succeed' if (((current == 1) && (workingString[current - 1] == 'A')) || StringAt(workingString, (current - 1), "UCCEE", "UCCES")) { metaphoneData.Add("KS"); } //'bacci', 'bertucci', other italian else { metaphoneData.Add("X"); } current += 3; break; } else { //Pierce's rule metaphoneData.Add("K"); current += 2; break; } } if (StringAt(workingString, current, "CK", "CG", "CQ")) { metaphoneData.Add("K"); current += 2; break; } if (StringAt(workingString, current, "CI", "CE", "CY")) { //italian vs. english if (StringAt(workingString, current, "CIO", "CIE", "CIA")) { metaphoneData.Add("S", "X"); } else { metaphoneData.Add("S"); } current += 2; break; } //else metaphoneData.Add("K"); //name sent in 'mac caffrey', 'mac gregor if (StringAt(workingString, (current + 1), " C", " Q", " G")) { current += 3; } else if (StringAt(workingString, (current + 1), "C", "K", "Q") && !StringAt(workingString, (current + 1), "CE", "CI")) { current += 2; } else { current += 1; } break; case 'D': if (StringAt(workingString, current, "DG")) { if (StringAt(workingString, (current + 2), "I", "E", "Y")) { //e.g. 'edge' metaphoneData.Add("J"); current += 3; break; } else { //e.g. 'edgar' metaphoneData.Add("TK"); current += 2; break; } } if (StringAt(workingString, current, "DT", "DD")) { metaphoneData.Add("T"); current += 2; break; } //else metaphoneData.Add("T"); current += 1; break; case 'F': if (workingString[current + 1] == 'F') { current += 2; } else { current += 1; } metaphoneData.Add("F"); break; case 'G': if (workingString[current + 1] == 'H') { if ((current > 0) && !IsVowel(workingString[current - 1])) { metaphoneData.Add("K"); current += 2; break; } if (current < 3) { //'ghislane', ghiradelli if (current == 0) { if (workingString[current + 2] == 'I') { metaphoneData.Add("J"); } else { metaphoneData.Add("K"); } current += 2; break; } } //Parker's rule (with some further refinements) - e.g., 'hugh' if (((current > 1) && StringAt(workingString, (current - 2), "B", "H", "D")) || //e.g., 'bough' ((current > 2) && StringAt(workingString, (current - 3), "B", "H", "D")) || //e.g., 'broughton' ((current > 3) && StringAt(workingString, (current - 4), "B", "H"))) { current += 2; break; } else { //e.g., 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough' if ((current > 2) && (workingString[current - 1] == 'U') && StringAt(workingString, (current - 3), "C", "G", "L", "R", "T")) { metaphoneData.Add("F"); } else if ((current > 0) && workingString[current - 1] != 'I') { metaphoneData.Add("K"); } current += 2; break; } } if (workingString[current + 1] == 'N') { if ((current == 1) && IsVowel(workingString[0]) && !isSlavoGermanic) { metaphoneData.Add("KN", "N"); } else //not e.g. 'cagney' if (!StringAt(workingString, (current + 2), "EY") && (workingString[current + 1] != 'Y') && !isSlavoGermanic) { metaphoneData.Add("N", "KN"); } else { metaphoneData.Add("KN"); } current += 2; break; } //'tagliaro' if (StringAt(workingString, (current + 1), "LI") && !isSlavoGermanic) { metaphoneData.Add("KL", "L"); current += 2; break; } //-ges-,-gep-,-gel-, -gie- at beginning if ((current == 0) && ((workingString[current + 1] == 'Y') || StringAt(workingString, (current + 1), "ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER"))) { metaphoneData.Add("K", "J"); current += 2; break; } // -ger-, -gy- if ((StringAt(workingString, (current + 1), "ER") || (workingString[current + 1] == 'Y')) && !StringAt(workingString, 0, "DANGER", "RANGER", "MANGER") && !StringAt(workingString, (current - 1), "E", "I") && !StringAt(workingString, (current - 1), "RGY", "OGY")) { metaphoneData.Add("K", "J"); current += 2; break; } // italian e.g, 'biaggi' if (StringAt(workingString, (current + 1), "E", "I", "Y") || StringAt(workingString, (current - 1), "AGGI", "OGGI")) { //obvious germanic if ((StringAt(workingString, 0, "VAN ", "VON ") || StringAt(workingString, 0, "SCH")) || StringAt(workingString, (current + 1), "ET")) { metaphoneData.Add("K"); } else //always soft if french ending if (StringAt(workingString, (current + 1), "IER ")) { metaphoneData.Add("J"); } else { metaphoneData.Add("J", "K"); } current += 2; break; } if (workingString[current + 1] == 'G') { current += 2; } else { current += 1; } metaphoneData.Add("K"); break; case 'H': //only keep if first & before vowel or btw. 2 vowels if (((current == 0) || IsVowel(workingString[current - 1])) && IsVowel(workingString[current + 1])) { metaphoneData.Add("H"); current += 2; } else //also takes care of 'HH' { current += 1; } break; case 'J': //obvious spanish, 'jose', 'san jacinto' if (StringAt(workingString, current, "JOSE") || StringAt(workingString, 0, "SAN ")) { if (((current == 0) && (workingString[current + 4] == ' ')) || StringAt(workingString, 0, "SAN ")) { metaphoneData.Add("H"); } else { metaphoneData.Add("J", "H"); } current += 1; break; } if ((current == 0) && !StringAt(workingString, current, "JOSE")) { metaphoneData.Add("J", "A"); //Yankelovich/Jankelowicz } else //spanish pron. of e.g. 'bajador' if (IsVowel(workingString[current - 1]) && !isSlavoGermanic && ((workingString[current + 1] == 'A') || (workingString[current + 1] == 'O'))) { metaphoneData.Add("J", "H"); } else if (current == last) { metaphoneData.Add("J", " "); } else if (!StringAt(workingString, (current + 1), "L", "T", "K", "S", "N", "M", "B", "Z") && !StringAt(workingString, (current - 1), "S", "K", "L")) { metaphoneData.Add("J"); } if (workingString[current + 1] == 'J') //it could happen! { current += 2; } else { current += 1; } break; case 'K': if (workingString[current + 1] == 'K') { current += 2; } else { current += 1; } metaphoneData.Add("K"); break; case 'L': if (workingString[current + 1] == 'L') { //spanish e.g. 'cabrillo', 'gallegos' if (((current == (self.Length - 3)) && StringAt(workingString, (current - 1), "ILLO", "ILLA", "ALLE")) || ((StringAt(workingString, (last - 1), "AS", "OS") || StringAt(workingString, last, "A", "O")) && StringAt(workingString, (current - 1), "ALLE"))) { metaphoneData.Add("L", " "); current += 2; break; } current += 2; } else { current += 1; } metaphoneData.Add("L"); break; case 'M': if ((StringAt(workingString, (current - 1), "UMB") && (((current + 1) == last) || StringAt(workingString, (current + 2), "ER"))) || //'dumb','thumb' (workingString[current + 1] == 'M')) { current += 2; } else { current += 1; } metaphoneData.Add("M"); break; case 'N': if (workingString[current + 1] == 'N') { current += 2; } else { current += 1; } metaphoneData.Add("N"); break; case 'Ð': current += 1; metaphoneData.Add("N"); break; case 'P': if (workingString[current + 1] == 'H') { metaphoneData.Add("F"); current += 2; break; } //also account for "campbell", "raspberry" if (StringAt(workingString, (current + 1), "P", "B")) { current += 2; } else { current += 1; } metaphoneData.Add("P"); break; case 'Q': if (workingString[current + 1] == 'Q') { current += 2; } else { current += 1; } metaphoneData.Add("K"); break; case 'R': //french e.g. 'rogier', but exclude 'hochmeier' if ((current == last) && !isSlavoGermanic && StringAt(workingString, (current - 2), "IE") && !StringAt(workingString, (current - 4), "ME", "MA")) { metaphoneData.Add("", "R"); } else { metaphoneData.Add("R"); } if (workingString[current + 1] == 'R') { current += 2; } else { current += 1; } break; case 'S': //special cases 'island', 'isle', 'carlisle', 'carlysle' if (StringAt(workingString, (current - 1), "ISL", "YSL")) { current += 1; break; } //special case 'sugar-' if ((current == 0) && StringAt(workingString, current, "SUGAR")) { metaphoneData.Add("X", "S"); current += 1; break; } if (StringAt(workingString, current, "SH")) { //germanic if (StringAt(workingString, (current + 1), "HEIM", "HOEK", "HOLM", "HOLZ")) { metaphoneData.Add("S"); } else { metaphoneData.Add("X"); } current += 2; break; } //italian & armenian if (StringAt(workingString, current, "SIO", "SIA") || StringAt(workingString, current, "SIAN")) { if (!isSlavoGermanic) { metaphoneData.Add("S", "X"); } else { metaphoneData.Add("S"); } current += 3; break; } //german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider' //also, -sz- in slavic language altho in hungarian it is pronounced 's' if (((current == 0) && StringAt(workingString, (current + 1), "M", "N", "L", "W")) || StringAt(workingString, (current + 1), "Z")) { metaphoneData.Add("S", "X"); if (StringAt(workingString, (current + 1), "Z")) { current += 2; } else { current += 1; } break; } if (StringAt(workingString, current, "SC")) { //Schlesinger's rule if (workingString[current + 2] == 'H') { //dutch origin, e.g. 'school', 'schooner' if (StringAt(workingString, (current + 3), "OO", "ER", "EN", "UY", "ED", "EM")) { //'schermerhorn', 'schenker' if (StringAt(workingString, (current + 3), "ER", "EN")) { metaphoneData.Add("X", "SK"); } else { metaphoneData.Add("SK"); } current += 3; break; } else { if ((current == 0) && !IsVowel(workingString[3]) && (workingString[3] != 'W')) { metaphoneData.Add("X", "S"); } else { metaphoneData.Add("X"); } current += 3; break; } } if (StringAt(workingString, (current + 2), "I", "E", "Y")) { metaphoneData.Add("S"); current += 3; break; } //else metaphoneData.Add("SK"); current += 3; break; } //french e.g. 'resnais', 'artois' if ((current == last) && StringAt(workingString, (current - 2), "AI", "OI")) { metaphoneData.Add("", "S"); } else { metaphoneData.Add("S"); } if (StringAt(workingString, (current + 1), "S", "Z")) { current += 2; } else { current += 1; } break; case 'T': if (StringAt(workingString, current, "TION")) { metaphoneData.Add("X"); current += 3; break; } if (StringAt(workingString, current, "TIA", "TCH")) { metaphoneData.Add("X"); current += 3; break; } if (StringAt(workingString, current, "TH") || StringAt(workingString, current, "TTH")) { //special case 'thomas', 'thames' or germanic if (StringAt(workingString, (current + 2), "OM", "AM") || StringAt(workingString, 0, "VAN ", "VON ") || StringAt(workingString, 0, "SCH")) { metaphoneData.Add("T"); } else { metaphoneData.Add("O", "T"); } current += 2; break; } if (StringAt(workingString, (current + 1), "T", "D")) { current += 2; } else { current += 1; } metaphoneData.Add("T"); break; case 'V': if (workingString[current + 1] == 'V') { current += 2; } else { current += 1; } metaphoneData.Add("F"); break; case 'W': //can also be in middle of word if (StringAt(workingString, current, "WR")) { metaphoneData.Add("R"); current += 2; break; } if ((current == 0) && (IsVowel(workingString[current + 1]) || StringAt(workingString, current, "WH"))) { //Wasserman should match Vasserman if (IsVowel(workingString[current + 1])) { metaphoneData.Add("A", "F"); } else { //need Uomo to match Womo metaphoneData.Add("A"); } } //Arnow should match Arnoff if (((current == last) && IsVowel(workingString[current - 1])) || StringAt(workingString, (current - 1), "EWSKI", "EWSKY", "OWSKI", "OWSKY") || StringAt(workingString, 0, "SCH")) { metaphoneData.Add("", "F"); current += 1; break; } //polish e.g. 'filipowicz' if (StringAt(workingString, current, "WICZ", "WITZ")) { metaphoneData.Add("TS", "FX"); current += 4; break; } //else skip it current += 1; break; case 'X': //french e.g. breaux if (!((current == last) && (StringAt(workingString, (current - 3), "IAU", "EAU") || StringAt(workingString, (current - 2), "AU", "OU")))) { metaphoneData.Add("KS"); } if (StringAt(workingString, (current + 1), "C", "X")) { current += 2; } else { current += 1; } break; case 'Z': //chinese pinyin e.g. 'zhao' if (workingString[current + 1] == 'H') { metaphoneData.Add("J"); current += 2; break; } else if (StringAt(workingString, (current + 1), "ZO", "ZI", "ZA") || (isSlavoGermanic && ((current > 0) && workingString[current - 1] != 'T'))) { metaphoneData.Add("S", "TS"); } else { metaphoneData.Add("S"); } if (workingString[current + 1] == 'Z') { current += 2; } else { current += 1; } break; default: current += 1; break; } } return(metaphoneData.ToString()); }
public static string ToDoubleMetaphone(this string input) { MetaphoneData metaphoneData = new MetaphoneData(); int current = 0; if (input.Length < 1) { return(input); } int last = input.Length - 1; //zero based index string workingString = input.ToUpperInvariant() + " "; bool isSlavoGermanic = (input.IndexOf(charW) > -1) || (input.IndexOf(charK) > -1) || (input.IndexOf(strCZ, StringComparison.OrdinalIgnoreCase) > -1) || (input.IndexOf(strWITZ, StringComparison.OrdinalIgnoreCase) > -1); //skip these when at start of word if (workingString.StartsWith(StringComparison.OrdinalIgnoreCase, strGN, strKN, strPN, strWR, strPS)) { current += 1; } //Initial 'X' is pronounced 'Z' e.g. 'Xavier' if (workingString[0] == charX) { metaphoneData.Add(strS); //'Z' maps to 'S' current += 1; } while ((metaphoneData.PrimaryLength < 4) || (metaphoneData.SecondaryLength < 4)) { if (current >= input.Length) { break; } switch (workingString[current]) { case charA: case charE: case charI: case charO: case charU: case charY: if (current == 0) { //all init vowels now map to 'A' metaphoneData.Add("A"); } current += 1; break; case charB: //"-mb", e.g", "dumb", already skipped over... metaphoneData.Add("P"); if (workingString[current + 1] == charB) { current += 2; } else { current += 1; } break; case charAdash: metaphoneData.Add(strS); current += 1; break; case charC: //various germanic if ((current > 1) && !IsVowel(workingString[current - 2]) && StringAt(workingString, (current - 1), strACH) && ((workingString[current + 2] != charI) && ((workingString[current + 2] != charE) || StringAt(workingString, (current - 2), strBACHER, strMACHER)))) { metaphoneData.Add(strK); current += 2; break; } //special case 'caesar' if ((current == 0) && StringAt(workingString, current, strCAESAR)) { metaphoneData.Add(strS); current += 2; break; } //italian 'chianti' if (StringAt(workingString, current, strCHIA)) { metaphoneData.Add(strK); current += 2; break; } if (StringAt(workingString, current, strCH)) { //find 'michael' if ((current > 0) && StringAt(workingString, current, strCHAE)) { metaphoneData.Add(strK, strX); current += 2; break; } //greek roots e.g. 'chemistry', 'chorus' if ((current == 0) && (StringAt(workingString, (current + 1), strHARAC, strHARIS) || StringAt(workingString, (current + 1), strHOR, strHYM, strHIA, strHEM)) && !StringAt(workingString, 0, strCHORE)) { metaphoneData.Add(strK); current += 2; break; } //germanic, greek, or otherwise 'ch' for 'kh' sound if ((StringAt(workingString, 0, strVANsp, strVONsp) || StringAt(workingString, 0, strSCH)) || // 'architect but not 'arch', 'orchestra', 'orchid' StringAt(workingString, (current - 2), strORCHES, strARCHIT, strORCHID) || StringAt(workingString, (current + 2), strT, strS) || ((StringAt(workingString, (current - 1), strA, strO, strU, strE) || (current == 0)) && //e.g., 'wachtler', 'wechsler', but not 'tichner' StringAt(workingString, (current + 2), strL, strR, strN, strM, strB, strH, strF, strV, strW, sp))) { metaphoneData.Add(strK); } else { if (current > 0) { if (StringAt(workingString, 0, strMC)) { //e.g., "McHugh" metaphoneData.Add(strK); } else { metaphoneData.Add(strX, strK); } } else { metaphoneData.Add(strX); } } current += 2; break; } //e.g, 'czerny' if (StringAt(workingString, current, strCZ) && !StringAt(workingString, (current - 2), strWICZ)) { metaphoneData.Add(strS, strX); current += 2; break; } //e.g., 'focaccia' if (StringAt(workingString, (current + 1), strCIA)) { metaphoneData.Add(strX); current += 3; break; } //double 'C', but not if e.g. 'McClellan' if (StringAt(workingString, current, strCC) && !((current == 1) && (workingString[0] == charM))) { //'bellocchio' but not 'bacchus' if (StringAt(workingString, (current + 2), strI, strE, strH) && !StringAt(workingString, (current + 2), strHU)) { //'accident', 'accede' 'succeed' if (((current == 1) && (workingString[current - 1] == charA)) || StringAt(workingString, (current - 1), strUCCEE, strUCCES)) { metaphoneData.Add(strKS); } //'bacci', 'bertucci', other italian else { metaphoneData.Add(strX); } current += 3; break; } else { //Pierce's rule metaphoneData.Add(strK); current += 2; break; } } if (StringAt(workingString, current, strCK, strCG, strCQ)) { metaphoneData.Add(strK); current += 2; break; } if (StringAt(workingString, current, strCI, strCE, strCY)) { //italian vs. english if (StringAt(workingString, current, strCIO, strCIE, strCIA)) { metaphoneData.Add(strS, strX); } else { metaphoneData.Add(strS); } current += 2; break; } //else metaphoneData.Add(strK); //name sent in 'mac caffrey', 'mac gregor if (StringAt(workingString, (current + 1), strspC, strspQ, strspG)) { current += 3; } else if (StringAt(workingString, (current + 1), strC, strK, strQ) && !StringAt(workingString, (current + 1), strCE, strCI)) { current += 2; } else { current += 1; } break; case charD: if (StringAt(workingString, current, strDG)) { if (StringAt(workingString, (current + 2), strI, strE, strY)) { //e.g. 'edge' metaphoneData.Add(strJ); current += 3; break; } else { //e.g. 'edgar' metaphoneData.Add(strTK); current += 2; break; } } if (StringAt(workingString, current, strDT, strDD)) { metaphoneData.Add(strT); current += 2; break; } //else metaphoneData.Add(strT); current += 1; break; case charF: if (workingString[current + 1] == charF) { current += 2; } else { current += 1; } metaphoneData.Add(strF); break; case charG: if (workingString[current + 1] == charH) { if ((current > 0) && !IsVowel(workingString[current - 1])) { metaphoneData.Add(strK); current += 2; break; } if (current < 3) { //'ghislane', ghiradelli if (current == 0) { if (workingString[current + 2] == charI) { metaphoneData.Add(strJ); } else { metaphoneData.Add(strK); } current += 2; break; } } //Parker's rule (with some further refinements) - e.g., 'hugh' if (((current > 1) && StringAt(workingString, (current - 2), strB, strH, strD)) || //e.g., 'bough' ((current > 2) && StringAt(workingString, (current - 3), strB, strH, strD)) || //e.g., 'broughton' ((current > 3) && StringAt(workingString, (current - 4), strB, strH))) { current += 2; break; } else { //e.g., 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough' if ((current > 2) && (workingString[current - 1] == charU) && StringAt(workingString, (current - 3), strC, strG, strL, strR, strT)) { metaphoneData.Add(strF); } else if ((current > 0) && workingString[current - 1] != charI) { metaphoneData.Add(strK); } current += 2; break; } } if (workingString[current + 1] == charN) { if ((current == 1) && IsVowel(workingString[0]) && !isSlavoGermanic) { metaphoneData.Add(strKN, strN); } else //not e.g. 'cagney' if (!StringAt(workingString, (current + 2), strEY) && (workingString[current + 1] != charY) && !isSlavoGermanic) { metaphoneData.Add(strN, strKN); } else { metaphoneData.Add(strKN); } current += 2; break; } //'tagliaro' if (StringAt(workingString, (current + 1), strLI) && !isSlavoGermanic) { metaphoneData.Add(strKL, strL); current += 2; break; } //-ges-,-gep-,-gel-, -gie- at beginning if ((current == 0) && ((workingString[current + 1] == charY) || StringAt(workingString, (current + 1), strES, strEP, strEB, strEL, strEY, strIB, strIL, strIN, strIE, strEI, strER))) { metaphoneData.Add(strK, strJ); current += 2; break; } // -ger-, -gy- if ((StringAt(workingString, (current + 1), strER) || (workingString[current + 1] == charY)) && !StringAt(workingString, 0, strDANGER, strRANGER, strMANGER) && !StringAt(workingString, (current - 1), strE, strI) && !StringAt(workingString, (current - 1), strRGY, strOGY)) { metaphoneData.Add(strK, strJ); current += 2; break; } // italian e.g, 'biaggi' if (StringAt(workingString, (current + 1), strE, strI, strY) || StringAt(workingString, (current - 1), strAGGI, strOGGI)) { //obvious germanic if ((StringAt(workingString, 0, strVANsp, strVONsp) || StringAt(workingString, 0, strSCH)) || StringAt(workingString, (current + 1), strET)) { metaphoneData.Add(strK); } else //always soft if french ending if (StringAt(workingString, (current + 1), strIERsp)) { metaphoneData.Add(strJ); } else { metaphoneData.Add(strJ, strK); } current += 2; break; } if (workingString[current + 1] == charG) { current += 2; } else { current += 1; } metaphoneData.Add(strK); break; case 'H': //only keep if first & before vowel or btw. 2 vowels if (((current == 0) || IsVowel(workingString[current - 1])) && IsVowel(workingString[current + 1])) { metaphoneData.Add(strH); current += 2; } else //also takes care of 'HH' { current += 1; } break; case 'J': //obvious spanish, 'jose', 'san jacinto' if (StringAt(workingString, current, strJOSE) || StringAt(workingString, 0, strSANsp)) { if (((current == 0) && (workingString[current + 4] == ' ')) || StringAt(workingString, 0, strSANsp)) { metaphoneData.Add(strH); } else { metaphoneData.Add(strJ, strH); } current += 1; break; } if ((current == 0) && !StringAt(workingString, current, strJOSE)) { metaphoneData.Add(strJ, strA); //Yankelovich/Jankelowicz } else //spanish pron. of e.g. 'bajador' if (current > 0 && IsVowel(workingString[current - 1]) && !isSlavoGermanic && ((workingString[current + 1] == charA) || (workingString[current + 1] == charO))) { metaphoneData.Add(strJ, strH); } else if (current == last) { metaphoneData.Add(strJ, sp); } else if (!StringAt(workingString, (current + 1), strL, strT, strK, strS, strN, strM, strB, strZ) && !StringAt(workingString, (current - 1), strS, strK, strL)) { metaphoneData.Add(strJ); } if (workingString[current + 1] == charJ) //it could happen! { current += 2; } else { current += 1; } break; case charK: if (workingString[current + 1] == charK) { current += 2; } else { current += 1; } metaphoneData.Add(strK); break; case charL: if (workingString[current + 1] == charL) { //spanish e.g. 'cabrillo', 'gallegos' if (((current == (input.Length - 3)) && StringAt(workingString, (current - 1), strILLO, strILLA, strALLE)) || ((StringAt(workingString, (last - 1), strAS, strOS) || StringAt(workingString, last, strA, strO)) && StringAt(workingString, (current - 1), strALLE))) { metaphoneData.Add(strL, sp); current += 2; break; } current += 2; } else { current += 1; } metaphoneData.Add("L"); break; case charM: if ((StringAt(workingString, (current - 1), strUMB) && (((current + 1) == last) || StringAt(workingString, (current + 2), strER))) || //'dumb','thumb' (workingString[current + 1] == charM)) { current += 2; } else { current += 1; } metaphoneData.Add("M"); break; case charN: if (workingString[current + 1] == charN) { current += 2; } else { current += 1; } metaphoneData.Add(strN); break; case charOdash: current += 1; metaphoneData.Add(strN); break; case charP: if (workingString[current + 1] == charH) { metaphoneData.Add(strF); current += 2; break; } //also account for "campbell", "raspberry" if (StringAt(workingString, (current + 1), strP, strB)) { current += 2; } else { current += 1; } metaphoneData.Add(strP); break; case charQ: if (workingString[current + 1] == charQ) { current += 2; } else { current += 1; } metaphoneData.Add(strK); break; case charR: //french e.g. 'rogier', but exclude 'hochmeier' if ((current == last) && !isSlavoGermanic && StringAt(workingString, (current - 2), strIE) && !StringAt(workingString, (current - 4), strME, strMA)) { metaphoneData.Add(string.Empty, strR); } else { metaphoneData.Add(strR); } if (workingString[current + 1] == charR) { current += 2; } else { current += 1; } break; case charS: //special cases 'island', 'isle', 'carlisle', 'carlysle' if (StringAt(workingString, (current - 1), strISL, strYSL)) { current += 1; break; } //special case 'sugar-' if ((current == 0) && StringAt(workingString, current, strSUGAR)) { metaphoneData.Add(strX, strS); current += 1; break; } if (StringAt(workingString, current, strSH)) { //germanic if (StringAt(workingString, (current + 1), strHEIM, strHOEK, strHOLM, strHOLZ)) { metaphoneData.Add(strS); } else { metaphoneData.Add(strX); } current += 2; break; } //italian & armenian if (StringAt(workingString, current, strSIO, strSIA) || StringAt(workingString, current, strSIAN)) { if (!isSlavoGermanic) { metaphoneData.Add(strS, strX); } else { metaphoneData.Add(strS); } current += 3; break; } //german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider' //also, -sz- in slavic language altho in hungarian it is pronounced 's' if (((current == 0) && StringAt(workingString, (current + 1), strM, strN, strL, strW)) || StringAt(workingString, (current + 1), strZ)) { metaphoneData.Add(strS, strX); if (StringAt(workingString, (current + 1), strZ)) { current += 2; } else { current += 1; } break; } if (StringAt(workingString, current, strSC)) { //Schlesinger's rule if (workingString[current + 2] == charH) { //dutch origin, e.g. 'school', 'schooner' if (StringAt(workingString, (current + 3), strOO, strER, strEN, strUY, strED, strEM)) { //'schermerhorn', 'schenker' if (StringAt(workingString, (current + 3), strER, strEN)) { metaphoneData.Add(strX, strSK); } else { metaphoneData.Add(strSK); } current += 3; break; } else { if ((current == 0) && !IsVowel(workingString[3]) && (workingString[3] != charW)) { metaphoneData.Add(strX, strS); } else { metaphoneData.Add(strX); } current += 3; break; } } if (StringAt(workingString, (current + 2), strI, strE, strY)) { metaphoneData.Add(strS); current += 3; break; } //else metaphoneData.Add(strSK); current += 3; break; } //french e.g. 'resnais', 'artois' if ((current == last) && StringAt(workingString, (current - 2), strAI, strOI)) { metaphoneData.Add(string.Empty, strS); } else { metaphoneData.Add(strS); } if (StringAt(workingString, (current + 1), strS, strZ)) { current += 2; } else { current += 1; } break; case charT: if (StringAt(workingString, current, strTION)) { metaphoneData.Add(strX); current += 3; break; } if (StringAt(workingString, current, strTIA, strTCH)) { metaphoneData.Add(strX); current += 3; break; } if (StringAt(workingString, current, strTH) || StringAt(workingString, current, strTTH)) { //special case 'thomas', 'thames' or germanic if (StringAt(workingString, (current + 2), strOM, strAM) || StringAt(workingString, 0, strVANsp, strVONsp) || StringAt(workingString, 0, strSCH)) { metaphoneData.Add(strT); } else { metaphoneData.Add(strO, strT); } current += 2; break; } if (StringAt(workingString, (current + 1), strT, strD)) { current += 2; } else { current += 1; } metaphoneData.Add(strT); break; case charV: if (workingString[current + 1] == charV) { current += 2; } else { current += 1; } metaphoneData.Add(strF); break; case charW: //can also be in middle of word if (StringAt(workingString, current, strWR)) { metaphoneData.Add(strR); current += 2; break; } if ((current == 0) && (IsVowel(workingString[current + 1]) || StringAt(workingString, current, strWH))) { //Wasserman should match Vasserman if (IsVowel(workingString[current + 1])) { metaphoneData.Add(strA, strF); } else { //need Uomo to match Womo metaphoneData.Add(strA); } } //Arnow should match Arnoff if ((current == last && current > 0 && IsVowel(workingString[current - 1])) || StringAt(workingString, (current - 1), strEWSKI, strEWSKY, strOWSKI, strOWSKY) || StringAt(workingString, 0, strSCH)) { metaphoneData.Add(string.Empty, strF); current += 1; break; } //polish e.g. 'filipowicz' if (StringAt(workingString, current, strWICZ, strWITZ)) { metaphoneData.Add(strTS, strFX); current += 4; break; } //else skip it current += 1; break; case charX: //french e.g. breaux if (!((current == last) && (StringAt(workingString, (current - 3), strIAU, strEAU) || StringAt(workingString, (current - 2), strAU, strOU)))) { metaphoneData.Add(strKS); } if (StringAt(workingString, (current + 1), strC, strX)) { current += 2; } else { current += 1; } break; case charZ: //chinese pinyin e.g. 'zhao' if (workingString[current + 1] == charH) { metaphoneData.Add(strJ); current += 2; break; } else if (StringAt(workingString, (current + 1), strZO, strZI, strZA) || (isSlavoGermanic && ((current > 0) && workingString[current - 1] != charT))) { metaphoneData.Add(strS, strTS); } else { metaphoneData.Add(strS); } if (workingString[current + 1] == charZ) { current += 2; } else { current += 1; } break; default: current += 1; break; } } return(metaphoneData.ToString()); }
public static string ToDoubleMetaphone(this string input) { MetaphoneData metaphoneData = new MetaphoneData(); int current = 0; if (input.Length < 1) { return input; } int last = input.Length - 1; //zero based index string workingString = input.ToUpperInvariant() + " "; bool isSlavoGermanic = (input.IndexOf(charW) > -1) || (input.IndexOf(charK) > -1) || (input.IndexOf(strCZ, StringComparison.OrdinalIgnoreCase) > -1) || (input.IndexOf(strWITZ, StringComparison.OrdinalIgnoreCase) > -1); //skip these when at start of word if (workingString.StartsWith(StringComparison.OrdinalIgnoreCase, strGN, strKN, strPN, strWR, strPS)) { current += 1; } //Initial 'X' is pronounced 'Z' e.g. 'Xavier' if (workingString[0] == charX) { metaphoneData.Add(strS); //'Z' maps to 'S' current += 1; } while ((metaphoneData.PrimaryLength < 4) || (metaphoneData.SecondaryLength < 4)) { if (current >= input.Length) { break; } switch (workingString[current]) { case charA: case charE: case charI: case charO: case charU: case charY: if (current == 0) { //all init vowels now map to 'A' metaphoneData.Add("A"); } current += 1; break; case charB: //"-mb", e.g", "dumb", already skipped over... metaphoneData.Add("P"); if (workingString[current + 1] == charB) { current += 2; } else { current += 1; } break; case charAdash: metaphoneData.Add(strS); current += 1; break; case charC: //various germanic if ((current > 1) && !IsVowel(workingString[current - 2]) && StringAt(workingString, (current - 1), strACH) && ((workingString[current + 2] != charI) && ((workingString[current + 2] != charE) || StringAt(workingString, (current - 2), strBACHER, strMACHER)))) { metaphoneData.Add(strK); current += 2; break; } //special case 'caesar' if ((current == 0) && StringAt(workingString, current, strCAESAR)) { metaphoneData.Add(strS); current += 2; break; } //italian 'chianti' if (StringAt(workingString, current, strCHIA)) { metaphoneData.Add(strK); current += 2; break; } if (StringAt(workingString, current, strCH)) { //find 'michael' if ((current > 0) && StringAt(workingString, current, strCHAE)) { metaphoneData.Add(strK, strX); current += 2; break; } //greek roots e.g. 'chemistry', 'chorus' if ((current == 0) && (StringAt(workingString, (current + 1), strHARAC, strHARIS) || StringAt(workingString, (current + 1), strHOR, strHYM, strHIA, strHEM)) && !StringAt(workingString, 0, strCHORE)) { metaphoneData.Add(strK); current += 2; break; } //germanic, greek, or otherwise 'ch' for 'kh' sound if ((StringAt(workingString, 0, strVANsp, strVONsp) || StringAt(workingString, 0, strSCH)) // 'architect but not 'arch', 'orchestra', 'orchid' || StringAt(workingString, (current - 2), strORCHES, strARCHIT, strORCHID) || StringAt(workingString, (current + 2), strT, strS) || ((StringAt(workingString, (current - 1), strA, strO, strU, strE) || (current == 0)) //e.g., 'wachtler', 'wechsler', but not 'tichner' && StringAt(workingString, (current + 2), strL, strR, strN, strM, strB, strH, strF, strV, strW, sp))) { metaphoneData.Add(strK); } else { if (current > 0) { if (StringAt(workingString, 0, strMC)) { //e.g., "McHugh" metaphoneData.Add(strK); } else { metaphoneData.Add(strX, strK); } } else { metaphoneData.Add(strX); } } current += 2; break; } //e.g, 'czerny' if (StringAt(workingString, current, strCZ) && !StringAt(workingString, (current - 2), strWICZ)) { metaphoneData.Add(strS, strX); current += 2; break; } //e.g., 'focaccia' if (StringAt(workingString, (current + 1), strCIA)) { metaphoneData.Add(strX); current += 3; break; } //double 'C', but not if e.g. 'McClellan' if (StringAt(workingString, current, strCC) && !((current == 1) && (workingString[0] == charM))) { //'bellocchio' but not 'bacchus' if (StringAt(workingString, (current + 2), strI, strE, strH) && !StringAt(workingString, (current + 2), strHU)) { //'accident', 'accede' 'succeed' if (((current == 1) && (workingString[current - 1] == charA)) || StringAt(workingString, (current - 1), strUCCEE, strUCCES)) { metaphoneData.Add(strKS); } //'bacci', 'bertucci', other italian else { metaphoneData.Add(strX); } current += 3; break; } else { //Pierce's rule metaphoneData.Add(strK); current += 2; break; } } if (StringAt(workingString, current, strCK, strCG, strCQ)) { metaphoneData.Add(strK); current += 2; break; } if (StringAt(workingString, current, strCI, strCE, strCY)) { //italian vs. english if (StringAt(workingString, current, strCIO, strCIE, strCIA)) { metaphoneData.Add(strS, strX); } else { metaphoneData.Add(strS); } current += 2; break; } //else metaphoneData.Add(strK); //name sent in 'mac caffrey', 'mac gregor if (StringAt(workingString, (current + 1), strspC, strspQ, strspG)) { current += 3; } else if (StringAt(workingString, (current + 1), strC, strK, strQ) && !StringAt(workingString, (current + 1), strCE, strCI)) { current += 2; } else { current += 1; } break; case charD: if (StringAt(workingString, current, strDG)) { if (StringAt(workingString, (current + 2), strI, strE, strY)) { //e.g. 'edge' metaphoneData.Add(strJ); current += 3; break; } else { //e.g. 'edgar' metaphoneData.Add(strTK); current += 2; break; } } if (StringAt(workingString, current, strDT, strDD)) { metaphoneData.Add(strT); current += 2; break; } //else metaphoneData.Add(strT); current += 1; break; case charF: if (workingString[current + 1] == charF) { current += 2; } else { current += 1; } metaphoneData.Add(strF); break; case charG: if (workingString[current + 1] == charH) { if ((current > 0) && !IsVowel(workingString[current - 1])) { metaphoneData.Add(strK); current += 2; break; } if (current < 3) { //'ghislane', ghiradelli if (current == 0) { if (workingString[current + 2] == charI) { metaphoneData.Add(strJ); } else { metaphoneData.Add(strK); } current += 2; break; } } //Parker's rule (with some further refinements) - e.g., 'hugh' if (((current > 1) && StringAt(workingString, (current - 2), strB, strH, strD)) //e.g., 'bough' || ((current > 2) && StringAt(workingString, (current - 3), strB, strH, strD)) //e.g., 'broughton' || ((current > 3) && StringAt(workingString, (current - 4), strB, strH))) { current += 2; break; } else { //e.g., 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough' if ((current > 2) && (workingString[current - 1] == charU) && StringAt(workingString, (current - 3), strC, strG, strL, strR, strT)) { metaphoneData.Add(strF); } else if ((current > 0) && workingString[current - 1] != charI) { metaphoneData.Add(strK); } current += 2; break; } } if (workingString[current + 1] == charN) { if ((current == 1) && IsVowel(workingString[0]) && !isSlavoGermanic) { metaphoneData.Add(strKN, strN); } else //not e.g. 'cagney' if (!StringAt(workingString, (current + 2), strEY) && (workingString[current + 1] != charY) && !isSlavoGermanic) { metaphoneData.Add(strN, strKN); } else { metaphoneData.Add(strKN); } current += 2; break; } //'tagliaro' if (StringAt(workingString, (current + 1), strLI) && !isSlavoGermanic) { metaphoneData.Add(strKL, strL); current += 2; break; } //-ges-,-gep-,-gel-, -gie- at beginning if ((current == 0) && ((workingString[current + 1] == charY) || StringAt(workingString, (current + 1), strES, strEP, strEB, strEL, strEY, strIB, strIL, strIN, strIE, strEI, strER))) { metaphoneData.Add(strK, strJ); current += 2; break; } // -ger-, -gy- if ((StringAt(workingString, (current + 1), strER) || (workingString[current + 1] == charY)) && !StringAt(workingString, 0, strDANGER, strRANGER, strMANGER) && !StringAt(workingString, (current - 1), strE, strI) && !StringAt(workingString, (current - 1), strRGY, strOGY)) { metaphoneData.Add(strK, strJ); current += 2; break; } // italian e.g, 'biaggi' if (StringAt(workingString, (current + 1), strE, strI, strY) || StringAt(workingString, (current - 1), strAGGI, strOGGI)) { //obvious germanic if ((StringAt(workingString, 0, strVANsp, strVONsp) || StringAt(workingString, 0, strSCH)) || StringAt(workingString, (current + 1), strET)) { metaphoneData.Add(strK); } else //always soft if french ending if (StringAt(workingString, (current + 1), strIERsp)) { metaphoneData.Add(strJ); } else { metaphoneData.Add(strJ, strK); } current += 2; break; } if (workingString[current + 1] == charG) { current += 2; } else { current += 1; } metaphoneData.Add(strK); break; case 'H': //only keep if first & before vowel or btw. 2 vowels if (((current == 0) || IsVowel(workingString[current - 1])) && IsVowel(workingString[current + 1])) { metaphoneData.Add(strH); current += 2; } else //also takes care of 'HH' { current += 1; } break; case 'J': //obvious spanish, 'jose', 'san jacinto' if (StringAt(workingString, current, strJOSE) || StringAt(workingString, 0, strSANsp)) { if (((current == 0) && (workingString[current + 4] == ' ')) || StringAt(workingString, 0, strSANsp)) { metaphoneData.Add(strH); } else { metaphoneData.Add(strJ, strH); } current += 1; break; } if ((current == 0) && !StringAt(workingString, current, strJOSE)) { metaphoneData.Add(strJ, strA); //Yankelovich/Jankelowicz } else //spanish pron. of e.g. 'bajador' if (IsVowel(workingString[current - 1]) && !isSlavoGermanic && ((workingString[current + 1] == charA) || (workingString[current + 1] == charO))) { metaphoneData.Add(strJ, strH); } else if (current == last) { metaphoneData.Add(strJ, sp); } else if (!StringAt(workingString, (current + 1), strL, strT, strK, strS, strN, strM, strB, strZ) && !StringAt(workingString, (current - 1), strS, strK, strL)) { metaphoneData.Add(strJ); } if (workingString[current + 1] == charJ) //it could happen! { current += 2; } else { current += 1; } break; case charK: if (workingString[current + 1] == charK) { current += 2; } else { current += 1; } metaphoneData.Add(strK); break; case charL: if (workingString[current + 1] == charL) { //spanish e.g. 'cabrillo', 'gallegos' if (((current == (input.Length - 3)) && StringAt(workingString, (current - 1), strILLO, strILLA, strALLE)) || ((StringAt(workingString, (last - 1), strAS, strOS) || StringAt(workingString, last, strA, strO)) && StringAt(workingString, (current - 1), strALLE))) { metaphoneData.Add(strL, sp); current += 2; break; } current += 2; } else { current += 1; } metaphoneData.Add("L"); break; case charM: if ((StringAt(workingString, (current - 1), strUMB) && (((current + 1) == last) || StringAt(workingString, (current + 2), strER))) //'dumb','thumb' || (workingString[current + 1] == charM)) { current += 2; } else { current += 1; } metaphoneData.Add("M"); break; case charN: if (workingString[current + 1] == charN) { current += 2; } else { current += 1; } metaphoneData.Add(strN); break; case charOdash: current += 1; metaphoneData.Add(strN); break; case charP: if (workingString[current + 1] == charH) { metaphoneData.Add(strF); current += 2; break; } //also account for "campbell", "raspberry" if (StringAt(workingString, (current + 1), strP, strB)) { current += 2; } else { current += 1; } metaphoneData.Add(strP); break; case charQ: if (workingString[current + 1] == charQ) { current += 2; } else { current += 1; } metaphoneData.Add(strK); break; case charR: //french e.g. 'rogier', but exclude 'hochmeier' if ((current == last) && !isSlavoGermanic && StringAt(workingString, (current - 2), strIE) && !StringAt(workingString, (current - 4), strME, strMA)) { metaphoneData.Add(string.Empty, strR); } else { metaphoneData.Add(strR); } if (workingString[current + 1] == charR) { current += 2; } else { current += 1; } break; case charS: //special cases 'island', 'isle', 'carlisle', 'carlysle' if (StringAt(workingString, (current - 1), strISL, strYSL)) { current += 1; break; } //special case 'sugar-' if ((current == 0) && StringAt(workingString, current, strSUGAR)) { metaphoneData.Add(strX, strS); current += 1; break; } if (StringAt(workingString, current, strSH)) { //germanic if (StringAt(workingString, (current + 1), strHEIM, strHOEK, strHOLM, strHOLZ)) { metaphoneData.Add(strS); } else { metaphoneData.Add(strX); } current += 2; break; } //italian & armenian if (StringAt(workingString, current, strSIO, strSIA) || StringAt(workingString, current, strSIAN)) { if (!isSlavoGermanic) { metaphoneData.Add(strS, strX); } else { metaphoneData.Add(strS); } current += 3; break; } //german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider' //also, -sz- in slavic language altho in hungarian it is pronounced 's' if (((current == 0) && StringAt(workingString, (current + 1), strM, strN, strL, strW)) || StringAt(workingString, (current + 1), strZ)) { metaphoneData.Add(strS, strX); if (StringAt(workingString, (current + 1), strZ)) { current += 2; } else { current += 1; } break; } if (StringAt(workingString, current, strSC)) { //Schlesinger's rule if (workingString[current + 2] == charH) { //dutch origin, e.g. 'school', 'schooner' if (StringAt(workingString, (current + 3), strOO, strER, strEN, strUY, strED, strEM)) { //'schermerhorn', 'schenker' if (StringAt(workingString, (current + 3), strER, strEN)) { metaphoneData.Add(strX, strSK); } else { metaphoneData.Add(strSK); } current += 3; break; } else { if ((current == 0) && !IsVowel(workingString[3]) && (workingString[3] != charW)) { metaphoneData.Add(strX, strS); } else { metaphoneData.Add(strX); } current += 3; break; } } if (StringAt(workingString, (current + 2), strI, strE, strY)) { metaphoneData.Add(strS); current += 3; break; } //else metaphoneData.Add(strSK); current += 3; break; } //french e.g. 'resnais', 'artois' if ((current == last) && StringAt(workingString, (current - 2), strAI, strOI)) { metaphoneData.Add(string.Empty, strS); } else { metaphoneData.Add(strS); } if (StringAt(workingString, (current + 1), strS, strZ)) { current += 2; } else { current += 1; } break; case charT: if (StringAt(workingString, current, strTION)) { metaphoneData.Add(strX); current += 3; break; } if (StringAt(workingString, current, strTIA, strTCH)) { metaphoneData.Add(strX); current += 3; break; } if (StringAt(workingString, current, strTH) || StringAt(workingString, current, strTTH)) { //special case 'thomas', 'thames' or germanic if (StringAt(workingString, (current + 2), strOM, strAM) || StringAt(workingString, 0, strVANsp, strVONsp) || StringAt(workingString, 0, strSCH)) { metaphoneData.Add(strT); } else { metaphoneData.Add(strO, strT); } current += 2; break; } if (StringAt(workingString, (current + 1), strT, strD)) { current += 2; } else { current += 1; } metaphoneData.Add(strT); break; case charV: if (workingString[current + 1] == charV) { current += 2; } else { current += 1; } metaphoneData.Add(strF); break; case charW: //can also be in middle of word if (StringAt(workingString, current, strWR)) { metaphoneData.Add(strR); current += 2; break; } if ((current == 0) && (IsVowel(workingString[current + 1]) || StringAt(workingString, current, strWH))) { //Wasserman should match Vasserman if (IsVowel(workingString[current + 1])) { metaphoneData.Add(strA, strF); } else { //need Uomo to match Womo metaphoneData.Add(strA); } } //Arnow should match Arnoff if (((current == last) && IsVowel(workingString[current - 1])) || StringAt(workingString, (current - 1), strEWSKI, strEWSKY, strOWSKI, strOWSKY) || StringAt(workingString, 0, strSCH)) { metaphoneData.Add(string.Empty, strF); current += 1; break; } //polish e.g. 'filipowicz' if (StringAt(workingString, current, strWICZ, strWITZ)) { metaphoneData.Add(strTS, strFX); current += 4; break; } //else skip it current += 1; break; case charX: //french e.g. breaux if (!((current == last) && (StringAt(workingString, (current - 3), strIAU, strEAU) || StringAt(workingString, (current - 2), strAU, strOU)))) { metaphoneData.Add(strKS); } if (StringAt(workingString, (current + 1), strC, strX)) { current += 2; } else { current += 1; } break; case charZ: //chinese pinyin e.g. 'zhao' if (workingString[current + 1] == charH) { metaphoneData.Add(strJ); current += 2; break; } else if (StringAt(workingString, (current + 1), strZO, strZI, strZA) || (isSlavoGermanic && ((current > 0) && workingString[current - 1] != charT))) { metaphoneData.Add(strS, strTS); } else { metaphoneData.Add(strS); } if (workingString[current + 1] == charZ) { current += 2; } else { current += 1; } break; default: current += 1; break; } } return metaphoneData.ToString(); }
public static string ToDoubleMetaphone(this string input) { MetaphoneData metaphoneData = new MetaphoneData(); int current = 0; if (input.Length < 1) { return input; } int last = input.Length - 1; //zero based index string workingString = input.ToUpperInvariant() + " "; bool isSlavoGermanic = (input.IndexOf(CharW) > -1) || (input.IndexOf(CharK) > -1) || (input.IndexOf(StrCz, StringComparison.OrdinalIgnoreCase) > -1) || (input.IndexOf(StrWitz, StringComparison.OrdinalIgnoreCase) > -1); //skip these when at start of word if (workingString.StartsWith(StringComparison.OrdinalIgnoreCase, StrGn, StrKn, StrPn, StrWr, StrPs)) { current += 1; } //Initial 'X' is pronounced 'Z' e.g. 'Xavier' if (workingString[0] == CharX) { metaphoneData.Add(StrS); //'Z' maps to 'S' current += 1; } while ((metaphoneData.PrimaryLength < 4) || (metaphoneData.SecondaryLength < 4)) { if (current >= input.Length) { break; } switch (workingString[current]) { case CharA: case CharE: case CharI: case CharO: case CharU: case CharY: if (current == 0) { //all init vowels now map to 'A' metaphoneData.Add("A"); } current += 1; break; case CharB: //"-mb", e.g", "dumb", already skipped over... metaphoneData.Add("P"); if (workingString[current + 1] == CharB) { current += 2; } else { current += 1; } break; case CharAdash: metaphoneData.Add(StrS); current += 1; break; case CharC: //various germanic if ((current > 1) && !IsVowel(workingString[current - 2]) && StringAt(workingString, (current - 1), StrAch) && ((workingString[current + 2] != CharI) && ((workingString[current + 2] != CharE) || StringAt(workingString, (current - 2), StrBacher, StrMacher)))) { metaphoneData.Add(StrK); current += 2; break; } //special case 'caesar' if ((current == 0) && StringAt(workingString, current, StrCaesar)) { metaphoneData.Add(StrS); current += 2; break; } //italian 'chianti' if (StringAt(workingString, current, StrChia)) { metaphoneData.Add(StrK); current += 2; break; } if (StringAt(workingString, current, StrCh)) { //find 'michael' if ((current > 0) && StringAt(workingString, current, StrChae)) { metaphoneData.Add(StrK, StrX); current += 2; break; } //greek roots e.g. 'chemistry', 'chorus' if ((current == 0) && (StringAt(workingString, (current + 1), StrHarac, StrHaris) || StringAt(workingString, (current + 1), StrHor, StrHym, StrHia, StrHem)) && !StringAt(workingString, 0, StrChore)) { metaphoneData.Add(StrK); current += 2; break; } //germanic, greek, or otherwise 'ch' for 'kh' sound if ((StringAt(workingString, 0, StrVaNsp, StrVoNsp) || StringAt(workingString, 0, StrSch)) // 'architect but not 'arch', 'orchestra', 'orchid' || StringAt(workingString, (current - 2), StrOrches, StrArchit, StrOrchid) || StringAt(workingString, (current + 2), StrT, StrS) || ((StringAt(workingString, (current - 1), StrA, StrO, StrU, StrE) || (current == 0)) //e.g., 'wachtler', 'wechsler', but not 'tichner' && StringAt(workingString, (current + 2), StrL, StrR, StrN, StrM, StrB, StrH, StrF, StrV, StrW, Sp))) { metaphoneData.Add(StrK); } else { if (current > 0) { if (StringAt(workingString, 0, StrMc)) { //e.g., "McHugh" metaphoneData.Add(StrK); } else { metaphoneData.Add(StrX, StrK); } } else { metaphoneData.Add(StrX); } } current += 2; break; } //e.g, 'czerny' if (StringAt(workingString, current, StrCz) && !StringAt(workingString, (current - 2), StrWicz)) { metaphoneData.Add(StrS, StrX); current += 2; break; } //e.g., 'focaccia' if (StringAt(workingString, (current + 1), StrCia)) { metaphoneData.Add(StrX); current += 3; break; } //double 'C', but not if e.g. 'McClellan' if (StringAt(workingString, current, StrCc) && !((current == 1) && (workingString[0] == CharM))) { //'bellocchio' but not 'bacchus' if (StringAt(workingString, (current + 2), StrI, StrE, StrH) && !StringAt(workingString, (current + 2), StrHu)) { //'accident', 'accede' 'succeed' if (((current == 1) && (workingString[current - 1] == CharA)) || StringAt(workingString, (current - 1), StrUccee, StrUcces)) { metaphoneData.Add(StrKs); } //'bacci', 'bertucci', other italian else { metaphoneData.Add(StrX); } current += 3; break; } else { //Pierce's rule metaphoneData.Add(StrK); current += 2; break; } } if (StringAt(workingString, current, StrCk, StrCg, StrCq)) { metaphoneData.Add(StrK); current += 2; break; } if (StringAt(workingString, current, StrCi, StrCe, StrCy)) { //italian vs. english if (StringAt(workingString, current, StrCio, StrCie, StrCia)) { metaphoneData.Add(StrS, StrX); } else { metaphoneData.Add(StrS); } current += 2; break; } //else metaphoneData.Add(StrK); //name sent in 'mac caffrey', 'mac gregor if (StringAt(workingString, (current + 1), StrspC, StrspQ, StrspG)) { current += 3; } else if (StringAt(workingString, (current + 1), StrC, StrK, StrQ) && !StringAt(workingString, (current + 1), StrCe, StrCi)) { current += 2; } else { current += 1; } break; case CharD: if (StringAt(workingString, current, StrDg)) { if (StringAt(workingString, (current + 2), StrI, StrE, StrY)) { //e.g. 'edge' metaphoneData.Add(StrJ); current += 3; break; } else { //e.g. 'edgar' metaphoneData.Add(StrTk); current += 2; break; } } if (StringAt(workingString, current, StrDt, StrDd)) { metaphoneData.Add(StrT); current += 2; break; } //else metaphoneData.Add(StrT); current += 1; break; case CharF: if (workingString[current + 1] == CharF) { current += 2; } else { current += 1; } metaphoneData.Add(StrF); break; case CharG: if (workingString[current + 1] == CharH) { if ((current > 0) && !IsVowel(workingString[current - 1])) { metaphoneData.Add(StrK); current += 2; break; } if (current < 3) { //'ghislane', ghiradelli if (current == 0) { if (workingString[current + 2] == CharI) { metaphoneData.Add(StrJ); } else { metaphoneData.Add(StrK); } current += 2; break; } } //Parker's rule (with some further refinements) - e.g., 'hugh' if (((current > 1) && StringAt(workingString, (current - 2), StrB, StrH, StrD)) //e.g., 'bough' || ((current > 2) && StringAt(workingString, (current - 3), StrB, StrH, StrD)) //e.g., 'broughton' || ((current > 3) && StringAt(workingString, (current - 4), StrB, StrH))) { current += 2; break; } else { //e.g., 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough' if ((current > 2) && (workingString[current - 1] == CharU) && StringAt(workingString, (current - 3), StrC, StrG, StrL, StrR, StrT)) { metaphoneData.Add(StrF); } else if ((current > 0) && workingString[current - 1] != CharI) { metaphoneData.Add(StrK); } current += 2; break; } } if (workingString[current + 1] == CharN) { if ((current == 1) && IsVowel(workingString[0]) && !isSlavoGermanic) { metaphoneData.Add(StrKn, StrN); } else //not e.g. 'cagney' if (!StringAt(workingString, (current + 2), StrEy) && (workingString[current + 1] != CharY) && !isSlavoGermanic) { metaphoneData.Add(StrN, StrKn); } else { metaphoneData.Add(StrKn); } current += 2; break; } //'tagliaro' if (StringAt(workingString, (current + 1), StrLi) && !isSlavoGermanic) { metaphoneData.Add(StrKl, StrL); current += 2; break; } //-ges-,-gep-,-gel-, -gie- at beginning if ((current == 0) && ((workingString[current + 1] == CharY) || StringAt(workingString, (current + 1), StrEs, StrEp, StrEb, StrEl, StrEy, StrIb, StrIl, StrIn, StrIe, StrEi, StrEr))) { metaphoneData.Add(StrK, StrJ); current += 2; break; } // -ger-, -gy- if ((StringAt(workingString, (current + 1), StrEr) || (workingString[current + 1] == CharY)) && !StringAt(workingString, 0, StrDanger, StrRanger, StrManger) && !StringAt(workingString, (current - 1), StrE, StrI) && !StringAt(workingString, (current - 1), StrRgy, StrOgy)) { metaphoneData.Add(StrK, StrJ); current += 2; break; } // italian e.g, 'biaggi' if (StringAt(workingString, (current + 1), StrE, StrI, StrY) || StringAt(workingString, (current - 1), StrAggi, StrOggi)) { //obvious germanic if ((StringAt(workingString, 0, StrVaNsp, StrVoNsp) || StringAt(workingString, 0, StrSch)) || StringAt(workingString, (current + 1), StrEt)) { metaphoneData.Add(StrK); } else //always soft if french ending if (StringAt(workingString, (current + 1), StrIeRsp)) { metaphoneData.Add(StrJ); } else { metaphoneData.Add(StrJ, StrK); } current += 2; break; } if (workingString[current + 1] == CharG) { current += 2; } else { current += 1; } metaphoneData.Add(StrK); break; case 'H': //only keep if first & before vowel or btw. 2 vowels if (((current == 0) || IsVowel(workingString[current - 1])) && IsVowel(workingString[current + 1])) { metaphoneData.Add(StrH); current += 2; } else //also takes care of 'HH' { current += 1; } break; case 'J': //obvious spanish, 'jose', 'san jacinto' if (StringAt(workingString, current, StrJose) || StringAt(workingString, 0, StrSaNsp)) { if (((current == 0) && (workingString[current + 4] == ' ')) || StringAt(workingString, 0, StrSaNsp)) { metaphoneData.Add(StrH); } else { metaphoneData.Add(StrJ, StrH); } current += 1; break; } if ((current == 0) && !StringAt(workingString, current, StrJose)) { metaphoneData.Add(StrJ, StrA); //Yankelovich/Jankelowicz } else //spanish pron. of e.g. 'bajador' if (current > 0 && IsVowel(workingString[current - 1]) && !isSlavoGermanic && ((workingString[current + 1] == CharA) || (workingString[current + 1] == CharO))) { metaphoneData.Add(StrJ, StrH); } else if (current == last) { metaphoneData.Add(StrJ, Sp); } else if (!StringAt(workingString, (current + 1), StrL, StrT, StrK, StrS, StrN, StrM, StrB, StrZ) && !StringAt(workingString, (current - 1), StrS, StrK, StrL)) { metaphoneData.Add(StrJ); } if (workingString[current + 1] == CharJ) //it could happen! { current += 2; } else { current += 1; } break; case CharK: if (workingString[current + 1] == CharK) { current += 2; } else { current += 1; } metaphoneData.Add(StrK); break; case CharL: if (workingString[current + 1] == CharL) { //spanish e.g. 'cabrillo', 'gallegos' if (((current == (input.Length - 3)) && StringAt(workingString, (current - 1), StrIllo, StrIlla, StrAlle)) || ((StringAt(workingString, (last - 1), StrAs, StrOs) || StringAt(workingString, last, StrA, StrO)) && StringAt(workingString, (current - 1), StrAlle))) { metaphoneData.Add(StrL, Sp); current += 2; break; } current += 2; } else { current += 1; } metaphoneData.Add("L"); break; case CharM: if ((StringAt(workingString, (current - 1), StrUmb) && (((current + 1) == last) || StringAt(workingString, (current + 2), StrEr))) //'dumb','thumb' || (workingString[current + 1] == CharM)) { current += 2; } else { current += 1; } metaphoneData.Add("M"); break; case CharN: if (workingString[current + 1] == CharN) { current += 2; } else { current += 1; } metaphoneData.Add(StrN); break; case CharOdash: current += 1; metaphoneData.Add(StrN); break; case CharP: if (workingString[current + 1] == CharH) { metaphoneData.Add(StrF); current += 2; break; } //also account for "campbell", "raspberry" if (StringAt(workingString, (current + 1), StrP, StrB)) { current += 2; } else { current += 1; } metaphoneData.Add(StrP); break; case CharQ: if (workingString[current + 1] == CharQ) { current += 2; } else { current += 1; } metaphoneData.Add(StrK); break; case CharR: //french e.g. 'rogier', but exclude 'hochmeier' if ((current == last) && !isSlavoGermanic && StringAt(workingString, (current - 2), StrIe) && !StringAt(workingString, (current - 4), StrMe, StrMa)) { metaphoneData.Add(string.Empty, StrR); } else { metaphoneData.Add(StrR); } if (workingString[current + 1] == CharR) { current += 2; } else { current += 1; } break; case CharS: //special cases 'island', 'isle', 'carlisle', 'carlysle' if (StringAt(workingString, (current - 1), StrIsl, StrYsl)) { current += 1; break; } //special case 'sugar-' if ((current == 0) && StringAt(workingString, current, StrSugar)) { metaphoneData.Add(StrX, StrS); current += 1; break; } if (StringAt(workingString, current, StrSh)) { //germanic if (StringAt(workingString, (current + 1), StrHeim, StrHoek, StrHolm, StrHolz)) { metaphoneData.Add(StrS); } else { metaphoneData.Add(StrX); } current += 2; break; } //italian & armenian if (StringAt(workingString, current, StrSio, StrSia) || StringAt(workingString, current, StrSian)) { if (!isSlavoGermanic) { metaphoneData.Add(StrS, StrX); } else { metaphoneData.Add(StrS); } current += 3; break; } //german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider' //also, -sz- in slavic language altho in hungarian it is pronounced 's' if (((current == 0) && StringAt(workingString, (current + 1), StrM, StrN, StrL, StrW)) || StringAt(workingString, (current + 1), StrZ)) { metaphoneData.Add(StrS, StrX); if (StringAt(workingString, (current + 1), StrZ)) { current += 2; } else { current += 1; } break; } if (StringAt(workingString, current, StrSc)) { //Schlesinger's rule if (workingString[current + 2] == CharH) { //dutch origin, e.g. 'school', 'schooner' if (StringAt(workingString, (current + 3), StrOo, StrEr, StrEn, StrUy, StrEd, StrEm)) { //'schermerhorn', 'schenker' if (StringAt(workingString, (current + 3), StrEr, StrEn)) { metaphoneData.Add(StrX, StrSk); } else { metaphoneData.Add(StrSk); } current += 3; break; } else { if ((current == 0) && !IsVowel(workingString[3]) && (workingString[3] != CharW)) { metaphoneData.Add(StrX, StrS); } else { metaphoneData.Add(StrX); } current += 3; break; } } if (StringAt(workingString, (current + 2), StrI, StrE, StrY)) { metaphoneData.Add(StrS); current += 3; break; } //else metaphoneData.Add(StrSk); current += 3; break; } //french e.g. 'resnais', 'artois' if ((current == last) && StringAt(workingString, (current - 2), StrAi, StrOi)) { metaphoneData.Add(string.Empty, StrS); } else { metaphoneData.Add(StrS); } if (StringAt(workingString, (current + 1), StrS, StrZ)) { current += 2; } else { current += 1; } break; case CharT: if (StringAt(workingString, current, StrTion)) { metaphoneData.Add(StrX); current += 3; break; } if (StringAt(workingString, current, StrTia, StrTch)) { metaphoneData.Add(StrX); current += 3; break; } if (StringAt(workingString, current, StrTh) || StringAt(workingString, current, StrTth)) { //special case 'thomas', 'thames' or germanic if (StringAt(workingString, (current + 2), StrOm, StrAm) || StringAt(workingString, 0, StrVaNsp, StrVoNsp) || StringAt(workingString, 0, StrSch)) { metaphoneData.Add(StrT); } else { metaphoneData.Add(StrO, StrT); } current += 2; break; } if (StringAt(workingString, (current + 1), StrT, StrD)) { current += 2; } else { current += 1; } metaphoneData.Add(StrT); break; case CharV: if (workingString[current + 1] == CharV) { current += 2; } else { current += 1; } metaphoneData.Add(StrF); break; case CharW: //can also be in middle of word if (StringAt(workingString, current, StrWr)) { metaphoneData.Add(StrR); current += 2; break; } if ((current == 0) && (IsVowel(workingString[current + 1]) || StringAt(workingString, current, StrWh))) { //Wasserman should match Vasserman if (IsVowel(workingString[current + 1])) { metaphoneData.Add(StrA, StrF); } else { //need Uomo to match Womo metaphoneData.Add(StrA); } } //Arnow should match Arnoff if ((current == last && current > 0 && IsVowel(workingString[current - 1])) || StringAt(workingString, (current - 1), StrEwski, StrEwsky, StrOwski, StrOwsky) || StringAt(workingString, 0, StrSch)) { metaphoneData.Add(string.Empty, StrF); current += 1; break; } //polish e.g. 'filipowicz' if (StringAt(workingString, current, StrWicz, StrWitz)) { metaphoneData.Add(StrTs, StrFx); current += 4; break; } //else skip it current += 1; break; case CharX: //french e.g. breaux if (!((current == last) && (StringAt(workingString, (current - 3), StrIau, StrEau) || StringAt(workingString, (current - 2), StrAu, StrOu)))) { metaphoneData.Add(StrKs); } if (StringAt(workingString, (current + 1), StrC, StrX)) { current += 2; } else { current += 1; } break; case CharZ: //chinese pinyin e.g. 'zhao' if (workingString[current + 1] == CharH) { metaphoneData.Add(StrJ); current += 2; break; } else if (StringAt(workingString, (current + 1), StrZo, StrZi, StrZa) || (isSlavoGermanic && ((current > 0) && workingString[current - 1] != CharT))) { metaphoneData.Add(StrS, StrTs); } else { metaphoneData.Add(StrS); } if (workingString[current + 1] == CharZ) { current += 2; } else { current += 1; } break; default: current += 1; break; } } return metaphoneData.ToString(); }
public static string ToDoubleMetaphone(this string input) { MetaphoneData metaphoneData = new MetaphoneData(); int current = 0; if (input.Length < 1) { return(input); } int last = input.Length - 1; //zero based index string workingString = input.ToUpperInvariant() + " "; bool isSlavoGermanic = (input.IndexOf(CharW) > -1) || (input.IndexOf(CharK) > -1) || (input.IndexOf(StrCz, StringComparison.OrdinalIgnoreCase) > -1) || (input.IndexOf(StrWitz, StringComparison.OrdinalIgnoreCase) > -1); //skip these when at start of word if (workingString.StartsWith(StringComparison.OrdinalIgnoreCase, StrGn, StrKn, StrPn, StrWr, StrPs)) { current += 1; } //Initial 'X' is pronounced 'Z' e.g. 'Xavier' if (workingString[0] == CharX) { metaphoneData.Add(StrS); //'Z' maps to 'S' current += 1; } while ((metaphoneData.PrimaryLength < 4) || (metaphoneData.SecondaryLength < 4)) { if (current >= input.Length) { break; } switch (workingString[current]) { case CharA: case CharE: case CharI: case CharO: case CharU: case CharY: if (current == 0) { //all init vowels now map to 'A' metaphoneData.Add("A"); } current += 1; break; case CharB: //"-mb", e.g", "dumb", already skipped over... metaphoneData.Add("P"); if (workingString[current + 1] == CharB) { current += 2; } else { current += 1; } break; case CharAdash: metaphoneData.Add(StrS); current += 1; break; case CharC: //various germanic if ((current > 1) && !IsVowel(workingString[current - 2]) && StringAt(workingString, (current - 1), StrAch) && ((workingString[current + 2] != CharI) && ((workingString[current + 2] != CharE) || StringAt(workingString, (current - 2), StrBacher, StrMacher)))) { metaphoneData.Add(StrK); current += 2; break; } //special case 'caesar' if ((current == 0) && StringAt(workingString, current, StrCaesar)) { metaphoneData.Add(StrS); current += 2; break; } //italian 'chianti' if (StringAt(workingString, current, StrChia)) { metaphoneData.Add(StrK); current += 2; break; } if (StringAt(workingString, current, StrCh)) { //find 'michael' if ((current > 0) && StringAt(workingString, current, StrChae)) { metaphoneData.Add(StrK, StrX); current += 2; break; } //greek roots e.g. 'chemistry', 'chorus' if ((current == 0) && (StringAt(workingString, (current + 1), StrHarac, StrHaris) || StringAt(workingString, (current + 1), StrHor, StrHym, StrHia, StrHem)) && !StringAt(workingString, 0, StrChore)) { metaphoneData.Add(StrK); current += 2; break; } //germanic, greek, or otherwise 'ch' for 'kh' sound if ((StringAt(workingString, 0, StrVaNsp, StrVoNsp) || StringAt(workingString, 0, StrSch)) || // 'architect but not 'arch', 'orchestra', 'orchid' StringAt(workingString, (current - 2), StrOrches, StrArchit, StrOrchid) || StringAt(workingString, (current + 2), StrT, StrS) || ((StringAt(workingString, (current - 1), StrA, StrO, StrU, StrE) || (current == 0)) && //e.g., 'wachtler', 'wechsler', but not 'tichner' StringAt(workingString, (current + 2), StrL, StrR, StrN, StrM, StrB, StrH, StrF, StrV, StrW, Sp))) { metaphoneData.Add(StrK); } else { if (current > 0) { if (StringAt(workingString, 0, StrMc)) { //e.g., "McHugh" metaphoneData.Add(StrK); } else { metaphoneData.Add(StrX, StrK); } } else { metaphoneData.Add(StrX); } } current += 2; break; } //e.g, 'czerny' if (StringAt(workingString, current, StrCz) && !StringAt(workingString, (current - 2), StrWicz)) { metaphoneData.Add(StrS, StrX); current += 2; break; } //e.g., 'focaccia' if (StringAt(workingString, (current + 1), StrCia)) { metaphoneData.Add(StrX); current += 3; break; } //double 'C', but not if e.g. 'McClellan' if (StringAt(workingString, current, StrCc) && !((current == 1) && (workingString[0] == CharM))) { //'bellocchio' but not 'bacchus' if (StringAt(workingString, (current + 2), StrI, StrE, StrH) && !StringAt(workingString, (current + 2), StrHu)) { //'accident', 'accede' 'succeed' if (((current == 1) && (workingString[current - 1] == CharA)) || StringAt(workingString, (current - 1), StrUccee, StrUcces)) { metaphoneData.Add(StrKs); } //'bacci', 'bertucci', other italian else { metaphoneData.Add(StrX); } current += 3; break; } else { //Pierce's rule metaphoneData.Add(StrK); current += 2; break; } } if (StringAt(workingString, current, StrCk, StrCg, StrCq)) { metaphoneData.Add(StrK); current += 2; break; } if (StringAt(workingString, current, StrCi, StrCe, StrCy)) { //italian vs. english if (StringAt(workingString, current, StrCio, StrCie, StrCia)) { metaphoneData.Add(StrS, StrX); } else { metaphoneData.Add(StrS); } current += 2; break; } //else metaphoneData.Add(StrK); //name sent in 'mac caffrey', 'mac gregor if (StringAt(workingString, (current + 1), StrspC, StrspQ, StrspG)) { current += 3; } else if (StringAt(workingString, (current + 1), StrC, StrK, StrQ) && !StringAt(workingString, (current + 1), StrCe, StrCi)) { current += 2; } else { current += 1; } break; case CharD: if (StringAt(workingString, current, StrDg)) { if (StringAt(workingString, (current + 2), StrI, StrE, StrY)) { //e.g. 'edge' metaphoneData.Add(StrJ); current += 3; break; } else { //e.g. 'edgar' metaphoneData.Add(StrTk); current += 2; break; } } if (StringAt(workingString, current, StrDt, StrDd)) { metaphoneData.Add(StrT); current += 2; break; } //else metaphoneData.Add(StrT); current += 1; break; case CharF: if (workingString[current + 1] == CharF) { current += 2; } else { current += 1; } metaphoneData.Add(StrF); break; case CharG: if (workingString[current + 1] == CharH) { if ((current > 0) && !IsVowel(workingString[current - 1])) { metaphoneData.Add(StrK); current += 2; break; } if (current < 3) { //'ghislane', ghiradelli if (current == 0) { if (workingString[current + 2] == CharI) { metaphoneData.Add(StrJ); } else { metaphoneData.Add(StrK); } current += 2; break; } } //Parker's rule (with some further refinements) - e.g., 'hugh' if (((current > 1) && StringAt(workingString, (current - 2), StrB, StrH, StrD)) || //e.g., 'bough' ((current > 2) && StringAt(workingString, (current - 3), StrB, StrH, StrD)) || //e.g., 'broughton' ((current > 3) && StringAt(workingString, (current - 4), StrB, StrH))) { current += 2; break; } else { //e.g., 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough' if ((current > 2) && (workingString[current - 1] == CharU) && StringAt(workingString, (current - 3), StrC, StrG, StrL, StrR, StrT)) { metaphoneData.Add(StrF); } else if ((current > 0) && workingString[current - 1] != CharI) { metaphoneData.Add(StrK); } current += 2; break; } } if (workingString[current + 1] == CharN) { if ((current == 1) && IsVowel(workingString[0]) && !isSlavoGermanic) { metaphoneData.Add(StrKn, StrN); } else //not e.g. 'cagney' if (!StringAt(workingString, (current + 2), StrEy) && (workingString[current + 1] != CharY) && !isSlavoGermanic) { metaphoneData.Add(StrN, StrKn); } else { metaphoneData.Add(StrKn); } current += 2; break; } //'tagliaro' if (StringAt(workingString, (current + 1), StrLi) && !isSlavoGermanic) { metaphoneData.Add(StrKl, StrL); current += 2; break; } //-ges-,-gep-,-gel-, -gie- at beginning if ((current == 0) && ((workingString[current + 1] == CharY) || StringAt(workingString, (current + 1), StrEs, StrEp, StrEb, StrEl, StrEy, StrIb, StrIl, StrIn, StrIe, StrEi, StrEr))) { metaphoneData.Add(StrK, StrJ); current += 2; break; } // -ger-, -gy- if ((StringAt(workingString, (current + 1), StrEr) || (workingString[current + 1] == CharY)) && !StringAt(workingString, 0, StrDanger, StrRanger, StrManger) && !StringAt(workingString, (current - 1), StrE, StrI) && !StringAt(workingString, (current - 1), StrRgy, StrOgy)) { metaphoneData.Add(StrK, StrJ); current += 2; break; } // italian e.g, 'biaggi' if (StringAt(workingString, (current + 1), StrE, StrI, StrY) || StringAt(workingString, (current - 1), StrAggi, StrOggi)) { //obvious germanic if ((StringAt(workingString, 0, StrVaNsp, StrVoNsp) || StringAt(workingString, 0, StrSch)) || StringAt(workingString, (current + 1), StrEt)) { metaphoneData.Add(StrK); } else //always soft if french ending if (StringAt(workingString, (current + 1), StrIeRsp)) { metaphoneData.Add(StrJ); } else { metaphoneData.Add(StrJ, StrK); } current += 2; break; } if (workingString[current + 1] == CharG) { current += 2; } else { current += 1; } metaphoneData.Add(StrK); break; case 'H': //only keep if first & before vowel or btw. 2 vowels if (((current == 0) || IsVowel(workingString[current - 1])) && IsVowel(workingString[current + 1])) { metaphoneData.Add(StrH); current += 2; } else //also takes care of 'HH' { current += 1; } break; case 'J': //obvious spanish, 'jose', 'san jacinto' if (StringAt(workingString, current, StrJose) || StringAt(workingString, 0, StrSaNsp)) { if (((current == 0) && (workingString[current + 4] == ' ')) || StringAt(workingString, 0, StrSaNsp)) { metaphoneData.Add(StrH); } else { metaphoneData.Add(StrJ, StrH); } current += 1; break; } if ((current == 0) && !StringAt(workingString, current, StrJose)) { metaphoneData.Add(StrJ, StrA); //Yankelovich/Jankelowicz } else //spanish pron. of e.g. 'bajador' if (current > 0 && IsVowel(workingString[current - 1]) && !isSlavoGermanic && ((workingString[current + 1] == CharA) || (workingString[current + 1] == CharO))) { metaphoneData.Add(StrJ, StrH); } else if (current == last) { metaphoneData.Add(StrJ, Sp); } else if (!StringAt(workingString, (current + 1), StrL, StrT, StrK, StrS, StrN, StrM, StrB, StrZ) && !StringAt(workingString, (current - 1), StrS, StrK, StrL)) { metaphoneData.Add(StrJ); } if (workingString[current + 1] == CharJ) //it could happen! { current += 2; } else { current += 1; } break; case CharK: if (workingString[current + 1] == CharK) { current += 2; } else { current += 1; } metaphoneData.Add(StrK); break; case CharL: if (workingString[current + 1] == CharL) { //spanish e.g. 'cabrillo', 'gallegos' if (((current == (input.Length - 3)) && StringAt(workingString, (current - 1), StrIllo, StrIlla, StrAlle)) || ((StringAt(workingString, (last - 1), StrAs, StrOs) || StringAt(workingString, last, StrA, StrO)) && StringAt(workingString, (current - 1), StrAlle))) { metaphoneData.Add(StrL, Sp); current += 2; break; } current += 2; } else { current += 1; } metaphoneData.Add("L"); break; case CharM: if ((StringAt(workingString, (current - 1), StrUmb) && (((current + 1) == last) || StringAt(workingString, (current + 2), StrEr))) || //'dumb','thumb' (workingString[current + 1] == CharM)) { current += 2; } else { current += 1; } metaphoneData.Add("M"); break; case CharN: if (workingString[current + 1] == CharN) { current += 2; } else { current += 1; } metaphoneData.Add(StrN); break; case CharOdash: current += 1; metaphoneData.Add(StrN); break; case CharP: if (workingString[current + 1] == CharH) { metaphoneData.Add(StrF); current += 2; break; } //also account for "campbell", "raspberry" if (StringAt(workingString, (current + 1), StrP, StrB)) { current += 2; } else { current += 1; } metaphoneData.Add(StrP); break; case CharQ: if (workingString[current + 1] == CharQ) { current += 2; } else { current += 1; } metaphoneData.Add(StrK); break; case CharR: //french e.g. 'rogier', but exclude 'hochmeier' if ((current == last) && !isSlavoGermanic && StringAt(workingString, (current - 2), StrIe) && !StringAt(workingString, (current - 4), StrMe, StrMa)) { metaphoneData.Add(string.Empty, StrR); } else { metaphoneData.Add(StrR); } if (workingString[current + 1] == CharR) { current += 2; } else { current += 1; } break; case CharS: //special cases 'island', 'isle', 'carlisle', 'carlysle' if (StringAt(workingString, (current - 1), StrIsl, StrYsl)) { current += 1; break; } //special case 'sugar-' if ((current == 0) && StringAt(workingString, current, StrSugar)) { metaphoneData.Add(StrX, StrS); current += 1; break; } if (StringAt(workingString, current, StrSh)) { //germanic if (StringAt(workingString, (current + 1), StrHeim, StrHoek, StrHolm, StrHolz)) { metaphoneData.Add(StrS); } else { metaphoneData.Add(StrX); } current += 2; break; } //italian & armenian if (StringAt(workingString, current, StrSio, StrSia) || StringAt(workingString, current, StrSian)) { if (!isSlavoGermanic) { metaphoneData.Add(StrS, StrX); } else { metaphoneData.Add(StrS); } current += 3; break; } //german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider' //also, -sz- in slavic language altho in hungarian it is pronounced 's' if (((current == 0) && StringAt(workingString, (current + 1), StrM, StrN, StrL, StrW)) || StringAt(workingString, (current + 1), StrZ)) { metaphoneData.Add(StrS, StrX); if (StringAt(workingString, (current + 1), StrZ)) { current += 2; } else { current += 1; } break; } if (StringAt(workingString, current, StrSc)) { //Schlesinger's rule if (workingString[current + 2] == CharH) { //dutch origin, e.g. 'school', 'schooner' if (StringAt(workingString, (current + 3), StrOo, StrEr, StrEn, StrUy, StrEd, StrEm)) { //'schermerhorn', 'schenker' if (StringAt(workingString, (current + 3), StrEr, StrEn)) { metaphoneData.Add(StrX, StrSk); } else { metaphoneData.Add(StrSk); } current += 3; break; } else { if ((current == 0) && !IsVowel(workingString[3]) && (workingString[3] != CharW)) { metaphoneData.Add(StrX, StrS); } else { metaphoneData.Add(StrX); } current += 3; break; } } if (StringAt(workingString, (current + 2), StrI, StrE, StrY)) { metaphoneData.Add(StrS); current += 3; break; } //else metaphoneData.Add(StrSk); current += 3; break; } //french e.g. 'resnais', 'artois' if ((current == last) && StringAt(workingString, (current - 2), StrAi, StrOi)) { metaphoneData.Add(string.Empty, StrS); } else { metaphoneData.Add(StrS); } if (StringAt(workingString, (current + 1), StrS, StrZ)) { current += 2; } else { current += 1; } break; case CharT: if (StringAt(workingString, current, StrTion)) { metaphoneData.Add(StrX); current += 3; break; } if (StringAt(workingString, current, StrTia, StrTch)) { metaphoneData.Add(StrX); current += 3; break; } if (StringAt(workingString, current, StrTh) || StringAt(workingString, current, StrTth)) { //special case 'thomas', 'thames' or germanic if (StringAt(workingString, (current + 2), StrOm, StrAm) || StringAt(workingString, 0, StrVaNsp, StrVoNsp) || StringAt(workingString, 0, StrSch)) { metaphoneData.Add(StrT); } else { metaphoneData.Add(StrO, StrT); } current += 2; break; } if (StringAt(workingString, (current + 1), StrT, StrD)) { current += 2; } else { current += 1; } metaphoneData.Add(StrT); break; case CharV: if (workingString[current + 1] == CharV) { current += 2; } else { current += 1; } metaphoneData.Add(StrF); break; case CharW: //can also be in middle of word if (StringAt(workingString, current, StrWr)) { metaphoneData.Add(StrR); current += 2; break; } if ((current == 0) && (IsVowel(workingString[current + 1]) || StringAt(workingString, current, StrWh))) { //Wasserman should match Vasserman if (IsVowel(workingString[current + 1])) { metaphoneData.Add(StrA, StrF); } else { //need Uomo to match Womo metaphoneData.Add(StrA); } } //Arnow should match Arnoff if ((current == last && current > 0 && IsVowel(workingString[current - 1])) || StringAt(workingString, (current - 1), StrEwski, StrEwsky, StrOwski, StrOwsky) || StringAt(workingString, 0, StrSch)) { metaphoneData.Add(string.Empty, StrF); current += 1; break; } //polish e.g. 'filipowicz' if (StringAt(workingString, current, StrWicz, StrWitz)) { metaphoneData.Add(StrTs, StrFx); current += 4; break; } //else skip it current += 1; break; case CharX: //french e.g. breaux if (!((current == last) && (StringAt(workingString, (current - 3), StrIau, StrEau) || StringAt(workingString, (current - 2), StrAu, StrOu)))) { metaphoneData.Add(StrKs); } if (StringAt(workingString, (current + 1), StrC, StrX)) { current += 2; } else { current += 1; } break; case CharZ: //chinese pinyin e.g. 'zhao' if (workingString[current + 1] == CharH) { metaphoneData.Add(StrJ); current += 2; break; } else if (StringAt(workingString, (current + 1), StrZo, StrZi, StrZa) || (isSlavoGermanic && ((current > 0) && workingString[current - 1] != CharT))) { metaphoneData.Add(StrS, StrTs); } else { metaphoneData.Add(StrS); } if (workingString[current + 1] == CharZ) { current += 2; } else { current += 1; } break; default: current += 1; break; } } return(metaphoneData.ToString()); }