Exemple #1
0
        /**
         * Handles 'CC' cases
         */
        private int handleCC(String value,
                             DoubleMetaphoneResult result,
                             int index)
        {
            if (contains(value, index + 2, 1, "I", "E", "H") &&
                !contains(value, index + 2, 2, "HU"))
            {
                //-- "bellocchio" but not "bacchus" --//
                if ((index == 1 && charAt(value, index - 1) == 'A') ||
                    contains(value, index - 1, 5, "UCCEE", "UCCES"))
                {
                    //-- "accident", "accede", "succeed" --//
                    result.append("KS");
                }
                else
                {
                    //-- "bacci", "bertucci", other Italian --//
                    result.append('X');
                }
                index += 3;
            }
            else
            {    // Pierce's rule
                result.append('K');
                index += 2;
            }

            return(index);
        }
Exemple #2
0
 /**
  * Handles 'D' cases
  */
 private int handleD(String value,
                     DoubleMetaphoneResult result,
                     int index)
 {
     if (contains(value, index, 2, "DG"))
     {
         //-- "Edge" --//
         if (contains(value, index + 2, 1, "I", "E", "Y"))
         {
             result.append('J');
             index += 3;
             //-- "Edgar" --//
         }
         else
         {
             result.append("TK");
             index += 2;
         }
     }
     else if (contains(value, index, 2, "DT", "DD"))
     {
         result.append('T');
         index += 2;
     }
     else
     {
         result.append('T');
         index++;
     }
     return(index);
 }
Exemple #3
0
 /**
  * Handles 'W' cases
  */
 private int handleW(String value,
                     DoubleMetaphoneResult result,
                     int index)
 {
     if (contains(value, index, 2, "WR"))
     {
         //-- can also be in middle of word --//
         result.append('R');
         index += 2;
     }
     else
     {
         if (index == 0 && (isVowel(charAt(value, index + 1)) ||
                            contains(value, index, 2, "WH")))
         {
             if (isVowel(charAt(value, index + 1)))
             {
                 //-- Wasserman should match Vasserman --//
                 result.append('A', 'F');
             }
             else
             {
                 //-- need Uomo to match Womo --//
                 result.append('A');
             }
             index++;
         }
         else if ((index == value.length() - 1 && isVowel(charAt(value, index - 1))) ||
                  contains(value, index - 1,
                           5, "EWSKI", "EWSKY", "OWSKI", "OWSKY") ||
                  contains(value, 0, 3, "SCH"))
         {
             //-- Arnow should match Arnoff --//
             result.appendAlternate('F');
             index++;
         }
         else if (contains(value, index, 4, "WICZ", "WITZ"))
         {
             //-- Polish e.g. "filipowicz" --//
             result.append("TS", "FX");
             index += 4;
         }
         else
         {
             index++;
         }
     }
     return(index);
 }
Exemple #4
0
 /**
  * Handles 'P' cases
  */
 private int handleP(String value,
                     DoubleMetaphoneResult result,
                     int index)
 {
     if (charAt(value, index + 1) == 'H')
     {
         result.append('F');
         index += 2;
     }
     else
     {
         result.append('P');
         index = contains(value, index + 1, 1, "P", "B") ? index + 2 : index + 1;
     }
     return(index);
 }
Exemple #5
0
        //-- BEGIN HANDLERS --//

        /**
         * Handles 'A', 'E', 'I', 'O', 'U', and 'Y' cases
         */
        private int handleAEIOUY(String value, DoubleMetaphoneResult result, int
                                 index)
        {
            if (index == 0)
            {
                result.append('A');
            }
            return(index + 1);
        }
Exemple #6
0
 /**
  * Handles 'GH' cases
  */
 private int handleGH(String value,
                      DoubleMetaphoneResult result,
                      int index)
 {
     if (index > 0 && !isVowel(charAt(value, index - 1)))
     {
         result.append('K');
         index += 2;
     }
     else if (index == 0)
     {
         if (charAt(value, index + 2) == 'I')
         {
             result.append('J');
         }
         else
         {
             result.append('K');
         }
         index += 2;
     }
     else if ((index > 1 && contains(value, index - 2, 1, "B", "H", "D")) ||
              (index > 2 && contains(value, index - 3, 1, "B", "H", "D")) ||
              (index > 3 && contains(value, index - 4, 1, "B", "H")))
     {
         //-- Parker's rule (with some further refinements) - "hugh"
         index += 2;
     }
     else
     {
         if (index > 2 && charAt(value, index - 1) == 'U' &&
             contains(value, index - 3, 1, "C", "G", "L", "R", "T"))
         {
             //-- "laugh", "McLaughlin", "cough", "gough", "rough", "tough"
             result.append('F');
         }
         else if (index > 0 && charAt(value, index - 1) != 'I')
         {
             result.append('K');
         }
         index += 2;
     }
     return(index);
 }
Exemple #7
0
 /**
  * Handles 'X' cases
  */
 private int handleX(String value,
                     DoubleMetaphoneResult result,
                     int index)
 {
     if (index == 0)
     {
         result.append('S');
         index++;
     }
     else
     {
         if (!((index == value.length() - 1) &&
               (contains(value, index - 3, 3, "IAU", "EAU") ||
                contains(value, index - 2, 2, "AU", "OU"))))
         {
             //-- French e.g. breaux --//
             result.append("KS");
         }
         index = contains(value, index + 1, 1, "C", "X") ? index + 2 : index + 1;
     }
     return(index);
 }
Exemple #8
0
 /**
  * Handles 'L' cases
  */
 private int handleL(String value,
                     DoubleMetaphoneResult result,
                     int index)
 {
     if (charAt(value, index + 1) == 'L')
     {
         if (conditionL0(value, index))
         {
             result.appendPrimary('L');
         }
         else
         {
             result.append('L');
         }
         index += 2;
     }
     else
     {
         index++;
         result.append('L');
     }
     return(index);
 }
Exemple #9
0
 /**
  * Handles 'Z' cases
  */
 private int handleZ(String value, DoubleMetaphoneResult result, int index,
                     bool slavoGermanic)
 {
     if (charAt(value, index + 1) == 'H')
     {
         //-- Chinese pinyin e.g. "zhao" or Angelina "Zhang" --//
         result.append('J');
         index += 2;
     }
     else
     {
         if (contains(value, index + 1, 2, "ZO", "ZI", "ZA") || (slavoGermanic && (index > 0 && charAt(value, index - 1) != 'T')))
         {
             result.append("S", "TS");
         }
         else
         {
             result.append('S');
         }
         index = charAt(value, index + 1) == 'Z' ? index + 2 : index + 1;
     }
     return(index);
 }
Exemple #10
0
 /**
  * Handles 'T' cases
  */
 private int handleT(String value,
                     DoubleMetaphoneResult result,
                     int index)
 {
     if (contains(value, index, 4, "TION"))
     {
         result.append('X');
         index += 3;
     }
     else if (contains(value, index, 3, "TIA", "TCH"))
     {
         result.append('X');
         index += 3;
     }
     else if (contains(value, index, 2, "TH") || contains(value, index,
                                                          3, "TTH"))
     {
         if (contains(value, index + 2, 2, "OM", "AM") ||
             //-- special case "thomas", "thames" or germanic --//
             contains(value, 0, 4, "VAN ", "VON ") ||
             contains(value, 0, 3, "SCH"))
         {
             result.append('T');
         }
         else
         {
             result.append('0', 'T');
         }
         index += 2;
     }
     else
     {
         result.append('T');
         index = contains(value, index + 1, 1, "T", "D") ? index + 2 : index + 1;
     }
     return(index);
 }
Exemple #11
0
 /**
  * Handles 'R' cases
  */
 private int handleR(String value,
                     DoubleMetaphoneResult result,
                     int index,
                     bool slavoGermanic)
 {
     if (index == value.length() - 1 && !slavoGermanic &&
         contains(value, index - 2, 2, "IE") &&
         !contains(value, index - 4, 2, "ME", "MA"))
     {
         result.appendAlternate('R');
     }
     else
     {
         result.append('R');
     }
     return(charAt(value, index + 1) == 'R' ? index + 2 : index + 1);
 }
Exemple #12
0
 /**
  * Handles 'H' cases
  */
 private int handleH(String value,
                     DoubleMetaphoneResult result,
                     int index)
 {
     //-- only keep if first & before vowel or between 2 vowels --//
     if ((index == 0 || isVowel(charAt(value, index - 1))) &&
         isVowel(charAt(value, index + 1)))
     {
         result.append('H');
         index += 2;
         //-- also takes car of "HH" --//
     }
     else
     {
         index++;
     }
     return(index);
 }
Exemple #13
0
        /**
         * Handles 'J' cases
         */
        private int handleJ(String value, DoubleMetaphoneResult result, int index,
                            bool slavoGermanic)
        {
            if (contains(value, index, 4, "JOSE") || contains(value, 0, 4, "SAN "))
            {
                //-- obvious Spanish, "Jose", "San Jacinto" --//
                if ((index == 0 && (charAt(value, index + 4) == ' ') ||
                     value.length() == 4) || contains(value, 0, 4, "SAN "))
                {
                    result.append('H');
                }
                else
                {
                    result.append('J', 'H');
                }
                index++;
            }
            else
            {
                if (index == 0 && !contains(value, index, 4, "JOSE"))
                {
                    result.append('J', 'A');
                }
                else if (isVowel(charAt(value, index - 1)) && !slavoGermanic &&
                         (charAt(value, index + 1) == 'A' || charAt(value, index + 1) == 'O'))
                {
                    result.append('J', 'H');
                }
                else if (index == value.length() - 1)
                {
                    result.append('J', ' ');
                }
                else if (!contains(value, index + 1, 1, L_T_K_S_N_M_B_Z) && !contains(value, index - 1, 1, "S", "K", "L"))
                {
                    result.append('J');
                }

                if (charAt(value, index + 1) == 'J')
                {
                    index += 2;
                }
                else
                {
                    index++;
                }
            }
            return(index);
        }
Exemple #14
0
 /**
  * Handles 'SC' cases
  */
 private int handleSC(String value,
                      DoubleMetaphoneResult result,
                      int index)
 {
     if (charAt(value, index + 2) == 'H')
     {
         //-- Schlesinger's rule --//
         if (contains(value, index + 3,
                      2, "OO", "ER", "EN", "UY", "ED", "EM"))
         {
             //-- Dutch origin, e.g. "school", "schooner" --//
             if (contains(value, index + 3, 2, "ER", "EN"))
             {
                 //-- "schermerhorn", "schenker" --//
                 result.append("X", "SK");
             }
             else
             {
                 result.append("SK");
             }
         }
         else
         {
             if (index == 0 && !isVowel(charAt(value, 3)) && charAt(value, 3) != 'W')
             {
                 result.append('X', 'S');
             }
             else
             {
                 result.append('X');
             }
         }
     }
     else if (contains(value, index + 2, 1, "I", "E", "Y"))
     {
         result.append('S');
     }
     else
     {
         result.append("SK");
     }
     return(index + 3);
 }
Exemple #15
0
 /**
  * Handles 'CH' cases
  */
 private int handleCH(String value,
                      DoubleMetaphoneResult result,
                      int index)
 {
     if (index > 0 && contains(value, index, 4, "CHAE"))
     {   // Michael
         result.append('K', 'X');
         return(index + 2);
     }
     else if (conditionCH0(value, index))
     {
         //-- Greek roots ("chemistry", "chorus", etc.) --//
         result.append('K');
         return(index + 2);
     }
     else if (conditionCH1(value, index))
     {
         //-- Germanic, Greek, or otherwise 'ch' for 'kh' sound --//
         result.append('K');
         return(index + 2);
     }
     else
     {
         if (index > 0)
         {
             if (contains(value, 0, 2, "MC"))
             {
                 result.append('K');
             }
             else
             {
                 result.append('X', 'K');
             }
         }
         else
         {
             result.append('X');
         }
         return(index + 2);
     }
 }
Exemple #16
0
        /**
         * Encode a value with Double Metaphone, optionally using the alternate
         * encoding.
         *
         * @param value String to encode
         * @param alternate use alternate encode
         * @return an encoded string
         */
        public String doubleMetaphone(String value, bool alternate)
        {
            value = cleanInput(value);
            if (value == null)
            {
                return(null);
            }

            bool slavoGermanic = isSlavoGermanic(value);
            int  index         = isSilentStart(value) ? 1 : 0;

            DoubleMetaphoneResult result = new DoubleMetaphoneResult(this.getMaxCodeLen(), this);

            while (!result.isComplete() && index <= value.length() - 1)
            {
                switch (value.charAt(index))
                {
                case 'A':
                case 'E':
                case 'I':
                case 'O':
                case 'U':
                case 'Y':
                    index = handleAEIOUY(value, result, index);
                    break;

                case 'B':
                    result.append('P');
                    index = charAt(value, index + 1) == 'B' ? index + 2 : index + 1;
                    break;

                case '\u00C7':
                    // A C with a Cedilla
                    result.append('S');
                    index++;
                    break;

                case 'C':
                    index = handleC(value, result, index);
                    break;

                case 'D':
                    index = handleD(value, result, index);
                    break;

                case 'F':
                    result.append('F');
                    index = charAt(value, index + 1) == 'F' ? index + 2 : index + 1;
                    break;

                case 'G':
                    index = handleG(value, result, index, slavoGermanic);
                    break;

                case 'H':
                    index = handleH(value, result, index);
                    break;

                case 'J':
                    index = handleJ(value, result, index, slavoGermanic);
                    break;

                case 'K':
                    result.append('K');
                    index = charAt(value, index + 1) == 'K' ? index + 2 : index + 1;
                    break;

                case 'L':
                    index = handleL(value, result, index);
                    break;

                case 'M':
                    result.append('M');
                    index = conditionM0(value, index) ? index + 2 : index + 1;
                    break;

                case 'N':
                    result.append('N');
                    index = charAt(value, index + 1) == 'N' ? index + 2 : index + 1;
                    break;

                case '\u00D1':
                    // N with a tilde (spanish ene)
                    result.append('N');
                    index++;
                    break;

                case 'P':
                    index = handleP(value, result, index);
                    break;

                case 'Q':
                    result.append('K');
                    index = charAt(value, index + 1) == 'Q' ? index + 2 : index + 1;
                    break;

                case 'R':
                    index = handleR(value, result, index, slavoGermanic);
                    break;

                case 'S':
                    index = handleS(value, result, index, slavoGermanic);
                    break;

                case 'T':
                    index = handleT(value, result, index);
                    break;

                case 'V':
                    result.append('F');
                    index = charAt(value, index + 1) == 'V' ? index + 2 : index + 1;
                    break;

                case 'W':
                    index = handleW(value, result, index);
                    break;

                case 'X':
                    index = handleX(value, result, index);
                    break;

                case 'Z':
                    index = handleZ(value, result, index, slavoGermanic);
                    break;

                default:
                    index++;
                    break;
                }
            }

            return(alternate ? result.getAlternate() : result.getPrimary());
        }
Exemple #17
0
 /**
  * Handles 'S' cases
  */
 private int handleS(String value,
                     DoubleMetaphoneResult result,
                     int index,
                     bool slavoGermanic)
 {
     if (contains(value, index - 1, 3, "ISL", "YSL"))
     {
         //-- special cases "island", "isle", "carlisle", "carlysle" --//
         index++;
     }
     else if (index == 0 && contains(value, index, 5, "SUGAR"))
     {
         //-- special case "sugar-" --//
         result.append('X', 'S');
         index++;
     }
     else if (contains(value, index, 2, "SH"))
     {
         if (contains(value, index + 1, 4,
                      "HEIM", "HOEK", "HOLM", "HOLZ"))
         {
             //-- germanic --//
             result.append('S');
         }
         else
         {
             result.append('X');
         }
         index += 2;
     }
     else if (contains(value, index, 3, "SIO", "SIA") || contains(value, index, 4, "SIAN"))
     {
         //-- Italian and Armenian --//
         if (slavoGermanic)
         {
             result.append('S');
         }
         else
         {
             result.append('S', 'X');
         }
         index += 3;
     }
     else if ((index == 0 && contains(value, index + 1, 1, "M", "N", "L", "W")) || contains(value, index + 1, 1, "Z"))
     {
         //-- german & anglicisations, e.g. "smith" match "schmidt" //
         // "snider" match "schneider" --//
         //-- also, -sz- in slavic language altho in hungarian it //
         //   is pronounced "s" --//
         result.append('S', 'X');
         index = contains(value, index + 1, 1, "Z") ? index + 2 : index + 1;
     }
     else if (contains(value, index, 2, "SC"))
     {
         index = handleSC(value, result, index);
     }
     else
     {
         if (index == value.length() - 1 && contains(value, index - 2,
                                                     2, "AI", "OI"))
         {
             //-- french e.g. "resnais", "artois" --//
             result.appendAlternate('S');
         }
         else
         {
             result.append('S');
         }
         index = contains(value, index + 1, 1, "S", "Z") ? index + 2 : index + 1;
     }
     return(index);
 }
Exemple #18
0
        /**
         * Handles 'C' cases
         */
        private int handleC(String value,
                            DoubleMetaphoneResult result,
                            int index)
        {
            if (conditionC0(value, index))
            {  // very confusing, moved out
                result.append('K');
                index += 2;
            }
            else if (index == 0 && contains(value, index, 6, "CAESAR"))
            {
                result.append('S');
                index += 2;
            }
            else if (contains(value, index, 2, "CH"))
            {
                index = handleCH(value, result, index);
            }
            else if (contains(value, index, 2, "CZ") &&
                     !contains(value, index - 2, 4, "WICZ"))
            {
                //-- "Czerny" --//
                result.append('S', 'X');
                index += 2;
            }
            else if (contains(value, index + 1, 3, "CIA"))
            {
                //-- "focaccia" --//
                result.append('X');
                index += 3;
            }
            else if (contains(value, index, 2, "CC") &&
                     !(index == 1 && charAt(value, 0) == 'M'))
            {
                //-- double "cc" but not "McClelland" --//
                return(handleCC(value, result, index));
            }
            else if (contains(value, index, 2, "CK", "CG", "CQ"))
            {
                result.append('K');
                index += 2;
            }
            else if (contains(value, index, 2, "CI", "CE", "CY"))
            {
                //-- Italian vs. English --//
                if (contains(value, index, 3, "CIO", "CIE", "CIA"))
                {
                    result.append('S', 'X');
                }
                else
                {
                    result.append('S');
                }
                index += 2;
            }
            else
            {
                result.append('K');
                if (contains(value, index + 1, 2, " C", " Q", " G"))
                {
                    //-- Mac Caffrey, Mac Gregor --//
                    index += 3;
                }
                else if (contains(value, index + 1, 1, "C", "K", "Q") &&
                         !contains(value, index + 1, 2, "CE", "CI"))
                {
                    index += 2;
                }
                else
                {
                    index++;
                }
            }

            return(index);
        }
Exemple #19
0
 /**
  * Handles 'G' cases
  */
 private int handleG(String value,
                     DoubleMetaphoneResult result,
                     int index,
                     bool slavoGermanic)
 {
     if (charAt(value, index + 1) == 'H')
     {
         index = handleGH(value, result, index);
     }
     else if (charAt(value, index + 1) == 'N')
     {
         if (index == 1 && isVowel(charAt(value, 0)) && !slavoGermanic)
         {
             result.append("KN", "N");
         }
         else if (!contains(value, index + 2, 2, "EY") &&
                  charAt(value, index + 1) != 'Y' && !slavoGermanic)
         {
             result.append("N", "KN");
         }
         else
         {
             result.append("KN");
         }
         index = index + 2;
     }
     else if (contains(value, index + 1, 2, "LI") && !slavoGermanic)
     {
         result.append("KL", "L");
         index += 2;
     }
     else if (index == 0 && (charAt(value, index + 1) == 'Y' || contains(value, index + 1, 2, ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER)))
     {
         //-- -ges-, -gep-, -gel-, -gie- at beginning --//
         result.append('K', 'J');
         index += 2;
     }
     else if ((contains(value, index + 1, 2, "ER") ||
               charAt(value, index + 1) == 'Y') &&
              !contains(value, 0, 6, "DANGER", "RANGER", "MANGER") &&
              !contains(value, index - 1, 1, "E", "I") &&
              !contains(value, index - 1, 3, "RGY", "OGY"))
     {
         //-- -ger-, -gy- --//
         result.append('K', 'J');
         index += 2;
     }
     else if (contains(value, index + 1, 1, "E", "I", "Y") ||
              contains(value, index - 1, 4, "AGGI", "OGGI"))
     {
         //-- Italian "biaggi" --//
         if ((contains(value, 0, 4, "VAN ", "VON ") || contains(value, 0, 3, "SCH")) || contains(value, index + 1, 2, "ET"))
         {
             //-- obvious germanic --//
             result.append('K');
         }
         else if (contains(value, index + 1, 3, "IER"))
         {
             result.append('J');
         }
         else
         {
             result.append('J', 'K');
         }
         index += 2;
     }
     else if (charAt(value, index + 1) == 'G')
     {
         index += 2;
         result.append('K');
     }
     else
     {
         index++;
         result.append('K');
     }
     return(index);
 }