/// <summary> /// Transforms input into a phonetic transform that is similar to analyzed data /// </summary> /// <param name="source">source term</param> /// <returns>Analyzed transformation</returns> public string Transform(string source) { int maxSizeNeeded = 4 * source.Length; char[] poutput = new char[ArrayUtil.GetNextSize(maxSizeNeeded)]; int len = PhoneticTransform(poutput, source.ToCharArray(), source.Length); char[] aoutput = new char[ArrayUtil.GetNextSize(maxSizeNeeded)]; len = Accent(aoutput, poutput, len); return(new string(aoutput, 0, len).ToLower()); }
/// <summary> Converts characters with tones. For example, /// accents are removed from accented characters. /// </summary> /// <param name="input">The string to fold /// </param> /// <param name="length">The number of characters in the input string /// </param> public void Transform(char[] input, int length) { // Worst-case length required: int maxSizeNeeded = 4 * length; if (output.Length < maxSizeNeeded) { output = new char[ArrayUtil.GetNextSize(maxSizeNeeded)]; } outputPos = 0; for (int pos = 0; pos < length; ++pos) { char c = input[pos]; // Quick test: if it's not in range then just keep current character if (c < 'Ά') { output[outputPos++] = c; } else { switch (c) { case 'ά': output[outputPos++] = 'α'; break; case 'Ά': output[outputPos++] = 'Α'; break; case 'έ': output[outputPos++] = 'ε'; break; case 'Έ': output[outputPos++] = 'Ε'; break; case 'ή': output[outputPos++] = 'η'; break; case 'Ή': output[outputPos++] = 'Η'; break; case 'ί': case 'ΐ': case 'ϊ': output[outputPos++] = 'ι'; break; case 'Ί': case 'Ϊ': output[outputPos++] = 'Ι'; break; case 'ό': output[outputPos++] = 'ο'; break; case 'Ό': output[outputPos++] = 'Ο'; break; case 'Ύ': case 'Ϋ': output[outputPos++] = 'Υ'; break; case 'ύ': case 'ΰ': case 'ϋ': output[outputPos++] = 'υ'; break; case 'ώ': output[outputPos++] = 'ω'; break; case 'Ώ': output[outputPos++] = 'Ω'; break; default: output[outputPos++] = c; break; } } } }
/// <summary> Converts characters above ASCII to their ASCII equivalents. For example, /// accents are removed from accented characters. /// </summary> /// <param name="input">The string to fold /// </param> /// <param name="length">The number of characters in the input string /// </param> public void Transform(char[] input, int length) { // Worst-case length required: int maxSizeNeeded = 4 * length; if (output.Length < maxSizeNeeded) { output = new char[ArrayUtil.GetNextSize(maxSizeNeeded)]; } outputPos = 0; for (int pos = 0; pos < length; ++pos) { char c = input[pos]; char next = (char)0; char fnext = (char)0; char fnext2 = (char)0; if (pos + 1 < length) { next = input[pos + 1]; } if (pos + 2 < length) { fnext = input[pos + 2]; } if (pos + 3 < length) { fnext2 = input[pos + 3]; } // Quick test: if it's not in range then just keep current character if (c < 'Ά') { output[outputPos++] = c; } else { switch (c) { /* * phoenetic similar αι -> α */ case 'α': case 'Α': if (next == 'υ' || next == 'ύ') { output[outputPos++] = c; pos++; if (IsLoud(fnext, fnext2)) { output[outputPos++] = 'β'; } else { output[outputPos++] = 'φ'; } } else if (next == 'Y' || next == 'Ύ') { output[outputPos++] = c; pos++; if (IsLoud(fnext, fnext2)) { output[outputPos++] = 'Β'; } else { output[outputPos++] = 'Φ'; } } else if (next == 'ι') { output[outputPos++] = 'ε'; pos++; } else if (next == 'ί') { output[outputPos++] = 'έ'; pos++; } else if (next == 'Ι') { output[outputPos++] = 'Ε'; pos++; } else if (next == 'Ί') { output[outputPos++] = 'Έ'; pos++; } else { output[outputPos++] = c; } break; case 'ε': case 'Ε': if (next == 'υ' || next == 'ύ') { output[outputPos++] = c; pos++; if (IsLoud(fnext, fnext2)) { output[outputPos++] = 'β'; } else { output[outputPos++] = 'φ'; } } else if (next == 'Y' || next == 'Ύ') { output[outputPos++] = c; pos++; if (IsLoud(fnext, fnext2)) { output[outputPos++] = 'Β'; } else { output[outputPos++] = 'Φ'; } } else if (next == 'ι') { output[outputPos++] = 'ι'; pos++; } else if (next == 'ί') { output[outputPos++] = 'ί'; pos++; } else if (next == 'Ι') { output[outputPos++] = 'Ι'; pos++; } else if (next == 'Ί') { output[outputPos++] = 'Ί'; pos++; } else { output[outputPos++] = c; } break; case 'η': output[outputPos++] = 'ι'; break; case 'ή': output[outputPos++] = 'ί'; break; case 'Η': output[outputPos++] = 'Ι'; break; case 'Ή': output[outputPos++] = 'Ί'; break; case 'ο': case 'Ο': if (next == 'ι') { output[outputPos++] = 'ι'; pos++; } else if (next == 'ί') { output[outputPos++] = 'ί'; pos++; } else if (next == 'Ι') { output[outputPos++] = 'Ι'; pos++; } else if (next == 'Ί') { output[outputPos++] = 'Ί'; pos++; } else { output[outputPos++] = c; } break; case 'υ': if (next == 'ι') { output[outputPos++] = 'ι'; pos++; } else if (next == 'ί') { output[outputPos++] = 'ί'; pos++; } else if (next == 'Ι') { output[outputPos++] = 'Ι'; pos++; } else if (next == 'Ί') { output[outputPos++] = 'Ί'; pos++; } else { output[outputPos++] = 'ι'; } break; case 'ύ': output[outputPos++] = 'ί'; break; case 'Υ': if (next == 'ι') { output[outputPos++] = 'ι'; pos++; } else if (next == 'ί') { output[outputPos++] = 'ί'; pos++; } else if (next == 'Ι') { output[outputPos++] = 'Ι'; pos++; } else if (next == 'Ί') { output[outputPos++] = 'Ί'; pos++; } else { output[outputPos++] = 'Ι'; } break; case 'Ύ': output[outputPos++] = 'Ί'; break; case 'ω': output[outputPos++] = 'ο'; break; case 'ώ': output[outputPos++] = 'ό'; break; case 'Ω': output[outputPos++] = 'Ο'; break; case 'Ώ': output[outputPos++] = 'Ό'; break; default: output[outputPos++] = c; break; } } } }