/// <summary>
        /// Transforms input into a phonetic transform that is similar to analyzed data
        /// </summary>
        /// <param name="source">source term</param>
        /// <returns>Analyzed transformation</returns>
        public string Transform(string source)
        {
            int maxSizeNeeded = 4 * source.Length;

            char[] poutput = new char[ArrayUtil.GetNextSize(maxSizeNeeded)];

            int len = PhoneticTransform(poutput, source.ToCharArray(), source.Length);

            char[] aoutput = new char[ArrayUtil.GetNextSize(maxSizeNeeded)];
            len = Accent(aoutput, poutput, len);
            return(new string(aoutput, 0, len).ToLower());
        }
예제 #2
0
        /// <summary> Converts characters with tones.  For example,
        /// accents are removed from accented characters.
        /// </summary>
        /// <param name="input">The string to fold
        /// </param>
        /// <param name="length">The number of characters in the input string
        /// </param>
        public void Transform(char[] input, int length)
        {
            // Worst-case length required:
            int maxSizeNeeded = 4 * length;

            if (output.Length < maxSizeNeeded)
            {
                output = new char[ArrayUtil.GetNextSize(maxSizeNeeded)];
            }

            outputPos = 0;

            for (int pos = 0; pos < length; ++pos)
            {
                char c = input[pos];

                // Quick test: if it's not in range then just keep current character
                if (c < 'Ά')
                {
                    output[outputPos++] = c;
                }
                else
                {
                    switch (c)
                    {
                    case 'ά':
                        output[outputPos++] = 'α';
                        break;

                    case 'Ά':
                        output[outputPos++] = 'Α';
                        break;

                    case 'έ':
                        output[outputPos++] = 'ε';
                        break;

                    case 'Έ':
                        output[outputPos++] = 'Ε';
                        break;

                    case 'ή':
                        output[outputPos++] = 'η';
                        break;

                    case 'Ή':
                        output[outputPos++] = 'Η';
                        break;

                    case 'ί':
                    case 'ΐ':
                    case 'ϊ':
                        output[outputPos++] = 'ι';
                        break;

                    case 'Ί':
                    case 'Ϊ':
                        output[outputPos++] = 'Ι';
                        break;

                    case 'ό':
                        output[outputPos++] = 'ο';
                        break;

                    case 'Ό':
                        output[outputPos++] = 'Ο';
                        break;

                    case 'Ύ':
                    case 'Ϋ':
                        output[outputPos++] = 'Υ';
                        break;

                    case 'ύ':
                    case 'ΰ':
                    case 'ϋ':
                        output[outputPos++] = 'υ';
                        break;

                    case 'ώ':
                        output[outputPos++] = 'ω';
                        break;

                    case 'Ώ':
                        output[outputPos++] = 'Ω';
                        break;

                    default:
                        output[outputPos++] = c;
                        break;
                    }
                }
            }
        }
        /// <summary> Converts characters above ASCII to their ASCII equivalents.  For example,
        /// accents are removed from accented characters.
        /// </summary>
        /// <param name="input">The string to fold
        /// </param>
        /// <param name="length">The number of characters in the input string
        /// </param>
        public void Transform(char[] input, int length)
        {
            // Worst-case length required:
            int maxSizeNeeded = 4 * length;

            if (output.Length < maxSizeNeeded)
            {
                output = new char[ArrayUtil.GetNextSize(maxSizeNeeded)];
            }

            outputPos = 0;

            for (int pos = 0; pos < length; ++pos)
            {
                char c      = input[pos];
                char next   = (char)0;
                char fnext  = (char)0;
                char fnext2 = (char)0;
                if (pos + 1 < length)
                {
                    next = input[pos + 1];
                }
                if (pos + 2 < length)
                {
                    fnext = input[pos + 2];
                }
                if (pos + 3 < length)
                {
                    fnext2 = input[pos + 3];
                }
                // Quick test: if it's not in range then just keep current character
                if (c < 'Ά')
                {
                    output[outputPos++] = c;
                }
                else
                {
                    switch (c)
                    {
                    /*
                     * phoenetic similar αι -> α
                     */
                    case 'α':
                    case 'Α':
                        if (next == 'υ' || next == 'ύ')
                        {
                            output[outputPos++] = c;
                            pos++;
                            if (IsLoud(fnext, fnext2))
                            {
                                output[outputPos++] = 'β';
                            }
                            else
                            {
                                output[outputPos++] = 'φ';
                            }
                        }
                        else if (next == 'Y' || next == 'Ύ')
                        {
                            output[outputPos++] = c;
                            pos++;
                            if (IsLoud(fnext, fnext2))
                            {
                                output[outputPos++] = 'Β';
                            }
                            else
                            {
                                output[outputPos++] = 'Φ';
                            }
                        }
                        else if (next == 'ι')
                        {
                            output[outputPos++] = 'ε';
                            pos++;
                        }
                        else if (next == 'ί')
                        {
                            output[outputPos++] = 'έ';
                            pos++;
                        }
                        else if (next == 'Ι')
                        {
                            output[outputPos++] = 'Ε';
                            pos++;
                        }
                        else if (next == 'Ί')
                        {
                            output[outputPos++] = 'Έ';
                            pos++;
                        }
                        else
                        {
                            output[outputPos++] = c;
                        }
                        break;

                    case 'ε':
                    case 'Ε':
                        if (next == 'υ' || next == 'ύ')
                        {
                            output[outputPos++] = c;
                            pos++;
                            if (IsLoud(fnext, fnext2))
                            {
                                output[outputPos++] = 'β';
                            }
                            else
                            {
                                output[outputPos++] = 'φ';
                            }
                        }
                        else if (next == 'Y' || next == 'Ύ')
                        {
                            output[outputPos++] = c;
                            pos++;
                            if (IsLoud(fnext, fnext2))
                            {
                                output[outputPos++] = 'Β';
                            }
                            else
                            {
                                output[outputPos++] = 'Φ';
                            }
                        }
                        else if (next == 'ι')
                        {
                            output[outputPos++] = 'ι';
                            pos++;
                        }
                        else if (next == 'ί')
                        {
                            output[outputPos++] = 'ί';
                            pos++;
                        }
                        else if (next == 'Ι')
                        {
                            output[outputPos++] = 'Ι';
                            pos++;
                        }
                        else if (next == 'Ί')
                        {
                            output[outputPos++] = 'Ί';
                            pos++;
                        }
                        else
                        {
                            output[outputPos++] = c;
                        }
                        break;

                    case 'η':
                        output[outputPos++] = 'ι';
                        break;

                    case 'ή':
                        output[outputPos++] = 'ί';
                        break;

                    case 'Η':
                        output[outputPos++] = 'Ι';
                        break;

                    case 'Ή':
                        output[outputPos++] = 'Ί';
                        break;

                    case 'ο':
                    case 'Ο':
                        if (next == 'ι')
                        {
                            output[outputPos++] = 'ι';
                            pos++;
                        }
                        else if (next == 'ί')
                        {
                            output[outputPos++] = 'ί';
                            pos++;
                        }
                        else if (next == 'Ι')
                        {
                            output[outputPos++] = 'Ι';
                            pos++;
                        }
                        else if (next == 'Ί')
                        {
                            output[outputPos++] = 'Ί';
                            pos++;
                        }
                        else
                        {
                            output[outputPos++] = c;
                        }
                        break;

                    case 'υ':
                        if (next == 'ι')
                        {
                            output[outputPos++] = 'ι';
                            pos++;
                        }
                        else if (next == 'ί')
                        {
                            output[outputPos++] = 'ί';
                            pos++;
                        }
                        else if (next == 'Ι')
                        {
                            output[outputPos++] = 'Ι';
                            pos++;
                        }
                        else if (next == 'Ί')
                        {
                            output[outputPos++] = 'Ί';
                            pos++;
                        }
                        else
                        {
                            output[outputPos++] = 'ι';
                        }
                        break;

                    case 'ύ':
                        output[outputPos++] = 'ί';
                        break;

                    case 'Υ':
                        if (next == 'ι')
                        {
                            output[outputPos++] = 'ι';
                            pos++;
                        }
                        else if (next == 'ί')
                        {
                            output[outputPos++] = 'ί';
                            pos++;
                        }
                        else if (next == 'Ι')
                        {
                            output[outputPos++] = 'Ι';
                            pos++;
                        }
                        else if (next == 'Ί')
                        {
                            output[outputPos++] = 'Ί';
                            pos++;
                        }
                        else
                        {
                            output[outputPos++] = 'Ι';
                        }
                        break;

                    case 'Ύ':
                        output[outputPos++] = 'Ί';
                        break;

                    case 'ω':
                        output[outputPos++] = 'ο';
                        break;

                    case 'ώ':
                        output[outputPos++] = 'ό';
                        break;

                    case 'Ω':
                        output[outputPos++] = 'Ο';
                        break;

                    case 'Ώ':
                        output[outputPos++] = 'Ό';
                        break;

                    default:
                        output[outputPos++] = c;
                        break;
                    }
                }
            }
        }