Exemple #1
0
        /// <summary>
        /// Replace Shadda + Another Tashkeel with combined form
        /// </summary>
        public static void FixShaddaCombinations(FastStringBuilder input)
        {
            /*
             * Fix of https://github.com/mnarimani/RTLTMPro/issues/13
             */
            int j = 0; // write index
            int i = 0; // read index

            while (i < input.Length)
            {
                int curChar  = input.Get(i);
                int nextChar = i < input.Length - 1 ? input.Get(i + 1) : (char)0;
                if ((TashkeelCharacters)curChar == TashkeelCharacters.Shadda &&
                    ShaddaCombinationMap.ContainsKey((char)nextChar))
                {
                    input.Set(j, ShaddaCombinationMap[(char)nextChar]);
                    j++;
                    i += 2;
                }
                else
                {
                    input.Set(j, curChar);
                    j++;
                    i++;
                }
            }
            input.Length = j;
        }
Exemple #2
0
 /// <summary>
 ///     Removes tashkeel. Converts general RTL letters to isolated form. Also fixes Farsi and Arabic ی letter.
 /// </summary>
 /// <param name="text">Input to prepare</param>
 /// <param name="farsi"></param>
 /// <returns>Prepared input in char array</returns>
 public static void FixYah(FastStringBuilder text, bool farsi)
 {
     for (int i = 0; i < text.Length; i++)
     {
         if (farsi && text.Get(i) == (int)ArabicGeneralLetters.Ya)
         {
             text.Set(i, (char)ArabicGeneralLetters.PersianYa);
         }
         else if (farsi == false && text.Get(i) == (int)ArabicGeneralLetters.PersianYa)
         {
             text.Set(i, (char)ArabicGeneralLetters.Ya);
         }
     }
 }
Exemple #3
0
        /// <summary>
        ///     Handles the special Lam-Alef connection in the text.
        /// </summary>
        /// <param name="input"></param>
        /// <param name="output"></param>
        /// <param name="i">Index of Lam letter</param>
        /// <returns><see langword="true" /> if special connection has been made.</returns>
        private static bool HandleSpecialLam(FastStringBuilder input, FastStringBuilder output, int i)
        {
            bool isFixed;

            switch (input.Get(i + 1))
            {
            case (char)ArabicGeneralLetters.AlefMaksoor:
                output.Set(i, (char)0xFEF7);
                isFixed = true;
                break;

            case (char)ArabicGeneralLetters.Alef:
                output.Set(i, (char)0xFEF9);
                isFixed = true;
                break;

            case (char)ArabicGeneralLetters.AlefHamza:
                output.Set(i, (char)0xFEF5);
                isFixed = true;
                break;

            case (char)ArabicGeneralLetters.AlefMad:
                output.Set(i, (char)0xFEF3);
                isFixed = true;
                break;

            default:
                isFixed = false;
                break;
            }

            if (isFixed)
            {
                output.Set(i + 1, (char)0xFFFF);
            }

            return(isFixed);
        }
Exemple #4
0
        /// <summary>
        ///     Removes tashkeel from text.
        /// </summary>
        public static void RemoveTashkeel(FastStringBuilder input)
        {
            TashkeelLocations.Clear();
            int j = 0; // write index

            for (int i = 0; i < input.Length; i++)
            {
                int curChar = input.Get(i);
                if (Char32Utils.IsUnicode16Char(curChar) && TashkeelCharactersSet.Contains((char)curChar))
                {
                    TashkeelLocations.Add(new TashkeelLocation((TashkeelCharacters)curChar, i));
                }
                else
                {
                    input.Set(j, curChar);
                    j++;
                }
            }
            input.Length = j;
        }
Exemple #5
0
        /// <summary>
        ///     Converts English numbers that are outside tags to Persian or Arabic numbers.
        /// </summary>
        /// <param name="text"></param>
        /// <param name="farsi"></param>
        /// <returns>Text with converted numbers</returns>
        public static void FixNumbersOutsideOfTags(FastStringBuilder text, bool farsi)
        {
            var englishDigits = new HashSet <char>(EnglishToFarsiNumberMap.Keys);

            for (int i = 0; i < text.Length; i++)
            {
                var iChar = text.Get(i);
                // skip valid tags
                if (iChar == '<')
                {
                    bool sawValidTag = false;
                    for (int j = i + 1; j < text.Length; j++)
                    {
                        int jChar = text.Get(j);
                        if ((j == i + 1 && jChar == ' ') || jChar == '<')
                        {
                            break;
                        }
                        else if (jChar == '>')
                        {
                            i           = j;
                            sawValidTag = true;
                            break;
                        }
                    }

                    if (sawValidTag)
                    {
                        continue;
                    }
                }

                if (englishDigits.Contains((char)iChar))
                {
                    text.Set(i, farsi ? EnglishToFarsiNumberMap[(char)iChar] : EnglishToHinduNumberMap[(char)iChar]);
                }
            }
        }
Exemple #6
0
        /// <summary>
        ///     Fixes the shape of letters based on their position.
        /// </summary>
        /// <param name="input"></param>
        /// <param name="output"></param>
        /// <param name="preserveNumbers"></param>
        /// <param name="farsi"></param>
        /// <returns></returns>
        public static void Fix(FastStringBuilder input, FastStringBuilder output, bool preserveNumbers, bool farsi, bool fixTextTags)
        {
            FixYah(input, farsi);

            output.SetValue(input);

            for (int i = 0; i < input.Length; i++)
            {
                bool skipNext = false;
                int  iChar    = input.Get(i);

                // For special Lam Letter connections.
                if (iChar == (int)ArabicGeneralLetters.Lam)
                {
                    if (i < input.Length - 1)
                    {
                        skipNext = HandleSpecialLam(input, output, i);
                        if (skipNext)
                        {
                            iChar = output.Get(i);
                        }
                    }
                }

                // We don't want to fix tatweel or zwnj character
                if (iChar == (int)ArabicGeneralLetters.ArabicTatweel ||
                    iChar == (int)ArabicGeneralLetters.ZeroWidthNoJoiner)
                {
                    continue;
                }

                if (iChar < 0xFFFF && TextUtils.IsGlyphFixedArabicCharacter((char)iChar))
                {
                    char converted = GlyphTable.Convert((char)iChar);

                    if (IsMiddleLetter(input, i))
                    {
                        output.Set(i, (char)(converted + 3));
                    }
                    else if (IsFinishingLetter(input, i))
                    {
                        output.Set(i, (char)(converted + 1));
                    }
                    else if (IsLeadingLetter(input, i))
                    {
                        output.Set(i, (char)(converted + 2));
                    }
                }

                // If this letter as Lam and special Lam-Alef connection was made, We want to skip the Alef
                // (Lam-Alef occupies 1 space)
                if (skipNext)
                {
                    i++;
                }
            }

            if (!preserveNumbers)
            {
                if (fixTextTags)
                {
                    FixNumbersOutsideOfTags(output, farsi);
                }
                else
                {
                    FixNumbers(output, farsi);
                }
            }
        }