/// <summary> /// Replace Shadda + Another Tashkeel with combined form /// </summary> public static void FixShaddaCombinations(FastStringBuilder input) { /* * Fix of https://github.com/mnarimani/RTLTMPro/issues/13 */ int j = 0; // write index int i = 0; // read index while (i < input.Length) { int curChar = input.Get(i); int nextChar = i < input.Length - 1 ? input.Get(i + 1) : (char)0; if ((TashkeelCharacters)curChar == TashkeelCharacters.Shadda && ShaddaCombinationMap.ContainsKey((char)nextChar)) { input.Set(j, ShaddaCombinationMap[(char)nextChar]); j++; i += 2; } else { input.Set(j, curChar); j++; i++; } } input.Length = j; }
/// <summary> /// Removes tashkeel. Converts general RTL letters to isolated form. Also fixes Farsi and Arabic ی letter. /// </summary> /// <param name="text">Input to prepare</param> /// <param name="farsi"></param> /// <returns>Prepared input in char array</returns> public static void FixYah(FastStringBuilder text, bool farsi) { for (int i = 0; i < text.Length; i++) { if (farsi && text.Get(i) == (int)ArabicGeneralLetters.Ya) { text.Set(i, (char)ArabicGeneralLetters.PersianYa); } else if (farsi == false && text.Get(i) == (int)ArabicGeneralLetters.PersianYa) { text.Set(i, (char)ArabicGeneralLetters.Ya); } } }
/// <summary> /// Handles the special Lam-Alef connection in the text. /// </summary> /// <param name="input"></param> /// <param name="output"></param> /// <param name="i">Index of Lam letter</param> /// <returns><see langword="true" /> if special connection has been made.</returns> private static bool HandleSpecialLam(FastStringBuilder input, FastStringBuilder output, int i) { bool isFixed; switch (input.Get(i + 1)) { case (char)ArabicGeneralLetters.AlefMaksoor: output.Set(i, (char)0xFEF7); isFixed = true; break; case (char)ArabicGeneralLetters.Alef: output.Set(i, (char)0xFEF9); isFixed = true; break; case (char)ArabicGeneralLetters.AlefHamza: output.Set(i, (char)0xFEF5); isFixed = true; break; case (char)ArabicGeneralLetters.AlefMad: output.Set(i, (char)0xFEF3); isFixed = true; break; default: isFixed = false; break; } if (isFixed) { output.Set(i + 1, (char)0xFFFF); } return(isFixed); }
/// <summary> /// Removes tashkeel from text. /// </summary> public static void RemoveTashkeel(FastStringBuilder input) { TashkeelLocations.Clear(); int j = 0; // write index for (int i = 0; i < input.Length; i++) { int curChar = input.Get(i); if (Char32Utils.IsUnicode16Char(curChar) && TashkeelCharactersSet.Contains((char)curChar)) { TashkeelLocations.Add(new TashkeelLocation((TashkeelCharacters)curChar, i)); } else { input.Set(j, curChar); j++; } } input.Length = j; }
/// <summary> /// Converts English numbers that are outside tags to Persian or Arabic numbers. /// </summary> /// <param name="text"></param> /// <param name="farsi"></param> /// <returns>Text with converted numbers</returns> public static void FixNumbersOutsideOfTags(FastStringBuilder text, bool farsi) { var englishDigits = new HashSet <char>(EnglishToFarsiNumberMap.Keys); for (int i = 0; i < text.Length; i++) { var iChar = text.Get(i); // skip valid tags if (iChar == '<') { bool sawValidTag = false; for (int j = i + 1; j < text.Length; j++) { int jChar = text.Get(j); if ((j == i + 1 && jChar == ' ') || jChar == '<') { break; } else if (jChar == '>') { i = j; sawValidTag = true; break; } } if (sawValidTag) { continue; } } if (englishDigits.Contains((char)iChar)) { text.Set(i, farsi ? EnglishToFarsiNumberMap[(char)iChar] : EnglishToHinduNumberMap[(char)iChar]); } } }
/// <summary> /// Fixes the shape of letters based on their position. /// </summary> /// <param name="input"></param> /// <param name="output"></param> /// <param name="preserveNumbers"></param> /// <param name="farsi"></param> /// <returns></returns> public static void Fix(FastStringBuilder input, FastStringBuilder output, bool preserveNumbers, bool farsi, bool fixTextTags) { FixYah(input, farsi); output.SetValue(input); for (int i = 0; i < input.Length; i++) { bool skipNext = false; int iChar = input.Get(i); // For special Lam Letter connections. if (iChar == (int)ArabicGeneralLetters.Lam) { if (i < input.Length - 1) { skipNext = HandleSpecialLam(input, output, i); if (skipNext) { iChar = output.Get(i); } } } // We don't want to fix tatweel or zwnj character if (iChar == (int)ArabicGeneralLetters.ArabicTatweel || iChar == (int)ArabicGeneralLetters.ZeroWidthNoJoiner) { continue; } if (iChar < 0xFFFF && TextUtils.IsGlyphFixedArabicCharacter((char)iChar)) { char converted = GlyphTable.Convert((char)iChar); if (IsMiddleLetter(input, i)) { output.Set(i, (char)(converted + 3)); } else if (IsFinishingLetter(input, i)) { output.Set(i, (char)(converted + 1)); } else if (IsLeadingLetter(input, i)) { output.Set(i, (char)(converted + 2)); } } // If this letter as Lam and special Lam-Alef connection was made, We want to skip the Alef // (Lam-Alef occupies 1 space) if (skipNext) { i++; } } if (!preserveNumbers) { if (fixTextTags) { FixNumbersOutsideOfTags(output, farsi); } else { FixNumbers(output, farsi); } } }