/// <summary> /// Fixes the provided string /// </summary> /// <param name="input">Text to fix</param> /// <param name="output">Fixed text</param> /// <param name="fixTextTags"></param> /// <param name="preserveNumbers"></param> /// <param name="farsi"></param> /// <returns>Fixed text</returns> public static void FixRTL( string input, FastStringBuilder output, bool farsi = true, bool fixTextTags = true, bool preserveNumbers = false) { inputBuilder.SetValue(input); TashkeelFixer.RemoveTashkeel(inputBuilder); // The shape of the letters in shapeFixedLetters is fixed according to their position in word. But the flow of the text is not fixed. GlyphFixer.Fix(inputBuilder, glyphFixerOutput, preserveNumbers, farsi); //Restore tashkeel to their places. TashkeelFixer.RestoreTashkeel(glyphFixerOutput); // Fix flow of the text and put the result in FinalLetters field LigatureFixer.Fix(glyphFixerOutput, output, farsi, fixTextTags, preserveNumbers); if (fixTextTags) { RichTextFixer.Fix(output); } inputBuilder.Clear(); }
/// <summary> /// Replace Shadda + Another Tashkeel with combined form /// </summary> public static void FixShaddaCombinations(FastStringBuilder input) { /* * Fix of https://github.com/mnarimani/RTLTMPro/issues/13 */ int j = 0; // write index int i = 0; // read index while (i < input.Length) { int curChar = input.Get(i); int nextChar = i < input.Length - 1 ? input.Get(i + 1) : (char)0; if ((TashkeelCharacters)curChar == TashkeelCharacters.Shadda && ShaddaCombinationMap.ContainsKey((char)nextChar)) { input.Set(j, ShaddaCombinationMap[(char)nextChar]); j++; i += 2; } else { input.Set(j, curChar); j++; i++; } } input.Length = j; }
/// <summary> /// Restores removed tashkeel. /// </summary> public static void RestoreTashkeel(FastStringBuilder letters) { foreach (TashkeelLocation location in TashkeelLocations) { letters.Insert(location.Position, location.Tashkeel); } }
/// <summary> /// Restores removed tashkeel. /// </summary> public static void RestoreTashkeel(FastStringBuilder letters) { int letterWithTashkeelTracker = 0; foreach (TashkeelLocation location in TashkeelLocations) { letters.Insert(location.Position + letterWithTashkeelTracker, location.Tashkeel); //letterWithTashkeelTracker++; } /* * Fix of https://github.com/mnarimani/RTLTMPro/issues/13 * The workaround is to replace Shadda + Another Tashkeel with combined form */ letters.Replace(ShaddaFatha, ShaddaWithFathaIsolatedForm); letters.Replace(ShaddaDamma, ShaddaWithDammaIsolatedForm); letters.Replace(ShaddaKasra, ShaddaWithKasraIsolatedForm); letters.Replace(ShaddaDammatan, ShaddaWithDammatanIsolatedForm); letters.Replace(ShaddaKasratan, ShaddaWithKasratanIsolatedForm); letters.Replace(ShaddaSuperscriptAlef, ShaddaWithSuperscriptAlefIsolatedForm); TashkeelLocations.Clear(); }
public void Substring(FastStringBuilder output, int start, int length) { output.Length = 0; for (int i = 0; i < length; i++) { output.Append(array[start + i]); } }
private static void FlushBufferToOutput(List <char> buffer, FastStringBuilder output) { for (int j = 0; j < buffer.Count; j++) { output.Append(buffer[buffer.Count - 1 - j]); } buffer.Clear(); }
/// <summary> /// Converts English numbers to Persian or Arabic numbers. /// </summary> /// <param name="text"></param> /// <param name="farsi"></param> /// <returns>Converted number</returns> public static void FixNumbers(FastStringBuilder text, bool farsi) { text.Replace((char)EnglishNumbers.Zero, farsi ? (char)FarsiNumbers.Zero : (char)HinduNumbers.Zero); text.Replace((char)EnglishNumbers.One, farsi ? (char)FarsiNumbers.One : (char)HinduNumbers.One); text.Replace((char)EnglishNumbers.Two, farsi ? (char)FarsiNumbers.Two : (char)HinduNumbers.Two); text.Replace((char)EnglishNumbers.Three, farsi ? (char)FarsiNumbers.Three : (char)HinduNumbers.Three); text.Replace((char)EnglishNumbers.Four, farsi ? (char)FarsiNumbers.Four : (char)HinduNumbers.Four); text.Replace((char)EnglishNumbers.Five, farsi ? (char)FarsiNumbers.Five : (char)HinduNumbers.Five); text.Replace((char)EnglishNumbers.Six, farsi ? (char)FarsiNumbers.Six : (char)HinduNumbers.Six); text.Replace((char)EnglishNumbers.Seven, farsi ? (char)FarsiNumbers.Seven : (char)HinduNumbers.Seven); text.Replace((char)EnglishNumbers.Eight, farsi ? (char)FarsiNumbers.Eight : (char)HinduNumbers.Eight); text.Replace((char)EnglishNumbers.Nine, farsi ? (char)FarsiNumbers.Nine : (char)HinduNumbers.Nine); }
/// <summary> /// Removes tashkeel. Converts general RTL letters to isolated form. Also fixes Farsi and Arabic ی letter. /// </summary> /// <param name="text">Input to prepare</param> /// <param name="farsi"></param> /// <returns>Prepared input in char array</returns> public static void FixYah(FastStringBuilder text, bool farsi) { for (int i = 0; i < text.Length; i++) { if (farsi && text.Get(i) == (int)ArabicGeneralLetters.Ya) { text.Set(i, (char)ArabicGeneralLetters.PersianYa); } else if (farsi == false && text.Get(i) == (int)ArabicGeneralLetters.PersianYa) { text.Set(i, (char)ArabicGeneralLetters.Ya); } } }
/// <summary> /// Is the letter at provided index a finishing letter? /// </summary> /// <returns><see langword="true" /> if the letter is a finishing letter</returns> private static bool IsFinishingLetter(FastStringBuilder letters, int index) { int currentIndexLetter = letters.Get(index); int previousIndexLetter = default; if (index != 0) { previousIndexLetter = letters.Get(index - 1); } bool isPreviousLetterConnectable = index != 0 && previousIndexLetter != ' ' && previousIndexLetter != (int)ArabicGeneralLetters.Dal && previousIndexLetter != (int)ArabicGeneralLetters.Thal && previousIndexLetter != (int)ArabicGeneralLetters.Ra2 && previousIndexLetter != (int)ArabicGeneralLetters.Zeen && previousIndexLetter != (int)ArabicGeneralLetters.PersianZe && previousIndexLetter != (int)ArabicGeneralLetters.Waw && previousIndexLetter != (int)ArabicGeneralLetters.Alef && previousIndexLetter != (int)ArabicGeneralLetters.AlefMad && previousIndexLetter != (int)ArabicGeneralLetters.AlefHamza && previousIndexLetter != (int)ArabicGeneralLetters.AlefMaksoor && previousIndexLetter != (int)ArabicGeneralLetters.WawHamza && previousIndexLetter != (int)ArabicGeneralLetters.Hamza && previousIndexLetter != (int)ArabicGeneralLetters.ZeroWidthNoJoiner && previousIndexLetter != (int)ArabicIsolatedLetters.Dal && previousIndexLetter != (int)ArabicIsolatedLetters.Thal && previousIndexLetter != (int)ArabicIsolatedLetters.Ra2 && previousIndexLetter != (int)ArabicIsolatedLetters.Zeen && previousIndexLetter != (int)ArabicIsolatedLetters.PersianZe && previousIndexLetter != (int)ArabicIsolatedLetters.Waw && previousIndexLetter != (int)ArabicIsolatedLetters.Alef && previousIndexLetter != (int)ArabicIsolatedLetters.AlefMad && previousIndexLetter != (int)ArabicIsolatedLetters.AlefHamza && previousIndexLetter != (int)ArabicIsolatedLetters.AlefMaksoor && previousIndexLetter != (int)ArabicIsolatedLetters.WawHamza && previousIndexLetter != (int)ArabicIsolatedLetters.Hamza && (previousIndexLetter < 0xFFFF && TextUtils.IsGlyphFixedArabicCharacter((char)previousIndexLetter)); bool canThisLetterBeFinishing = currentIndexLetter != ' ' && currentIndexLetter != (int)ArabicGeneralLetters.ZeroWidthNoJoiner && currentIndexLetter != (int)ArabicGeneralLetters.Hamza; return(isPreviousLetterConnectable && canThisLetterBeFinishing); }
/// <summary> /// Removes tashkeel from text. /// </summary> public static void RemoveTashkeel(FastStringBuilder input) { TashkeelLocations.Clear(); int j = 0; // write index for (int i = 0; i < input.Length; i++) { int curChar = input.Get(i); if (Char32Utils.IsUnicode16Char(curChar) && TashkeelCharactersSet.Contains((char)curChar)) { TashkeelLocations.Add(new TashkeelLocation((TashkeelCharacters)curChar, i)); } else { input.Set(j, curChar); j++; } } input.Length = j; }
/// <summary> /// Handles the special Lam-Alef connection in the text. /// </summary> /// <param name="input"></param> /// <param name="output"></param> /// <param name="i">Index of Lam letter</param> /// <returns><see langword="true" /> if special connection has been made.</returns> private static bool HandleSpecialLam(FastStringBuilder input, FastStringBuilder output, int i) { bool isFixed; switch (input.Get(i + 1)) { case (char)ArabicGeneralLetters.AlefMaksoor: output.Set(i, (char)0xFEF7); isFixed = true; break; case (char)ArabicGeneralLetters.Alef: output.Set(i, (char)0xFEF9); isFixed = true; break; case (char)ArabicGeneralLetters.AlefHamza: output.Set(i, (char)0xFEF5); isFixed = true; break; case (char)ArabicGeneralLetters.AlefMad: output.Set(i, (char)0xFEF3); isFixed = true; break; default: isFixed = false; break; } if (isFixed) { output.Set(i + 1, (char)0xFFFF); } return(isFixed); }
/// <summary> /// Converts English numbers that are outside tags to Persian or Arabic numbers. /// </summary> /// <param name="text"></param> /// <param name="farsi"></param> /// <returns>Text with converted numbers</returns> public static void FixNumbersOutsideOfTags(FastStringBuilder text, bool farsi) { var englishDigits = new HashSet <char>(EnglishToFarsiNumberMap.Keys); for (int i = 0; i < text.Length; i++) { var iChar = text.Get(i); // skip valid tags if (iChar == '<') { bool sawValidTag = false; for (int j = i + 1; j < text.Length; j++) { int jChar = text.Get(j); if ((j == i + 1 && jChar == ' ') || jChar == '<') { break; } else if (jChar == '>') { i = j; sawValidTag = true; break; } } if (sawValidTag) { continue; } } if (englishDigits.Contains((char)iChar)) { text.Set(i, farsi ? EnglishToFarsiNumberMap[(char)iChar] : EnglishToHinduNumberMap[(char)iChar]); } } }
public void Insert(int pos, FastStringBuilder str, int offset, int count) { if (str == this) { throw new InvalidOperationException("You cannot pass the same string builder to insert"); } if (count == 0) { return; } Length += count; EnsureCapacity(Length, true); for (int i = Length - count - 1; i >= pos; i--) { array[i + count] = array[i]; } for (int i = 0; i < count; i++) { array[pos + i] = str.array[offset + i]; } }
public void Insert(int pos, FastStringBuilder str) { Insert(pos, str, 0, str.Length); }
public void SetValue(FastStringBuilder other) { EnsureCapacity(other.Length, false); Copy(other.array, array); Length = other.Length; }
/// <summary> /// Fixes the flow of the text. /// </summary> public static void Fix(FastStringBuilder input, FastStringBuilder output, bool farsi, bool fixTextTags, bool preserveNumbers) { // Some texts like tags, English words and numbers need to be displayed in their original order. // This list keeps the characters that their order should be reserved and streams reserved texts into final letters. LtrTextHolder.Clear(); for (int i = input.Length - 1; i >= 0; i--) { bool isInMiddle = i > 0 && i < input.Length - 1; bool isAtBeginning = i == 0; bool isAtEnd = i == input.Length - 1; char characterAtThisIndex = input.Get(i); char nextCharacter = default; if (!isAtEnd) { nextCharacter = input.Get(i + 1); } char previousCharacter = default; if (!isAtBeginning) { previousCharacter = input.Get(i - 1); } if (char.IsPunctuation(characterAtThisIndex) || char.IsSymbol(characterAtThisIndex)) { if (fixTextTags) { if (characterAtThisIndex == '>') { // We need to check if it is actually the beginning of a tag. bool isValidTag = false; // If > is at the end of the text (At beginning of the array), it can't be a tag if (isAtEnd == false) { for (int j = i - 1; j >= 0; j--) { // Tags do not have space inside if (input.Get(j) == ' ') { break; } // Tags do not have RTL characters inside if (TextUtils.IsRTLCharacter(input.Get(j))) { break; } if (input.Get(j) == '<') { isValidTag = true; break; } } } if (LtrTextHolder.Count > 0 && isValidTag) { for (int j = 0; j < LtrTextHolder.Count; j++) { output.Append(LtrTextHolder[LtrTextHolder.Count - 1 - j]); } LtrTextHolder.Clear(); } } } if (characterAtThisIndex == ')') { if (isInMiddle) { bool isAfterRTLCharacter = TextUtils.IsRTLCharacter(previousCharacter); bool isBeforeRTLCharacter = TextUtils.IsRTLCharacter(nextCharacter); if (isAfterRTLCharacter || isBeforeRTLCharacter) { characterAtThisIndex = '('; } } else if (isAtEnd) { bool isAfterRTLCharacter = TextUtils.IsRTLCharacter(previousCharacter); if (isAfterRTLCharacter) { characterAtThisIndex = '('; } } else if (isAtBeginning) { bool isBeforeRTLCharacter = TextUtils.IsRTLCharacter(nextCharacter); if (isBeforeRTLCharacter) { characterAtThisIndex = '('; } } } else if (characterAtThisIndex == '(') { if (isInMiddle) { bool isAfterRTLCharacter = TextUtils.IsRTLCharacter(previousCharacter); bool isBeforeRTLCharacter = TextUtils.IsRTLCharacter(nextCharacter); if (isAfterRTLCharacter || isBeforeRTLCharacter) { characterAtThisIndex = ')'; } } else if (isAtEnd) { bool isAfterRTLCharacter = TextUtils.IsRTLCharacter(previousCharacter); if (isAfterRTLCharacter) { characterAtThisIndex = ')'; } } else if (isAtBeginning) { bool isBeforeRTLCharacter = TextUtils.IsRTLCharacter(nextCharacter); if (isBeforeRTLCharacter) { characterAtThisIndex = ')'; } } } else if (characterAtThisIndex == '«') { if (isInMiddle) { bool isAfterRTLCharacter = TextUtils.IsRTLCharacter(nextCharacter); bool isBeforeRTLCharacter = TextUtils.IsRTLCharacter(previousCharacter); if (isAfterRTLCharacter || isBeforeRTLCharacter) { characterAtThisIndex = '»'; } } else if (isAtEnd) { bool isAfterRTLCharacter = TextUtils.IsRTLCharacter(nextCharacter); if (isAfterRTLCharacter) { characterAtThisIndex = '»'; } } else if (isAtBeginning) { bool isBeforeRTLCharacter = TextUtils.IsRTLCharacter(previousCharacter); if (isBeforeRTLCharacter) { characterAtThisIndex = '»'; } } } else if (characterAtThisIndex == '»') { if (isInMiddle) { bool isAfterRTLCharacter = TextUtils.IsRTLCharacter(nextCharacter); bool isBeforeRTLCharacter = TextUtils.IsRTLCharacter(previousCharacter); if (isAfterRTLCharacter || isBeforeRTLCharacter) { characterAtThisIndex = '«'; } } else if (isAtEnd) { bool isAfterRTLCharacter = TextUtils.IsRTLCharacter(nextCharacter); if (isAfterRTLCharacter) { characterAtThisIndex = '«'; } } else if (isAtBeginning) { bool isBeforeRTLCharacter = TextUtils.IsRTLCharacter(previousCharacter); if (isBeforeRTLCharacter) { characterAtThisIndex = '«'; } } } if (isInMiddle) { bool isAfterRTLCharacter = TextUtils.IsRTLCharacter(previousCharacter); bool isBeforeRTLCharacter = TextUtils.IsRTLCharacter(nextCharacter); bool isBeforeWhiteSpace = char.IsWhiteSpace(nextCharacter); bool isAfterWhiteSpace = char.IsWhiteSpace(previousCharacter); bool isUnderline = characterAtThisIndex == '_'; bool isSpecialPunctuation = characterAtThisIndex == '.' || characterAtThisIndex == '،' || characterAtThisIndex == '؛'; if (isBeforeRTLCharacter && isAfterRTLCharacter || isAfterWhiteSpace && isSpecialPunctuation || isBeforeWhiteSpace && isAfterRTLCharacter || isBeforeRTLCharacter && isAfterWhiteSpace || (isBeforeRTLCharacter || isAfterRTLCharacter) && isUnderline) { if (LtrTextHolder.Count > 0) { for (int j = 0; j < LtrTextHolder.Count; j++) { output.Append(LtrTextHolder[LtrTextHolder.Count - 1 - j]); } LtrTextHolder.Clear(); } output.Append(characterAtThisIndex); } else { LtrTextHolder.Add(characterAtThisIndex); } } else if (isAtEnd) { LtrTextHolder.Add(characterAtThisIndex); } else if (isAtBeginning) { output.Append(characterAtThisIndex); } if (fixTextTags) { if (characterAtThisIndex == '<') { bool valid = false; if (isAtBeginning == false) { for (int j = i + 1; j < input.Length; j++) { // Tags do not have space inside if (input.Get(j) == ' ') { break; } // Tags do not have RTL characters inside if (TextUtils.IsRTLCharacter(input.Get(j))) { break; } if (input.Get(j) == '>') { valid = true; break; } } } if (LtrTextHolder.Count > 0 && valid) { for (int j = 0; j < LtrTextHolder.Count; j++) { output.Append(LtrTextHolder[LtrTextHolder.Count - 1 - j]); } LtrTextHolder.Clear(); } } } continue; } if (isInMiddle) { bool isAfterEnglishChar = TextUtils.IsEnglishLetter(previousCharacter); bool isBeforeEnglishChar = TextUtils.IsEnglishLetter(nextCharacter); bool isAfterNumber = TextUtils.IsNumber(previousCharacter, preserveNumbers, farsi); bool isBeforeNumber = TextUtils.IsNumber(nextCharacter, preserveNumbers, farsi); bool isAfterSymbol = char.IsSymbol(previousCharacter); bool isBeforeSymbol = char.IsSymbol(nextCharacter); // For cases where english words and farsi/arabic are mixed. This allows for using farsi/arabic, english and numbers in one sentence. // If the space is between numbers,symbols or English words, keep the order if (characterAtThisIndex == ' ' && (isBeforeEnglishChar || isBeforeNumber || isBeforeSymbol) && (isAfterEnglishChar || isAfterNumber || isAfterSymbol)) { LtrTextHolder.Add(characterAtThisIndex); continue; } } if (TextUtils.IsEnglishLetter(characterAtThisIndex) || TextUtils.IsNumber(characterAtThisIndex, preserveNumbers, farsi)) { LtrTextHolder.Add(characterAtThisIndex); continue; } if (characterAtThisIndex >= (char)0xD800 && characterAtThisIndex <= (char)0xDBFF || characterAtThisIndex >= (char)0xDC00 && characterAtThisIndex <= (char)0xDFFF) { LtrTextHolder.Add(characterAtThisIndex); continue; } if (LtrTextHolder.Count > 0) { for (int j = 0; j < LtrTextHolder.Count; j++) { output.Append(LtrTextHolder[LtrTextHolder.Count - 1 - j]); } LtrTextHolder.Clear(); } if (characterAtThisIndex != 0xFFFF && characterAtThisIndex != (int)GeneralLetters.ZeroWidthNoJoiner) { output.Append(characterAtThisIndex); } } if (LtrTextHolder.Count > 0) { for (int j = 0; j < LtrTextHolder.Count; j++) { output.Append(LtrTextHolder[LtrTextHolder.Count - 1 - j]); } LtrTextHolder.Clear(); } }
public static void FindTag( FastStringBuilder str, int start, out int tagStart, out int tagEnd, out int tagType, out int hashCode) { for (int i = start; i < str.Length;) { if (str.Get(i) != '<') { i++; continue; } bool calculateHashCode = true; hashCode = 0; for (int j = i + 1; j < str.Length; j++) { char jChar = str.Get(j); if (calculateHashCode) { if (char.IsLetter(jChar)) { unchecked { if (hashCode == 0) { hashCode = jChar.GetHashCode(); } else { hashCode = (hashCode * 397) ^ jChar.GetHashCode(); } } } else if (hashCode != 0) { // We have computed the hash code. Now we reached a non letter character. We need to stop calculateHashCode = false; } } // Rich text tag cannot contain RTL chars if (TextUtils.IsRTLCharacter(jChar)) { break; } if (jChar == '>') { // Check if the tag is closing, opening or self contained tagStart = i; tagEnd = j + 1; if (str.Get(j - 1) == '/') { // This is self contained. tagType = 3; return; } if (str.Get(i + 1) == '/') { // This is closing tagType = 2; return; } tagType = 1; return; } } i++; } tagStart = 0; tagEnd = 0; tagType = 0; hashCode = 0; }
/// <summary> /// Fixes rich text tags in input string and returns the result. /// </summary> public static void Fix(FastStringBuilder text) { for (int i = 0; i < text.Length; i++) { FindTag(text, i, out int tagStart, out int tagEnd, out int tagType, out int hashCode); // If we couldn't find a tag, end the process if (tagType == 0) { break; } switch (tagType) { case 1: // Opening tag { Tag closingTag = default; // Search and find the closing tag for this bool foundClosingTag = false; for (int j = ClosedTagsHash.Count - 1; j >= 0; j--) { if (ClosedTagsHash[j] == hashCode) { closingTag = ClosedTags[j]; foundClosingTag = true; ClosedTags.RemoveAt(j); ClosedTagsHash.RemoveAt(j); break; } } if (foundClosingTag) { // NOTE: order of execution is important here int openingTagLength = tagEnd - tagStart; int closingTagLength = closingTag.End - closingTag.Start; text.Reverse(tagStart, openingTagLength); text.Reverse(closingTag.Start, closingTagLength); } else { text.Reverse(tagStart, tagEnd - tagStart); } break; } case 2: // Closing tag { ClosedTags.Add(new Tag(tagStart, tagEnd)); ClosedTagsHash.Add(hashCode); break; } case 3: // Self contained tag { text.Reverse(tagStart, tagEnd - tagStart); break; } } i = tagEnd; } }
static RTLSupport() { inputBuilder = new FastStringBuilder(DefaultBufferSize); glyphFixerOutput = new FastStringBuilder(DefaultBufferSize); }
public void Replace(FastStringBuilder oldStr, FastStringBuilder newStr) { for (int i = 0; i < length; i++) { bool match = true; for (int j = 0; j < oldStr.Length; j++) { if (array[i + j] != oldStr.Get(j)) { match = false; break; } } if (!match) { continue; } if (oldStr.Length == newStr.Length) { for (int k = 0; k < oldStr.Length; k++) { array[i + k] = newStr.Get(k); } } else if (oldStr.Length < newStr.Length) { // We need to expand capacity int diff = newStr.Length - oldStr.Length; length += diff; EnsureCapacity(length, true); // Move everything forward by difference of length for (int k = length - diff - 1; k >= i + oldStr.Length; k--) { array[k + diff] = array[k]; } // Start writing new string for (int k = 0; k < newStr.Length; k++) { array[i + k] = newStr.Get(k); } } else { // We need to shrink int diff = oldStr.Length - newStr.Length; // Move everything backwards by diff for (int k = i + diff; k < length - diff; k++) { array[k] = array[k + diff]; } for (int k = 0; k < newStr.Length; k++) { array[i + k] = newStr.Get(k); } length -= diff; } i += newStr.Length; } }
/// <summary> /// Fixes the shape of letters based on their position. /// </summary> /// <param name="input"></param> /// <param name="output"></param> /// <param name="preserveNumbers"></param> /// <param name="farsi"></param> /// <returns></returns> public static void Fix(FastStringBuilder input, FastStringBuilder output, bool preserveNumbers, bool farsi, bool fixTextTags) { FixYah(input, farsi); output.SetValue(input); for (int i = 0; i < input.Length; i++) { bool skipNext = false; int iChar = input.Get(i); // For special Lam Letter connections. if (iChar == (int)ArabicGeneralLetters.Lam) { if (i < input.Length - 1) { skipNext = HandleSpecialLam(input, output, i); if (skipNext) { iChar = output.Get(i); } } } // We don't want to fix tatweel or zwnj character if (iChar == (int)ArabicGeneralLetters.ArabicTatweel || iChar == (int)ArabicGeneralLetters.ZeroWidthNoJoiner) { continue; } if (iChar < 0xFFFF && TextUtils.IsGlyphFixedArabicCharacter((char)iChar)) { char converted = GlyphTable.Convert((char)iChar); if (IsMiddleLetter(input, i)) { output.Set(i, (char)(converted + 3)); } else if (IsFinishingLetter(input, i)) { output.Set(i, (char)(converted + 1)); } else if (IsLeadingLetter(input, i)) { output.Set(i, (char)(converted + 2)); } } // If this letter as Lam and special Lam-Alef connection was made, We want to skip the Alef // (Lam-Alef occupies 1 space) if (skipNext) { i++; } } if (!preserveNumbers) { if (fixTextTags) { FixNumbersOutsideOfTags(output, farsi); } else { FixNumbers(output, farsi); } } }
/// <summary> /// Removes tashkeel from text. /// </summary> public static void RemoveTashkeel(FastStringBuilder input) { for (int i = 0; i < input.Length; i++) { switch ((TashkeelCharacters)input.Get(i)) { case TashkeelCharacters.Fathan: TashkeelLocations.Add(new TashkeelLocation(TashkeelCharacters.Fathan, i)); break; case TashkeelCharacters.Dammatan: TashkeelLocations.Add(new TashkeelLocation(TashkeelCharacters.Dammatan, i)); break; case TashkeelCharacters.Kasratan: TashkeelLocations.Add(new TashkeelLocation(TashkeelCharacters.Kasratan, i)); break; case TashkeelCharacters.Fatha: TashkeelLocations.Add(new TashkeelLocation(TashkeelCharacters.Fatha, i)); break; case TashkeelCharacters.Damma: TashkeelLocations.Add(new TashkeelLocation(TashkeelCharacters.Damma, i)); break; case TashkeelCharacters.Kasra: TashkeelLocations.Add(new TashkeelLocation(TashkeelCharacters.Kasra, i)); break; case TashkeelCharacters.Shadda: TashkeelLocations.Add(new TashkeelLocation(TashkeelCharacters.Shadda, i)); break; case TashkeelCharacters.Sukun: TashkeelLocations.Add(new TashkeelLocation(TashkeelCharacters.Sukun, i)); break; case TashkeelCharacters.MaddahAbove: TashkeelLocations.Add( new TashkeelLocation(TashkeelCharacters.MaddahAbove, i)); break; case TashkeelCharacters.SuperscriptAlef: TashkeelLocations.Add( new TashkeelLocation(TashkeelCharacters.SuperscriptAlef, i)); break; } } input.RemoveAll((char)TashkeelCharacters.Fathan); input.RemoveAll((char)TashkeelCharacters.Dammatan); input.RemoveAll((char)TashkeelCharacters.Kasratan); input.RemoveAll((char)TashkeelCharacters.Fatha); input.RemoveAll((char)TashkeelCharacters.Damma); input.RemoveAll((char)TashkeelCharacters.Kasra); input.RemoveAll((char)TashkeelCharacters.Shadda); input.RemoveAll((char)TashkeelCharacters.Sukun); input.RemoveAll((char)TashkeelCharacters.MaddahAbove); input.RemoveAll((char)TashkeelCharacters.ShaddaWithFathaIsolatedForm); input.RemoveAll((char)TashkeelCharacters.ShaddaWithDammaIsolatedForm); input.RemoveAll((char)TashkeelCharacters.ShaddaWithKasraIsolatedForm); input.RemoveAll((char)TashkeelCharacters.SuperscriptAlef); }
/// <summary> /// Is the letter at provided index a leading letter? /// </summary> /// <returns><see langword="true" /> if the letter is a leading letter</returns> private static bool IsLeadingLetter(FastStringBuilder letters, int index) { var currentIndexLetter = letters.Get(index); int previousIndexLetter = default; if (index != 0) { previousIndexLetter = letters.Get(index - 1); } int nextIndexLetter = default; if (index < letters.Length - 1) { nextIndexLetter = letters.Get(index + 1); } bool isPreviousLetterNonConnectable = index == 0 || (previousIndexLetter < 0xFFFF && !TextUtils.IsGlyphFixedArabicCharacter((char)previousIndexLetter)) || previousIndexLetter == (int)ArabicGeneralLetters.Alef || previousIndexLetter == (int)ArabicGeneralLetters.Dal || previousIndexLetter == (int)ArabicGeneralLetters.Thal || previousIndexLetter == (int)ArabicGeneralLetters.Ra2 || previousIndexLetter == (int)ArabicGeneralLetters.Zeen || previousIndexLetter == (int)ArabicGeneralLetters.PersianZe || previousIndexLetter == (int)ArabicGeneralLetters.Waw || previousIndexLetter == (int)ArabicGeneralLetters.AlefMad || previousIndexLetter == (int)ArabicGeneralLetters.AlefHamza || previousIndexLetter == (int)ArabicGeneralLetters.Hamza || previousIndexLetter == (int)ArabicGeneralLetters.AlefMaksoor || previousIndexLetter == (int)ArabicGeneralLetters.ZeroWidthNoJoiner || previousIndexLetter == (int)ArabicGeneralLetters.WawHamza || previousIndexLetter == (int)ArabicIsolatedLetters.Alef || previousIndexLetter == (int)ArabicIsolatedLetters.Dal || previousIndexLetter == (int)ArabicIsolatedLetters.Thal || previousIndexLetter == (int)ArabicIsolatedLetters.Ra2 || previousIndexLetter == (int)ArabicIsolatedLetters.Zeen || previousIndexLetter == (int)ArabicIsolatedLetters.PersianZe || previousIndexLetter == (int)ArabicIsolatedLetters.Waw || previousIndexLetter == (int)ArabicIsolatedLetters.AlefMad || previousIndexLetter == (int)ArabicIsolatedLetters.AlefHamza || previousIndexLetter == (int)ArabicIsolatedLetters.Hamza || previousIndexLetter == (int)ArabicIsolatedLetters.AlefMaksoor; bool canThisLetterBeLeading = currentIndexLetter != ' ' && currentIndexLetter != (int)ArabicGeneralLetters.Dal && currentIndexLetter != (int)ArabicGeneralLetters.Thal && currentIndexLetter != (int)ArabicGeneralLetters.Ra2 && currentIndexLetter != (int)ArabicGeneralLetters.Zeen && currentIndexLetter != (int)ArabicGeneralLetters.PersianZe && currentIndexLetter != (int)ArabicGeneralLetters.Alef && currentIndexLetter != (int)ArabicGeneralLetters.AlefHamza && currentIndexLetter != (int)ArabicGeneralLetters.AlefMaksoor && currentIndexLetter != (int)ArabicGeneralLetters.AlefMad && currentIndexLetter != (int)ArabicGeneralLetters.WawHamza && currentIndexLetter != (int)ArabicGeneralLetters.Waw && currentIndexLetter != (int)ArabicGeneralLetters.ZeroWidthNoJoiner && currentIndexLetter != (int)ArabicGeneralLetters.Hamza; bool isNextLetterConnectable = index < letters.Length - 1 && (nextIndexLetter < 0xFFFF && TextUtils.IsGlyphFixedArabicCharacter((char)nextIndexLetter)) && nextIndexLetter != (int)ArabicGeneralLetters.Hamza && nextIndexLetter != (int)ArabicGeneralLetters.ZeroWidthNoJoiner; return(isPreviousLetterNonConnectable && canThisLetterBeLeading && isNextLetterConnectable); }
/// <summary> /// Fixes the flow of the text. /// </summary> public static void Fix(FastStringBuilder input, FastStringBuilder output, bool farsi, bool fixTextTags, bool preserveNumbers) { // Some texts like tags, English words and numbers need to be displayed in their original order. // This list keeps the characters that their order should be reserved and streams reserved texts into final letters. LtrTextHolder.Clear(); TagTextHolder.Clear(); for (int i = input.Length - 1; i >= 0; i--) { bool isInMiddle = i > 0 && i < input.Length - 1; bool isAtBeginning = i == 0; bool isAtEnd = i == input.Length - 1; char characterAtThisIndex = input.Get(i); char nextCharacter = default; if (!isAtEnd) { nextCharacter = input.Get(i + 1); } char previousCharacter = default; if (!isAtBeginning) { previousCharacter = input.Get(i - 1); } if (fixTextTags) { if (characterAtThisIndex == '>') { // We need to check if it is actually the beginning of a tag. bool isValidTag = false; int nextI = i; TagTextHolder.Add(characterAtThisIndex); for (int j = i - 1; j >= 0; j--) { var jChar = input.Get(j); // Tags do not have space inside if (jChar == ' ') { break; } // Tags do not have RTL characters inside if (TextUtils.IsRTLCharacter(jChar)) { break; } TagTextHolder.Add(jChar); if (jChar == '<') { isValidTag = true; nextI = j; break; } } if (isValidTag) { FlushBufferToOutput(LtrTextHolder, output); FlushBufferToOutput(TagTextHolder, output); i = nextI; continue; } else { TagTextHolder.Clear(); } } } if (char.IsPunctuation(characterAtThisIndex) || char.IsSymbol(characterAtThisIndex)) { if (MirroredCharsSet.Contains(characterAtThisIndex)) { // IsRTLCharacter returns false for null bool isAfterRTLCharacter = TextUtils.IsRTLCharacter(previousCharacter); bool isBeforeRTLCharacter = TextUtils.IsRTLCharacter(nextCharacter); if (isAfterRTLCharacter || isBeforeRTLCharacter) { characterAtThisIndex = MirroredCharsMap[characterAtThisIndex]; } } if (isInMiddle) { bool isAfterRTLCharacter = TextUtils.IsRTLCharacter(previousCharacter); bool isBeforeRTLCharacter = TextUtils.IsRTLCharacter(nextCharacter); bool isBeforeWhiteSpace = char.IsWhiteSpace(nextCharacter); bool isAfterWhiteSpace = char.IsWhiteSpace(previousCharacter); bool isUnderline = characterAtThisIndex == '_'; bool isSpecialPunctuation = characterAtThisIndex == '.' || characterAtThisIndex == '،' || characterAtThisIndex == '؛'; if (isBeforeRTLCharacter && isAfterRTLCharacter || isAfterWhiteSpace && isSpecialPunctuation || isBeforeWhiteSpace && isAfterRTLCharacter || isBeforeRTLCharacter && isAfterWhiteSpace || (isBeforeRTLCharacter || isAfterRTLCharacter) && isUnderline) { FlushBufferToOutput(LtrTextHolder, output); output.Append(characterAtThisIndex); } else { LtrTextHolder.Add(characterAtThisIndex); } } else if (isAtEnd) { LtrTextHolder.Add(characterAtThisIndex); } else if (isAtBeginning) { output.Append(characterAtThisIndex); } continue; } if (isInMiddle) { bool isAfterEnglishChar = TextUtils.IsEnglishLetter(previousCharacter); bool isBeforeEnglishChar = TextUtils.IsEnglishLetter(nextCharacter); bool isAfterNumber = TextUtils.IsNumber(previousCharacter, preserveNumbers, farsi); bool isBeforeNumber = TextUtils.IsNumber(nextCharacter, preserveNumbers, farsi); bool isAfterSymbol = char.IsSymbol(previousCharacter); bool isBeforeSymbol = char.IsSymbol(nextCharacter); // For cases where english words and farsi/arabic are mixed. This allows for using farsi/arabic, english and numbers in one sentence. // If the space is between numbers,symbols or English words, keep the order if (characterAtThisIndex == ' ' && (isBeforeEnglishChar || isBeforeNumber || isBeforeSymbol) && (isAfterEnglishChar || isAfterNumber || isAfterSymbol)) { LtrTextHolder.Add(characterAtThisIndex); continue; } } if (TextUtils.IsEnglishLetter(characterAtThisIndex) || TextUtils.IsNumber(characterAtThisIndex, preserveNumbers, farsi)) { LtrTextHolder.Add(characterAtThisIndex); continue; } if (characterAtThisIndex >= (char)0xD800 && characterAtThisIndex <= (char)0xDBFF || characterAtThisIndex >= (char)0xDC00 && characterAtThisIndex <= (char)0xDFFF) { LtrTextHolder.Add(characterAtThisIndex); continue; } FlushBufferToOutput(LtrTextHolder, output); if (characterAtThisIndex != 0xFFFF && characterAtThisIndex != (int)GeneralLetters.ZeroWidthNoJoiner) { output.Append(characterAtThisIndex); } } FlushBufferToOutput(LtrTextHolder, output); }
/// <summary> /// Is the letter at provided index a middle letter? /// </summary> /// <returns><see langword="true" /> if the letter is a middle letter</returns> private static bool IsMiddleLetter(FastStringBuilder letters, int index) { var currentIndexLetter = letters.Get(index); int previousIndexLetter = default; if (index != 0) { previousIndexLetter = letters.Get(index - 1); } int nextIndexLetter = default; if (index < letters.Length - 1) { nextIndexLetter = letters.Get(index + 1); } bool middleLetterCheck = index != 0 && currentIndexLetter != (int)ArabicGeneralLetters.Alef && currentIndexLetter != (int)ArabicGeneralLetters.Dal && currentIndexLetter != (int)ArabicGeneralLetters.Thal && currentIndexLetter != (int)ArabicGeneralLetters.Ra2 && currentIndexLetter != (int)ArabicGeneralLetters.Zeen && currentIndexLetter != (int)ArabicGeneralLetters.PersianZe && currentIndexLetter != (int)ArabicGeneralLetters.Waw && currentIndexLetter != (int)ArabicGeneralLetters.AlefMad && currentIndexLetter != (int)ArabicGeneralLetters.AlefHamza && currentIndexLetter != (int)ArabicGeneralLetters.AlefMaksoor && currentIndexLetter != (int)ArabicGeneralLetters.WawHamza && currentIndexLetter != (int)ArabicGeneralLetters.ZeroWidthNoJoiner && currentIndexLetter != (int)ArabicGeneralLetters.Hamza; bool previousLetterCheck = index != 0 && previousIndexLetter != (int)ArabicGeneralLetters.Alef && previousIndexLetter != (int)ArabicGeneralLetters.Dal && previousIndexLetter != (int)ArabicGeneralLetters.Thal && previousIndexLetter != (int)ArabicGeneralLetters.Ra2 && previousIndexLetter != (int)ArabicGeneralLetters.Zeen && previousIndexLetter != (int)ArabicGeneralLetters.PersianZe && previousIndexLetter != (int)ArabicGeneralLetters.Waw && previousIndexLetter != (int)ArabicGeneralLetters.AlefMad && previousIndexLetter != (int)ArabicGeneralLetters.AlefHamza && previousIndexLetter != (int)ArabicGeneralLetters.AlefMaksoor && previousIndexLetter != (int)ArabicGeneralLetters.WawHamza && previousIndexLetter != (int)ArabicGeneralLetters.Hamza && previousIndexLetter != (int)ArabicGeneralLetters.ZeroWidthNoJoiner && previousIndexLetter != (int)ArabicIsolatedLetters.Alef && previousIndexLetter != (int)ArabicIsolatedLetters.Dal && previousIndexLetter != (int)ArabicIsolatedLetters.Thal && previousIndexLetter != (int)ArabicIsolatedLetters.Ra2 && previousIndexLetter != (int)ArabicIsolatedLetters.Zeen && previousIndexLetter != (int)ArabicIsolatedLetters.PersianZe && previousIndexLetter != (int)ArabicIsolatedLetters.Waw && previousIndexLetter != (int)ArabicIsolatedLetters.AlefMad && previousIndexLetter != (int)ArabicIsolatedLetters.AlefHamza && previousIndexLetter != (int)ArabicIsolatedLetters.AlefMaksoor && previousIndexLetter != (int)ArabicIsolatedLetters.WawHamza && previousIndexLetter != (int)ArabicIsolatedLetters.Hamza && (previousIndexLetter < 0xFFFF && TextUtils.IsGlyphFixedArabicCharacter((char)previousIndexLetter)); bool nextLetterCheck = index < letters.Length - 1 && (nextIndexLetter < 0xFFFF && TextUtils.IsGlyphFixedArabicCharacter((char)nextIndexLetter)) && nextIndexLetter != (int)ArabicGeneralLetters.ZeroWidthNoJoiner && nextIndexLetter != (int)ArabicGeneralLetters.Hamza && nextIndexLetter != (int)ArabicIsolatedLetters.Hamza; return(nextLetterCheck && previousLetterCheck && middleLetterCheck); }