/// <summary> /// Converts English numbers that are outside tags to Persian or Arabic numbers. /// </summary> /// <param name="text"></param> /// <param name="farsi"></param> /// <returns>Text with converted numbers</returns> public static void FixNumbersOutsideOfTags(FastStringBuilder text, bool farsi) { var englishDigits = new HashSet <char>(EnglishToFarsiNumberMap.Keys); for (int i = 0; i < text.Length; i++) { var iChar = text.Get(i); // skip valid tags if (iChar == '<') { bool sawValidTag = false; for (int j = i + 1; j < text.Length; j++) { char jChar = text.Get(j); if ((j == i + 1 && jChar == ' ') || jChar == '<') { break; } else if (jChar == '>') { i = j; sawValidTag = true; break; } } if (sawValidTag) { continue; } } if (englishDigits.Contains(iChar)) { text.Set(i, farsi ? EnglishToFarsiNumberMap[iChar] : EnglishToHinduNumberMap[iChar]); } } }
/// <summary> /// Removes tashkeel from text. /// </summary> public static void RemoveTashkeel(FastStringBuilder input) { TashkeelLocations.Clear(); int j = 0; // write index for (int i = 0; i < input.Length; i++) { int curChar = input.Get(i); if (Char32Utils.IsUnicode16Char(curChar) && TashkeelCharactersSet.Contains((char)curChar)) { TashkeelLocations.Add(new TashkeelLocation((TashkeelCharacters)curChar, i)); } else { input.Set(j, curChar); j++; } } input.Length = j; }
/// <summary> /// Handles the special Lam-Alef connection in the text. /// </summary> /// <param name="input"></param> /// <param name="output"></param> /// <param name="i">Index of Lam letter</param> /// <returns><see langword="true" /> if special connection has been made.</returns> private static bool HandleSpecialLam(FastStringBuilder input, FastStringBuilder output, int i) { bool isFixed; switch (input.Get(i + 1)) { case (char)ArabicGeneralLetters.AlefMaksoor: output.Set(i, (char)0xFEF7); isFixed = true; break; case (char)ArabicGeneralLetters.Alef: output.Set(i, (char)0xFEF9); isFixed = true; break; case (char)ArabicGeneralLetters.AlefHamza: output.Set(i, (char)0xFEF5); isFixed = true; break; case (char)ArabicGeneralLetters.AlefMad: output.Set(i, (char)0xFEF3); isFixed = true; break; default: isFixed = false; break; } if (isFixed) { output.Set(i + 1, (char)0xFFFF); } return(isFixed); }
/// <summary> /// Fixes the flow of the text. /// </summary> public static void Fix(FastStringBuilder input, FastStringBuilder output, bool farsi, bool fixTextTags, bool preserveNumbers) { // Some texts like tags, English words and numbers need to be displayed in their original order. // This list keeps the characters that their order should be reserved and streams reserved texts into final letters. LtrTextHolder.Clear(); TagTextHolder.Clear(); for (int i = input.Length - 1; i >= 0; i--) { bool isInMiddle = i > 0 && i < input.Length - 1; bool isAtBeginning = i == 0; bool isAtEnd = i == input.Length - 1; char characterAtThisIndex = input.Get(i); char nextCharacter = default; if (!isAtEnd) { nextCharacter = input.Get(i + 1); } char previousCharacter = default; if (!isAtBeginning) { previousCharacter = input.Get(i - 1); } if (fixTextTags) { if (characterAtThisIndex == '>') { // We need to check if it is actually the beginning of a tag. bool isValidTag = false; int nextI = i; TagTextHolder.Add(characterAtThisIndex); for (int j = i - 1; j >= 0; j--) { var jChar = input.Get(j); // Tags do not have space inside if (jChar == ' ') { break; } // Tags do not have RTL characters inside if (TextUtils.IsRTLCharacter(jChar)) { break; } TagTextHolder.Add(jChar); if (jChar == '<') { isValidTag = true; nextI = j; break; } } if (isValidTag) { FlushBufferToOutput(LtrTextHolder, output); FlushBufferToOutput(TagTextHolder, output); i = nextI; continue; } else { TagTextHolder.Clear(); } } } if (char.IsPunctuation(characterAtThisIndex) || char.IsSymbol(characterAtThisIndex)) { if (MirroredCharsSet.Contains(characterAtThisIndex)) { // IsRTLCharacter returns false for null bool isAfterRTLCharacter = TextUtils.IsRTLCharacter(previousCharacter); bool isBeforeRTLCharacter = TextUtils.IsRTLCharacter(nextCharacter); if (isAfterRTLCharacter || isBeforeRTLCharacter) { characterAtThisIndex = MirroredCharsMap[characterAtThisIndex]; } } if (isInMiddle) { bool isAfterRTLCharacter = TextUtils.IsRTLCharacter(previousCharacter); bool isBeforeRTLCharacter = TextUtils.IsRTLCharacter(nextCharacter); bool isBeforeWhiteSpace = char.IsWhiteSpace(nextCharacter); bool isAfterWhiteSpace = char.IsWhiteSpace(previousCharacter); bool isUnderline = characterAtThisIndex == '_'; bool isSpecialPunctuation = characterAtThisIndex == '.' || characterAtThisIndex == '،' || characterAtThisIndex == '؛'; if (isBeforeRTLCharacter && isAfterRTLCharacter || isAfterWhiteSpace && isSpecialPunctuation || isBeforeWhiteSpace && isAfterRTLCharacter || isBeforeRTLCharacter && isAfterWhiteSpace || (isBeforeRTLCharacter || isAfterRTLCharacter) && isUnderline) { FlushBufferToOutput(LtrTextHolder, output); output.Append(characterAtThisIndex); } else { LtrTextHolder.Add(characterAtThisIndex); } } else if (isAtEnd) { LtrTextHolder.Add(characterAtThisIndex); } else if (isAtBeginning) { output.Append(characterAtThisIndex); } continue; } if (isInMiddle) { bool isAfterEnglishChar = TextUtils.IsEnglishLetter(previousCharacter); bool isBeforeEnglishChar = TextUtils.IsEnglishLetter(nextCharacter); bool isAfterNumber = TextUtils.IsNumber(previousCharacter, preserveNumbers, farsi); bool isBeforeNumber = TextUtils.IsNumber(nextCharacter, preserveNumbers, farsi); bool isAfterSymbol = char.IsSymbol(previousCharacter); bool isBeforeSymbol = char.IsSymbol(nextCharacter); // For cases where english words and farsi/arabic are mixed. This allows for using farsi/arabic, english and numbers in one sentence. // If the space is between numbers,symbols or English words, keep the order if (characterAtThisIndex == ' ' && (isBeforeEnglishChar || isBeforeNumber || isBeforeSymbol) && (isAfterEnglishChar || isAfterNumber || isAfterSymbol)) { LtrTextHolder.Add(characterAtThisIndex); continue; } } if (TextUtils.IsEnglishLetter(characterAtThisIndex) || TextUtils.IsNumber(characterAtThisIndex, preserveNumbers, farsi)) { LtrTextHolder.Add(characterAtThisIndex); continue; } if (characterAtThisIndex >= (char)0xD800 && characterAtThisIndex <= (char)0xDBFF || characterAtThisIndex >= (char)0xDC00 && characterAtThisIndex <= (char)0xDFFF) { LtrTextHolder.Add(characterAtThisIndex); continue; } FlushBufferToOutput(LtrTextHolder, output); if (characterAtThisIndex != 0xFFFF && characterAtThisIndex != (int)GeneralLetters.ZeroWidthNoJoiner) { output.Append(characterAtThisIndex); } } FlushBufferToOutput(LtrTextHolder, output); }
public static void FindTag( FastStringBuilder str, int start, out int tagStart, out int tagEnd, out int tagType, out int hashCode) { for (int i = start; i < str.Length;) { if (str.Get(i) != '<') { i++; continue; } bool calculateHashCode = true; hashCode = 0; for (int j = i + 1; j < str.Length; j++) { char jChar = str.Get(j); if (calculateHashCode) { if (char.IsLetter(jChar)) { unchecked { if (hashCode == 0) { hashCode = jChar.GetHashCode(); } else { hashCode = (hashCode * 397) ^ jChar.GetHashCode(); } } } else if (hashCode != 0) { // We have computed the hash code. Now we reached a non letter character. We need to stop calculateHashCode = false; } } // Rich text tag cannot contain RTL chars if (TextUtils.IsRTLCharacter(jChar)) { break; } if (jChar == '>') { // Check if the tag is closing, opening or self contained tagStart = i; tagEnd = j + 1; if (str.Get(j - 1) == '/') { // This is self contained. tagType = 3; return; } if (str.Get(i + 1) == '/') { // This is closing tagType = 2; return; } tagType = 1; return; } } i++; } tagStart = 0; tagEnd = 0; tagType = 0; hashCode = 0; }
public void Replace(FastStringBuilder oldStr, FastStringBuilder newStr) { for (int i = 0; i < length; i++) { bool match = true; for (int j = 0; j < oldStr.Length; j++) { if (array[i + j] != oldStr.Get(j)) { match = false; break; } } if (!match) { continue; } if (oldStr.Length == newStr.Length) { for (int k = 0; k < oldStr.Length; k++) { array[i + k] = newStr.Get(k); } } else if (oldStr.Length < newStr.Length) { // We need to expand capacity int diff = newStr.Length - oldStr.Length; length += diff; EnsureCapacity(length, true); // Move everything forward by difference of length for (int k = length - diff - 1; k >= i + oldStr.Length; k--) { array[k + diff] = array[k]; } // Start writing new string for (int k = 0; k < newStr.Length; k++) { array[i + k] = newStr.Get(k); } } else { // We need to shrink int diff = oldStr.Length - newStr.Length; // Move everything backwards by diff for (int k = i + diff; k < length - diff; k++) { array[k] = array[k + diff]; } for (int k = 0; k < newStr.Length; k++) { array[i + k] = newStr.Get(k); } length -= diff; } i += newStr.Length; } }
/// <summary> /// Fixes the shape of letters based on their position. /// </summary> /// <param name="input"></param> /// <param name="output"></param> /// <param name="preserveNumbers"></param> /// <param name="farsi"></param> /// <returns></returns> public static void Fix(FastStringBuilder input, FastStringBuilder output, bool preserveNumbers, bool farsi, bool fixTextTags) { FixYah(input, farsi); output.SetValue(input); for (int i = 0; i < input.Length; i++) { bool skipNext = false; int iChar = input.Get(i); // For special Lam Letter connections. if (iChar == (int)ArabicGeneralLetters.Lam) { if (i < input.Length - 1) { skipNext = HandleSpecialLam(input, output, i); if (skipNext) { iChar = output.Get(i); } } } // We don't want to fix tatweel or zwnj character if (iChar == (int)ArabicGeneralLetters.ArabicTatweel || iChar == (int)ArabicGeneralLetters.ZeroWidthNoJoiner) { continue; } if (iChar < 0xFFFF && TextUtils.IsGlyphFixedArabicCharacter((char)iChar)) { char converted = GlyphTable.Convert((char)iChar); if (IsMiddleLetter(input, i)) { output.Set(i, (char)(converted + 3)); } else if (IsFinishingLetter(input, i)) { output.Set(i, (char)(converted + 1)); } else if (IsLeadingLetter(input, i)) { output.Set(i, (char)(converted + 2)); } } // If this letter as Lam and special Lam-Alef connection was made, We want to skip the Alef // (Lam-Alef occupies 1 space) if (skipNext) { i++; } } if (!preserveNumbers) { if (fixTextTags) { FixNumbersOutsideOfTags(output, farsi); } else { FixNumbers(output, farsi); } } }
/// <summary> /// Is the letter at provided index a middle letter? /// </summary> /// <returns><see langword="true" /> if the letter is a middle letter</returns> private static bool IsMiddleLetter(FastStringBuilder letters, int index) { var currentIndexLetter = letters.Get(index); int previousIndexLetter = default; if (index != 0) { previousIndexLetter = letters.Get(index - 1); } int nextIndexLetter = default; if (index < letters.Length - 1) { nextIndexLetter = letters.Get(index + 1); } bool middleLetterCheck = index != 0 && currentIndexLetter != (int)ArabicGeneralLetters.Alef && currentIndexLetter != (int)ArabicGeneralLetters.Dal && currentIndexLetter != (int)ArabicGeneralLetters.Thal && currentIndexLetter != (int)ArabicGeneralLetters.Ra2 && currentIndexLetter != (int)ArabicGeneralLetters.Zeen && currentIndexLetter != (int)ArabicGeneralLetters.PersianZe && currentIndexLetter != (int)ArabicGeneralLetters.Waw && currentIndexLetter != (int)ArabicGeneralLetters.AlefMad && currentIndexLetter != (int)ArabicGeneralLetters.AlefHamza && currentIndexLetter != (int)ArabicGeneralLetters.AlefMaksoor && currentIndexLetter != (int)ArabicGeneralLetters.WawHamza && currentIndexLetter != (int)ArabicGeneralLetters.ZeroWidthNoJoiner && currentIndexLetter != (int)ArabicGeneralLetters.Hamza; bool previousLetterCheck = index != 0 && previousIndexLetter != (int)ArabicGeneralLetters.Alef && previousIndexLetter != (int)ArabicGeneralLetters.Dal && previousIndexLetter != (int)ArabicGeneralLetters.Thal && previousIndexLetter != (int)ArabicGeneralLetters.Ra2 && previousIndexLetter != (int)ArabicGeneralLetters.Zeen && previousIndexLetter != (int)ArabicGeneralLetters.PersianZe && previousIndexLetter != (int)ArabicGeneralLetters.Waw && previousIndexLetter != (int)ArabicGeneralLetters.AlefMad && previousIndexLetter != (int)ArabicGeneralLetters.AlefHamza && previousIndexLetter != (int)ArabicGeneralLetters.AlefMaksoor && previousIndexLetter != (int)ArabicGeneralLetters.WawHamza && previousIndexLetter != (int)ArabicGeneralLetters.Hamza && previousIndexLetter != (int)ArabicGeneralLetters.ZeroWidthNoJoiner && previousIndexLetter != (int)ArabicIsolatedLetters.Alef && previousIndexLetter != (int)ArabicIsolatedLetters.Dal && previousIndexLetter != (int)ArabicIsolatedLetters.Thal && previousIndexLetter != (int)ArabicIsolatedLetters.Ra2 && previousIndexLetter != (int)ArabicIsolatedLetters.Zeen && previousIndexLetter != (int)ArabicIsolatedLetters.PersianZe && previousIndexLetter != (int)ArabicIsolatedLetters.Waw && previousIndexLetter != (int)ArabicIsolatedLetters.AlefMad && previousIndexLetter != (int)ArabicIsolatedLetters.AlefHamza && previousIndexLetter != (int)ArabicIsolatedLetters.AlefMaksoor && previousIndexLetter != (int)ArabicIsolatedLetters.WawHamza && previousIndexLetter != (int)ArabicIsolatedLetters.Hamza && (previousIndexLetter < 0xFFFF && TextUtils.IsGlyphFixedArabicCharacter((char)previousIndexLetter)); bool nextLetterCheck = index < letters.Length - 1 && (nextIndexLetter < 0xFFFF && TextUtils.IsGlyphFixedArabicCharacter((char)nextIndexLetter)) && nextIndexLetter != (int)ArabicGeneralLetters.ZeroWidthNoJoiner && nextIndexLetter != (int)ArabicGeneralLetters.Hamza && nextIndexLetter != (int)ArabicIsolatedLetters.Hamza; return(nextLetterCheck && previousLetterCheck && middleLetterCheck); }
/// <summary> /// Is the letter at provided index a leading letter? /// </summary> /// <returns><see langword="true" /> if the letter is a leading letter</returns> private static bool IsLeadingLetter(FastStringBuilder letters, int index) { var currentIndexLetter = letters.Get(index); int previousIndexLetter = default; if (index != 0) { previousIndexLetter = letters.Get(index - 1); } int nextIndexLetter = default; if (index < letters.Length - 1) { nextIndexLetter = letters.Get(index + 1); } bool isPreviousLetterNonConnectable = index == 0 || (previousIndexLetter < 0xFFFF && !TextUtils.IsGlyphFixedArabicCharacter((char)previousIndexLetter)) || previousIndexLetter == (int)ArabicGeneralLetters.Alef || previousIndexLetter == (int)ArabicGeneralLetters.Dal || previousIndexLetter == (int)ArabicGeneralLetters.Thal || previousIndexLetter == (int)ArabicGeneralLetters.Ra2 || previousIndexLetter == (int)ArabicGeneralLetters.Zeen || previousIndexLetter == (int)ArabicGeneralLetters.PersianZe || previousIndexLetter == (int)ArabicGeneralLetters.Waw || previousIndexLetter == (int)ArabicGeneralLetters.AlefMad || previousIndexLetter == (int)ArabicGeneralLetters.AlefHamza || previousIndexLetter == (int)ArabicGeneralLetters.Hamza || previousIndexLetter == (int)ArabicGeneralLetters.AlefMaksoor || previousIndexLetter == (int)ArabicGeneralLetters.ZeroWidthNoJoiner || previousIndexLetter == (int)ArabicGeneralLetters.WawHamza || previousIndexLetter == (int)ArabicIsolatedLetters.Alef || previousIndexLetter == (int)ArabicIsolatedLetters.Dal || previousIndexLetter == (int)ArabicIsolatedLetters.Thal || previousIndexLetter == (int)ArabicIsolatedLetters.Ra2 || previousIndexLetter == (int)ArabicIsolatedLetters.Zeen || previousIndexLetter == (int)ArabicIsolatedLetters.PersianZe || previousIndexLetter == (int)ArabicIsolatedLetters.Waw || previousIndexLetter == (int)ArabicIsolatedLetters.AlefMad || previousIndexLetter == (int)ArabicIsolatedLetters.AlefHamza || previousIndexLetter == (int)ArabicIsolatedLetters.Hamza || previousIndexLetter == (int)ArabicIsolatedLetters.AlefMaksoor; bool canThisLetterBeLeading = currentIndexLetter != ' ' && currentIndexLetter != (int)ArabicGeneralLetters.Dal && currentIndexLetter != (int)ArabicGeneralLetters.Thal && currentIndexLetter != (int)ArabicGeneralLetters.Ra2 && currentIndexLetter != (int)ArabicGeneralLetters.Zeen && currentIndexLetter != (int)ArabicGeneralLetters.PersianZe && currentIndexLetter != (int)ArabicGeneralLetters.Alef && currentIndexLetter != (int)ArabicGeneralLetters.AlefHamza && currentIndexLetter != (int)ArabicGeneralLetters.AlefMaksoor && currentIndexLetter != (int)ArabicGeneralLetters.AlefMad && currentIndexLetter != (int)ArabicGeneralLetters.WawHamza && currentIndexLetter != (int)ArabicGeneralLetters.Waw && currentIndexLetter != (int)ArabicGeneralLetters.ZeroWidthNoJoiner && currentIndexLetter != (int)ArabicGeneralLetters.Hamza; bool isNextLetterConnectable = index < letters.Length - 1 && (nextIndexLetter < 0xFFFF && TextUtils.IsGlyphFixedArabicCharacter((char)nextIndexLetter)) && nextIndexLetter != (int)ArabicGeneralLetters.Hamza && nextIndexLetter != (int)ArabicGeneralLetters.ZeroWidthNoJoiner; return(isPreviousLetterNonConnectable && canThisLetterBeLeading && isNextLetterConnectable); }
/// <summary> /// Fixes the flow of the text. /// </summary> public static void Fix(FastStringBuilder input, FastStringBuilder output, bool farsi, bool fixTextTags, bool preserveNumbers) { // Some texts like tags, English words and numbers need to be displayed in their original order. // This list keeps the characters that their order should be reserved and streams reserved texts into final letters. LtrTextHolder.Clear(); for (int i = input.Length - 1; i >= 0; i--) { bool isInMiddle = i > 0 && i < input.Length - 1; bool isAtBeginning = i == 0; bool isAtEnd = i == input.Length - 1; char characterAtThisIndex = input.Get(i); char nextCharacter = default; if (!isAtEnd) { nextCharacter = input.Get(i + 1); } char previousCharacter = default; if (!isAtBeginning) { previousCharacter = input.Get(i - 1); } if (char.IsPunctuation(characterAtThisIndex) || char.IsSymbol(characterAtThisIndex)) { if (fixTextTags) { if (characterAtThisIndex == '>') { // We need to check if it is actually the beginning of a tag. bool isValidTag = false; // If > is at the end of the text (At beginning of the array), it can't be a tag if (isAtEnd == false) { for (int j = i - 1; j >= 0; j--) { // Tags do not have space inside if (input.Get(j) == ' ') { break; } // Tags do not have RTL characters inside if (TextUtils.IsRTLCharacter(input.Get(j))) { break; } if (input.Get(j) == '<') { isValidTag = true; break; } } } if (LtrTextHolder.Count > 0 && isValidTag) { for (int j = 0; j < LtrTextHolder.Count; j++) { output.Append(LtrTextHolder[LtrTextHolder.Count - 1 - j]); } LtrTextHolder.Clear(); } } } if (characterAtThisIndex == ')') { if (isInMiddle) { bool isAfterRTLCharacter = TextUtils.IsRTLCharacter(previousCharacter); bool isBeforeRTLCharacter = TextUtils.IsRTLCharacter(nextCharacter); if (isAfterRTLCharacter || isBeforeRTLCharacter) { characterAtThisIndex = '('; } } else if (isAtEnd) { bool isAfterRTLCharacter = TextUtils.IsRTLCharacter(previousCharacter); if (isAfterRTLCharacter) { characterAtThisIndex = '('; } } else if (isAtBeginning) { bool isBeforeRTLCharacter = TextUtils.IsRTLCharacter(nextCharacter); if (isBeforeRTLCharacter) { characterAtThisIndex = '('; } } } else if (characterAtThisIndex == '(') { if (isInMiddle) { bool isAfterRTLCharacter = TextUtils.IsRTLCharacter(previousCharacter); bool isBeforeRTLCharacter = TextUtils.IsRTLCharacter(nextCharacter); if (isAfterRTLCharacter || isBeforeRTLCharacter) { characterAtThisIndex = ')'; } } else if (isAtEnd) { bool isAfterRTLCharacter = TextUtils.IsRTLCharacter(previousCharacter); if (isAfterRTLCharacter) { characterAtThisIndex = ')'; } } else if (isAtBeginning) { bool isBeforeRTLCharacter = TextUtils.IsRTLCharacter(nextCharacter); if (isBeforeRTLCharacter) { characterAtThisIndex = ')'; } } } else if (characterAtThisIndex == '«') { if (isInMiddle) { bool isAfterRTLCharacter = TextUtils.IsRTLCharacter(nextCharacter); bool isBeforeRTLCharacter = TextUtils.IsRTLCharacter(previousCharacter); if (isAfterRTLCharacter || isBeforeRTLCharacter) { characterAtThisIndex = '»'; } } else if (isAtEnd) { bool isAfterRTLCharacter = TextUtils.IsRTLCharacter(nextCharacter); if (isAfterRTLCharacter) { characterAtThisIndex = '»'; } } else if (isAtBeginning) { bool isBeforeRTLCharacter = TextUtils.IsRTLCharacter(previousCharacter); if (isBeforeRTLCharacter) { characterAtThisIndex = '»'; } } } else if (characterAtThisIndex == '»') { if (isInMiddle) { bool isAfterRTLCharacter = TextUtils.IsRTLCharacter(nextCharacter); bool isBeforeRTLCharacter = TextUtils.IsRTLCharacter(previousCharacter); if (isAfterRTLCharacter || isBeforeRTLCharacter) { characterAtThisIndex = '«'; } } else if (isAtEnd) { bool isAfterRTLCharacter = TextUtils.IsRTLCharacter(nextCharacter); if (isAfterRTLCharacter) { characterAtThisIndex = '«'; } } else if (isAtBeginning) { bool isBeforeRTLCharacter = TextUtils.IsRTLCharacter(previousCharacter); if (isBeforeRTLCharacter) { characterAtThisIndex = '«'; } } } if (isInMiddle) { bool isAfterRTLCharacter = TextUtils.IsRTLCharacter(previousCharacter); bool isBeforeRTLCharacter = TextUtils.IsRTLCharacter(nextCharacter); bool isBeforeWhiteSpace = char.IsWhiteSpace(nextCharacter); bool isAfterWhiteSpace = char.IsWhiteSpace(previousCharacter); bool isUnderline = characterAtThisIndex == '_'; bool isSpecialPunctuation = characterAtThisIndex == '.' || characterAtThisIndex == '،' || characterAtThisIndex == '؛'; if (isBeforeRTLCharacter && isAfterRTLCharacter || isAfterWhiteSpace && isSpecialPunctuation || isBeforeWhiteSpace && isAfterRTLCharacter || isBeforeRTLCharacter && isAfterWhiteSpace || (isBeforeRTLCharacter || isAfterRTLCharacter) && isUnderline) { if (LtrTextHolder.Count > 0) { for (int j = 0; j < LtrTextHolder.Count; j++) { output.Append(LtrTextHolder[LtrTextHolder.Count - 1 - j]); } LtrTextHolder.Clear(); } output.Append(characterAtThisIndex); } else { LtrTextHolder.Add(characterAtThisIndex); } } else if (isAtEnd) { LtrTextHolder.Add(characterAtThisIndex); } else if (isAtBeginning) { output.Append(characterAtThisIndex); } if (fixTextTags) { if (characterAtThisIndex == '<') { bool valid = false; if (isAtBeginning == false) { for (int j = i + 1; j < input.Length; j++) { // Tags do not have space inside if (input.Get(j) == ' ') { break; } // Tags do not have RTL characters inside if (TextUtils.IsRTLCharacter(input.Get(j))) { break; } if (input.Get(j) == '>') { valid = true; break; } } } if (LtrTextHolder.Count > 0 && valid) { for (int j = 0; j < LtrTextHolder.Count; j++) { output.Append(LtrTextHolder[LtrTextHolder.Count - 1 - j]); } LtrTextHolder.Clear(); } } } continue; } if (isInMiddle) { bool isAfterEnglishChar = TextUtils.IsEnglishLetter(previousCharacter); bool isBeforeEnglishChar = TextUtils.IsEnglishLetter(nextCharacter); bool isAfterNumber = TextUtils.IsNumber(previousCharacter, preserveNumbers, farsi); bool isBeforeNumber = TextUtils.IsNumber(nextCharacter, preserveNumbers, farsi); bool isAfterSymbol = char.IsSymbol(previousCharacter); bool isBeforeSymbol = char.IsSymbol(nextCharacter); // For cases where english words and farsi/arabic are mixed. This allows for using farsi/arabic, english and numbers in one sentence. // If the space is between numbers,symbols or English words, keep the order if (characterAtThisIndex == ' ' && (isBeforeEnglishChar || isBeforeNumber || isBeforeSymbol) && (isAfterEnglishChar || isAfterNumber || isAfterSymbol)) { LtrTextHolder.Add(characterAtThisIndex); continue; } } if (TextUtils.IsEnglishLetter(characterAtThisIndex) || TextUtils.IsNumber(characterAtThisIndex, preserveNumbers, farsi)) { LtrTextHolder.Add(characterAtThisIndex); continue; } if (characterAtThisIndex >= (char)0xD800 && characterAtThisIndex <= (char)0xDBFF || characterAtThisIndex >= (char)0xDC00 && characterAtThisIndex <= (char)0xDFFF) { LtrTextHolder.Add(characterAtThisIndex); continue; } if (LtrTextHolder.Count > 0) { for (int j = 0; j < LtrTextHolder.Count; j++) { output.Append(LtrTextHolder[LtrTextHolder.Count - 1 - j]); } LtrTextHolder.Clear(); } if (characterAtThisIndex != 0xFFFF && characterAtThisIndex != (int)GeneralLetters.ZeroWidthNoJoiner) { output.Append(characterAtThisIndex); } } if (LtrTextHolder.Count > 0) { for (int j = 0; j < LtrTextHolder.Count; j++) { output.Append(LtrTextHolder[LtrTextHolder.Count - 1 - j]); } LtrTextHolder.Clear(); } }
/// <summary> /// Removes tashkeel from text. /// </summary> public static void RemoveTashkeel(FastStringBuilder input) { for (int i = 0; i < input.Length; i++) { switch ((TashkeelCharacters)input.Get(i)) { case TashkeelCharacters.Fathan: TashkeelLocations.Add(new TashkeelLocation(TashkeelCharacters.Fathan, i)); break; case TashkeelCharacters.Dammatan: TashkeelLocations.Add(new TashkeelLocation(TashkeelCharacters.Dammatan, i)); break; case TashkeelCharacters.Kasratan: TashkeelLocations.Add(new TashkeelLocation(TashkeelCharacters.Kasratan, i)); break; case TashkeelCharacters.Fatha: TashkeelLocations.Add(new TashkeelLocation(TashkeelCharacters.Fatha, i)); break; case TashkeelCharacters.Damma: TashkeelLocations.Add(new TashkeelLocation(TashkeelCharacters.Damma, i)); break; case TashkeelCharacters.Kasra: TashkeelLocations.Add(new TashkeelLocation(TashkeelCharacters.Kasra, i)); break; case TashkeelCharacters.Shadda: TashkeelLocations.Add(new TashkeelLocation(TashkeelCharacters.Shadda, i)); break; case TashkeelCharacters.Sukun: TashkeelLocations.Add(new TashkeelLocation(TashkeelCharacters.Sukun, i)); break; case TashkeelCharacters.MaddahAbove: TashkeelLocations.Add( new TashkeelLocation(TashkeelCharacters.MaddahAbove, i)); break; case TashkeelCharacters.SuperscriptAlef: TashkeelLocations.Add( new TashkeelLocation(TashkeelCharacters.SuperscriptAlef, i)); break; } } input.RemoveAll((char)TashkeelCharacters.Fathan); input.RemoveAll((char)TashkeelCharacters.Dammatan); input.RemoveAll((char)TashkeelCharacters.Kasratan); input.RemoveAll((char)TashkeelCharacters.Fatha); input.RemoveAll((char)TashkeelCharacters.Damma); input.RemoveAll((char)TashkeelCharacters.Kasra); input.RemoveAll((char)TashkeelCharacters.Shadda); input.RemoveAll((char)TashkeelCharacters.Sukun); input.RemoveAll((char)TashkeelCharacters.MaddahAbove); input.RemoveAll((char)TashkeelCharacters.ShaddaWithFathaIsolatedForm); input.RemoveAll((char)TashkeelCharacters.ShaddaWithDammaIsolatedForm); input.RemoveAll((char)TashkeelCharacters.ShaddaWithKasraIsolatedForm); input.RemoveAll((char)TashkeelCharacters.SuperscriptAlef); }