/// <summary> /// Removes tashkeel from text. /// </summary> public static void RemoveTashkeel(FastStringBuilder input) { TashkeelLocations.Clear(); int j = 0; // write index for (int i = 0; i < input.Length; i++) { int curChar = input.Get(i); if (Char32Utils.IsUnicode16Char(curChar) && TashkeelCharactersSet.Contains((char)curChar)) { TashkeelLocations.Add(new TashkeelLocation((TashkeelCharacters)curChar, i)); } else { input.Set(j, curChar); j++; } } input.Length = j; }
/// <summary> /// Fixes the flow of the text. /// </summary> public static void Fix(FastStringBuilder input, FastStringBuilder output, bool farsi, bool fixTextTags, bool preserveNumbers) { // Some texts like tags, English words and numbers need to be displayed in their original order. // This list keeps the characters that their order should be reserved and streams reserved texts into final letters. LtrTextHolder.Clear(); TagTextHolder.Clear(); for (int i = input.Length - 1; i >= 0; i--) { bool isInMiddle = i > 0 && i < input.Length - 1; bool isAtBeginning = i == 0; bool isAtEnd = i == input.Length - 1; int characterAtThisIndex = input.Get(i); int nextCharacter = default; if (!isAtEnd) { nextCharacter = input.Get(i + 1); } int previousCharacter = default; if (!isAtBeginning) { previousCharacter = input.Get(i - 1); } if (fixTextTags) { if (characterAtThisIndex == '>') { // We need to check if it is actually the beginning of a tag. bool isValidTag = false; int nextI = i; TagTextHolder.Add(characterAtThisIndex); for (int j = i - 1; j >= 0; j--) { var jChar = input.Get(j); TagTextHolder.Add(jChar); if (jChar == '<') { var jPlus1Char = input.Get(j + 1); // Tags do not start with space if (jPlus1Char == ' ') { break; } isValidTag = true; nextI = j; break; } } if (isValidTag) { FlushBufferToOutput(LtrTextHolder, output); FlushBufferToOutput(TagTextHolder, output); i = nextI; continue; } else { TagTextHolder.Clear(); } } } if (Char32Utils.IsPunctuation(characterAtThisIndex) || Char32Utils.IsSymbol(characterAtThisIndex)) { if (MirroredCharsSet.Contains((char)characterAtThisIndex)) { // IsRTLCharacter returns false for null bool isAfterRTLCharacter = Char32Utils.IsRTLCharacter(previousCharacter); bool isBeforeRTLCharacter = Char32Utils.IsRTLCharacter(nextCharacter); if (isAfterRTLCharacter || isBeforeRTLCharacter) { characterAtThisIndex = MirroredCharsMap[(char)characterAtThisIndex]; } } if (isInMiddle) { bool isAfterRTLCharacter = Char32Utils.IsRTLCharacter(previousCharacter); bool isBeforeRTLCharacter = Char32Utils.IsRTLCharacter(nextCharacter); bool isBeforeWhiteSpace = Char32Utils.IsWhiteSpace(nextCharacter); bool isAfterWhiteSpace = Char32Utils.IsWhiteSpace(previousCharacter); bool isUnderline = characterAtThisIndex == '_'; bool isSpecialPunctuation = characterAtThisIndex == '.' || characterAtThisIndex == '،' || characterAtThisIndex == '؛'; if (isBeforeRTLCharacter && isAfterRTLCharacter || isAfterWhiteSpace && isSpecialPunctuation || isBeforeWhiteSpace && isAfterRTLCharacter || isBeforeRTLCharacter && isAfterWhiteSpace || (isBeforeRTLCharacter || isAfterRTLCharacter) && isUnderline) { FlushBufferToOutput(LtrTextHolder, output); output.Append(characterAtThisIndex); } else { LtrTextHolder.Add(characterAtThisIndex); } } else if (isAtEnd) { LtrTextHolder.Add(characterAtThisIndex); } else if (isAtBeginning) { output.Append(characterAtThisIndex); } continue; } if (isInMiddle) { bool isAfterEnglishChar = Char32Utils.IsEnglishLetter(previousCharacter); bool isBeforeEnglishChar = Char32Utils.IsEnglishLetter(nextCharacter); bool isAfterNumber = Char32Utils.IsNumber(previousCharacter, preserveNumbers, farsi); bool isBeforeNumber = Char32Utils.IsNumber(nextCharacter, preserveNumbers, farsi); bool isAfterSymbol = Char32Utils.IsSymbol(previousCharacter); bool isBeforeSymbol = Char32Utils.IsSymbol(nextCharacter); // For cases where english words and farsi/arabic are mixed. This allows for using farsi/arabic, english and numbers in one sentence. // If the space is between numbers,symbols or English words, keep the order if (characterAtThisIndex == ' ' && (isBeforeEnglishChar || isBeforeNumber || isBeforeSymbol) && (isAfterEnglishChar || isAfterNumber || isAfterSymbol)) { LtrTextHolder.Add(characterAtThisIndex); continue; } } if (Char32Utils.IsEnglishLetter(characterAtThisIndex) || Char32Utils.IsNumber(characterAtThisIndex, preserveNumbers, farsi)) { LtrTextHolder.Add(characterAtThisIndex); continue; } // handle surrogates if (characterAtThisIndex >= (char)0xD800 && characterAtThisIndex <= (char)0xDBFF || characterAtThisIndex >= (char)0xDC00 && characterAtThisIndex <= (char)0xDFFF) { LtrTextHolder.Add(characterAtThisIndex); continue; } FlushBufferToOutput(LtrTextHolder, output); if (characterAtThisIndex != 0xFFFF && characterAtThisIndex != (int)SpecialCharacters.ZeroWidthNoJoiner) { output.Append(characterAtThisIndex); } } FlushBufferToOutput(LtrTextHolder, output); }