// 3.3.1.P1 - Split the text into separate paragraphs. // A paragraph separator is kept with the previous paragraph. // Within each paragraph, apply all the other rules of this algorithm. public static Paragraph[] SplitStringToParagraphs(string logicalString) { ArrayList ret = new ArrayList(); int i; StringBuilder sb = new StringBuilder(); for (i = 0; i < logicalString.Length; ++i) { char c = logicalString[i]; BidiCharacterType cType = UnicodeCharacterDataResolver.GetBidiCharacterType(c); if (cType == BidiCharacterType.B) { Paragraph p = new Paragraph(sb.ToString()); p.ParagraphSeparator = c; ret.Add(p); sb.Length = 0; } else { sb.Append(c); } } if (sb.Length > 0) // string ended without a paragraph separator { ret.Add(new Paragraph(sb.ToString())); } return((Paragraph[])ret.ToArray(typeof(Paragraph))); }
private char GetPairwiseComposition(char first, char second) { if (first < 0 || first > 0xFFFF || second < 0 || second > 0xFFFF) { return(BidiChars.NotAChar); } return(UnicodeCharacterDataResolver.Compose(first.ToString() + second.ToString())); }
private void GetRecursiveDecomposition(bool canonical, char ch, StringBuilder builder) { string decomp = UnicodeCharacterDataResolver.GetUnicodeDecompositionMapping(ch); if (decomp != null && !(canonical && UnicodeCharacterDataResolver.GetUnicodeDecompositionType(ch) != UnicodeDecompositionType.None)) { for (int i = 0; i < decomp.Length; ++i) { GetRecursiveDecomposition(canonical, decomp[i], builder); } } else // if no decomp, append { builder.Append(ch); } }
private StringBuilder InternalDecompose(ArrayList char_lengths) { StringBuilder target = new StringBuilder(); StringBuilder buffer = new StringBuilder(); _hasArabic = false; _hasNSMs = false; for (int i = 0; i < _text.Length; ++i) { BidiCharacterType ct = UnicodeCharacterDataResolver.GetBidiCharacterType(_text[i]); _hasArabic |= ((ct == BidiCharacterType.AL) || (ct == BidiCharacterType.AN)); _hasNSMs |= (ct == BidiCharacterType.NSM); buffer.Length = 0; GetRecursiveDecomposition(false, _text[i], buffer); char_lengths.Add(1 - buffer.Length); // add all of the characters in the decomposition. // (may be just the original character, if there was // no decomposition mapping) char ch; for (int j = 0; j < buffer.Length; ++j) { ch = buffer[j]; UnicodeCanonicalClass chClass = UnicodeCharacterDataResolver.GetUnicodeCanonicalClass(ch); int k = target.Length; // insertion point if (chClass != UnicodeCanonicalClass.NR) { // bubble-sort combining marks as necessary char ch2; for (; k > 0; --k) { ch2 = target[k - 1]; if (UnicodeCharacterDataResolver.GetUnicodeCanonicalClass(ch2) <= chClass) { break; } } } target.Insert(k, ch); } } return(target); }
// 3.3.1 The Paragraph Level // P2 - In each paragraph, find the first character of type L, AL, or R. // P3 - If a character is found in P2 and it is of type AL or R, then // set the paragraph embedding level to one; otherwise, set it to zero. public void RecalculateParagraphEmbeddingLevel() { embedding_level = 1; foreach (char c in _text) { BidiCharacterType cType = UnicodeCharacterDataResolver.GetBidiCharacterType(c); if (cType == BidiCharacterType.R || cType == BidiCharacterType.AL) { embedding_level = 1; break; } else if (cType == BidiCharacterType.L) { break; } } }
private void InternalCompose(StringBuilder target, ArrayList char_lengths) { if (target.Length == 0) { return; } int starterPos = 0; int compPos = 1; int text_idx = 0; char starterCh = target[0]; char_lengths[starterPos] = (int)char_lengths[starterPos] + 1; UnicodeCanonicalClass lastClass = UnicodeCharacterDataResolver.GetUnicodeCanonicalClass(starterCh); if (lastClass != UnicodeCanonicalClass.NR) { lastClass = (UnicodeCanonicalClass)256; // fix for strings staring with a combining mark } int oldLen = target.Length; // Loop on the decomposed characters, combining where possible char ch; for (int decompPos = compPos; decompPos < target.Length; ++decompPos) { ch = target[decompPos]; UnicodeCanonicalClass chClass = UnicodeCharacterDataResolver.GetUnicodeCanonicalClass(ch); char composite = GetPairwiseComposition(starterCh, ch); UnicodeDecompositionType composeType = UnicodeCharacterDataResolver.GetUnicodeDecompositionType(composite); if (composeType == UnicodeDecompositionType.None && composite != BidiChars.NotAChar && (lastClass < chClass || lastClass == UnicodeCanonicalClass.NR)) { target[starterPos] = composite; char_lengths[starterPos] = (int)char_lengths[starterPos] + 1; // we know that we will only be replacing non-supplementaries by non-supplementaries // so we don't have to adjust the decompPos starterCh = composite; } else { if (chClass == UnicodeCanonicalClass.NR) { starterPos = compPos; starterCh = ch; text_idx++; } lastClass = chClass; target[compPos] = ch; //char_lengths[compPos] = (int)char_lengths[compPos] + 1; int chkPos = compPos; if ((int)char_lengths[chkPos] < 0) { while ((int)char_lengths[chkPos] < 0) { char_lengths[chkPos] = (int)char_lengths[chkPos] + 1; char_lengths.Insert(compPos, 0); chkPos++; } } else { char_lengths[chkPos] = (int)char_lengths[chkPos] + 1; } if (target.Length != oldLen) // MAY HAVE TO ADJUST! { decompPos += target.Length - oldLen; oldLen = target.Length; } ++compPos; } } target.Length = compPos; char_lengths.RemoveRange(compPos, char_lengths.Count - compPos); }
// 3.3.2 Explicit Levels and Directions public void RecalculateCharactersEmbeddingLevels() { // This method is implemented in such a way it handles the string in logical order, // rather than visual order, so it is easier to handle complex layouts. That is why // it is placed BEFORE ReorderString rather than AFTER it, as its number suggests. if (_hasArabic) { _text = PerformArabicShaping(_text); } _text_data = new CharData[_text.Length]; #region rules X1 - X9 // X1 byte embeddingLevel = EmbeddingLevel; DirectionalOverrideStatus dos = DirectionalOverrideStatus.Neutral; Stack dosStack = new Stack(); Stack elStack = new Stack(); int idx = 0; for (int i = 0; i < _text.Length; ++i) { bool x9Char = false; char c = _text[i]; _text_data[i]._ct = UnicodeCharacterDataResolver.GetBidiCharacterType(c); _text_data[i]._char = c; _text_data[i]._idx = idx; idx += _char_lengths[i]; #region rules X2 - X5 // X2. With each RLE, compute the least greater odd embedding level. // X4. With each RLO, compute the least greater odd embedding level. if (c == BidiChars.RLE || c == BidiChars.RLO) { x9Char = true; if (embeddingLevel < 60) { elStack.Push(embeddingLevel); dosStack.Push(dos); ++embeddingLevel; embeddingLevel |= 1; if (c == BidiChars.RLE) { dos = DirectionalOverrideStatus.Neutral; } else { dos = DirectionalOverrideStatus.RTL; } } } // X3. With each LRE, compute the least greater even embedding level. // X5. With each LRO, compute the least greater even embedding level. else if (c == BidiChars.LRE || c == BidiChars.LRO) { x9Char = true; if (embeddingLevel < 59) { elStack.Push(embeddingLevel); dosStack.Push(dos); embeddingLevel |= 1; ++embeddingLevel; if (c == BidiChars.LRE) { dos = DirectionalOverrideStatus.Neutral; } else { dos = DirectionalOverrideStatus.LTR; } } } #endregion #region rule X6 // X6. For all types besides RLE, LRE, RLO, LRO, and PDF: (...) else if (c != BidiChars.PDF) { // a. Set the level of the current character to the current embedding level. _text_data[i]._el = embeddingLevel; //b. Whenever the directional override status is not neutral, //reset the current character type to the directional override status. if (dos == DirectionalOverrideStatus.LTR) { _text_data[i]._ct = BidiCharacterType.L; } else if (dos == DirectionalOverrideStatus.RTL) { _text_data[i]._ct = BidiCharacterType.R; } } #endregion #region rule X7 //Terminating Embeddings and Overrides // X7. With each PDF, determine the matching embedding or override code. // If there was a valid matching code, restore (pop) the last remembered (pushed) // embedding level and directional override. else if (c == BidiChars.PDF) { x9Char = true; if (elStack.Count > 0) { embeddingLevel = (byte)(elStack.Pop()); dos = (DirectionalOverrideStatus)(dosStack.Pop()); } } #endregion // X8. All explicit directional embeddings and overrides are completely // terminated at the end of each paragraph. Paragraph separators are not // included in the embedding. if (x9Char || _text_data[i]._ct == BidiCharacterType.BN) { _text_data[i]._el = embeddingLevel; } } #endregion // X10. The remaining rules are applied to each run of characters at the same level. int prevLevel = EmbeddingLevel; int start = 0; while (start < _text.Length) { #region rule X10 - run level setup byte level = _text_data[start]._el; BidiCharacterType sor = TypeForLevel(Math.Max(prevLevel, level)); int limit = start + 1; while (limit < _text.Length && _text_data[limit]._el == level) { ++limit; } byte nextLevel = limit < _text.Length ? _text_data[limit]._el : EmbeddingLevel; BidiCharacterType eor = TypeForLevel(Math.Max(nextLevel, level)); #endregion ResolveWeakTypes(start, limit, sor, eor); ResolveNeutralTypes(start, limit, sor, eor, level); ResolveImplicitTypes(start, limit, level); prevLevel = level; start = limit; } // Wrap lines ReorderString(0, _text.Length); FixMirroredCharacters(); ArrayList indexes = new ArrayList(); ArrayList lengths = new ArrayList(); StringBuilder sb = new StringBuilder(); foreach (CharData cd in _text_data) { sb.Append(cd._char); indexes.Add(cd._idx); lengths.Add(1); } _bidi_text = sb.ToString(); _bidi_indexes = (int[])indexes.ToArray(typeof(int)); }
public static ArabicShapeJoiningType GetArabicShapeJoiningType(char c) { if (c >= '\u0600' && c <= '\u0603') { return(ArabicShapeJoiningType.U); } if (c == '\u0608') { return(ArabicShapeJoiningType.U); } if (c == '\u060B') { return(ArabicShapeJoiningType.U); } if (c == '\u0621') { return(ArabicShapeJoiningType.U); } if (c >= '\u0622' && c <= '\u0625') { return(ArabicShapeJoiningType.R); } if (c == '\u0626') { return(ArabicShapeJoiningType.D); } if (c == '\u0627') { return(ArabicShapeJoiningType.R); } if (c == '\u0628') { return(ArabicShapeJoiningType.D); } if (c == '\u0629') { return(ArabicShapeJoiningType.R); } if (c >= '\u062A' && c <= '\u062E') { return(ArabicShapeJoiningType.D); } if (c >= '\u062F' && c <= '\u0632') { return(ArabicShapeJoiningType.R); } if (c >= '\u0633' && c <= '\u063F') { return(ArabicShapeJoiningType.D); } if (c == '\u0640') { return(ArabicShapeJoiningType.C); } if (c >= '\u0641' && c <= '\u0647') { return(ArabicShapeJoiningType.D); } if (c == '\u0648') { return(ArabicShapeJoiningType.R); } if (c >= '\u0649' && c <= '\u064A') { return(ArabicShapeJoiningType.D); } if (c >= '\u066E' && c <= '\u066F') { return(ArabicShapeJoiningType.D); } if (c >= '\u0671' && c <= '\u0673') { return(ArabicShapeJoiningType.R); } if (c == '\u0674') { return(ArabicShapeJoiningType.U); } if (c >= '\u0675' && c <= '\u0677') { return(ArabicShapeJoiningType.R); } if (c >= '\u0678' && c <= '\u0687') { return(ArabicShapeJoiningType.D); } if (c >= '\u0688' && c <= '\u0699') { return(ArabicShapeJoiningType.R); } if (c >= '\u069A' && c <= '\u06BF') { return(ArabicShapeJoiningType.D); } if (c == '\u06C0') { return(ArabicShapeJoiningType.R); } if (c >= '\u06C1' && c <= '\u06C2') { return(ArabicShapeJoiningType.D); } if (c >= '\u06C3' && c <= '\u06CB') { return(ArabicShapeJoiningType.R); } if (c == '\u06CC') { return(ArabicShapeJoiningType.D); } if (c == '\u06CD') { return(ArabicShapeJoiningType.R); } if (c == '\u06CE') { return(ArabicShapeJoiningType.D); } if (c == '\u06CF') { return(ArabicShapeJoiningType.R); } if (c >= '\u06D0' && c <= '\u06D1') { return(ArabicShapeJoiningType.D); } if (c >= '\u06D2' && c <= '\u06D3') { return(ArabicShapeJoiningType.R); } if (c == '\u06D5') { return(ArabicShapeJoiningType.R); } if (c == '\u06DD') { return(ArabicShapeJoiningType.U); } if (c >= '\u06EE' && c <= '\u06EF') { return(ArabicShapeJoiningType.R); } if (c >= '\u06FA' && c <= '\u06FC') { return(ArabicShapeJoiningType.D); } if (c == '\u06FF') { return(ArabicShapeJoiningType.D); } if (c == '\u0710') { return(ArabicShapeJoiningType.R); } if (c >= '\u0712' && c <= '\u0714') { return(ArabicShapeJoiningType.D); } if (c >= '\u0715' && c <= '\u0719') { return(ArabicShapeJoiningType.R); } if (c >= '\u071A' && c <= '\u071D') { return(ArabicShapeJoiningType.D); } if (c == '\u071E') { return(ArabicShapeJoiningType.R); } if (c >= '\u071F' && c <= '\u0727') { return(ArabicShapeJoiningType.D); } if (c == '\u0728') { return(ArabicShapeJoiningType.R); } if (c == '\u0729') { return(ArabicShapeJoiningType.D); } if (c == '\u072A') { return(ArabicShapeJoiningType.R); } if (c == '\u072B') { return(ArabicShapeJoiningType.D); } if (c == '\u072C') { return(ArabicShapeJoiningType.R); } if (c >= '\u072D' && c <= '\u072E') { return(ArabicShapeJoiningType.D); } if (c == '\u072F') { return(ArabicShapeJoiningType.R); } if (c == '\u074D') { return(ArabicShapeJoiningType.R); } if (c >= '\u074E' && c <= '\u0758') { return(ArabicShapeJoiningType.D); } if (c >= '\u0759' && c <= '\u075B') { return(ArabicShapeJoiningType.R); } if (c >= '\u075C' && c <= '\u076A') { return(ArabicShapeJoiningType.D); } if (c >= '\u076B' && c <= '\u076C') { return(ArabicShapeJoiningType.R); } if (c >= '\u076D' && c <= '\u0770') { return(ArabicShapeJoiningType.D); } if (c == '\u0771') { return(ArabicShapeJoiningType.R); } if (c == '\u0772') { return(ArabicShapeJoiningType.D); } if (c >= '\u0773' && c <= '\u0774') { return(ArabicShapeJoiningType.R); } if (c >= '\u0775' && c <= '\u0777') { return(ArabicShapeJoiningType.D); } if (c >= '\u0778' && c <= '\u0779') { return(ArabicShapeJoiningType.R); } if (c >= '\u077A' && c <= '\u077F') { return(ArabicShapeJoiningType.D); } if (c >= '\u07CA' && c <= '\u07EA') { return(ArabicShapeJoiningType.D); } if (c == '\u07FA') { return(ArabicShapeJoiningType.C); } if (c == '\u200D') { return(ArabicShapeJoiningType.C); } UnicodeGeneralCategory ugc = UnicodeCharacterDataResolver.GetUnicodeGeneralCategory(c); if (ugc == UnicodeGeneralCategory.Mn || ugc == UnicodeGeneralCategory.Me || ugc == UnicodeGeneralCategory.Cf) { return(ArabicShapeJoiningType.T); } return(ArabicShapeJoiningType.U); }