Beispiel #1
0
        // 3.3.1.P1 - Split the text into separate paragraphs.
        // A paragraph separator is kept with the previous paragraph.
        // Within each paragraph, apply all the other rules of this algorithm.
        public static Paragraph[] SplitStringToParagraphs(string logicalString)
        {
            ArrayList     ret = new ArrayList();
            int           i;
            StringBuilder sb = new StringBuilder();

            for (i = 0; i < logicalString.Length; ++i)
            {
                char c = logicalString[i];
                BidiCharacterType cType = UnicodeCharacterDataResolver.GetBidiCharacterType(c);
                if (cType == BidiCharacterType.B)
                {
                    Paragraph p = new Paragraph(sb.ToString());
                    p.ParagraphSeparator = c;
                    ret.Add(p);
                    sb.Length = 0;
                }
                else
                {
                    sb.Append(c);
                }
            }
            if (sb.Length > 0) // string ended without a paragraph separator
            {
                ret.Add(new Paragraph(sb.ToString()));
            }
            return((Paragraph[])ret.ToArray(typeof(Paragraph)));
        }
Beispiel #2
0
 private char GetPairwiseComposition(char first, char second)
 {
     if (first < 0 || first > 0xFFFF || second < 0 || second > 0xFFFF)
     {
         return(BidiChars.NotAChar);
     }
     return(UnicodeCharacterDataResolver.Compose(first.ToString() + second.ToString()));
 }
Beispiel #3
0
            private void GetRecursiveDecomposition(bool canonical, char ch, StringBuilder builder)
            {
                string decomp = UnicodeCharacterDataResolver.GetUnicodeDecompositionMapping(ch);

                if (decomp != null && !(canonical && UnicodeCharacterDataResolver.GetUnicodeDecompositionType(ch) != UnicodeDecompositionType.None))
                {
                    for (int i = 0; i < decomp.Length; ++i)
                    {
                        GetRecursiveDecomposition(canonical, decomp[i], builder);
                    }
                }
                else // if no decomp, append
                {
                    builder.Append(ch);
                }
            }
Beispiel #4
0
            private StringBuilder InternalDecompose(ArrayList char_lengths)
            {
                StringBuilder target = new StringBuilder();
                StringBuilder buffer = new StringBuilder();

                _hasArabic = false;
                _hasNSMs   = false;

                for (int i = 0; i < _text.Length; ++i)
                {
                    BidiCharacterType ct = UnicodeCharacterDataResolver.GetBidiCharacterType(_text[i]);
                    _hasArabic |= ((ct == BidiCharacterType.AL) || (ct == BidiCharacterType.AN));
                    _hasNSMs   |= (ct == BidiCharacterType.NSM);

                    buffer.Length = 0;
                    GetRecursiveDecomposition(false, _text[i], buffer);
                    char_lengths.Add(1 - buffer.Length);
                    // add all of the characters in the decomposition.
                    // (may be just the original character, if there was
                    // no decomposition mapping)

                    char ch;
                    for (int j = 0; j < buffer.Length; ++j)
                    {
                        ch = buffer[j];
                        UnicodeCanonicalClass chClass = UnicodeCharacterDataResolver.GetUnicodeCanonicalClass(ch);
                        int k = target.Length; // insertion point
                        if (chClass != UnicodeCanonicalClass.NR)
                        {
                            // bubble-sort combining marks as necessary
                            char ch2;
                            for (; k > 0; --k)
                            {
                                ch2 = target[k - 1];
                                if (UnicodeCharacterDataResolver.GetUnicodeCanonicalClass(ch2) <= chClass)
                                {
                                    break;
                                }
                            }
                        }
                        target.Insert(k, ch);
                    }
                }
                return(target);
            }
Beispiel #5
0
            // 3.3.1 The Paragraph Level
            // P2 - In each paragraph, find the first character of type L, AL, or R.
            // P3 - If a character is found in P2 and it is of type AL or R, then
            // set the paragraph embedding level to one; otherwise, set it to zero.
            public void RecalculateParagraphEmbeddingLevel()
            {
                embedding_level = 1;

                foreach (char c in _text)
                {
                    BidiCharacterType cType = UnicodeCharacterDataResolver.GetBidiCharacterType(c);
                    if (cType == BidiCharacterType.R || cType == BidiCharacterType.AL)
                    {
                        embedding_level = 1;
                        break;
                    }
                    else if (cType == BidiCharacterType.L)
                    {
                        break;
                    }
                }
            }
Beispiel #6
0
            private void InternalCompose(StringBuilder target, ArrayList char_lengths)
            {
                if (target.Length == 0)
                {
                    return;
                }
                int  starterPos = 0;
                int  compPos    = 1;
                int  text_idx   = 0;
                char starterCh  = target[0];

                char_lengths[starterPos] = (int)char_lengths[starterPos] + 1;

                UnicodeCanonicalClass lastClass = UnicodeCharacterDataResolver.GetUnicodeCanonicalClass(starterCh);

                if (lastClass != UnicodeCanonicalClass.NR)
                {
                    lastClass = (UnicodeCanonicalClass)256; // fix for strings staring with a combining mark
                }
                int oldLen = target.Length;

                // Loop on the decomposed characters, combining where possible
                char ch;

                for (int decompPos = compPos; decompPos < target.Length; ++decompPos)
                {
                    ch = target[decompPos];
                    UnicodeCanonicalClass chClass = UnicodeCharacterDataResolver.GetUnicodeCanonicalClass(ch);
                    char composite = GetPairwiseComposition(starterCh, ch);
                    UnicodeDecompositionType composeType = UnicodeCharacterDataResolver.GetUnicodeDecompositionType(composite);

                    if (composeType == UnicodeDecompositionType.None &&
                        composite != BidiChars.NotAChar &&
                        (lastClass < chClass || lastClass == UnicodeCanonicalClass.NR))
                    {
                        target[starterPos]       = composite;
                        char_lengths[starterPos] = (int)char_lengths[starterPos] + 1;
                        // we know that we will only be replacing non-supplementaries by non-supplementaries
                        // so we don't have to adjust the decompPos
                        starterCh = composite;
                    }
                    else
                    {
                        if (chClass == UnicodeCanonicalClass.NR)
                        {
                            starterPos = compPos;
                            starterCh  = ch;
                            text_idx++;
                        }
                        lastClass       = chClass;
                        target[compPos] = ch;
                        //char_lengths[compPos] = (int)char_lengths[compPos] + 1;
                        int chkPos = compPos;
                        if ((int)char_lengths[chkPos] < 0)
                        {
                            while ((int)char_lengths[chkPos] < 0)
                            {
                                char_lengths[chkPos] = (int)char_lengths[chkPos] + 1;
                                char_lengths.Insert(compPos, 0);
                                chkPos++;
                            }
                        }
                        else
                        {
                            char_lengths[chkPos] = (int)char_lengths[chkPos] + 1;
                        }

                        if (target.Length != oldLen) // MAY HAVE TO ADJUST!
                        {
                            decompPos += target.Length - oldLen;
                            oldLen     = target.Length;
                        }
                        ++compPos;
                    }
                }
                target.Length = compPos;
                char_lengths.RemoveRange(compPos, char_lengths.Count - compPos);
            }
Beispiel #7
0
            // 3.3.2 Explicit Levels and Directions
            public void RecalculateCharactersEmbeddingLevels()
            {
                // This method is implemented in such a way it handles the string in logical order,
                // rather than visual order, so it is easier to handle complex layouts. That is why
                // it is placed BEFORE ReorderString rather than AFTER it, as its number suggests.
                if (_hasArabic)
                {
                    _text = PerformArabicShaping(_text);
                }

                _text_data = new CharData[_text.Length];

                #region rules X1 - X9
                // X1
                byte embeddingLevel           = EmbeddingLevel;
                DirectionalOverrideStatus dos = DirectionalOverrideStatus.Neutral;
                Stack dosStack = new Stack();
                Stack elStack  = new Stack();
                int   idx      = 0;
                for (int i = 0; i < _text.Length; ++i)
                {
                    bool x9Char = false;
                    char c      = _text[i];
                    _text_data[i]._ct   = UnicodeCharacterDataResolver.GetBidiCharacterType(c);
                    _text_data[i]._char = c;
                    _text_data[i]._idx  = idx;
                    idx += _char_lengths[i];

                    #region rules X2 - X5
                    // X2. With each RLE, compute the least greater odd embedding level.
                    // X4. With each RLO, compute the least greater odd embedding level.
                    if (c == BidiChars.RLE || c == BidiChars.RLO)
                    {
                        x9Char = true;
                        if (embeddingLevel < 60)
                        {
                            elStack.Push(embeddingLevel);
                            dosStack.Push(dos);

                            ++embeddingLevel;
                            embeddingLevel |= 1;

                            if (c == BidiChars.RLE)
                            {
                                dos = DirectionalOverrideStatus.Neutral;
                            }
                            else
                            {
                                dos = DirectionalOverrideStatus.RTL;
                            }
                        }
                    }
                    // X3. With each LRE, compute the least greater even embedding level.
                    // X5. With each LRO, compute the least greater even embedding level.
                    else if (c == BidiChars.LRE || c == BidiChars.LRO)
                    {
                        x9Char = true;
                        if (embeddingLevel < 59)
                        {
                            elStack.Push(embeddingLevel);
                            dosStack.Push(dos);

                            embeddingLevel |= 1;
                            ++embeddingLevel;

                            if (c == BidiChars.LRE)
                            {
                                dos = DirectionalOverrideStatus.Neutral;
                            }
                            else
                            {
                                dos = DirectionalOverrideStatus.LTR;
                            }
                        }
                    }
                    #endregion

                    #region rule X6
                    // X6. For all types besides RLE, LRE, RLO, LRO, and PDF: (...)
                    else if (c != BidiChars.PDF)
                    {
                        // a. Set the level of the current character to the current embedding level.
                        _text_data[i]._el = embeddingLevel;

                        //b. Whenever the directional override status is not neutral,
                        //reset the current character type to the directional override status.
                        if (dos == DirectionalOverrideStatus.LTR)
                        {
                            _text_data[i]._ct = BidiCharacterType.L;
                        }
                        else if (dos == DirectionalOverrideStatus.RTL)
                        {
                            _text_data[i]._ct = BidiCharacterType.R;
                        }
                    }
                    #endregion

                    #region rule X7
                    //Terminating Embeddings and Overrides
                    // X7. With each PDF, determine the matching embedding or override code.
                    // If there was a valid matching code, restore (pop) the last remembered (pushed)
                    // embedding level and directional override.
                    else if (c == BidiChars.PDF)
                    {
                        x9Char = true;
                        if (elStack.Count > 0)
                        {
                            embeddingLevel = (byte)(elStack.Pop());
                            dos            = (DirectionalOverrideStatus)(dosStack.Pop());
                        }
                    }
                    #endregion

                    // X8. All explicit directional embeddings and overrides are completely
                    // terminated at the end of each paragraph. Paragraph separators are not
                    // included in the embedding.

                    if (x9Char || _text_data[i]._ct == BidiCharacterType.BN)
                    {
                        _text_data[i]._el = embeddingLevel;
                    }
                }
                #endregion

                // X10. The remaining rules are applied to each run of characters at the same level.
                int prevLevel = EmbeddingLevel;
                int start     = 0;
                while (start < _text.Length)
                {
                    #region rule X10 - run level setup
                    byte level            = _text_data[start]._el;
                    BidiCharacterType sor = TypeForLevel(Math.Max(prevLevel, level));

                    int limit = start + 1;
                    while (limit < _text.Length && _text_data[limit]._el == level)
                    {
                        ++limit;
                    }

                    byte nextLevel        = limit < _text.Length ? _text_data[limit]._el : EmbeddingLevel;
                    BidiCharacterType eor = TypeForLevel(Math.Max(nextLevel, level));
                    #endregion

                    ResolveWeakTypes(start, limit, sor, eor);
                    ResolveNeutralTypes(start, limit, sor, eor, level);
                    ResolveImplicitTypes(start, limit, level);

                    prevLevel = level;
                    start     = limit;
                }

                // Wrap lines
                ReorderString(0, _text.Length);

                FixMirroredCharacters();

                ArrayList indexes = new ArrayList();
                ArrayList lengths = new ArrayList();

                StringBuilder sb = new StringBuilder();
                foreach (CharData cd in _text_data)
                {
                    sb.Append(cd._char);
                    indexes.Add(cd._idx);
                    lengths.Add(1);
                }

                _bidi_text    = sb.ToString();
                _bidi_indexes = (int[])indexes.ToArray(typeof(int));
            }
        public static ArabicShapeJoiningType GetArabicShapeJoiningType(char c)
        {
            if (c >= '\u0600' && c <= '\u0603')
            {
                return(ArabicShapeJoiningType.U);
            }
            if (c == '\u0608')
            {
                return(ArabicShapeJoiningType.U);
            }
            if (c == '\u060B')
            {
                return(ArabicShapeJoiningType.U);
            }
            if (c == '\u0621')
            {
                return(ArabicShapeJoiningType.U);
            }
            if (c >= '\u0622' && c <= '\u0625')
            {
                return(ArabicShapeJoiningType.R);
            }
            if (c == '\u0626')
            {
                return(ArabicShapeJoiningType.D);
            }
            if (c == '\u0627')
            {
                return(ArabicShapeJoiningType.R);
            }
            if (c == '\u0628')
            {
                return(ArabicShapeJoiningType.D);
            }
            if (c == '\u0629')
            {
                return(ArabicShapeJoiningType.R);
            }
            if (c >= '\u062A' && c <= '\u062E')
            {
                return(ArabicShapeJoiningType.D);
            }
            if (c >= '\u062F' && c <= '\u0632')
            {
                return(ArabicShapeJoiningType.R);
            }
            if (c >= '\u0633' && c <= '\u063F')
            {
                return(ArabicShapeJoiningType.D);
            }
            if (c == '\u0640')
            {
                return(ArabicShapeJoiningType.C);
            }
            if (c >= '\u0641' && c <= '\u0647')
            {
                return(ArabicShapeJoiningType.D);
            }
            if (c == '\u0648')
            {
                return(ArabicShapeJoiningType.R);
            }
            if (c >= '\u0649' && c <= '\u064A')
            {
                return(ArabicShapeJoiningType.D);
            }
            if (c >= '\u066E' && c <= '\u066F')
            {
                return(ArabicShapeJoiningType.D);
            }
            if (c >= '\u0671' && c <= '\u0673')
            {
                return(ArabicShapeJoiningType.R);
            }
            if (c == '\u0674')
            {
                return(ArabicShapeJoiningType.U);
            }
            if (c >= '\u0675' && c <= '\u0677')
            {
                return(ArabicShapeJoiningType.R);
            }
            if (c >= '\u0678' && c <= '\u0687')
            {
                return(ArabicShapeJoiningType.D);
            }
            if (c >= '\u0688' && c <= '\u0699')
            {
                return(ArabicShapeJoiningType.R);
            }
            if (c >= '\u069A' && c <= '\u06BF')
            {
                return(ArabicShapeJoiningType.D);
            }
            if (c == '\u06C0')
            {
                return(ArabicShapeJoiningType.R);
            }
            if (c >= '\u06C1' && c <= '\u06C2')
            {
                return(ArabicShapeJoiningType.D);
            }
            if (c >= '\u06C3' && c <= '\u06CB')
            {
                return(ArabicShapeJoiningType.R);
            }
            if (c == '\u06CC')
            {
                return(ArabicShapeJoiningType.D);
            }
            if (c == '\u06CD')
            {
                return(ArabicShapeJoiningType.R);
            }
            if (c == '\u06CE')
            {
                return(ArabicShapeJoiningType.D);
            }
            if (c == '\u06CF')
            {
                return(ArabicShapeJoiningType.R);
            }
            if (c >= '\u06D0' && c <= '\u06D1')
            {
                return(ArabicShapeJoiningType.D);
            }
            if (c >= '\u06D2' && c <= '\u06D3')
            {
                return(ArabicShapeJoiningType.R);
            }
            if (c == '\u06D5')
            {
                return(ArabicShapeJoiningType.R);
            }
            if (c == '\u06DD')
            {
                return(ArabicShapeJoiningType.U);
            }
            if (c >= '\u06EE' && c <= '\u06EF')
            {
                return(ArabicShapeJoiningType.R);
            }
            if (c >= '\u06FA' && c <= '\u06FC')
            {
                return(ArabicShapeJoiningType.D);
            }
            if (c == '\u06FF')
            {
                return(ArabicShapeJoiningType.D);
            }
            if (c == '\u0710')
            {
                return(ArabicShapeJoiningType.R);
            }
            if (c >= '\u0712' && c <= '\u0714')
            {
                return(ArabicShapeJoiningType.D);
            }
            if (c >= '\u0715' && c <= '\u0719')
            {
                return(ArabicShapeJoiningType.R);
            }
            if (c >= '\u071A' && c <= '\u071D')
            {
                return(ArabicShapeJoiningType.D);
            }
            if (c == '\u071E')
            {
                return(ArabicShapeJoiningType.R);
            }
            if (c >= '\u071F' && c <= '\u0727')
            {
                return(ArabicShapeJoiningType.D);
            }
            if (c == '\u0728')
            {
                return(ArabicShapeJoiningType.R);
            }
            if (c == '\u0729')
            {
                return(ArabicShapeJoiningType.D);
            }
            if (c == '\u072A')
            {
                return(ArabicShapeJoiningType.R);
            }
            if (c == '\u072B')
            {
                return(ArabicShapeJoiningType.D);
            }
            if (c == '\u072C')
            {
                return(ArabicShapeJoiningType.R);
            }
            if (c >= '\u072D' && c <= '\u072E')
            {
                return(ArabicShapeJoiningType.D);
            }
            if (c == '\u072F')
            {
                return(ArabicShapeJoiningType.R);
            }
            if (c == '\u074D')
            {
                return(ArabicShapeJoiningType.R);
            }
            if (c >= '\u074E' && c <= '\u0758')
            {
                return(ArabicShapeJoiningType.D);
            }
            if (c >= '\u0759' && c <= '\u075B')
            {
                return(ArabicShapeJoiningType.R);
            }
            if (c >= '\u075C' && c <= '\u076A')
            {
                return(ArabicShapeJoiningType.D);
            }
            if (c >= '\u076B' && c <= '\u076C')
            {
                return(ArabicShapeJoiningType.R);
            }
            if (c >= '\u076D' && c <= '\u0770')
            {
                return(ArabicShapeJoiningType.D);
            }
            if (c == '\u0771')
            {
                return(ArabicShapeJoiningType.R);
            }
            if (c == '\u0772')
            {
                return(ArabicShapeJoiningType.D);
            }
            if (c >= '\u0773' && c <= '\u0774')
            {
                return(ArabicShapeJoiningType.R);
            }
            if (c >= '\u0775' && c <= '\u0777')
            {
                return(ArabicShapeJoiningType.D);
            }
            if (c >= '\u0778' && c <= '\u0779')
            {
                return(ArabicShapeJoiningType.R);
            }
            if (c >= '\u077A' && c <= '\u077F')
            {
                return(ArabicShapeJoiningType.D);
            }
            if (c >= '\u07CA' && c <= '\u07EA')
            {
                return(ArabicShapeJoiningType.D);
            }
            if (c == '\u07FA')
            {
                return(ArabicShapeJoiningType.C);
            }
            if (c == '\u200D')
            {
                return(ArabicShapeJoiningType.C);
            }
            UnicodeGeneralCategory ugc = UnicodeCharacterDataResolver.GetUnicodeGeneralCategory(c);

            if (ugc == UnicodeGeneralCategory.Mn ||
                ugc == UnicodeGeneralCategory.Me ||
                ugc == UnicodeGeneralCategory.Cf)
            {
                return(ArabicShapeJoiningType.T);
            }
            return(ArabicShapeJoiningType.U);
        }