Exemplo n.º 1
0
            /// <summary>
            /// 3.3.5 Resolving Implicit Levels
            /// </summary>
            private void ResolveImplicitTypes(int start, int limit, int level)
            {
                // I1. For all characters with an even (left-to-right) embedding direction, those of type R go up one level and those of type AN or EN go up two levels.
                // I2. For all characters with an odd (right-to-left) embedding direction, those of type L, EN or AN go up one level.

                if ((level & 1) == 0) // even level
                {
                    for (int i = start; i < limit; ++i)
                    {
                        BidiCharacterType t = _text_data[i]._ct;
                        // Rule I1.
                        if (t == BidiCharacterType.R)
                        {
                            _text_data[i]._el += 1;
                        }
                        else if (t == BidiCharacterType.AN || t == BidiCharacterType.EN)
                        {
                            _text_data[i]._el += 2;
                        }
                    }
                }
                else // odd level
                {
                    for (int i = start; i < limit; ++i)
                    {
                        BidiCharacterType t = _text_data[i]._ct;
                        // Rule I2.
                        if (t == BidiCharacterType.L || t == BidiCharacterType.AN || t == BidiCharacterType.EN)
                        {
                            _text_data[i]._el += 1;
                        }
                    }
                }
            }
Exemplo n.º 2
0
 /// <summary>
 /// Set resultTypes from start up to (but not including) limit to newType.
 /// </summary>
 /// <param name="start"></param>
 /// <param name="limit"></param>
 /// <param name="newType"></param>
 private void SetTypes(int start, int limit, BidiCharacterType newType)
 {
     for (int i = start; i < limit; ++i)
     {
         _text_data[i]._ct = newType;
     }
 }
Exemplo n.º 3
0
        // 3.3.1.P1 - Split the text into separate paragraphs.
        // A paragraph separator is kept with the previous paragraph.
        // Within each paragraph, apply all the other rules of this algorithm.
        public static Paragraph[] SplitStringToParagraphs(string logicalString)
        {
            ArrayList     ret = new ArrayList();
            int           i;
            StringBuilder sb = new StringBuilder();

            for (i = 0; i < logicalString.Length; ++i)
            {
                char c = logicalString[i];
                BidiCharacterType cType = UnicodeCharacterDataResolver.GetBidiCharacterType(c);
                if (cType == BidiCharacterType.B)
                {
                    Paragraph p = new Paragraph(sb.ToString());
                    p.ParagraphSeparator = c;
                    ret.Add(p);
                    sb.Length = 0;
                }
                else
                {
                    sb.Append(c);
                }
            }
            if (sb.Length > 0) // string ended without a paragraph separator
            {
                ret.Add(new Paragraph(sb.ToString()));
            }
            return((Paragraph[])ret.ToArray(typeof(Paragraph)));
        }
Exemplo n.º 4
0
 public BidiRun(BidiCharacterType direction, int level, int start, int length)
 {
     this.Direction = direction;
     this.Level     = level;
     this.Start     = start;
     this.Length    = length;
 }
Exemplo n.º 5
0
            private StringBuilder InternalDecompose(ArrayList char_lengths)
            {
                StringBuilder target = new StringBuilder();
                StringBuilder buffer = new StringBuilder();

                _hasArabic = false;
                _hasNSMs   = false;

                for (int i = 0; i < _text.Length; ++i)
                {
                    BidiCharacterType ct = UnicodeCharacterDataResolver.GetBidiCharacterType(_text[i]);
                    _hasArabic |= ((ct == BidiCharacterType.AL) || (ct == BidiCharacterType.AN));
                    _hasNSMs   |= (ct == BidiCharacterType.NSM);

                    buffer.Length = 0;
                    GetRecursiveDecomposition(false, _text[i], buffer);
                    char_lengths.Add(1 - buffer.Length);
                    // add all of the characters in the decomposition.
                    // (may be just the original character, if there was
                    // no decomposition mapping)

                    char ch;
                    for (int j = 0; j < buffer.Length; ++j)
                    {
                        ch = buffer[j];
                        UnicodeCanonicalClass chClass = UnicodeCharacterDataResolver.GetUnicodeCanonicalClass(ch);
                        int k = target.Length; // insertion point
                        if (chClass != UnicodeCanonicalClass.NR)
                        {
                            // bubble-sort combining marks as necessary
                            char ch2;
                            for (; k > 0; --k)
                            {
                                ch2 = target[k - 1];
                                if (UnicodeCharacterDataResolver.GetUnicodeCanonicalClass(ch2) <= chClass)
                                {
                                    break;
                                }
                            }
                        }
                        target.Insert(k, ch);
                    }
                }
                return(target);
            }
Exemplo n.º 6
0
            // 3.3.1 The Paragraph Level
            // P2 - In each paragraph, find the first character of type L, AL, or R.
            // P3 - If a character is found in P2 and it is of type AL or R, then
            // set the paragraph embedding level to one; otherwise, set it to zero.
            public void RecalculateParagraphEmbeddingLevel()
            {
                embedding_level = 1;

                foreach (char c in _text)
                {
                    BidiCharacterType cType = UnicodeCharacterDataResolver.GetBidiCharacterType(c);
                    if (cType == BidiCharacterType.R || cType == BidiCharacterType.AL)
                    {
                        embedding_level = 1;
                        break;
                    }
                    else if (cType == BidiCharacterType.L)
                    {
                        break;
                    }
                }
            }
Exemplo n.º 7
0
        public static CharData[] UnicodeStringToSaman(string unicode_str)
        {
            ushort[]            unicode_byte_array;
            BidiCharacterType[] bidi_character_type;
            Byte[] saman_byte_array;
            unicode_byte_array  = new ushort[unicode_str.Length];
            saman_byte_array    = new byte[unicode_str.Length];
            bidi_character_type = new BidiCharacterType[unicode_str.Length];
            byte temp;

            int _length = unicode_str.Length;

            CharData[] txt_data = new CharData[_length];
            int        idx      = 0;

            for (int i = 0; i < _length; ++i)
            {
                unicode_byte_array[i] = (ushort)unicode_str[i];
                temp = UnicodeCharToSamanChar(unicode_byte_array[i]);
                if (temp != 0)
                {
                    saman_byte_array[i] = temp;
                }
                else
                {
                    saman_byte_array[i] =
                        System.Text.Encoding.GetEncoding(1256).GetBytes(unicode_str[i].ToString())[0];
                }
                bidi_character_type[i] = SamanCharType[saman_byte_array[i]];
                ///
                char c = System.Text.Encoding.GetEncoding(1256).GetChars(saman_byte_array)[i];
                txt_data[i]._ct   = bidi_character_type[i];
                txt_data[i]._char = c;
                txt_data[i]._idx  = idx;
                idx++;
            }
            string saman_str = System.Text.Encoding.GetEncoding(1256).GetString(saman_byte_array);

            //   return saman_str;
            return(txt_data);
        }
Exemplo n.º 8
0
        public static CharData[] SamanStringToUnicode(string saman_str)
        {
            int _length = saman_str.Length;

            ushort[]            unicode_byte_array;
            Byte[]              saman_byte_array;
            BidiCharacterType[] bidi_character_type;
            string              unicode_str = "";

            unicode_byte_array = new ushort[_length];
            saman_byte_array   = new byte[_length];
            UInt16 temp;

            bidi_character_type = new BidiCharacterType[_length];

            CharData[] txt_data = new CharData[_length];
            int        idx      = 0;

            for (int i = 0; i < _length; ++i)
            {
                saman_byte_array[i] =
                    System.Text.Encoding.GetEncoding(1256).GetBytes(saman_str[i].ToString())[0];
                bidi_character_type[i] = SamanCharType[saman_byte_array[i]];
                temp = SamanCharToUnicodeChar(saman_byte_array[i]);
                if (temp != 0)
                {
                    unicode_byte_array[i] = temp;
                }
                else
                {
                    unicode_byte_array[i] = (UInt16)(saman_str[i]);
                }
                unicode_str      += (char)(unicode_byte_array[i]);
                txt_data[i]._ct   = bidi_character_type[i];
                txt_data[i]._char = (char)(unicode_byte_array[i]);
                txt_data[i]._idx  = idx;
                idx++;
            }
            return(txt_data);
        }
Exemplo n.º 9
0
            /// <summary>
            /// Return the limit of the run, starting at index, that includes only resultTypes in validSet.
            /// This checks the value at index, and will return index if that value is not in validSet.
            /// </summary>
            /// <param name="index"></param>
            /// <param name="limit"></param>
            /// <param name="validSet"></param>
            /// <returns></returns>
            private int FindRunLimit(int index, int limit, BidiCharacterType[] validSet)
            {
                --index;
                bool found = false;

                while (++index < limit)
                {
                    BidiCharacterType t = _text_data[index]._ct;
                    found = false;
                    for (int i = 0; i < validSet.Length && !found; ++i)
                    {
                        if (t == validSet[i])
                        {
                            found = true;
                        }
                    }

                    if (!found)
                    {
                        return(index); // didn't find a match in validSet
                    }
                }
                return(limit);
            }
Exemplo n.º 10
0
            /// <summary>
            /// 3.3.4 Resolving Neutral Types
            /// </summary>
            private void ResolveNeutralTypes(int start, int limit, BidiCharacterType sor, BidiCharacterType eor, int level)
            {
                // N1. A sequence of neutrals takes the direction of the surrounding strong text if the text on both sides has the same direction.
                //     European and Arabic numbers act as if they were R in terms of their influence on neutrals.
                //     Start-of-level-run (sor) and end-of-level-run (eor) are used at level run boundaries.
                // N2. Any remaining neutrals take the embedding direction.

                //level = 1;

                for (int i = start; i < limit; ++i)
                {
                    BidiCharacterType t = _text_data[i]._ct;
                    if (t == BidiCharacterType.WS || t == BidiCharacterType.ON || t == BidiCharacterType.B || t == BidiCharacterType.S)
                    {
                        // find bounds of run of neutrals
                        int runstart = i;
                        int runlimit = FindRunLimit(runstart, limit, new[] { BidiCharacterType.B, BidiCharacterType.S, BidiCharacterType.WS, BidiCharacterType.ON });

                        // determine effective types at ends of run
                        BidiCharacterType leadingType;
                        BidiCharacterType trailingType;

                        if (runstart == start)
                        {
                            leadingType = BidiCharacterType.R;
                        }
                        else
                        {
                            leadingType = _text_data[runstart - 1]._ct;

                            if (leadingType == BidiCharacterType.AN || leadingType == BidiCharacterType.EN)
                            {
                                leadingType = BidiCharacterType.R;
                            }
                        }

                        if (runlimit == limit)
                        {
                            trailingType = eor;
                        }
                        else
                        {
                            trailingType = _text_data[runlimit]._ct;
                            if (trailingType == BidiCharacterType.AN || trailingType == BidiCharacterType.EN)
                            {
                                trailingType = BidiCharacterType.R;
                            }
                        }

                        BidiCharacterType resolvedType;
                        if (leadingType == trailingType)
                        {
                            // Rule N1.
                            resolvedType = leadingType;
                        }
                        else
                        {
                            // Rule N2.
                            // Notice the embedding level of the run is used, not
                            // the paragraph embedding level.
                            resolvedType = TypeForLevel(level);
                        }

                        SetTypes(runstart, runlimit, resolvedType);

                        // skip over run of (former) neutrals
                        i = runlimit;
                    }
                }
            }
Exemplo n.º 11
0
            /// <summary>
            /// 3.3.3 Resolving Weak Types
            /// </summary>
            private void ResolveWeakTypes(int start, int limit, BidiCharacterType sor, BidiCharacterType eor)
            {
                // TODO - all these repeating runs seems somewhat unefficient...
                // TODO - rules 2 and 7 are the same, except for minor parameter changes...

                #region rule W1
                // W1. Examine each nonspacing mark (NSM) in the level run, and change the type of the NSM to the type of the previous character. If the NSM is at the start of the level run, it will get the type of sor.
                if (_hasNSMs)
                {
                    BidiCharacterType preceedingCharacterType = sor;
                    for (int i = start; i < limit; ++i)
                    {
                        BidiCharacterType t = _text_data[i]._ct;
                        if (t == BidiCharacterType.NSM)
                        {
                            _text_data[i]._ct = preceedingCharacterType;
                        }
                        else
                        {
                            preceedingCharacterType = t;
                        }
                    }
                }
                #endregion

                #region rule W2
                // W2. Search backward from each instance of a European number until the first strong type (R, L, AL, or sor) is found. If an AL is found, change the type of the European number to Arabic number.

                BidiCharacterType t_w2 = BidiCharacterType.EN;
                for (int i = start; i < limit; ++i)
                {
                    if (_text_data[i]._ct == BidiCharacterType.L || _text_data[i]._ct == BidiCharacterType.R)
                    {
                        t_w2 = BidiCharacterType.EN;
                    }
                    else if (_text_data[i]._ct == BidiCharacterType.AL)
                    {
                        t_w2 = BidiCharacterType.AN;
                    }
                    else if (_text_data[i]._ct == BidiCharacterType.EN)
                    {
                        _text_data[i]._ct = t_w2;
                    }
                }
                #endregion

                #region rule #3
                // W3. Change all ALs to R.
                if (_hasArabic)
                {
                    for (int i = start; i < limit; ++i)
                    {
                        if (_text_data[i]._ct == BidiCharacterType.AL)
                        {
                            _text_data[i]._ct = BidiCharacterType.R;
                        }
                    }
                }
                #endregion

                #region rule W4
                // W4. A single European separator between two European numbers changes to a European number. A single common separator between two numbers of the same type changes to that type.

                // Since there must be values on both sides for this rule to have an
                // effect, the scan skips the first and last value.
                //
                // Although the scan proceeds left to right, and changes the type values
                // in a way that would appear to affect the computations later in the scan,
                // there is actually no problem.  A change in the current value can only
                // affect the value to its immediate right, and only affect it if it is
                // ES or CS.  But the current value can only change if the value to its
                // right is not ES or CS.  Thus either the current value will not change,
                // or its change will have no effect on the remainder of the analysis.

                for (int i = start + 1; i < limit - 1; ++i)
                {
                    if (_text_data[i]._ct == BidiCharacterType.ES || _text_data[i]._ct == BidiCharacterType.CS)
                    {
                        BidiCharacterType prevSepType = _text_data[i - 1]._ct;
                        BidiCharacterType succSepType = _text_data[i + 1]._ct;
                        if (prevSepType == BidiCharacterType.EN && succSepType == BidiCharacterType.EN)
                        {
                            _text_data[i]._ct = BidiCharacterType.EN;
                        }
                        else if (_text_data[i]._ct == BidiCharacterType.CS && prevSepType == BidiCharacterType.AN && succSepType == BidiCharacterType.AN)
                        {
                            _text_data[i]._ct = BidiCharacterType.AN;
                        }
                    }
                }
                #endregion

                #region rule W5
                // W5. A sequence of European terminators adjacent to European numbers changes to all European numbers.
                for (int i = start; i < limit; ++i)
                {
                    if (_text_data[i]._ct == BidiCharacterType.ET)
                    {
                        // locate end of sequence
                        int runstart = i;
                        int runlimit = FindRunLimit(runstart, limit, new BidiCharacterType[] { BidiCharacterType.ET });

                        // check values at ends of sequence
                        BidiCharacterType t = runstart == start ? sor : _text_data[runstart - 1]._ct;

                        if (t != BidiCharacterType.EN)
                        {
                            t = runlimit == limit ? eor : _text_data[runlimit]._ct;
                        }

                        if (t == BidiCharacterType.EN)
                        {
                            SetTypes(runstart, runlimit, BidiCharacterType.EN);
                        }

                        // continue at end of sequence
                        i = runlimit;
                    }
                }
                #endregion

                #region rule W6
                // W6. Otherwise, separators and terminators change to Other Neutral.
                for (int i = start; i < limit; ++i)
                {
                    BidiCharacterType t = _text_data[i]._ct;
                    if (t == BidiCharacterType.ES || t == BidiCharacterType.ET || t == BidiCharacterType.CS)
                    {
                        _text_data[i]._ct = BidiCharacterType.ON;
                    }
                }
                #endregion

                #region rule W7
                // W7. Search backward from each instance of a European number until the first strong type (R, L, or sor) is found.
                //     If an L is found, then change the type of the European number to L.

                BidiCharacterType t_w7 = sor == BidiCharacterType.L ? BidiCharacterType.L : BidiCharacterType.EN;
                for (int i = start; i < limit; ++i)
                {
                    if (_text_data[i]._ct == BidiCharacterType.R)
                    {
                        t_w7 = BidiCharacterType.EN;
                    }
                    else if (_text_data[i]._ct == BidiCharacterType.L)
                    {
                        t_w7 = BidiCharacterType.L;
                    }
                    else if (_text_data[i]._ct == BidiCharacterType.EN)
                    {
                        _text_data[i]._ct = t_w7;
                    }
                }
                #endregion
            }
Exemplo n.º 12
0
            // 3.3.2 Explicit Levels and Directions
            public void RecalculateCharactersEmbeddingLevels()
            {
                // This method is implemented in such a way it handles the string in logical order,
                // rather than visual order, so it is easier to handle complex layouts. That is why
                // it is placed BEFORE ReorderString rather than AFTER it, as its number suggests.
                if (_hasArabic)
                {
                    _text = PerformArabicShaping(_text);
                }

                _text_data = new CharData[_text.Length];

                #region rules X1 - X9
                // X1
                byte embeddingLevel           = EmbeddingLevel;
                DirectionalOverrideStatus dos = DirectionalOverrideStatus.Neutral;
                Stack dosStack = new Stack();
                Stack elStack  = new Stack();
                int   idx      = 0;
                for (int i = 0; i < _text.Length; ++i)
                {
                    bool x9Char = false;
                    char c      = _text[i];
                    _text_data[i]._ct   = UnicodeCharacterDataResolver.GetBidiCharacterType(c);
                    _text_data[i]._char = c;
                    _text_data[i]._idx  = idx;
                    idx += _char_lengths[i];

                    #region rules X2 - X5
                    // X2. With each RLE, compute the least greater odd embedding level.
                    // X4. With each RLO, compute the least greater odd embedding level.
                    if (c == BidiChars.RLE || c == BidiChars.RLO)
                    {
                        x9Char = true;
                        if (embeddingLevel < 60)
                        {
                            elStack.Push(embeddingLevel);
                            dosStack.Push(dos);

                            ++embeddingLevel;
                            embeddingLevel |= 1;

                            if (c == BidiChars.RLE)
                            {
                                dos = DirectionalOverrideStatus.Neutral;
                            }
                            else
                            {
                                dos = DirectionalOverrideStatus.RTL;
                            }
                        }
                    }
                    // X3. With each LRE, compute the least greater even embedding level.
                    // X5. With each LRO, compute the least greater even embedding level.
                    else if (c == BidiChars.LRE || c == BidiChars.LRO)
                    {
                        x9Char = true;
                        if (embeddingLevel < 59)
                        {
                            elStack.Push(embeddingLevel);
                            dosStack.Push(dos);

                            embeddingLevel |= 1;
                            ++embeddingLevel;

                            if (c == BidiChars.LRE)
                            {
                                dos = DirectionalOverrideStatus.Neutral;
                            }
                            else
                            {
                                dos = DirectionalOverrideStatus.LTR;
                            }
                        }
                    }
                    #endregion

                    #region rule X6
                    // X6. For all types besides RLE, LRE, RLO, LRO, and PDF: (...)
                    else if (c != BidiChars.PDF)
                    {
                        // a. Set the level of the current character to the current embedding level.
                        _text_data[i]._el = embeddingLevel;

                        //b. Whenever the directional override status is not neutral,
                        //reset the current character type to the directional override status.
                        if (dos == DirectionalOverrideStatus.LTR)
                        {
                            _text_data[i]._ct = BidiCharacterType.L;
                        }
                        else if (dos == DirectionalOverrideStatus.RTL)
                        {
                            _text_data[i]._ct = BidiCharacterType.R;
                        }
                    }
                    #endregion

                    #region rule X7
                    //Terminating Embeddings and Overrides
                    // X7. With each PDF, determine the matching embedding or override code.
                    // If there was a valid matching code, restore (pop) the last remembered (pushed)
                    // embedding level and directional override.
                    else if (c == BidiChars.PDF)
                    {
                        x9Char = true;
                        if (elStack.Count > 0)
                        {
                            embeddingLevel = (byte)(elStack.Pop());
                            dos            = (DirectionalOverrideStatus)(dosStack.Pop());
                        }
                    }
                    #endregion

                    // X8. All explicit directional embeddings and overrides are completely
                    // terminated at the end of each paragraph. Paragraph separators are not
                    // included in the embedding.

                    if (x9Char || _text_data[i]._ct == BidiCharacterType.BN)
                    {
                        _text_data[i]._el = embeddingLevel;
                    }
                }
                #endregion

                // X10. The remaining rules are applied to each run of characters at the same level.
                int prevLevel = EmbeddingLevel;
                int start     = 0;
                while (start < _text.Length)
                {
                    #region rule X10 - run level setup
                    byte level            = _text_data[start]._el;
                    BidiCharacterType sor = TypeForLevel(Math.Max(prevLevel, level));

                    int limit = start + 1;
                    while (limit < _text.Length && _text_data[limit]._el == level)
                    {
                        ++limit;
                    }

                    byte nextLevel        = limit < _text.Length ? _text_data[limit]._el : EmbeddingLevel;
                    BidiCharacterType eor = TypeForLevel(Math.Max(nextLevel, level));
                    #endregion

                    ResolveWeakTypes(start, limit, sor, eor);
                    ResolveNeutralTypes(start, limit, sor, eor, level);
                    ResolveImplicitTypes(start, limit, level);

                    prevLevel = level;
                    start     = limit;
                }

                // Wrap lines
                ReorderString(0, _text.Length);

                FixMirroredCharacters();

                ArrayList indexes = new ArrayList();
                ArrayList lengths = new ArrayList();

                StringBuilder sb = new StringBuilder();
                foreach (CharData cd in _text_data)
                {
                    sb.Append(cd._char);
                    indexes.Add(cd._idx);
                    lengths.Add(1);
                }

                _bidi_text    = sb.ToString();
                _bidi_indexes = (int[])indexes.ToArray(typeof(int));
            }
Exemplo n.º 13
0
 /// <summary>
 /// Set resultTypes from start up to (but not including) limit to newType.
 /// </summary>
 /// <param name="start"></param>
 /// <param name="limit"></param>
 /// <param name="newType"></param>
 private void SetTypes(int start, int limit, BidiCharacterType newType) {
    for (int i = start; i < limit; ++i) {
       _text_data[i]._ct = newType;
    }
 }
Exemplo n.º 14
0
         /// <summary>
         /// Return the limit of the run, starting at index, that includes only resultTypes in validSet.
         /// This checks the value at index, and will return index if that value is not in validSet.
         /// </summary>
         /// <param name="index"></param>
         /// <param name="limit"></param>
         /// <param name="validSet"></param>
         /// <returns></returns>
         private int FindRunLimit(int index, int limit, BidiCharacterType[] validSet) {
            --index;
            bool found = false;
            while (++index < limit) {
               BidiCharacterType t = _text_data[index]._ct;
               found = false;
               for (int i = 0; i < validSet.Length && !found; ++i) {
                  if (t == validSet[i])
                     found = true;
               }

               if (!found)
                  return index; // didn't find a match in validSet
            }
            return limit;
         }
Exemplo n.º 15
0
         /// <summary>
         /// 3.3.4 Resolving Neutral Types
         /// </summary>
         private void ResolveNeutralTypes(int start, int limit, BidiCharacterType sor, BidiCharacterType eor, int level) {
            // N1. A sequence of neutrals takes the direction of the surrounding strong text if the text on both sides has the same direction. European and Arabic numbers act as if they were R in terms of their influence on neutrals. Start-of-level-run (sor) and end-of-level-run (eor) are used at level run boundaries.
            // N2. Any remaining neutrals take the embedding direction.

            for (int i = start; i < limit; ++i) {
               BidiCharacterType t = _text_data[i]._ct;
               if (t == BidiCharacterType.WS || t == BidiCharacterType.ON || t == BidiCharacterType.B || t == BidiCharacterType.S) {
                  // find bounds of run of neutrals
                  int runstart = i;
                  int runlimit = FindRunLimit(runstart, limit, new BidiCharacterType[] { BidiCharacterType.B, BidiCharacterType.S, BidiCharacterType.WS, BidiCharacterType.ON });

                  // determine effective types at ends of run
                  BidiCharacterType leadingType;
                  BidiCharacterType trailingType;

                  if (runstart == start)
                     leadingType = sor;
                  else {
                     leadingType = _text_data[runstart - 1]._ct;
                     if (leadingType == BidiCharacterType.AN || leadingType == BidiCharacterType.EN)
                        leadingType = BidiCharacterType.R;
                  }

                  if (runlimit == limit)
                     trailingType = eor;
                  else {
                     trailingType = _text_data[runlimit]._ct;
                     if (trailingType == BidiCharacterType.AN || trailingType == BidiCharacterType.EN)
                        trailingType = BidiCharacterType.R;
                  }

                  BidiCharacterType resolvedType;
                  if (leadingType == trailingType) {
                     // Rule N1.
                     resolvedType = leadingType;
                  } else {
                     // Rule N2.
                     // Notice the embedding level of the run is used, not
                     // the paragraph embedding level.
                     resolvedType = TypeForLevel(level);
                  }

                  SetTypes(runstart, runlimit, resolvedType);

                  // skip over run of (former) neutrals
                  i = runlimit;
               }
            }
         }
Exemplo n.º 16
0
         /// <summary>
         /// 3.3.3 Resolving Weak Types
         /// </summary>
         private void ResolveWeakTypes(int start, int limit, BidiCharacterType sor, BidiCharacterType eor) {
            // TODO - all these repeating runs seems somewhat unefficient...
            // TODO - rules 2 and 7 are the same, except for minor parameter changes...

            #region rule W1
            // W1. Examine each nonspacing mark (NSM) in the level run, and change the type of the NSM to the type of the previous character. If the NSM is at the start of the level run, it will get the type of sor.
            BidiCharacterType preceedingCharacterType = sor;
            for (int i = start; i < limit; ++i) {
               BidiCharacterType t = _text_data[i]._ct;
               if (t == BidiCharacterType.NSM)
                  _text_data[i]._ct = preceedingCharacterType;
               else
                  preceedingCharacterType = t;
            }
            #endregion

            #region rule W2
            // W2. Search backward from each instance of a European number until the first strong type (R, L, AL, or sor) is found. If an AL is found, change the type of the European number to Arabic number.

            BidiCharacterType t_w2 = BidiCharacterType.EN;
            for (int i = start; i < limit; ++i) {
               if (_text_data[i]._ct == BidiCharacterType.L || _text_data[i]._ct == BidiCharacterType.R)
                  t_w2 = BidiCharacterType.EN;
               else if (_text_data[i]._ct == BidiCharacterType.AL)
                  t_w2 = BidiCharacterType.AN;
               else if (_text_data[i]._ct == BidiCharacterType.EN)
                  _text_data[i]._ct = t_w2;
            }
            #endregion

            #region rule #3
            // W3. Change all ALs to R.
            for (int i = start; i < limit; ++i) {
               if (_text_data[i]._ct == BidiCharacterType.AL)
                  _text_data[i]._ct = BidiCharacterType.R;
            }
            #endregion

            #region rule W4
            // W4. A single European separator between two European numbers changes to a European number. A single common separator between two numbers of the same type changes to that type.

            // Since there must be values on both sides for this rule to have an
            // effect, the scan skips the first and last value.
            //
            // Although the scan proceeds left to right, and changes the type values
            // in a way that would appear to affect the computations later in the scan,
            // there is actually no problem.  A change in the current value can only
            // affect the value to its immediate right, and only affect it if it is
            // ES or CS.  But the current value can only change if the value to its
            // right is not ES or CS.  Thus either the current value will not change,
            // or its change will have no effect on the remainder of the analysis.

            for (int i = start + 1; i < limit - 1; ++i) {
               if (_text_data[i]._ct == BidiCharacterType.ES || _text_data[i]._ct == BidiCharacterType.CS) {
                  BidiCharacterType prevSepType = _text_data[i - 1]._ct;
                  BidiCharacterType succSepType = _text_data[i + 1]._ct;
                  if (prevSepType == BidiCharacterType.EN && succSepType == BidiCharacterType.EN) {
                     _text_data[i]._ct = BidiCharacterType.EN;
                  } else if (_text_data[i]._ct == BidiCharacterType.CS && prevSepType == BidiCharacterType.AN && succSepType == BidiCharacterType.AN) {
                     _text_data[i]._ct = BidiCharacterType.AN;
                  }
               }
            }
            #endregion

            #region rule W5
            // W5. A sequence of European terminators adjacent to European numbers changes to all European numbers.
            for (int i = start; i < limit; ++i) {
               if (_text_data[i]._ct == BidiCharacterType.ET) {
                  // locate end of sequence
                  int runstart = i;
                  int runlimit = FindRunLimit(runstart, limit, new BidiCharacterType[] { BidiCharacterType.ET });

                  // check values at ends of sequence
                  BidiCharacterType t = runstart == start ? sor : _text_data[runstart - 1]._ct;

                  if (t != BidiCharacterType.EN)
                     t = runlimit == limit ? eor : _text_data[runlimit]._ct;

                  if (t == BidiCharacterType.EN)
                     SetTypes(runstart, runlimit, BidiCharacterType.EN);

                  // continue at end of sequence
                  i = runlimit;
               }
            }
            #endregion

            #region rule W6
            // W6. Otherwise, separators and terminators change to Other Neutral.
            for (int i = start; i < limit; ++i) {
               BidiCharacterType t = _text_data[i]._ct;
               if (t == BidiCharacterType.ES || t == BidiCharacterType.ET || t == BidiCharacterType.CS) {
                  _text_data[i]._ct = BidiCharacterType.ON;
               }
            }
            #endregion

            #region rule W7
            // W7. Search backward from each instance of a European number until the first strong type (R, L, or sor) is found. If an L is found, then change the type of the European number to L.
            BidiCharacterType t_w7 = sor == BidiCharacterType.L ? BidiCharacterType.L : BidiCharacterType.EN;
            for (int i = start; i < limit; ++i) {
               if (_text_data[i]._ct == BidiCharacterType.R)
                  t_w7 = BidiCharacterType.EN;
               else if (_text_data[i]._ct == BidiCharacterType.L)
                  t_w7 = BidiCharacterType.L;
               else if (_text_data[i]._ct == BidiCharacterType.EN)
                  _text_data[i]._ct = t_w7;
            }
            #endregion
         }