Esempio n. 1
0
        // X10 bullet 2 Determine start and end of sequence types (R or L) for an isolating run sequence
        // using run sequence indexes
        private static void ComputeIsolatingRunSequence(this IsolatingRunSequence sequence, byte pLevel, List <int> indexList,
                                                        byte[] typesList, byte[] levels)
        {
            sequence.length  = indexList.Count;
            sequence.indexes = indexList.ToArray();                     // Indexes of run in original text

            // Character types of run sequence
            sequence.types = new byte[indexList.Count];
            for (int i = 0; i < sequence.length; i++)
            {
                sequence.types[i] = typesList[indexList[i]];
            }

            // sos
            var firstLevel = levels[indexList[0]];      // level of first character

            sequence.level = firstLevel;
            var previous  = indexList[0] - 1;
            var prevLevel = previous >= 0 ? levels[previous] : pLevel;

            sequence.sos = GetTypeForLevel(Math.Max(firstLevel, prevLevel));

            // eos
            var lastType  = (BidiClass)sequence.types[sequence.length - 1];
            var last      = indexList[sequence.length - 1];          // last character in the sequence
            var lastLevel = levels[last];
            var next      = indexList[sequence.length - 1] + 1;      // next character after sequence (in paragraph)
            var nextLevel = next < typesList.Length && lastType != BidiClass.PDI ? levels[last] : pLevel;

            sequence.eos = GetTypeForLevel(Math.Max(lastLevel, nextLevel));
        }
Esempio n. 2
0
 // Override types list from start up to (not including) limit to newType
 private static void SetRunTypes(this IsolatingRunSequence sequence, int start, int limit, BidiClass newType)
 {
     for (int i = start; i < limit; i++)
     {
         sequence.types[i] = (byte)newType;
     }
 }
Esempio n. 3
0
 private static void ApplyTypesAndLevels(this IsolatingRunSequence sequence, ref byte[] typesList, ref byte[] levelsList)
 {
     for (int i = 0; i < sequence.length; i++)
     {
         int idx = sequence.indexes[i];
         typesList[idx]  = sequence.types[i];
         levelsList[idx] = sequence.resolvedLevels[i];
     }
 }
Esempio n. 4
0
        // Return end index of run consisting of types in typesSet
        // Start from index and check the value, if value not present in set then return index.
        private static int GetRunLimit(this IsolatingRunSequence sequence, int index, int limit, BidiClass[] typesSet)
        {
            loop : for (; index < limit;)
            {
                var type = (BidiClass)sequence.types[index];
                for (int i = 0; i < typesSet.Length; i++)
                {
                    if (type == typesSet[i])
                    {
                        index++;
                        goto loop;
                    }
                }

                // No match in typesSet
                return(index);
            }

            return(limit);
        }
Esempio n. 5
0
        // 3.3.5 Resolve Implicit Embedding Levels
        private static void ResolveImplicit(this IsolatingRunSequence sequence)
        {
            byte level = sequence.level;

            // Initialize the sequence resolved levels with sequence embedding level
            sequence.resolvedLevels = new byte[sequence.length];
            SetLevels(ref sequence.resolvedLevels, sequence.level);

            for (int i = 0; i < sequence.length; i++)
            {
                var ct = (BidiClass)sequence.types[i];

                // I1
                // Sequence level is even (Left-to-right) then R types go up one level, AN and EN go up two levels
                if (!IsOdd(level))
                {
                    if (ct == BidiClass.R)
                    {
                        sequence.resolvedLevels[i] += 1;
                    }
                    else if (ct == BidiClass.AN || ct == BidiClass.EN)
                    {
                        sequence.resolvedLevels[i] += 2;
                    }
                }
                // N2
                // Sequence level is odd (Right-to-left) then L, AN, EN go up one level
                else
                {
                    if (ct == BidiClass.L || ct == BidiClass.AN || ct == BidiClass.EN)
                    {
                        sequence.resolvedLevels[i] += 1;
                    }
                }
            }
        }
Esempio n. 6
0
        // 3.3.4 Resolve Neutral Types
        // In final results all NIs are resolved to R or L
        private static void ResolveNeutrals(this IsolatingRunSequence sequence)
        {
            // TODO: N0 rule (Paired Brackets algorithm)

            // N1
            // Sequence of NIs will resolve to surrounding "strong" type if text on both sides was of same direction.
            // sos and eos are used at run sequence boundaries. AN and EN will resolve type to R.
            var typesSet = new BidiClass[] { BidiClass.B, BidiClass.S, BidiClass.WS, BidiClass.ON, BidiClass.LRI, BidiClass.RLI, BidiClass.FSI, BidiClass.PDI };

            for (int i = 0; i < sequence.length; i++)
            {
                var  ct   = (BidiClass)sequence.types[i];
                bool isNI = ct == BidiClass.B ||
                            ct == BidiClass.S ||
                            ct == BidiClass.WS ||
                            ct == BidiClass.ON ||
                            ct == BidiClass.LRI ||
                            ct == BidiClass.RLI ||
                            ct == BidiClass.FSI ||
                            ct == BidiClass.PDI;

                if (isNI)
                {
                    BidiClass leadType  = 0;
                    BidiClass trailType = 0;
                    int       start     = i;
                    int       runEnd    = sequence.GetRunLimit(start, sequence.length, typesSet);

                    // Start of matching NI
                    if (start == 0) // Start boundary, lead type = sos
                    {
                        leadType = sequence.sos;
                    }
                    else
                    {
                        leadType = (BidiClass)sequence.types[start - 1];
                        if (leadType == BidiClass.AN || leadType == BidiClass.EN)   // Leading AN, EN resolve type to R
                        {
                            leadType = BidiClass.R;
                        }
                    }

                    // End of Matching NI
                    if (runEnd == sequence.length) // End boundary. trail type = eos
                    {
                        trailType = sequence.eos;
                    }
                    else
                    {
                        trailType = (BidiClass)sequence.types[runEnd];
                        if (trailType == BidiClass.AN || trailType == BidiClass.EN)
                        {
                            trailType = BidiClass.R;
                        }
                    }

                    if (leadType == trailType)
                    {
                        sequence.SetRunTypes(start, runEnd, leadType);
                    }
                    else    // N2
                    {
                        // Remaining NIs take current run embedding level
                        var runDirection = GetTypeForLevel(sequence.level);
                        sequence.SetRunTypes(start, runEnd, runDirection);
                    }

                    i = runEnd;
                }
            }
        }
Esempio n. 7
0
        // 3.3.3 Resolve Weak Types
        private static void ResolveWeaks(this IsolatingRunSequence sequence)
        {
            // W1 NSM
            for (int i = 0; i < sequence.length; i++)
            {
                var ct       = (BidiClass)sequence.types[i];
                var prevType = i == 0 ? sequence.sos : (BidiClass)sequence.types[i - 1];
                if (ct == BidiClass.NSM)
                {
                    // if NSM is at start of sequence resolved to sos type
                    // assign ON if previous is isolate initiator or PDI, otherwise type of previous
                    bool isIsolateOrPDI = prevType == BidiClass.LRI ||
                                          prevType == BidiClass.RLI ||
                                          prevType == BidiClass.FSI ||
                                          prevType == BidiClass.PDI;

                    sequence.types[i] = isIsolateOrPDI ? (byte)BidiClass.ON : (byte)prevType;
                }
            }

            // W2 EN
            // At each EN search in backward until first strong type is found, if AL is found then resolve to AN
            for (int i = 0; i < sequence.length; i++)
            {
                var chType = (BidiClass)sequence.types[i];
                if (chType == BidiClass.EN)
                {
                    for (int j = i - 1; j >= 0; j--)
                    {
                        var type = (BidiClass)sequence.types[j];
                        if (type == BidiClass.R || type == BidiClass.AL || type == BidiClass.L)
                        {
                            if (type == BidiClass.AL)
                            {
                                sequence.types[i] = (byte)BidiClass.AN;
                                break;
                            }
                        }
                    }
                }
            }

            // W3 AL
            // Resolve all ALs to R
            for (int i = 0; i < sequence.length; i++)
            {
                if ((BidiClass)sequence.types[i] == BidiClass.AL)
                {
                    sequence.types[i] = (byte)BidiClass.R;
                }
            }

            // W4 ES, CS (Number Separators)
            // ES between EN is resolved to EN
            // Single CS between same numbers type is resolve to that number type
            for (int i = 1; i < sequence.length - 1; i++)
            {
                var cct      = (BidiClass)sequence.types[i];
                var prevType = (BidiClass)sequence.types[i - 1];
                var nextType = (BidiClass)sequence.types[i + 1];

                if (cct == BidiClass.ES && prevType == BidiClass.EN && nextType == BidiClass.EN) // EN ES EN -> EN EN EN
                {
                    sequence.types[i] = (byte)BidiClass.EN;
                }
                else if (cct == BidiClass.CS && (
                             prevType == BidiClass.EN && nextType == BidiClass.EN ||
                             prevType == BidiClass.AN && nextType == BidiClass.AN)) // EN CS EN -> EN EN EN, AN CS AN -> AN AN AN
                {
                    sequence.types[i] = (byte)prevType;
                }
            }

            // W5 ET(s) adjacent to EN resolve to EN(s)
            var typesSet = new BidiClass[] { BidiClass.ET };

            for (int i = 0; i < sequence.length; i++)
            {
                if ((BidiClass)sequence.types[i] == BidiClass.ET)
                {
                    int runStart = i;
                    // int runEnd = runStart;
                    // runEnd = Array.FindIndex(sequence.types, runStart, t1 => typesSet.Any(t2 => t2 == (BidiClass)t1));
                    int runEnd = sequence.GetRunLimit(runStart, sequence.length, typesSet);

                    var type = runStart > 0 ? (BidiClass)sequence.types[runStart - 1] : sequence.sos;

                    if (type != BidiClass.EN)
                    {
                        type = runEnd < sequence.length ? (BidiClass)sequence.types[runEnd] : sequence.eos; // End type
                    }

                    if (type == BidiClass.EN)
                    {
                        sequence.SetRunTypes(runStart, runEnd, BidiClass.EN); // Resolve to EN
                    }

                    i = runEnd; // advance to end of sequence
                }
            }

            // W6 Separators and Terminators -> ON
            for (int i = 0; i < sequence.length; i++)
            {
                var t = (BidiClass)sequence.types[i];
                if (t == BidiClass.ET || t == BidiClass.ES || t == BidiClass.CS)
                {
                    sequence.types[i] = (byte)BidiClass.ON;
                }
            }

            // W7 same as W2 but EN -> L
            for (int i = 0; i < sequence.length; i++)
            {
                if ((BidiClass)sequence.types[i] == BidiClass.EN)
                {
                    var prevStrong = sequence.sos;  // Default to sos if reached start
                    for (int j = i - 1; j >= 0; j--)
                    {
                        var t = (BidiClass)sequence.types[j];
                        if (t == BidiClass.R || t == BidiClass.L || t == BidiClass.AL)
                        {
                            prevStrong = t;
                            break;
                        }

                        if (prevStrong == BidiClass.L)
                        {
                            sequence.types[i] = (byte)BidiClass.L;
                        }
                    }
                }
            }
        }