Exemple #1
0
        // 3.3.4 Resolve Neutral Types
        // In final results all NIs are resolved to R or L
        private static void ResolveNeutrals(this IsolatingRunSequence sequence)
        {
            // TODO: N0 rule (Paired Brackets algorithm)

            // N1
            // Sequence of NIs will resolve to surrounding "strong" type if text on both sides was of same direction.
            // sos and eos are used at run sequence boundaries. AN and EN will resolve type to R.
            var typesSet = new BidiClass[] { BidiClass.B, BidiClass.S, BidiClass.WS, BidiClass.ON, BidiClass.LRI, BidiClass.RLI, BidiClass.FSI, BidiClass.PDI };

            for (int i = 0; i < sequence.length; i++)
            {
                var  ct   = (BidiClass)sequence.types[i];
                bool isNI = ct == BidiClass.B ||
                            ct == BidiClass.S ||
                            ct == BidiClass.WS ||
                            ct == BidiClass.ON ||
                            ct == BidiClass.LRI ||
                            ct == BidiClass.RLI ||
                            ct == BidiClass.FSI ||
                            ct == BidiClass.PDI;

                if (isNI)
                {
                    BidiClass leadType  = 0;
                    BidiClass trailType = 0;
                    int       start     = i;
                    int       runEnd    = sequence.GetRunLimit(start, sequence.length, typesSet);

                    // Start of matching NI
                    if (start == 0) // Start boundary, lead type = sos
                    {
                        leadType = sequence.sos;
                    }
                    else
                    {
                        leadType = (BidiClass)sequence.types[start - 1];
                        if (leadType == BidiClass.AN || leadType == BidiClass.EN)   // Leading AN, EN resolve type to R
                        {
                            leadType = BidiClass.R;
                        }
                    }

                    // End of Matching NI
                    if (runEnd == sequence.length) // End boundary. trail type = eos
                    {
                        trailType = sequence.eos;
                    }
                    else
                    {
                        trailType = (BidiClass)sequence.types[runEnd];
                        if (trailType == BidiClass.AN || trailType == BidiClass.EN)
                        {
                            trailType = BidiClass.R;
                        }
                    }

                    if (leadType == trailType)
                    {
                        sequence.SetRunTypes(start, runEnd, leadType);
                    }
                    else    // N2
                    {
                        // Remaining NIs take current run embedding level
                        var runDirection = GetTypeForLevel(sequence.level);
                        sequence.SetRunTypes(start, runEnd, runDirection);
                    }

                    i = runEnd;
                }
            }
        }
Exemple #2
0
        // 3.3.3 Resolve Weak Types
        private static void ResolveWeaks(this IsolatingRunSequence sequence)
        {
            // W1 NSM
            for (int i = 0; i < sequence.length; i++)
            {
                var ct       = (BidiClass)sequence.types[i];
                var prevType = i == 0 ? sequence.sos : (BidiClass)sequence.types[i - 1];
                if (ct == BidiClass.NSM)
                {
                    // if NSM is at start of sequence resolved to sos type
                    // assign ON if previous is isolate initiator or PDI, otherwise type of previous
                    bool isIsolateOrPDI = prevType == BidiClass.LRI ||
                                          prevType == BidiClass.RLI ||
                                          prevType == BidiClass.FSI ||
                                          prevType == BidiClass.PDI;

                    sequence.types[i] = isIsolateOrPDI ? (byte)BidiClass.ON : (byte)prevType;
                }
            }

            // W2 EN
            // At each EN search in backward until first strong type is found, if AL is found then resolve to AN
            for (int i = 0; i < sequence.length; i++)
            {
                var chType = (BidiClass)sequence.types[i];
                if (chType == BidiClass.EN)
                {
                    for (int j = i - 1; j >= 0; j--)
                    {
                        var type = (BidiClass)sequence.types[j];
                        if (type == BidiClass.R || type == BidiClass.AL || type == BidiClass.L)
                        {
                            if (type == BidiClass.AL)
                            {
                                sequence.types[i] = (byte)BidiClass.AN;
                                break;
                            }
                        }
                    }
                }
            }

            // W3 AL
            // Resolve all ALs to R
            for (int i = 0; i < sequence.length; i++)
            {
                if ((BidiClass)sequence.types[i] == BidiClass.AL)
                {
                    sequence.types[i] = (byte)BidiClass.R;
                }
            }

            // W4 ES, CS (Number Separators)
            // ES between EN is resolved to EN
            // Single CS between same numbers type is resolve to that number type
            for (int i = 1; i < sequence.length - 1; i++)
            {
                var cct      = (BidiClass)sequence.types[i];
                var prevType = (BidiClass)sequence.types[i - 1];
                var nextType = (BidiClass)sequence.types[i + 1];

                if (cct == BidiClass.ES && prevType == BidiClass.EN && nextType == BidiClass.EN) // EN ES EN -> EN EN EN
                {
                    sequence.types[i] = (byte)BidiClass.EN;
                }
                else if (cct == BidiClass.CS && (
                             prevType == BidiClass.EN && nextType == BidiClass.EN ||
                             prevType == BidiClass.AN && nextType == BidiClass.AN)) // EN CS EN -> EN EN EN, AN CS AN -> AN AN AN
                {
                    sequence.types[i] = (byte)prevType;
                }
            }

            // W5 ET(s) adjacent to EN resolve to EN(s)
            var typesSet = new BidiClass[] { BidiClass.ET };

            for (int i = 0; i < sequence.length; i++)
            {
                if ((BidiClass)sequence.types[i] == BidiClass.ET)
                {
                    int runStart = i;
                    // int runEnd = runStart;
                    // runEnd = Array.FindIndex(sequence.types, runStart, t1 => typesSet.Any(t2 => t2 == (BidiClass)t1));
                    int runEnd = sequence.GetRunLimit(runStart, sequence.length, typesSet);

                    var type = runStart > 0 ? (BidiClass)sequence.types[runStart - 1] : sequence.sos;

                    if (type != BidiClass.EN)
                    {
                        type = runEnd < sequence.length ? (BidiClass)sequence.types[runEnd] : sequence.eos; // End type
                    }

                    if (type == BidiClass.EN)
                    {
                        sequence.SetRunTypes(runStart, runEnd, BidiClass.EN); // Resolve to EN
                    }

                    i = runEnd; // advance to end of sequence
                }
            }

            // W6 Separators and Terminators -> ON
            for (int i = 0; i < sequence.length; i++)
            {
                var t = (BidiClass)sequence.types[i];
                if (t == BidiClass.ET || t == BidiClass.ES || t == BidiClass.CS)
                {
                    sequence.types[i] = (byte)BidiClass.ON;
                }
            }

            // W7 same as W2 but EN -> L
            for (int i = 0; i < sequence.length; i++)
            {
                if ((BidiClass)sequence.types[i] == BidiClass.EN)
                {
                    var prevStrong = sequence.sos;  // Default to sos if reached start
                    for (int j = i - 1; j >= 0; j--)
                    {
                        var t = (BidiClass)sequence.types[j];
                        if (t == BidiClass.R || t == BidiClass.L || t == BidiClass.AL)
                        {
                            prevStrong = t;
                            break;
                        }

                        if (prevStrong == BidiClass.L)
                        {
                            sequence.types[i] = (byte)BidiClass.L;
                        }
                    }
                }
            }
        }