// X10 bullet 2 Determine start and end of sequence types (R or L) for an isolating run sequence // using run sequence indexes private static void ComputeIsolatingRunSequence(this IsolatingRunSequence sequence, byte pLevel, List <int> indexList, byte[] typesList, byte[] levels) { sequence.length = indexList.Count; sequence.indexes = indexList.ToArray(); // Indexes of run in original text // Character types of run sequence sequence.types = new byte[indexList.Count]; for (int i = 0; i < sequence.length; i++) { sequence.types[i] = typesList[indexList[i]]; } // sos var firstLevel = levels[indexList[0]]; // level of first character sequence.level = firstLevel; var previous = indexList[0] - 1; var prevLevel = previous >= 0 ? levels[previous] : pLevel; sequence.sos = GetTypeForLevel(Math.Max(firstLevel, prevLevel)); // eos var lastType = (BidiClass)sequence.types[sequence.length - 1]; var last = indexList[sequence.length - 1]; // last character in the sequence var lastLevel = levels[last]; var next = indexList[sequence.length - 1] + 1; // next character after sequence (in paragraph) var nextLevel = next < typesList.Length && lastType != BidiClass.PDI ? levels[last] : pLevel; sequence.eos = GetTypeForLevel(Math.Max(lastLevel, nextLevel)); }
// Override types list from start up to (not including) limit to newType private static void SetRunTypes(this IsolatingRunSequence sequence, int start, int limit, BidiClass newType) { for (int i = start; i < limit; i++) { sequence.types[i] = (byte)newType; } }
private static void ApplyTypesAndLevels(this IsolatingRunSequence sequence, ref byte[] typesList, ref byte[] levelsList) { for (int i = 0; i < sequence.length; i++) { int idx = sequence.indexes[i]; typesList[idx] = sequence.types[i]; levelsList[idx] = sequence.resolvedLevels[i]; } }
// Return end index of run consisting of types in typesSet // Start from index and check the value, if value not present in set then return index. private static int GetRunLimit(this IsolatingRunSequence sequence, int index, int limit, BidiClass[] typesSet) { loop : for (; index < limit;) { var type = (BidiClass)sequence.types[index]; for (int i = 0; i < typesSet.Length; i++) { if (type == typesSet[i]) { index++; goto loop; } } // No match in typesSet return(index); } return(limit); }
// 3.3.5 Resolve Implicit Embedding Levels private static void ResolveImplicit(this IsolatingRunSequence sequence) { byte level = sequence.level; // Initialize the sequence resolved levels with sequence embedding level sequence.resolvedLevels = new byte[sequence.length]; SetLevels(ref sequence.resolvedLevels, sequence.level); for (int i = 0; i < sequence.length; i++) { var ct = (BidiClass)sequence.types[i]; // I1 // Sequence level is even (Left-to-right) then R types go up one level, AN and EN go up two levels if (!IsOdd(level)) { if (ct == BidiClass.R) { sequence.resolvedLevels[i] += 1; } else if (ct == BidiClass.AN || ct == BidiClass.EN) { sequence.resolvedLevels[i] += 2; } } // N2 // Sequence level is odd (Right-to-left) then L, AN, EN go up one level else { if (ct == BidiClass.L || ct == BidiClass.AN || ct == BidiClass.EN) { sequence.resolvedLevels[i] += 1; } } } }
// 3.3.4 Resolve Neutral Types // In final results all NIs are resolved to R or L private static void ResolveNeutrals(this IsolatingRunSequence sequence) { // TODO: N0 rule (Paired Brackets algorithm) // N1 // Sequence of NIs will resolve to surrounding "strong" type if text on both sides was of same direction. // sos and eos are used at run sequence boundaries. AN and EN will resolve type to R. var typesSet = new BidiClass[] { BidiClass.B, BidiClass.S, BidiClass.WS, BidiClass.ON, BidiClass.LRI, BidiClass.RLI, BidiClass.FSI, BidiClass.PDI }; for (int i = 0; i < sequence.length; i++) { var ct = (BidiClass)sequence.types[i]; bool isNI = ct == BidiClass.B || ct == BidiClass.S || ct == BidiClass.WS || ct == BidiClass.ON || ct == BidiClass.LRI || ct == BidiClass.RLI || ct == BidiClass.FSI || ct == BidiClass.PDI; if (isNI) { BidiClass leadType = 0; BidiClass trailType = 0; int start = i; int runEnd = sequence.GetRunLimit(start, sequence.length, typesSet); // Start of matching NI if (start == 0) // Start boundary, lead type = sos { leadType = sequence.sos; } else { leadType = (BidiClass)sequence.types[start - 1]; if (leadType == BidiClass.AN || leadType == BidiClass.EN) // Leading AN, EN resolve type to R { leadType = BidiClass.R; } } // End of Matching NI if (runEnd == sequence.length) // End boundary. trail type = eos { trailType = sequence.eos; } else { trailType = (BidiClass)sequence.types[runEnd]; if (trailType == BidiClass.AN || trailType == BidiClass.EN) { trailType = BidiClass.R; } } if (leadType == trailType) { sequence.SetRunTypes(start, runEnd, leadType); } else // N2 { // Remaining NIs take current run embedding level var runDirection = GetTypeForLevel(sequence.level); sequence.SetRunTypes(start, runEnd, runDirection); } i = runEnd; } } }
// 3.3.3 Resolve Weak Types private static void ResolveWeaks(this IsolatingRunSequence sequence) { // W1 NSM for (int i = 0; i < sequence.length; i++) { var ct = (BidiClass)sequence.types[i]; var prevType = i == 0 ? sequence.sos : (BidiClass)sequence.types[i - 1]; if (ct == BidiClass.NSM) { // if NSM is at start of sequence resolved to sos type // assign ON if previous is isolate initiator or PDI, otherwise type of previous bool isIsolateOrPDI = prevType == BidiClass.LRI || prevType == BidiClass.RLI || prevType == BidiClass.FSI || prevType == BidiClass.PDI; sequence.types[i] = isIsolateOrPDI ? (byte)BidiClass.ON : (byte)prevType; } } // W2 EN // At each EN search in backward until first strong type is found, if AL is found then resolve to AN for (int i = 0; i < sequence.length; i++) { var chType = (BidiClass)sequence.types[i]; if (chType == BidiClass.EN) { for (int j = i - 1; j >= 0; j--) { var type = (BidiClass)sequence.types[j]; if (type == BidiClass.R || type == BidiClass.AL || type == BidiClass.L) { if (type == BidiClass.AL) { sequence.types[i] = (byte)BidiClass.AN; break; } } } } } // W3 AL // Resolve all ALs to R for (int i = 0; i < sequence.length; i++) { if ((BidiClass)sequence.types[i] == BidiClass.AL) { sequence.types[i] = (byte)BidiClass.R; } } // W4 ES, CS (Number Separators) // ES between EN is resolved to EN // Single CS between same numbers type is resolve to that number type for (int i = 1; i < sequence.length - 1; i++) { var cct = (BidiClass)sequence.types[i]; var prevType = (BidiClass)sequence.types[i - 1]; var nextType = (BidiClass)sequence.types[i + 1]; if (cct == BidiClass.ES && prevType == BidiClass.EN && nextType == BidiClass.EN) // EN ES EN -> EN EN EN { sequence.types[i] = (byte)BidiClass.EN; } else if (cct == BidiClass.CS && ( prevType == BidiClass.EN && nextType == BidiClass.EN || prevType == BidiClass.AN && nextType == BidiClass.AN)) // EN CS EN -> EN EN EN, AN CS AN -> AN AN AN { sequence.types[i] = (byte)prevType; } } // W5 ET(s) adjacent to EN resolve to EN(s) var typesSet = new BidiClass[] { BidiClass.ET }; for (int i = 0; i < sequence.length; i++) { if ((BidiClass)sequence.types[i] == BidiClass.ET) { int runStart = i; // int runEnd = runStart; // runEnd = Array.FindIndex(sequence.types, runStart, t1 => typesSet.Any(t2 => t2 == (BidiClass)t1)); int runEnd = sequence.GetRunLimit(runStart, sequence.length, typesSet); var type = runStart > 0 ? (BidiClass)sequence.types[runStart - 1] : sequence.sos; if (type != BidiClass.EN) { type = runEnd < sequence.length ? (BidiClass)sequence.types[runEnd] : sequence.eos; // End type } if (type == BidiClass.EN) { sequence.SetRunTypes(runStart, runEnd, BidiClass.EN); // Resolve to EN } i = runEnd; // advance to end of sequence } } // W6 Separators and Terminators -> ON for (int i = 0; i < sequence.length; i++) { var t = (BidiClass)sequence.types[i]; if (t == BidiClass.ET || t == BidiClass.ES || t == BidiClass.CS) { sequence.types[i] = (byte)BidiClass.ON; } } // W7 same as W2 but EN -> L for (int i = 0; i < sequence.length; i++) { if ((BidiClass)sequence.types[i] == BidiClass.EN) { var prevStrong = sequence.sos; // Default to sos if reached start for (int j = i - 1; j >= 0; j--) { var t = (BidiClass)sequence.types[j]; if (t == BidiClass.R || t == BidiClass.L || t == BidiClass.AL) { prevStrong = t; break; } if (prevStrong == BidiClass.L) { sequence.types[i] = (byte)BidiClass.L; } } } } }