internal Processor(ReadOnlySpan <T> buffer, DecodeFirstRune <T> decoder) { _buffer = buffer; _decoder = decoder; _codeUnitLengthOfCurrentScalar = 0; CurrentType = GraphemeClusterBreakType.Other; CurrentCodeUnitOffset = 0; }
private static int GetLengthOfFirstExtendedGraphemeCluster <T>(ReadOnlySpan <T> input, DecodeFirstRune <T> decoder) { // Algorithm given at https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_Rules. Processor <T> processor = new Processor <T>(input, decoder); processor.MoveNext(); // First, consume as many Prepend scalars as we can (rule GB9b). while (processor.CurrentType == GraphemeClusterBreakType.Prepend) { processor.MoveNext(); } // Next, make sure we're not about to violate control character restrictions. // Essentially, if we saw Prepend data, we can't have Control | CR | LF data afterward (rule GB5). if (processor.CurrentCodeUnitOffset > 0) { if (processor.CurrentType == GraphemeClusterBreakType.Control || processor.CurrentType == GraphemeClusterBreakType.CR || processor.CurrentType == GraphemeClusterBreakType.LF) { goto Return; } } // Now begin the main state machine. GraphemeClusterBreakType previousClusterBreakType = processor.CurrentType; processor.MoveNext(); switch (previousClusterBreakType) { case GraphemeClusterBreakType.CR: if (processor.CurrentType != GraphemeClusterBreakType.LF) { goto Return; // rules GB3 & GB4 (only <LF> can follow <CR>) } processor.MoveNext(); goto case GraphemeClusterBreakType.LF; case GraphemeClusterBreakType.Control: case GraphemeClusterBreakType.LF: goto Return; // rule GB4 (no data after Control | LF) case GraphemeClusterBreakType.L: if (processor.CurrentType == GraphemeClusterBreakType.L) { processor.MoveNext(); // rule GB6 (L x L) goto case GraphemeClusterBreakType.L; } else if (processor.CurrentType == GraphemeClusterBreakType.V) { processor.MoveNext(); // rule GB6 (L x V) goto case GraphemeClusterBreakType.V; } else if (processor.CurrentType == GraphemeClusterBreakType.LV) { processor.MoveNext(); // rule GB6 (L x LV) goto case GraphemeClusterBreakType.LV; } else if (processor.CurrentType == GraphemeClusterBreakType.LVT) { processor.MoveNext(); // rule GB6 (L x LVT) goto case GraphemeClusterBreakType.LVT; } else { break; } case GraphemeClusterBreakType.LV: case GraphemeClusterBreakType.V: if (processor.CurrentType == GraphemeClusterBreakType.V) { processor.MoveNext(); // rule GB7 (LV | V x V) goto case GraphemeClusterBreakType.V; } else if (processor.CurrentType == GraphemeClusterBreakType.T) { processor.MoveNext(); // rule GB7 (LV | V x T) goto case GraphemeClusterBreakType.T; } else { break; } case GraphemeClusterBreakType.LVT: case GraphemeClusterBreakType.T: if (processor.CurrentType == GraphemeClusterBreakType.T) { processor.MoveNext(); // rule GB8 (LVT | T x T) goto case GraphemeClusterBreakType.T; } else { break; } case GraphemeClusterBreakType.Extended_Pictograph: // Attempt processing extended pictographic (rules GB11, GB9). // First, drain any Extend scalars that might exist while (processor.CurrentType == GraphemeClusterBreakType.Extend) { processor.MoveNext(); } // Now see if there's a ZWJ + extended pictograph again. if (processor.CurrentType != GraphemeClusterBreakType.ZWJ) { break; } processor.MoveNext(); if (processor.CurrentType != GraphemeClusterBreakType.Extended_Pictograph) { break; } processor.MoveNext(); goto case GraphemeClusterBreakType.Extended_Pictograph; case GraphemeClusterBreakType.Regional_Indicator: // We've consumed a single RI scalar. Try to consume another (to make it a pair). if (processor.CurrentType == GraphemeClusterBreakType.Regional_Indicator) { processor.MoveNext(); } // Standlone RI scalars (or a single pair of RI scalars) can only be followed by trailers. break; // nothing but trailers after the final RI default: break; } // rules GB9, GB9a while (processor.CurrentType == GraphemeClusterBreakType.Extend || processor.CurrentType == GraphemeClusterBreakType.ZWJ || processor.CurrentType == GraphemeClusterBreakType.SpacingMark) { processor.MoveNext(); } Return: return(processor.CurrentCodeUnitOffset); // rules GB2, GB999 }