public IEnumerable <LineBreak> FindMandatoryBreaks() { for (int i = 0; i < _codePoints.Length; i++) { var cls = UnicodeClasses.LineBreakClass(_codePoints[i]); switch (cls) { case LineBreakClass.BK: yield return(new LineBreak(i, i + 1, true)); break; case LineBreakClass.CR: if (i + 1 < _codePoints.Length && UnicodeClasses.LineBreakClass(_codePoints[i + 1]) == LineBreakClass.LF) { yield return(new LineBreak(i, i + 2, true)); } else { yield return(new LineBreak(i, i + 1, true)); } break; case LineBreakClass.LF: yield return(new LineBreak(i, i + 1, true)); break; } } }
/// <summary> /// Check if the characters at the boundary between strings is a word boundary /// </summary> /// <param name="a">The first string</param> /// <param name="b">The second string</param> /// <returns>True if this is a word boundary</returns> public static bool IsWordBoundary(Slice <int> a, Slice <int> b) { // If either empty, assume it's a boundary if (a.Length == 0) { return(true); } if (b.Length == 0) { return(true); } // Get the last non-ignore character from 'first string var aGroup = WordBoundaryClass.Ignore; for (int i = a.Length - 1; i >= 0 && aGroup == WordBoundaryClass.Ignore; i--) { aGroup = UnicodeClasses.BoundaryGroup(a[i]); } // Get the first non-ignore character from second string var bGroup = WordBoundaryClass.Ignore; for (int i = 0; i < b.Length && bGroup == WordBoundaryClass.Ignore; i++) { bGroup = UnicodeClasses.BoundaryGroup(b[i]); } // Check if boundary return(aGroup != bGroup && bGroup != WordBoundaryClass.Space); }
int findNextNonWhitespace(int from) { while (from < _codePoints.Length && UnicodeClasses.LineBreakClass(_codePoints[from]) == LineBreakClass.SP) { from++; } return(from); }
/// <summary> /// Check if a position in a code point buffer is a grapheme cluster boundary /// </summary> /// <param name="codePoints">The code points</param> /// <param name="position">The position to check</param> /// <returns></returns> public static bool IsBoundary(Slice <int> codePoints, int position) { if (codePoints.Length == 0) { return(false); } // Get the grapheme cluster class of the character on each side var a = position <= 0 ? GraphemeClusterClass.SOT : UnicodeClasses.GraphemeClusterClass(codePoints[position - 1]); var b = position < codePoints.Length ? UnicodeClasses.GraphemeClusterClass(codePoints[position]) : GraphemeClusterClass.EOT; // Rule 11 - Special handling for ZWJ in extended pictograph if (a == GraphemeClusterClass.ZWJ) { var i = position - 2; while (i >= 0 && UnicodeClasses.GraphemeClusterClass(codePoints[i]) == GraphemeClusterClass.Extend) { i--; } if (i >= 0 && UnicodeClasses.GraphemeClusterClass(codePoints[i]) == GraphemeClusterClass.ExtPict) { a = GraphemeClusterClass.ExtPictZwg; } } // Special handling for regional indicator // Rule 12 and 13 if (a == GraphemeClusterClass.Regional_Indicator) { // Count how many int count = 0; for (int i = position - 1; i > 0; i--) { if (UnicodeClasses.GraphemeClusterClass(codePoints[i - 1]) != GraphemeClusterClass.Regional_Indicator) { break; } count++; } // If odd, switch from RI to Any if ((count % 2) != 0) { a = GraphemeClusterClass.Any; } } return(pairTable[(int)b][(int)a] != 0); }
int findPriorNonWhitespace(int from) { if (from > 0) { var cls = UnicodeClasses.LineBreakClass(_codePoints[from - 1]); if (cls == LineBreakClass.BK || cls == LineBreakClass.LF || cls == LineBreakClass.CR) { from--; } } while (from > 0) { var cls = UnicodeClasses.LineBreakClass(_codePoints[from - 1]); if (cls == LineBreakClass.SP) { from--; } else { break; } } return(from); }
/// <summary> /// Initialize with an array of Unicode code points /// </summary> /// <param name="codePoints">The unicode code points to be processed</param> /// <param name="paragraphEmbeddingLevel">The paragraph embedding level</param> public void Init(Slice <int> codePoints, sbyte paragraphEmbeddingLevel) { // Set working buffer sizes _types.Length = codePoints.Length; _pairedBracketTypes.Length = codePoints.Length; _pairedBracketValues.Length = codePoints.Length; _paragraphPositions.Clear(); _paragraphEmbeddingLevel = paragraphEmbeddingLevel; // Resolve the directionality, paired bracket type and paired bracket values for // all code points _hasBrackets = false; _hasEmbeddings = false; _hasIsolates = false; for (int i = 0; i < codePoints.Length; i++) { var bidiData = UnicodeClasses.BidiData(codePoints[i]); // Look up directionality var dir = (Directionality)(bidiData >> 24); _types[i] = dir; switch (dir) { case Directionality.LRE: case Directionality.LRO: case Directionality.RLE: case Directionality.RLO: case Directionality.PDF: _hasEmbeddings = true; break; case Directionality.LRI: case Directionality.RLI: case Directionality.FSI: case Directionality.PDI: _hasIsolates = true; break; } // Lookup paired bracket types var pbt = (PairedBracketType)((bidiData >> 16) & 0xFF); _pairedBracketTypes[i] = pbt; switch (pbt) { case PairedBracketType.o: _pairedBracketValues[i] = MapCanon((int)(bidiData & 0xFFFF)); _hasBrackets = true; break; case PairedBracketType.c: _pairedBracketValues[i] = MapCanon(codePoints[i]); _hasBrackets = true; break; } /* * if (_types[i] == RichTextKit.Directionality.B) * { * _types[i] = (Directionality)Directionality.WS; * _paragraphPositions.Add(i); * } */ } // Create slices on work buffers Types = _types.AsSlice(); PairedBracketTypes = _pairedBracketTypes.AsSlice(); PairedBracketValues = _pairedBracketValues.AsSlice(); }
/// <summary> /// Locate the start of each "word" in a unicode string. Used for Ctrl+Left/Right /// in editor and different to the line break algorithm. /// </summary> public static IEnumerable <int> FindWordBoundaries(Slice <int> codePoints) { // Start is always a word boundary yield return(0); // Find all boundaries bool inWord = false; var wordGroup = WordBoundaryClass.Ignore; for (int i = 0; i < codePoints.Length; i++) { // Get group var bg = UnicodeClasses.BoundaryGroup(codePoints[i]); // Ignore? if (bg == WordBoundaryClass.Ignore) { continue; } // Ignore spaces before word if (!inWord) { // Ignore spaces before word if (bg == WordBoundaryClass.Space) { continue; } // Found start of word if (i != 0) { yield return(i); } // We're now in the word inWord = true; wordGroup = bg; continue; } // We're in a word group, check for change of kind if (wordGroup != bg) { if (bg == WordBoundaryClass.Space) { inWord = false; } else { // Switch to a different word kind without a space // just emit a word boundary here yield return(i); } } } if (!inWord && codePoints.Length > 0) { yield return(codePoints.Length); } }
/// <summary> /// Get the next line break info /// </summary> /// <param name="lineBreak">A LineBreak structure returned by this method</param> /// <returns>True if there was another line break</returns> public bool NextBreak(out LineBreak lineBreak) { // get the first char if we're at the beginning of the string if (!_curClass.HasValue) { if (this.peekCharClass() == LineBreakClass.SP) { this._curClass = LineBreakClass.WJ; } else { this._curClass = mapFirst(this.readCharClass()); } } while (_pos < _codePoints.Length) { _lastPos = _pos; var lastClass = _nextClass; _nextClass = this.readCharClass(); // explicit newline if (_curClass.HasValue && ((_curClass == LineBreakClass.BK) || ((_curClass == LineBreakClass.CR) && (this._nextClass != LineBreakClass.LF)))) { _curClass = mapFirst(mapClass(_nextClass.Value)); lineBreak = new LineBreak(findPriorNonWhitespace(_lastPos), _lastPos, true); return(true); } // handle classes not handled by the pair table LineBreakClass?cur = null; switch (_nextClass.Value) { case LineBreakClass.SP: cur = _curClass; break; case LineBreakClass.BK: case LineBreakClass.LF: case LineBreakClass.NL: cur = LineBreakClass.BK; break; case LineBreakClass.CR: cur = LineBreakClass.CR; break; case LineBreakClass.CB: cur = LineBreakClass.BA; break; } if (cur != null) { _curClass = cur; if (_nextClass.HasValue && _nextClass.Value == LineBreakClass.CB) { lineBreak = new LineBreak(findPriorNonWhitespace(_lastPos), _lastPos); return(true); } continue; } // if not handled already, use the pair table var shouldBreak = false; switch (LineBreakPairTable.table[(int)this._curClass.Value][(int)this._nextClass.Value]) { case LineBreakPairTable.DI_BRK: // Direct break shouldBreak = true; break; case LineBreakPairTable.IN_BRK: // possible indirect break shouldBreak = lastClass.HasValue && lastClass.Value == LineBreakClass.SP; break; case LineBreakPairTable.CI_BRK: shouldBreak = lastClass.HasValue && lastClass.Value == LineBreakClass.SP; if (!shouldBreak) { continue; } break; case LineBreakPairTable.CP_BRK: // prohibited for combining marks if (!lastClass.HasValue || lastClass.Value != LineBreakClass.SP) { continue; } break; } _curClass = _nextClass; if (shouldBreak) { lineBreak = new LineBreak(findPriorNonWhitespace(_lastPos), _lastPos); return(true); } } if (_pos >= _codePoints.Length) { if (_lastPos < _codePoints.Length) { _lastPos = _codePoints.Length; var cls = UnicodeClasses.LineBreakClass(_codePoints[_codePoints.Length - 1]); bool required = cls == LineBreakClass.BK || cls == LineBreakClass.LF || cls == LineBreakClass.CR; lineBreak = new LineBreak(findPriorNonWhitespace(_codePoints.Length), _codePoints.Length, required); return(true); } } lineBreak = new LineBreak(0, 0, false); return(false); }
LineBreakClass peekCharClass() { return(mapClass(UnicodeClasses.LineBreakClass(_codePoints[_pos]))); }
// Get the next character class LineBreakClass readCharClass() { return(mapClass(UnicodeClasses.LineBreakClass(_codePoints[_pos++]))); }
/// <summary> /// Splits a sequence of code points into a series of runs with font fallback applied /// </summary> /// <param name="codePoints">The code points</param> /// <param name="typeface">The preferred typeface</param> /// <param name="replacementCharacter">The replacement character to be used for the run</param> /// <returns>A sequence of runs with unsupported code points replaced by a selected font fallback</returns> public static IEnumerable <Run> GetFontRuns(Slice <int> codePoints, SKTypeface typeface, char replacementCharacter = '\0') { var font = new SKFont(typeface); if (replacementCharacter != '\0') { var glyph = font.GetGlyph(replacementCharacter); if (glyph == 0) { var fallbackTypeface = CharacterMatcher.MatchCharacter(typeface.FamilyName, typeface.FontWeight, typeface.FontWidth, typeface.FontSlant, null, replacementCharacter); if (fallbackTypeface != null) { typeface = fallbackTypeface; } } yield return(new Run() { Start = 0, Length = codePoints.Length, Typeface = typeface, }); yield break; } // Get glyphs using the top-level typeface var glyphs = new ushort[codePoints.Length]; font.GetGlyphs(codePoints.AsSpan(), glyphs); // Look for subspans that need font fallback (where glyphs are zero) int runStart = 0; for (int i = 0; i < codePoints.Length; i++) { // Do we need fallback for this character? if (glyphs[i] == 0) { // Check if there's a fallback available, if not, might as well continue with the current top-level typeface var subSpanTypeface = CharacterMatcher.MatchCharacter(typeface.FamilyName, typeface.FontWeight, typeface.FontWidth, typeface.FontSlant, null, codePoints[i]); if (subSpanTypeface == null) { continue; } // Don't fallback for whitespace characters if (UnicodeClasses.BoundaryGroup(codePoints[i]) == WordBoundaryClass.Space) { continue; } // Must be a cluster boundary if (!GraphemeClusterAlgorithm.IsBoundary(codePoints, i)) { continue; } // We can do font fallback... // Flush the current top-level run if (i > runStart) { yield return(new Run() { Start = runStart, Length = i - runStart, Typeface = typeface, }); } // Count how many unmatched characters var unmatchedStart = i; var unmatchedEnd = i + 1; while (unmatchedEnd < codePoints.Length && (glyphs[unmatchedEnd] == 0 || !GraphemeClusterAlgorithm.IsBoundary(codePoints, unmatchedEnd))) { unmatchedEnd++; } var unmatchedLength = unmatchedEnd - unmatchedStart; // Match the missing characters while (unmatchedLength > 0) { // Find the font fallback using the first character subSpanTypeface = CharacterMatcher.MatchCharacter(typeface.FamilyName, typeface.FontWeight, typeface.FontWidth, typeface.FontSlant, null, codePoints[unmatchedStart]); if (subSpanTypeface == null) { unmatchedEnd = unmatchedStart; break; } var subSpanFont = new SKFont(subSpanTypeface); // Get the glyphs over the current unmatched range subSpanFont.GetGlyphs(codePoints.SubSlice(unmatchedStart, unmatchedLength).AsSpan(), new Span <ushort>(glyphs, unmatchedStart, unmatchedLength)); // Count how many characters were matched var fallbackStart = unmatchedStart; var fallbackEnd = unmatchedStart + 1; while (fallbackEnd < unmatchedEnd && glyphs[fallbackEnd] != 0) { fallbackEnd++; } var fallbackLength = fallbackEnd - fallbackStart; // Yield this font fallback run yield return(new Run() { Start = fallbackStart, Length = fallbackLength, Typeface = subSpanTypeface, }); // Continue selecting font fallbacks until the entire unmatched ranges has been matched unmatchedStart += fallbackLength; unmatchedLength -= fallbackLength; } // Move onto the next top level span i = unmatchedEnd - 1; // account for i++ on for loop runStart = unmatchedEnd; } } // Flush find run if (codePoints.Length > runStart) { yield return(new Run() { Start = runStart, Length = codePoints.Length - runStart, Typeface = typeface, }); } }