Пример #1
0
        public IEnumerable <LineBreak> FindMandatoryBreaks()
        {
            for (int i = 0; i < _codePoints.Length; i++)
            {
                var cls = UnicodeClasses.LineBreakClass(_codePoints[i]);
                switch (cls)
                {
                case LineBreakClass.BK:
                    yield return(new LineBreak(i, i + 1, true));

                    break;

                case LineBreakClass.CR:
                    if (i + 1 < _codePoints.Length && UnicodeClasses.LineBreakClass(_codePoints[i + 1]) == LineBreakClass.LF)
                    {
                        yield return(new LineBreak(i, i + 2, true));
                    }
                    else
                    {
                        yield return(new LineBreak(i, i + 1, true));
                    }
                    break;

                case LineBreakClass.LF:
                    yield return(new LineBreak(i, i + 1, true));

                    break;
                }
            }
        }
Пример #2
0
        /// <summary>
        /// Check if the characters at the boundary between strings is a word boundary
        /// </summary>
        /// <param name="a">The first string</param>
        /// <param name="b">The second string</param>
        /// <returns>True if this is a word boundary</returns>
        public static bool IsWordBoundary(Slice <int> a, Slice <int> b)
        {
            // If either empty, assume it's a boundary
            if (a.Length == 0)
            {
                return(true);
            }
            if (b.Length == 0)
            {
                return(true);
            }

            // Get the last non-ignore character from 'first string
            var aGroup = WordBoundaryClass.Ignore;

            for (int i = a.Length - 1; i >= 0 && aGroup == WordBoundaryClass.Ignore; i--)
            {
                aGroup = UnicodeClasses.BoundaryGroup(a[i]);
            }

            // Get the first non-ignore character from second string
            var bGroup = WordBoundaryClass.Ignore;

            for (int i = 0; i < b.Length && bGroup == WordBoundaryClass.Ignore; i++)
            {
                bGroup = UnicodeClasses.BoundaryGroup(b[i]);
            }

            // Check if boundary
            return(aGroup != bGroup && bGroup != WordBoundaryClass.Space);
        }
Пример #3
0
 int findNextNonWhitespace(int from)
 {
     while (from < _codePoints.Length && UnicodeClasses.LineBreakClass(_codePoints[from]) == LineBreakClass.SP)
     {
         from++;
     }
     return(from);
 }
Пример #4
0
        /// <summary>
        /// Check if a position in a code point buffer is a grapheme cluster boundary
        /// </summary>
        /// <param name="codePoints">The code points</param>
        /// <param name="position">The position to check</param>
        /// <returns></returns>
        public static bool IsBoundary(Slice <int> codePoints, int position)
        {
            if (codePoints.Length == 0)
            {
                return(false);
            }

            // Get the grapheme cluster class of the character on each side
            var a = position <= 0 ? GraphemeClusterClass.SOT : UnicodeClasses.GraphemeClusterClass(codePoints[position - 1]);
            var b = position < codePoints.Length ? UnicodeClasses.GraphemeClusterClass(codePoints[position]) : GraphemeClusterClass.EOT;

            // Rule 11 - Special handling for ZWJ in extended pictograph
            if (a == GraphemeClusterClass.ZWJ)
            {
                var i = position - 2;
                while (i >= 0 && UnicodeClasses.GraphemeClusterClass(codePoints[i]) == GraphemeClusterClass.Extend)
                {
                    i--;
                }

                if (i >= 0 && UnicodeClasses.GraphemeClusterClass(codePoints[i]) == GraphemeClusterClass.ExtPict)
                {
                    a = GraphemeClusterClass.ExtPictZwg;
                }
            }

            // Special handling for regional indicator
            // Rule 12 and 13
            if (a == GraphemeClusterClass.Regional_Indicator)
            {
                // Count how many
                int count = 0;
                for (int i = position - 1; i > 0; i--)
                {
                    if (UnicodeClasses.GraphemeClusterClass(codePoints[i - 1]) != GraphemeClusterClass.Regional_Indicator)
                    {
                        break;
                    }
                    count++;
                }

                // If odd, switch from RI to Any
                if ((count % 2) != 0)
                {
                    a = GraphemeClusterClass.Any;
                }
            }


            return(pairTable[(int)b][(int)a] != 0);
        }
Пример #5
0
 int findPriorNonWhitespace(int from)
 {
     if (from > 0)
     {
         var cls = UnicodeClasses.LineBreakClass(_codePoints[from - 1]);
         if (cls == LineBreakClass.BK || cls == LineBreakClass.LF || cls == LineBreakClass.CR)
         {
             from--;
         }
     }
     while (from > 0)
     {
         var cls = UnicodeClasses.LineBreakClass(_codePoints[from - 1]);
         if (cls == LineBreakClass.SP)
         {
             from--;
         }
         else
         {
             break;
         }
     }
     return(from);
 }
Пример #6
0
        /// <summary>
        /// Initialize with an array of Unicode code points
        /// </summary>
        /// <param name="codePoints">The unicode code points to be processed</param>
        /// <param name="paragraphEmbeddingLevel">The paragraph embedding level</param>
        public void Init(Slice <int> codePoints, sbyte paragraphEmbeddingLevel)
        {
            // Set working buffer sizes
            _types.Length = codePoints.Length;
            _pairedBracketTypes.Length  = codePoints.Length;
            _pairedBracketValues.Length = codePoints.Length;

            _paragraphPositions.Clear();
            _paragraphEmbeddingLevel = paragraphEmbeddingLevel;

            // Resolve the directionality, paired bracket type and paired bracket values for
            // all code points
            _hasBrackets   = false;
            _hasEmbeddings = false;
            _hasIsolates   = false;
            for (int i = 0; i < codePoints.Length; i++)
            {
                var bidiData = UnicodeClasses.BidiData(codePoints[i]);

                // Look up directionality
                var dir = (Directionality)(bidiData >> 24);
                _types[i] = dir;

                switch (dir)
                {
                case Directionality.LRE:
                case Directionality.LRO:
                case Directionality.RLE:
                case Directionality.RLO:
                case Directionality.PDF:
                    _hasEmbeddings = true;
                    break;

                case Directionality.LRI:
                case Directionality.RLI:
                case Directionality.FSI:
                case Directionality.PDI:
                    _hasIsolates = true;
                    break;
                }

                // Lookup paired bracket types
                var pbt = (PairedBracketType)((bidiData >> 16) & 0xFF);
                _pairedBracketTypes[i] = pbt;
                switch (pbt)
                {
                case PairedBracketType.o:
                    _pairedBracketValues[i] = MapCanon((int)(bidiData & 0xFFFF));
                    _hasBrackets            = true;
                    break;

                case PairedBracketType.c:
                    _pairedBracketValues[i] = MapCanon(codePoints[i]);
                    _hasBrackets            = true;
                    break;
                }

                /*
                 * if (_types[i] == RichTextKit.Directionality.B)
                 * {
                 *  _types[i] = (Directionality)Directionality.WS;
                 *  _paragraphPositions.Add(i);
                 * }
                 */
            }

            // Create slices on work buffers
            Types = _types.AsSlice();
            PairedBracketTypes  = _pairedBracketTypes.AsSlice();
            PairedBracketValues = _pairedBracketValues.AsSlice();
        }
Пример #7
0
        /// <summary>
        /// Locate the start of each "word" in a unicode string.  Used for Ctrl+Left/Right
        /// in editor and different to the line break algorithm.
        /// </summary>
        public static IEnumerable <int> FindWordBoundaries(Slice <int> codePoints)
        {
            // Start is always a word boundary
            yield return(0);

            // Find all boundaries
            bool inWord    = false;
            var  wordGroup = WordBoundaryClass.Ignore;

            for (int i = 0; i < codePoints.Length; i++)
            {
                // Get group
                var bg = UnicodeClasses.BoundaryGroup(codePoints[i]);

                // Ignore?
                if (bg == WordBoundaryClass.Ignore)
                {
                    continue;
                }

                // Ignore spaces before word
                if (!inWord)
                {
                    // Ignore spaces before word
                    if (bg == WordBoundaryClass.Space)
                    {
                        continue;
                    }

                    // Found start of word
                    if (i != 0)
                    {
                        yield return(i);
                    }

                    // We're now in the word
                    inWord    = true;
                    wordGroup = bg;
                    continue;
                }

                // We're in a word group, check for change of kind
                if (wordGroup != bg)
                {
                    if (bg == WordBoundaryClass.Space)
                    {
                        inWord = false;
                    }
                    else
                    {
                        // Switch to a different word kind without a space
                        // just emit a word boundary here
                        yield return(i);
                    }
                }
            }

            if (!inWord && codePoints.Length > 0)
            {
                yield return(codePoints.Length);
            }
        }
Пример #8
0
        /// <summary>
        /// Get the next line break info
        /// </summary>
        /// <param name="lineBreak">A LineBreak structure returned by this method</param>
        /// <returns>True if there was another line break</returns>
        public bool NextBreak(out LineBreak lineBreak)
        {
            // get the first char if we're at the beginning of the string
            if (!_curClass.HasValue)
            {
                if (this.peekCharClass() == LineBreakClass.SP)
                {
                    this._curClass = LineBreakClass.WJ;
                }
                else
                {
                    this._curClass = mapFirst(this.readCharClass());
                }
            }

            while (_pos < _codePoints.Length)
            {
                _lastPos = _pos;
                var lastClass = _nextClass;
                _nextClass = this.readCharClass();

                // explicit newline
                if (_curClass.HasValue && ((_curClass == LineBreakClass.BK) || ((_curClass == LineBreakClass.CR) && (this._nextClass != LineBreakClass.LF))))
                {
                    _curClass = mapFirst(mapClass(_nextClass.Value));
                    lineBreak = new LineBreak(findPriorNonWhitespace(_lastPos), _lastPos, true);
                    return(true);
                }

                // handle classes not handled by the pair table
                LineBreakClass?cur = null;
                switch (_nextClass.Value)
                {
                case LineBreakClass.SP:
                    cur = _curClass;
                    break;

                case LineBreakClass.BK:
                case LineBreakClass.LF:
                case LineBreakClass.NL:
                    cur = LineBreakClass.BK;
                    break;

                case LineBreakClass.CR:
                    cur = LineBreakClass.CR;
                    break;

                case LineBreakClass.CB:
                    cur = LineBreakClass.BA;
                    break;
                }

                if (cur != null)
                {
                    _curClass = cur;
                    if (_nextClass.HasValue && _nextClass.Value == LineBreakClass.CB)
                    {
                        lineBreak = new LineBreak(findPriorNonWhitespace(_lastPos), _lastPos);
                        return(true);
                    }
                    continue;
                }

                // if not handled already, use the pair table
                var shouldBreak = false;
                switch (LineBreakPairTable.table[(int)this._curClass.Value][(int)this._nextClass.Value])
                {
                case LineBreakPairTable.DI_BRK:     // Direct break
                    shouldBreak = true;
                    break;

                case LineBreakPairTable.IN_BRK:     // possible indirect break
                    shouldBreak = lastClass.HasValue && lastClass.Value == LineBreakClass.SP;
                    break;

                case LineBreakPairTable.CI_BRK:
                    shouldBreak = lastClass.HasValue && lastClass.Value == LineBreakClass.SP;
                    if (!shouldBreak)
                    {
                        continue;
                    }
                    break;

                case LineBreakPairTable.CP_BRK:     // prohibited for combining marks
                    if (!lastClass.HasValue || lastClass.Value != LineBreakClass.SP)
                    {
                        continue;
                    }
                    break;
                }

                _curClass = _nextClass;
                if (shouldBreak)
                {
                    lineBreak = new LineBreak(findPriorNonWhitespace(_lastPos), _lastPos);
                    return(true);
                }
            }

            if (_pos >= _codePoints.Length)
            {
                if (_lastPos < _codePoints.Length)
                {
                    _lastPos = _codePoints.Length;
                    var  cls      = UnicodeClasses.LineBreakClass(_codePoints[_codePoints.Length - 1]);
                    bool required = cls == LineBreakClass.BK || cls == LineBreakClass.LF || cls == LineBreakClass.CR;
                    lineBreak = new LineBreak(findPriorNonWhitespace(_codePoints.Length), _codePoints.Length, required);
                    return(true);
                }
            }

            lineBreak = new LineBreak(0, 0, false);
            return(false);
        }
Пример #9
0
 LineBreakClass peekCharClass()
 {
     return(mapClass(UnicodeClasses.LineBreakClass(_codePoints[_pos])));
 }
Пример #10
0
 // Get the next character class
 LineBreakClass readCharClass()
 {
     return(mapClass(UnicodeClasses.LineBreakClass(_codePoints[_pos++])));
 }
Пример #11
0
        /// <summary>
        /// Splits a sequence of code points into a series of runs with font fallback applied
        /// </summary>
        /// <param name="codePoints">The code points</param>
        /// <param name="typeface">The preferred typeface</param>
        /// <param name="replacementCharacter">The replacement character to be used for the run</param>
        /// <returns>A sequence of runs with unsupported code points replaced by a selected font fallback</returns>
        public static IEnumerable <Run> GetFontRuns(Slice <int> codePoints, SKTypeface typeface, char replacementCharacter = '\0')
        {
            var font = new SKFont(typeface);

            if (replacementCharacter != '\0')
            {
                var glyph = font.GetGlyph(replacementCharacter);
                if (glyph == 0)
                {
                    var fallbackTypeface = CharacterMatcher.MatchCharacter(typeface.FamilyName, typeface.FontWeight, typeface.FontWidth, typeface.FontSlant, null, replacementCharacter);
                    if (fallbackTypeface != null)
                    {
                        typeface = fallbackTypeface;
                    }
                }

                yield return(new Run()
                {
                    Start = 0,
                    Length = codePoints.Length,
                    Typeface = typeface,
                });

                yield break;
            }

            // Get glyphs using the top-level typeface
            var glyphs = new ushort[codePoints.Length];

            font.GetGlyphs(codePoints.AsSpan(), glyphs);

            // Look for subspans that need font fallback (where glyphs are zero)
            int runStart = 0;

            for (int i = 0; i < codePoints.Length; i++)
            {
                // Do we need fallback for this character?
                if (glyphs[i] == 0)
                {
                    // Check if there's a fallback available, if not, might as well continue with the current top-level typeface
                    var subSpanTypeface = CharacterMatcher.MatchCharacter(typeface.FamilyName, typeface.FontWeight, typeface.FontWidth, typeface.FontSlant, null, codePoints[i]);
                    if (subSpanTypeface == null)
                    {
                        continue;
                    }

                    // Don't fallback for whitespace characters
                    if (UnicodeClasses.BoundaryGroup(codePoints[i]) == WordBoundaryClass.Space)
                    {
                        continue;
                    }

                    // Must be a cluster boundary
                    if (!GraphemeClusterAlgorithm.IsBoundary(codePoints, i))
                    {
                        continue;
                    }

                    // We can do font fallback...

                    // Flush the current top-level run
                    if (i > runStart)
                    {
                        yield return(new Run()
                        {
                            Start = runStart,
                            Length = i - runStart,
                            Typeface = typeface,
                        });
                    }

                    // Count how many unmatched characters
                    var unmatchedStart = i;
                    var unmatchedEnd   = i + 1;
                    while (unmatchedEnd < codePoints.Length &&
                           (glyphs[unmatchedEnd] == 0 || !GraphemeClusterAlgorithm.IsBoundary(codePoints, unmatchedEnd)))
                    {
                        unmatchedEnd++;
                    }
                    var unmatchedLength = unmatchedEnd - unmatchedStart;

                    // Match the missing characters
                    while (unmatchedLength > 0)
                    {
                        // Find the font fallback using the first character
                        subSpanTypeface = CharacterMatcher.MatchCharacter(typeface.FamilyName, typeface.FontWeight, typeface.FontWidth, typeface.FontSlant, null, codePoints[unmatchedStart]);
                        if (subSpanTypeface == null)
                        {
                            unmatchedEnd = unmatchedStart;
                            break;
                        }
                        var subSpanFont = new SKFont(subSpanTypeface);

                        // Get the glyphs over the current unmatched range
                        subSpanFont.GetGlyphs(codePoints.SubSlice(unmatchedStart, unmatchedLength).AsSpan(), new Span <ushort>(glyphs, unmatchedStart, unmatchedLength));

                        // Count how many characters were matched
                        var fallbackStart = unmatchedStart;
                        var fallbackEnd   = unmatchedStart + 1;
                        while (fallbackEnd < unmatchedEnd && glyphs[fallbackEnd] != 0)
                        {
                            fallbackEnd++;
                        }
                        var fallbackLength = fallbackEnd - fallbackStart;

                        // Yield this font fallback run
                        yield return(new Run()
                        {
                            Start = fallbackStart,
                            Length = fallbackLength,
                            Typeface = subSpanTypeface,
                        });

                        // Continue selecting font fallbacks until the entire unmatched ranges has been matched
                        unmatchedStart  += fallbackLength;
                        unmatchedLength -= fallbackLength;
                    }

                    // Move onto the next top level span
                    i        = unmatchedEnd - 1;    // account for i++ on for loop
                    runStart = unmatchedEnd;
                }
            }

            // Flush find run
            if (codePoints.Length > runStart)
            {
                yield return(new Run()
                {
                    Start = runStart,
                    Length = codePoints.Length - runStart,
                    Typeface = typeface,
                });
            }
        }