Пример #1
0
        public IEnumerable <LineBreak> FindMandatoryBreaks()
        {
            for (int i = 0; i < _codePoints.Length; i++)
            {
                var cls = UnicodeClasses.LineBreakClass(_codePoints[i]);
                switch (cls)
                {
                case LineBreakClass.BK:
                    yield return(new LineBreak(i, i + 1, true));

                    break;

                case LineBreakClass.CR:
                    if (i + 1 < _codePoints.Length && UnicodeClasses.LineBreakClass(_codePoints[i + 1]) == LineBreakClass.LF)
                    {
                        yield return(new LineBreak(i, i + 2, true));
                    }
                    else
                    {
                        yield return(new LineBreak(i, i + 1, true));
                    }
                    break;

                case LineBreakClass.LF:
                    yield return(new LineBreak(i, i + 1, true));

                    break;
                }
            }
        }
Пример #2
0
 int findNextNonWhitespace(int from)
 {
     while (from < _codePoints.Length && UnicodeClasses.LineBreakClass(_codePoints[from]) == LineBreakClass.SP)
     {
         from++;
     }
     return(from);
 }
Пример #3
0
 int findPriorNonWhitespace(int from)
 {
     if (from > 0)
     {
         var cls = UnicodeClasses.LineBreakClass(_codePoints[from - 1]);
         if (cls == LineBreakClass.BK || cls == LineBreakClass.LF || cls == LineBreakClass.CR)
         {
             from--;
         }
     }
     while (from > 0)
     {
         var cls = UnicodeClasses.LineBreakClass(_codePoints[from - 1]);
         if (cls == LineBreakClass.SP)
         {
             from--;
         }
         else
         {
             break;
         }
     }
     return(from);
 }
Пример #4
0
        /// <summary>
        /// Get the next line break info
        /// </summary>
        /// <param name="lineBreak">A LineBreak structure returned by this method</param>
        /// <returns>True if there was another line break</returns>
        public bool NextBreak(out LineBreak lineBreak)
        {
            // get the first char if we're at the beginning of the string
            if (!_curClass.HasValue)
            {
                if (this.peekCharClass() == LineBreakClass.SP)
                {
                    this._curClass = LineBreakClass.WJ;
                }
                else
                {
                    this._curClass = mapFirst(this.readCharClass());
                }
            }

            while (_pos < _codePoints.Length)
            {
                _lastPos = _pos;
                var lastClass = _nextClass;
                _nextClass = this.readCharClass();

                // explicit newline
                if (_curClass.HasValue && ((_curClass == LineBreakClass.BK) || ((_curClass == LineBreakClass.CR) && (this._nextClass != LineBreakClass.LF))))
                {
                    _curClass = mapFirst(mapClass(_nextClass.Value));
                    lineBreak = new LineBreak(findPriorNonWhitespace(_lastPos), _lastPos, true);
                    return(true);
                }

                // handle classes not handled by the pair table
                LineBreakClass?cur = null;
                switch (_nextClass.Value)
                {
                case LineBreakClass.SP:
                    cur = _curClass;
                    break;

                case LineBreakClass.BK:
                case LineBreakClass.LF:
                case LineBreakClass.NL:
                    cur = LineBreakClass.BK;
                    break;

                case LineBreakClass.CR:
                    cur = LineBreakClass.CR;
                    break;

                case LineBreakClass.CB:
                    cur = LineBreakClass.BA;
                    break;
                }

                if (cur != null)
                {
                    _curClass = cur;
                    if (_nextClass.HasValue && _nextClass.Value == LineBreakClass.CB)
                    {
                        lineBreak = new LineBreak(findPriorNonWhitespace(_lastPos), _lastPos);
                        return(true);
                    }
                    continue;
                }

                // if not handled already, use the pair table
                var shouldBreak = false;
                switch (LineBreakPairTable.table[(int)this._curClass.Value][(int)this._nextClass.Value])
                {
                case LineBreakPairTable.DI_BRK:     // Direct break
                    shouldBreak = true;
                    break;

                case LineBreakPairTable.IN_BRK:     // possible indirect break
                    shouldBreak = lastClass.HasValue && lastClass.Value == LineBreakClass.SP;
                    break;

                case LineBreakPairTable.CI_BRK:
                    shouldBreak = lastClass.HasValue && lastClass.Value == LineBreakClass.SP;
                    if (!shouldBreak)
                    {
                        continue;
                    }
                    break;

                case LineBreakPairTable.CP_BRK:     // prohibited for combining marks
                    if (!lastClass.HasValue || lastClass.Value != LineBreakClass.SP)
                    {
                        continue;
                    }
                    break;
                }

                _curClass = _nextClass;
                if (shouldBreak)
                {
                    lineBreak = new LineBreak(findPriorNonWhitespace(_lastPos), _lastPos);
                    return(true);
                }
            }

            if (_pos >= _codePoints.Length)
            {
                if (_lastPos < _codePoints.Length)
                {
                    _lastPos = _codePoints.Length;
                    var  cls      = UnicodeClasses.LineBreakClass(_codePoints[_codePoints.Length - 1]);
                    bool required = cls == LineBreakClass.BK || cls == LineBreakClass.LF || cls == LineBreakClass.CR;
                    lineBreak = new LineBreak(findPriorNonWhitespace(_codePoints.Length), _codePoints.Length, required);
                    return(true);
                }
            }

            lineBreak = new LineBreak(0, 0, false);
            return(false);
        }
Пример #5
0
 LineBreakClass peekCharClass()
 {
     return(mapClass(UnicodeClasses.LineBreakClass(_codePoints[_pos])));
 }
Пример #6
0
 // Get the next character class
 LineBreakClass readCharClass()
 {
     return(mapClass(UnicodeClasses.LineBreakClass(_codePoints[_pos++])));
 }