private int FindPriorNonWhitespace(int from) { Span <int> codePointSpan = this.codePoints.Span; if (from > 0) { LineBreakClass cls = GetLineBreakClass(codePointSpan[from - 1]); if (cls == LineBreakClass.BK || cls == LineBreakClass.LF || cls == LineBreakClass.CR) { from--; } } while (from > 0) { LineBreakClass cls = GetLineBreakClass(codePointSpan[from - 1]); if (cls == LineBreakClass.SP) { from--; } else { break; } } return(from); }
/// <summary> /// Advances the enumerator to the next element of the collection. /// </summary> /// <returns> /// <see langword="true"/> if the enumerator was successfully advanced to the next element; /// <see langword="false"/> if the enumerator has passed the end of the collection. /// </returns> public bool MoveNext() { // Get the first char if we're at the beginning of the string. if (this.first) { LineBreakClass firstClass = this.NextCharClass(); this.first = false; this.currentClass = this.MapFirst(firstClass); this.nextClass = firstClass; this.lb8a = firstClass == LineBreakClass.ZWJ; this.lb30a = 0; } while (this.position < this.pointsLength) { this.lastPosition = this.position; LineBreakClass lastClass = this.nextClass; this.nextClass = this.NextCharClass(); // Explicit newline switch (this.currentClass) { case LineBreakClass.BK: case LineBreakClass.CR when this.nextClass != LineBreakClass.LF: this.currentClass = this.MapFirst(this.nextClass); this.Current = new LineBreak(this.FindPriorNonWhitespace(this.lastPosition), this.lastPosition, true); return(true); } bool?shouldBreak = this.GetSimpleBreak() ?? (bool?)this.GetPairTableBreak(lastClass); // Rule LB8a this.lb8a = this.nextClass == LineBreakClass.ZWJ; if (shouldBreak.Value) { this.Current = new LineBreak(this.FindPriorNonWhitespace(this.lastPosition), this.lastPosition, false); return(true); } } if (this.position >= this.pointsLength && this.lastPosition < this.pointsLength) { this.lastPosition = this.pointsLength; bool required = false; switch (this.currentClass) { case LineBreakClass.BK: case LineBreakClass.CR when this.nextClass != LineBreakClass.LF: required = true; break; } this.Current = new LineBreak(this.FindPriorNonWhitespace(this.pointsLength), this.lastPosition, required); return(true); } this.Current = default; return(false); }
private LineBreakClass MapClass(CodePoint cp, LineBreakClass c) { // LB 1 // ========================================== // Resolved Original General_Category // ========================================== // AL AI, SG, XX Any // CM SA Only Mn or Mc // AL SA Any except Mn and Mc // NS CJ Any switch (c) { case LineBreakClass.AI: case LineBreakClass.SG: case LineBreakClass.XX: return(LineBreakClass.AL); case LineBreakClass.SA: UnicodeCategory category = CodePoint.GetGeneralCategory(cp); return((category is UnicodeCategory.NonSpacingMark or UnicodeCategory.SpacingCombiningMark) ? LineBreakClass.CM : LineBreakClass.AL); case LineBreakClass.CJ: return(LineBreakClass.NS); default: return(c); } }
/// <summary> /// Returns the line break from the current code points if one is found. /// </summary> /// <param name="lineBreak"> /// When this method returns, contains the value associate with the break; /// otherwise, the default value. /// This parameter is passed uninitialized.</param> /// <returns>The <see cref="bool"/>.</returns> public bool TryGetNextBreak(out LineBreak lineBreak) { // get the first char if we're at the beginning of the string if (this.first) { this.first = false; LineBreakClass firstClass = this.NextCharClass(); this.currentClass = this.MapFirst(firstClass); this.nextClass = firstClass; this.lb8a = firstClass == LineBreakClass.ZWJ; this.lb30a = 0; } while (this.position < this.codePoints.Length) { this.lastPosition = this.position; LineBreakClass lastClass = this.nextClass; this.nextClass = this.NextCharClass(); // explicit newline if ((this.currentClass == LineBreakClass.BK) || ((this.currentClass == LineBreakClass.CR) && (this.nextClass != LineBreakClass.LF))) { this.currentClass = this.MapFirst(this.MapClass(this.nextClass)); lineBreak = new LineBreak(this.FindPriorNonWhitespace(this.lastPosition), this.lastPosition, true); return(true); } bool?shouldBreak = this.GetSimpleBreak(); if (!shouldBreak.HasValue) { shouldBreak = this.GetPairTableBreak(lastClass); } // Rule LB8a this.lb8a = this.nextClass == LineBreakClass.ZWJ; if (shouldBreak.Value) { lineBreak = new LineBreak(this.FindPriorNonWhitespace(this.lastPosition), this.lastPosition, false); return(true); } } if (this.lastPosition < this.codePoints.Length) { this.lastPosition = this.codePoints.Length; bool required = (this.currentClass == LineBreakClass.BK) || ((this.currentClass == LineBreakClass.CR) && (this.nextClass != LineBreakClass.LF)); lineBreak = new LineBreak(this.FindPriorNonWhitespace(this.codePoints.Length), this.lastPosition, required); return(true); } else { lineBreak = default; return(false); } }
public bool NextBreak(out LineBreak lineBreak) { // get the first char if we're at the beginning of the string if (_first) { _first = false; var firstClass = nextCharClass(); _curClass = mapFirst(firstClass); _nextClass = firstClass; _LB8a = (firstClass == LineBreakClass.ZWJ); _LB30a = 0; } while (_pos < _codePoints.Length) { _lastPos = _pos; var lastClass = _nextClass; _nextClass = nextCharClass(); // explicit newline if ((_curClass == LineBreakClass.BK) || ((_curClass == LineBreakClass.CR) && (_nextClass != LineBreakClass.LF))) { _curClass = mapFirst(mapClass(_nextClass)); lineBreak = new LineBreak(findPriorNonWhitespace(_lastPos), _lastPos, true); return(true); } bool?shouldBreak = getSimpleBreak(); if (!shouldBreak.HasValue) { shouldBreak = getPairTableBreak(lastClass); } // Rule LB8a _LB8a = (_nextClass == LineBreakClass.ZWJ); if (shouldBreak.Value) { lineBreak = new LineBreak(findPriorNonWhitespace(_lastPos), _lastPos, false); return(true); } } if (_lastPos < _codePoints.Length) { _lastPos = _codePoints.Length; var required = (_curClass == LineBreakClass.BK) || ((_curClass == LineBreakClass.CR) && (_nextClass != LineBreakClass.LF)); lineBreak = new LineBreak(findPriorNonWhitespace(_codePoints.Length), _lastPos, required); return(true); } else { lineBreak = new LineBreak(0, 0, false); return(false); } }
private static LineBreakClass MapFirst(LineBreakClass c) { switch (c) { case LineBreakClass.LineFeed: case LineBreakClass.NextLine: return(LineBreakClass.MandatoryBreak); case LineBreakClass.Space: return(LineBreakClass.WordJoiner); default: return(c); } }
private LineBreakClass MapFirst(LineBreakClass c) { switch (c) { case LineBreakClass.LF: case LineBreakClass.NL: return(LineBreakClass.BK); case LineBreakClass.SP: return(LineBreakClass.WJ); default: return(c); } }
private LineBreakClass MapClass(LineBreakClass c) { switch (c) { case LineBreakClass.AI: case LineBreakClass.SA: case LineBreakClass.SG: case LineBreakClass.XX: return(LineBreakClass.AL); case LineBreakClass.CJ: return(LineBreakClass.NS); default: return(c); } }
static LineBreakClass mapFirst(LineBreakClass c) { switch (c) { case LineBreakClass.LF: case LineBreakClass.NL: return(LineBreakClass.BK); case LineBreakClass.CB: return(LineBreakClass.BA); case LineBreakClass.SP: return(LineBreakClass.WJ); default: return(c); } }
public LineBreakEnumerator(ReadOnlySlice <char> text) : this() { _text = text; _position = 0; _currentClass = LineBreakClass.Unknown; _nextClass = LineBreakClass.Unknown; _first = true; _lb8a = false; _lb21a = false; _lb22ex = false; _lb24ex = false; _lb25ex = false; _alphaNumericCount = 0; _lb31 = false; _lb30 = false; _lb30a = 0; }
private static LineBreakClass MapClass(LineBreakClass c) { switch (c) { case LineBreakClass.Ambiguous: return(LineBreakClass.Alphabetic); case LineBreakClass.ComplexContext: case LineBreakClass.Surrogate: case LineBreakClass.Unknown: return(LineBreakClass.Alphabetic); case LineBreakClass.ConditionalJapaneseStarter: return(LineBreakClass.Nonstarter); default: return(c); } }
private bool?GetSimpleBreak() { // handle classes not handled by the pair table switch (this.nextClass) { case LineBreakClass.SP: return(false); case LineBreakClass.BK: case LineBreakClass.LF: case LineBreakClass.NL: this.currentClass = LineBreakClass.BK; return(false); case LineBreakClass.CR: this.currentClass = LineBreakClass.CR; return(false); } return(null); }
bool?getSimpleBreak() { // handle classes not handled by the pair table switch (_nextClass) { case LineBreakClass.SP: return(false); case LineBreakClass.BK: case LineBreakClass.LF: case LineBreakClass.NL: _curClass = LineBreakClass.BK; return(false); case LineBreakClass.CR: _curClass = LineBreakClass.CR; return(false); } return(null); }
public LineBreakEnumerator(ReadOnlySpan <char> source) : this() { this.source = source; this.pointsLength = CodePoint.GetCodePointCount(source); this.charPosition = 0; this.position = 0; this.lastPosition = 0; this.currentClass = LineBreakClass.XX; this.nextClass = LineBreakClass.XX; this.first = true; this.lb8a = false; this.lb21a = false; this.lb22ex = false; this.lb24ex = false; this.lb25ex = false; this.alphaNumericCount = 0; this.lb31 = false; this.lb30 = false; this.lb30a = 0; }
private bool?GetSimpleBreak() { // handle classes not handled by the pair table switch (_nextClass) { case LineBreakClass.Space: return(false); case LineBreakClass.MandatoryBreak: case LineBreakClass.LineFeed: case LineBreakClass.NextLine: _currentClass = LineBreakClass.MandatoryBreak; return(false); case LineBreakClass.CarriageReturn: _currentClass = LineBreakClass.CarriageReturn; return(false); } return(null); }
private bool GetPairTableBreak(LineBreakClass lastClass) { // If not handled already, use the pair table bool shouldBreak = false; switch (LineBreakPairTable.Table[(int)_currentClass][(int)_nextClass]) { case LineBreakPairTable.DIBRK: // Direct break shouldBreak = true; break; // TODO: Rewrite this so that it defaults to true and rules are set as exceptions. case LineBreakPairTable.INBRK: // Possible indirect break // LB31 if (_lb31 && _nextClass == LineBreakClass.OpenPunctuation) { shouldBreak = true; _lb31 = false; break; } // LB30 if (_lb30) { shouldBreak = true; _lb30 = false; _alphaNumericCount = 0; break; } // LB25 if (_lb25ex && (_nextClass == LineBreakClass.PrefixNumeric || _nextClass == LineBreakClass.Numeric)) { shouldBreak = true; _lb25ex = false; break; } // LB24 if (_lb24ex && (_nextClass == LineBreakClass.PostfixNumeric || _nextClass == LineBreakClass.PrefixNumeric)) { shouldBreak = true; _lb24ex = false; break; } // LB18 shouldBreak = lastClass == LineBreakClass.Space; break; case LineBreakPairTable.CIBRK: shouldBreak = lastClass == LineBreakClass.Space; if (!shouldBreak) { return(false); } break; case LineBreakPairTable.CPBRK: // prohibited for combining marks if (lastClass != LineBreakClass.Space) { return(false); } break; case LineBreakPairTable.PRBRK: break; } // Rule LB22 if (_nextClass == LineBreakClass.Inseparable) { switch (lastClass) { case LineBreakClass.MandatoryBreak: case LineBreakClass.ContingentBreak: case LineBreakClass.Exclamation: case LineBreakClass.LineFeed: case LineBreakClass.NextLine: case LineBreakClass.Space: case LineBreakClass.ZWSpace: // Allow break break; case LineBreakClass.CombiningMark: if (_lb22ex) { // Allow break _lb22ex = false; break; } shouldBreak = false; break; default: shouldBreak = false; break; } } if (_lb8a) { shouldBreak = false; } // Rule LB21a if (_lb21a && (_currentClass == LineBreakClass.Hyphen || _currentClass == LineBreakClass.BreakAfter)) { shouldBreak = false; _lb21a = false; } else { _lb21a = _currentClass == LineBreakClass.HebrewLetter; } // Rule LB30a if (_currentClass == LineBreakClass.RegionalIndicator) { _lb30a++; if (_lb30a == 2 && _nextClass == LineBreakClass.RegionalIndicator) { shouldBreak = true; _lb30a = 0; } } else { _lb30a = 0; } // Rule LB30b if (_nextClass == LineBreakClass.EModifier && _lastPosition > 0) { // Mahjong Tiles (Unicode block) are extended pictographics but have a class of ID // Unassigned codepoints with Line_Break=ID in some blocks are also assigned the Extended_Pictographic property. // Those blocks are intended for future allocation of emoji characters. var cp = Codepoint.ReadAt(_text, _lastPosition - 1, out int _); if (Codepoint.IsInRangeInclusive(cp, 0x1F000, 0x1F02F)) { shouldBreak = false; } } _currentClass = _nextClass; return(shouldBreak); }
private LineBreakClass MapFirst(LineBreakClass c) => c switch {
bool getPairTableBreak(LineBreakClass lastClass) { // if not handled already, use the pair table bool shouldBreak = false; switch (LineBreakPairTable.table[(int)_curClass][(int)_nextClass]) { case LineBreakPairTable.DI_BRK: // Direct break shouldBreak = true; break; case LineBreakPairTable.IN_BRK: // possible indirect break shouldBreak = lastClass == LineBreakClass.SP; break; case LineBreakPairTable.CI_BRK: shouldBreak = lastClass == LineBreakClass.SP; if (!shouldBreak) { shouldBreak = false; return(shouldBreak); } break; case LineBreakPairTable.CP_BRK: // prohibited for combining marks if (lastClass != LineBreakClass.SP) { return(shouldBreak); } break; case LineBreakPairTable.PR_BRK: break; } if (_LB8a) { shouldBreak = false; } // Rule LB21a if (_LB21a && (_curClass == LineBreakClass.HY || _curClass == LineBreakClass.BA)) { shouldBreak = false; _LB21a = false; } else { _LB21a = (_curClass == LineBreakClass.HL); } // Rule LB30a if (_curClass == LineBreakClass.RI) { _LB30a++; if (_LB30a == 2 && (_nextClass == LineBreakClass.RI)) { shouldBreak = true; _LB30a = 0; } } else { _LB30a = 0; } _curClass = _nextClass; return(shouldBreak); }
public bool MoveNext() { // Get the first char if we're at the beginning of the string. if (_first) { var firstClass = NextCharClass(); _first = false; _currentClass = MapFirst(firstClass); _nextClass = firstClass; _lb8a = firstClass == LineBreakClass.ZWJ; _lb30a = 0; } while (_position < _text.Length) { _lastPosition = _position; var lastClass = _nextClass; _nextClass = NextCharClass(); // Explicit newline switch (_currentClass) { case LineBreakClass.MandatoryBreak: case LineBreakClass.CarriageReturn when _nextClass != LineBreakClass.LineFeed: { _currentClass = MapFirst(_nextClass); Current = new LineBreak(FindPriorNonWhitespace(_lastPosition), _lastPosition, true); return(true); } } var shouldBreak = GetSimpleBreak() ?? GetPairTableBreak(lastClass); // Rule LB8a _lb8a = _nextClass == LineBreakClass.ZWJ; if (shouldBreak) { Current = new LineBreak(FindPriorNonWhitespace(_lastPosition), _lastPosition); return(true); } } if (_position >= _text.Length) { if (_lastPosition < _text.Length) { _lastPosition = _text.Length; var required = false; switch (_currentClass) { case LineBreakClass.MandatoryBreak: case LineBreakClass.CarriageReturn when _nextClass != LineBreakClass.LineFeed: required = true; break; } Current = new LineBreak(FindPriorNonWhitespace(_lastPosition), _lastPosition, required); return(true); } } Current = default; return(false); }
private bool GetPairTableBreak(LineBreakClass lastClass) { // If not handled already, use the pair table bool shouldBreak = false; switch (LineBreakPairTable.Table[(int)_currentClass][(int)_nextClass]) { case LineBreakPairTable.DIBRK: // Direct break shouldBreak = true; break; // TODO: Rewrite this so that it defaults to true and rules are set as exceptions. case LineBreakPairTable.INBRK: // Possible indirect break // LB31 if (_lb31 && _nextClass == LineBreakClass.OpenPunctuation) { shouldBreak = true; _lb31 = false; break; } // LB30 if (_lb30) { shouldBreak = true; _lb30 = false; _alphaNumericCount = 0; break; } // LB25 if (_lb25ex && (_nextClass == LineBreakClass.PrefixNumeric || _nextClass == LineBreakClass.Numeric)) { shouldBreak = true; _lb25ex = false; break; } // LB24 if (_lb24ex && (_nextClass == LineBreakClass.PostfixNumeric || _nextClass == LineBreakClass.PrefixNumeric)) { shouldBreak = true; _lb24ex = false; break; } // LB18 shouldBreak = lastClass == LineBreakClass.Space; break; case LineBreakPairTable.CIBRK: shouldBreak = lastClass == LineBreakClass.Space; if (!shouldBreak) { return(false); } break; case LineBreakPairTable.CPBRK: // prohibited for combining marks if (lastClass != LineBreakClass.Space) { return(false); } break; case LineBreakPairTable.PRBRK: break; } // Rule LB22 if (_nextClass == LineBreakClass.Inseparable) { switch (lastClass) { case LineBreakClass.MandatoryBreak: case LineBreakClass.ContingentBreak: case LineBreakClass.Exclamation: case LineBreakClass.LineFeed: case LineBreakClass.NextLine: case LineBreakClass.Space: case LineBreakClass.ZWSpace: // Allow break break; case LineBreakClass.CombiningMark: if (_lb22ex) { // Allow break _lb22ex = false; break; } shouldBreak = false; break; default: shouldBreak = false; break; } } if (_lb8a) { shouldBreak = false; } // Rule LB21a if (_lb21a && (_currentClass == LineBreakClass.Hyphen || _currentClass == LineBreakClass.BreakAfter)) { shouldBreak = false; _lb21a = false; } else { _lb21a = _currentClass == LineBreakClass.HebrewLetter; } // Rule LB30a if (_currentClass == LineBreakClass.RegionalIndicator) { _lb30a++; if (_lb30a == 2 && _nextClass == LineBreakClass.RegionalIndicator) { shouldBreak = true; _lb30a = 0; } } else { _lb30a = 0; } _currentClass = _nextClass; return(shouldBreak); }
private bool GetPairTableBreak(LineBreakClass lastClass) { // if not handled already, use the pair table bool shouldBreak = false; switch (LineBreakPairTable.Table[(int)this.currentClass][(int)this.nextClass]) { case LineBreakPairTable.DIBRK: // Direct break shouldBreak = true; break; case LineBreakPairTable.INBRK: // possible indirect break shouldBreak = lastClass == LineBreakClass.SP; break; case LineBreakPairTable.CIBRK: shouldBreak = lastClass == LineBreakClass.SP; if (!shouldBreak) { return(false); } break; case LineBreakPairTable.CPBRK: // prohibited for combining marks if (lastClass != LineBreakClass.SP) { return(shouldBreak); } break; case LineBreakPairTable.PRBRK: break; } if (this.lb8a) { shouldBreak = false; } // Rule LB21a if (this.lb21a && (this.currentClass == LineBreakClass.HY || this.currentClass == LineBreakClass.BA)) { shouldBreak = false; this.lb21a = false; } else { this.lb21a = this.currentClass == LineBreakClass.HL; } // Rule LB30a if (this.currentClass == LineBreakClass.RI) { this.lb30a++; if (this.lb30a == 2 && (this.nextClass == LineBreakClass.RI)) { shouldBreak = true; this.lb30a = 0; } } else { this.lb30a = 0; } this.currentClass = this.nextClass; return(shouldBreak); }
private static bool IsAlphaNumeric(LineBreakClass cls) => cls == LineBreakClass.Alphabetic || cls == LineBreakClass.HebrewLetter || cls == LineBreakClass.Numeric;
/// <summary> /// Initializes a new of the class <see cref="NLineBreak.LineBreakClassResolveEventArgs"/>. /// </summary> /// <param name="lbClass"></param> /// <param name="ci"></param> public LineBreakClassResolveEventArgs(LineBreakClass lbClass, CultureInfo ci) { m_lbSource = lbClass; m_cultureInfo = ci; }
/// <summary> /// LB1 Assign a line breaking class to each code point of the input. /// Resolve AI, CB, SA, SG, and XX into other line breaking classes depending on criteria outside the scope of this algorithm. /// In the absence of such criteria, it is recommended that classes AI, SA, SG, and XX be resolved to AL, /// except that characters of class SA that have General_Category Mn or Mc be resolved to CM (see SA). Unresolved class CB is handled in rule LB20. /// </summary> /// <param name="lbClass">Informatin about line break class culture.</param> /// <param name="ci">The <see>System.Globalization.CultureInfo</see> object.</param> private LineBreakClass ResolveLineBreakClass(LineBreakClass lbClass, CultureInfo ci) { LineBreakClass result = LineBreakClass.Undefined; switch (lbClass) { case LineBreakClass.AI: result = LineBreakClass.AL; if (ci != null) { if (ci.TwoLetterISOLanguageName == "ko" || ci.TwoLetterISOLanguageName == "ja" || ci.TwoLetterISOLanguageName == "zh") { result = LineBreakClass.ID; } } break; case LineBreakClass.CB: result = LineBreakClass.CB; break; case LineBreakClass.SA: case LineBreakClass.SG: case LineBreakClass.XX: result = LineBreakClass.AL; break; default: result = lbClass; break; } if (result != LineBreakClass.Undefined) { if (LineBreakClassResolve != null) { LineBreakClassResolveEventArgs e = new LineBreakClassResolveEventArgs(lbClass, ci); LineBreakClassResolve(this, e); return e.Result; } } return result; }
public static PairBreakType Map(LineBreakClass first, LineBreakClass second) { return((PairBreakType)s_breakPairTable[(int)first][(int)second]); }