private static IEnumerable <Boundary> GetBoundaries(UBreakIteratorType type, Locale locale, string text, bool includeSpacesAndPunctuation) { List <Boundary> boundaries = new List <Boundary>(); using (var breakIterator = new RuleBasedBreakIterator(type, locale)) { breakIterator.SetText(text); int current = breakIterator.Current; while (current != DONE) { int next = breakIterator.MoveNext(); int status = breakIterator.GetRuleStatus(); if (next == DONE) { break; } if (includeSpacesAndPunctuation || AddToken(type, status)) { boundaries.Add(new Boundary(current, next)); } current = next; } } return(boundaries); }
/// <summary> /// Creates a copy of the given RuleBasedBreakIterator /// </summary> /// <param name="bi">break itrerator</param> /// <exception cref="Exception">Throws an exception if we get an error cloning the native /// break iterator</exception> private RuleBasedBreakIterator(RuleBasedBreakIterator bi) { _iteratorType = bi._iteratorType; Rules = bi.Rules; _locale = bi._locale; _text = bi._text; _currentIndex = bi._currentIndex; _textBoundaries = new TextBoundary[bi._textBoundaries.Length]; bi._textBoundaries.CopyTo(_textBoundaries, 0); if (bi._breakIterator == IntPtr.Zero) { return; } ErrorCode errorCode; _breakIterator = NativeMethods.ubrk_safeClone(bi._breakIterator, IntPtr.Zero, IntPtr.Zero, out errorCode); if (errorCode.IsFailure()) { throw new Exception($"BreakIterator.ubrk_safeClone() failed with code {errorCode}"); } }