public IEnumerable <string> Split(BreakIterator.UBreakIteratorType type, string locale, string text) { using (var breakIterator = new RuleBasedBreakIterator(type, locale)) { breakIterator.SetText(text); return(breakIterator); } }
private void Consume(BreakIterator.UBreakIteratorType iteratorType, Locale locale, CharacterIterator ci) { var contents = BreakIterator.Split(iteratorType, locale, ci.ToString()); foreach (var token in contents) { ; } }
/// <summary> /// Open a new UBreakIterator for locating text boundaries for a specified locale. /// </summary> /// <param name="type">The type.</param> /// <param name="locale">The locale.</param> /// <param name="text">The text.</param> /// <param name="textLength">Length of the text.</param> /// <param name="errorCode">The error code.</param> /// <returns></returns> public static IntPtr ubrk_open(BreakIterator.UBreakIteratorType type, string locale, string text, int textLength, out ErrorCode errorCode) { errorCode = ErrorCode.NoErrors; if (BreakIteratorMethods.ubrk_open == null) { BreakIteratorMethods.ubrk_open = GetMethod <BreakIteratorMethodsContainer.ubrk_openDelegate>(IcuCommonLibHandle, "ubrk_open", true); } return(BreakIteratorMethods.ubrk_open(type, locale, text, textLength, out errorCode)); }
public List <string> GetEnumerator(BreakIterator.UBreakIteratorType type) { using (var breakIterator = new RuleBasedBreakIterator(type, "en-US")) { breakIterator.SetText("Aa bb. Cc 3.5 x? Y?x! Z"); var result = new List <string>(); foreach (var s in breakIterator) { result.Add(s); } return(result); } }
public void IsBoundary(BreakIterator.UBreakIteratorType type, string text, int[] offsetsToTest, bool[] expectedIsBoundary, int[] expectedOffsets) // expected BreakIterator.Current after calling IsBoundary. { var locale = new Locale("zh"); BreakIterator bi = default(BreakIterator); try { switch (type) { case BreakIterator.UBreakIteratorType.SENTENCE: bi = BreakIterator.CreateSentenceInstance(locale); break; case BreakIterator.UBreakIteratorType.WORD: bi = BreakIterator.CreateWordInstance(locale); break; default: throw new NotSupportedException("This iterator type is not supported in this test yet. [" + type + "]"); } bi.SetText(text); for (int i = 0; i < offsetsToTest.Length; i++) { var isBoundary = bi.IsBoundary(offsetsToTest[i]); Assert.AreEqual(expectedIsBoundary[i], isBoundary, "Expected IsBoundary was not equal at i: {0}, offset: {1}", i, offsetsToTest[i]); Assert.AreEqual(expectedOffsets[i], bi.Current); } } finally { if (bi != default(BreakIterator)) { bi.Dispose(); } } }
public IEnumerable <string> Split(BreakIterator.UBreakIteratorType type, string text) { return(BreakIterator.Split(type, "en-US", text)); }
public void CanIterateForwards(BreakIterator.UBreakIteratorType type, string text, int[] expected, BreakIterator.UWordBreak[] ruleStatus) { var locale = new Locale("zh"); BreakIterator bi = default(BreakIterator); try { switch (type) { case BreakIterator.UBreakIteratorType.SENTENCE: bi = BreakIterator.CreateSentenceInstance(locale); break; case BreakIterator.UBreakIteratorType.WORD: bi = BreakIterator.CreateWordInstance(locale); break; default: throw new NotSupportedException("This iterator type is not supported in this test yet. [" + type + "]"); } bi.SetText(text); CollectionAssert.AreEqual(expected, bi.Boundaries); // Verify each boundary for the sentences for (int i = 0; i < expected.Length; i++) { int current = bi.Current; int status = bi.GetRuleStatus(); int expectedStatus = (int)ruleStatus[i]; Assert.AreEqual(expected[i], current); Assert.AreEqual(expectedStatus, status); CollectionAssert.AreEqual(new[] { expectedStatus }, bi.GetRuleStatusVector()); int moveNext = bi.MoveNext(); int next = i + 1; if (next < expected.Length) { Assert.AreEqual(expected[next], moveNext); } else { // Verify that the BreakIterator is exhausted because we've // moved past every item. Assert.AreEqual(BreakIterator.DONE, moveNext); } } int lastIndex = expected.Length - 1; Assert.AreEqual(expected[lastIndex], bi.Current); // We've moved past the last word, it should return the last offset. Assert.AreEqual(BreakIterator.DONE, bi.MoveNext()); Assert.AreEqual(expected[lastIndex], bi.Current); // Verify that the first element is correct now that we've moved to the end. Assert.AreEqual(expected[0], bi.MoveFirst()); Assert.AreEqual(expected[0], bi.Current); } finally { if (bi != default(BreakIterator)) { bi.Dispose(); } } }