public void CanIterateForwards(BreakIterator.UBreakIteratorType type, string text, int[] expected, BreakIterator.UWordBreak[] ruleStatus) { var locale = new Locale("zh"); BreakIterator bi = default(BreakIterator); try { switch (type) { case BreakIterator.UBreakIteratorType.SENTENCE: bi = BreakIterator.CreateSentenceInstance(locale); break; case BreakIterator.UBreakIteratorType.WORD: bi = BreakIterator.CreateWordInstance(locale); break; default: throw new NotSupportedException("This iterator type is not supported in this test yet. [" + type + "]"); } bi.SetText(text); CollectionAssert.AreEqual(expected, bi.Boundaries); // Verify each boundary for the sentences for (int i = 0; i < expected.Length; i++) { int current = bi.Current; int status = bi.GetRuleStatus(); int expectedStatus = (int)ruleStatus[i]; Assert.AreEqual(expected[i], current); Assert.AreEqual(expectedStatus, status); CollectionAssert.AreEqual(new[] { expectedStatus }, bi.GetRuleStatusVector()); int moveNext = bi.MoveNext(); int next = i + 1; if (next < expected.Length) { Assert.AreEqual(expected[next], moveNext); } else { // Verify that the BreakIterator is exhausted because we've // moved past every item. Assert.AreEqual(BreakIterator.DONE, moveNext); } } int lastIndex = expected.Length - 1; Assert.AreEqual(expected[lastIndex], bi.Current); // We've moved past the last word, it should return the last offset. Assert.AreEqual(BreakIterator.DONE, bi.MoveNext()); Assert.AreEqual(expected[lastIndex], bi.Current); // Verify that the first element is correct now that we've moved to the end. Assert.AreEqual(expected[0], bi.MoveFirst()); Assert.AreEqual(expected[0], bi.Current); } finally { if (bi != default(BreakIterator)) { bi.Dispose(); } } }
/// <inheritdoc/> public void Reset() { _currentLimit = _breakIterator.MoveFirst(); }
public void TestLineIteration() { BreakIterator bi = GetLineInstance(System.Globalization.CultureInfo.InvariantCulture); // Test empty Assert.AreEqual(0, bi.Current); Assert.AreEqual(BreakIterator.DONE, bi.MoveNext()); Assert.AreEqual(0, bi.Current); bi.SetText(LINE_TEXT); // Ensure position starts at 0 when initialized Assert.AreEqual(0, bi.Current); // Check first boundary (Apache\t^Lucene) - Ensure we break on \t Assert.AreEqual(7, bi.MoveNext()); // Ensure Current returns the most recent boundary Assert.AreEqual(7, bi.Current); // Check next boundary (Lucene^(TM)) Assert.AreEqual(13, bi.MoveNext()); // Ensure Current returns the most recent boundary Assert.AreEqual(13, bi.Current); // Check next boundary (Lucene(TM) ^is a) Assert.AreEqual(18, bi.MoveNext()); // Ensure Current returns the most recent boundary Assert.AreEqual(18, bi.Current); // Move to start of high-performance bi.MoveNext(); bi.MoveNext(); // Check next boundary (high-\n^performance) Assert.AreEqual(29, bi.MoveNext()); // Check last boundary (in Java.^) Assert.AreEqual(108, bi.MoveLast()); // Check move past last boundary Assert.AreEqual(BreakIterator.DONE, bi.MoveNext()); // Ensure we are still at last boundary Assert.AreEqual(108, bi.Current); // Check MovePrevious Assert.AreEqual(103, bi.MovePrevious()); // Ensure we get the same value for Current as the last move Assert.AreEqual(103, bi.Current); // Check MoveFirst Assert.AreEqual(0, bi.MoveFirst()); // Ensure we get the same value for Current as the last move Assert.AreEqual(0, bi.Current); // Check moving beyond first boundary Assert.AreEqual(BreakIterator.DONE, bi.MovePrevious()); // Ensure we are still at first boundary Assert.AreEqual(0, bi.Current); // Check MoveLast() Assert.AreEqual(108, bi.MoveLast()); }
public void TestWordIteration() { BreakIterator bi = GetWordInstance(System.Globalization.CultureInfo.InvariantCulture); // Test empty Assert.AreEqual(0, bi.Current); Assert.AreEqual(BreakIterator.DONE, bi.MoveNext()); Assert.AreEqual(0, bi.Current); bi.SetText(TEXT); // Ensure position starts at 0 when initialized Assert.AreEqual(0, bi.Current); // Check first boundary (Apache^) Assert.AreEqual(6, bi.MoveNext()); // Ensure Current returns the last boundary iterated to Assert.AreEqual(6, bi.Current); // Check second boundary (^Lucene) Assert.AreEqual(7, bi.MoveNext()); // Ensure Current returns the last boundary iterated to Assert.AreEqual(7, bi.Current); // Check third boundary (Lucene^) Assert.AreEqual(13, bi.MoveNext()); // Ensure Current returns the last boundary iterated to Assert.AreEqual(13, bi.Current); // Check fourth boundary (^TM) Assert.AreEqual(14, bi.MoveNext()); // Check fifth boundary (TM^) Assert.AreEqual(16, bi.MoveNext()); // Check sixth boundary (TM)^ Assert.AreEqual(17, bi.MoveNext()); // Check seventh boundary (^is) Assert.AreEqual(18, bi.MoveNext()); // Move to (^high-performance) bi.MoveNext(); bi.MoveNext(); bi.MoveNext(); // Check next boundary (^high-performance) Assert.AreEqual(23, bi.MoveNext()); // Ensure we don't break on hyphen (high-performance^) Assert.AreEqual(39, bi.MoveNext()); // Check MoveLast() Assert.AreEqual(107, bi.MoveLast()); // Check going past last boundary Assert.AreEqual(BreakIterator.DONE, bi.MoveNext()); // Check we are still at last boundary Assert.AreEqual(107, bi.Current); // Check MoveFirst() Assert.AreEqual(0, bi.MoveFirst()); // Check going past first boundary Assert.AreEqual(BreakIterator.DONE, bi.MovePrevious()); // Check we are still at first boundary Assert.AreEqual(0, bi.Current); }
public void TestSentenceIteration() { BreakIterator bi = GetSentenceInstance(System.Globalization.CultureInfo.InvariantCulture); // Test empty Assert.AreEqual(0, bi.Current); Assert.AreEqual(BreakIterator.DONE, bi.MoveNext()); Assert.AreEqual(0, bi.Current); bi.SetText(SENTENCE_TEXT); // Ensure position starts at 0 when initialized Assert.AreEqual(0, bi.Current); // Check first boundary (in Java.^) - Ensure we don't break on \n Assert.AreEqual(108, bi.MoveNext()); // Ensure Current returns the most recent boundary Assert.AreEqual(108, bi.Current); // Check next boundary (especially cross-platform.^) Assert.AreEqual(221, bi.MoveNext()); // Check next boundary (free download.^) Assert.AreEqual(290, bi.MoveNext()); // Check next boundary (things easy.^) Assert.AreEqual(324, bi.MoveNext()); // Check next boundary (is powerful.^) Assert.AreEqual(344, bi.MoveNext()); // Check next boundary (is exciting.^) Assert.AreEqual(364, bi.MoveNext()); // Check next boundary (is cool.^) Assert.AreEqual(380, bi.MoveNext()); // Check last boundary (Lucene now?^) Assert.AreEqual(400, bi.MoveNext()); // Check move past last boundary Assert.AreEqual(BreakIterator.DONE, bi.MoveNext()); // Ensure we are still at last boundary Assert.AreEqual(400, bi.Current); // Check MovePrevious Assert.AreEqual(380, bi.MovePrevious()); // Ensure we get the same value for Current as the last move Assert.AreEqual(380, bi.Current); // Check MoveFirst Assert.AreEqual(0, bi.MoveFirst()); // Ensure we get the same value for Current as the last move Assert.AreEqual(0, bi.Current); // Check moving beyond first boundary Assert.AreEqual(BreakIterator.DONE, bi.MovePrevious()); // Ensure we are still at first boundary Assert.AreEqual(0, bi.Current); // Check MoveLast() Assert.AreEqual(400, bi.MoveLast()); }
public void TestWordIterationThai() { BreakIterator bi = GetWordInstance(new System.Globalization.CultureInfo("th")); // Test empty Assert.AreEqual(0, bi.Current); Assert.AreEqual(BreakIterator.DONE, bi.MoveNext()); Assert.AreEqual(0, bi.Current); bi.SetText("บริษัทMicrosoftบริการดีที่สุด"); // Ensure position starts at 0 when initialized Assert.AreEqual(0, bi.Current); // Check first boundary (บริษัท^Microsoft) Assert.AreEqual(6, bi.MoveNext()); // Ensure Current returns the last boundary iterated to Assert.AreEqual(6, bi.Current); // Check second boundary (Microsoft^บริการ) Assert.AreEqual(15, bi.MoveNext()); // Ensure Current returns the last boundary iterated to Assert.AreEqual(15, bi.Current); // Check third boundary (บริการ^ดี) Assert.AreEqual(21, bi.MoveNext()); // Ensure Current returns the last boundary iterated to Assert.AreEqual(21, bi.Current); // Check fourth boundary (ดี^ที่สุด) Assert.AreEqual(23, bi.MoveNext()); // Check fifth boundary (ดีที่สุด^) Assert.AreEqual(29, bi.MoveNext()); // Check beyond last boundary (ดีที่สุด)^ Assert.AreEqual(BreakIterator.DONE, bi.MoveNext()); // Check we are still at last boundary Assert.AreEqual(29, bi.Current); // Check MovePrevious() (ดี^ที่สุด) Assert.AreEqual(23, bi.MovePrevious()); // Check MoveFirst() Assert.AreEqual(0, bi.MoveFirst()); // Check going past first boundary Assert.AreEqual(BreakIterator.DONE, bi.MovePrevious()); // Check we are still at first boundary Assert.AreEqual(0, bi.Current); // Check Numerals bi.SetText("๑23๔๕๖7"); // Ensure position starts at 0 when initialized Assert.AreEqual(0, bi.Current); // Ensure Hindu and Thai numerals stay in one group Assert.AreEqual(7, bi.MoveNext()); }