public void CanIterateForwards(BreakIterator.UBreakIteratorType type, string text, int[] expected, BreakIterator.UWordBreak[] ruleStatus)
        {
            var locale = new Locale("zh");

            BreakIterator bi = default(BreakIterator);

            try
            {
                switch (type)
                {
                case BreakIterator.UBreakIteratorType.SENTENCE:
                    bi = BreakIterator.CreateSentenceInstance(locale);
                    break;

                case BreakIterator.UBreakIteratorType.WORD:
                    bi = BreakIterator.CreateWordInstance(locale);
                    break;

                default:
                    throw new NotSupportedException("This iterator type is not supported in this test yet. [" + type + "]");
                }

                bi.SetText(text);

                CollectionAssert.AreEqual(expected, bi.Boundaries);

                // Verify each boundary for the sentences
                for (int i = 0; i < expected.Length; i++)
                {
                    int current = bi.Current;
                    int status  = bi.GetRuleStatus();

                    int expectedStatus = (int)ruleStatus[i];

                    Assert.AreEqual(expected[i], current);
                    Assert.AreEqual(expectedStatus, status);
                    CollectionAssert.AreEqual(new[] { expectedStatus }, bi.GetRuleStatusVector());

                    int moveNext = bi.MoveNext();
                    int next     = i + 1;

                    if (next < expected.Length)
                    {
                        Assert.AreEqual(expected[next], moveNext);
                    }
                    else
                    {
                        // Verify that the BreakIterator is exhausted because we've
                        // moved past every item.
                        Assert.AreEqual(BreakIterator.DONE, moveNext);
                    }
                }

                int lastIndex = expected.Length - 1;
                Assert.AreEqual(expected[lastIndex], bi.Current);

                // We've moved past the last word, it should return the last offset.
                Assert.AreEqual(BreakIterator.DONE, bi.MoveNext());
                Assert.AreEqual(expected[lastIndex], bi.Current);

                // Verify that the first element is correct now that we've moved to the end.
                Assert.AreEqual(expected[0], bi.MoveFirst());
                Assert.AreEqual(expected[0], bi.Current);
            }
            finally
            {
                if (bi != default(BreakIterator))
                {
                    bi.Dispose();
                }
            }
        }
Ejemplo n.º 2
0
 /// <inheritdoc/>
 public void Reset()
 {
     _currentLimit = _breakIterator.MoveFirst();
 }
Ejemplo n.º 3
0
        public void TestLineIteration()
        {
            BreakIterator bi = GetLineInstance(System.Globalization.CultureInfo.InvariantCulture);

            // Test empty
            Assert.AreEqual(0, bi.Current);
            Assert.AreEqual(BreakIterator.DONE, bi.MoveNext());
            Assert.AreEqual(0, bi.Current);

            bi.SetText(LINE_TEXT);

            // Ensure position starts at 0 when initialized
            Assert.AreEqual(0, bi.Current);

            // Check first boundary (Apache\t^Lucene) - Ensure we break on \t
            Assert.AreEqual(7, bi.MoveNext());

            // Ensure Current returns the most recent boundary
            Assert.AreEqual(7, bi.Current);

            // Check next boundary (Lucene^(TM))
            Assert.AreEqual(13, bi.MoveNext());

            // Ensure Current returns the most recent boundary
            Assert.AreEqual(13, bi.Current);

            // Check next boundary (Lucene(TM) ^is a)
            Assert.AreEqual(18, bi.MoveNext());

            // Ensure Current returns the most recent boundary
            Assert.AreEqual(18, bi.Current);

            // Move to start of high-performance
            bi.MoveNext();
            bi.MoveNext();

            // Check next boundary (high-\n^performance)
            Assert.AreEqual(29, bi.MoveNext());


            // Check last boundary (in Java.^)
            Assert.AreEqual(108, bi.MoveLast());


            // Check move past last boundary
            Assert.AreEqual(BreakIterator.DONE, bi.MoveNext());

            // Ensure we are still at last boundary
            Assert.AreEqual(108, bi.Current);


            // Check MovePrevious
            Assert.AreEqual(103, bi.MovePrevious());

            // Ensure we get the same value for Current as the last move
            Assert.AreEqual(103, bi.Current);


            // Check MoveFirst
            Assert.AreEqual(0, bi.MoveFirst());

            // Ensure we get the same value for Current as the last move
            Assert.AreEqual(0, bi.Current);


            // Check moving beyond first boundary
            Assert.AreEqual(BreakIterator.DONE, bi.MovePrevious());

            // Ensure we are still at first boundary
            Assert.AreEqual(0, bi.Current);


            // Check MoveLast()
            Assert.AreEqual(108, bi.MoveLast());
        }
Ejemplo n.º 4
0
        public void TestWordIteration()
        {
            BreakIterator bi = GetWordInstance(System.Globalization.CultureInfo.InvariantCulture);

            // Test empty
            Assert.AreEqual(0, bi.Current);
            Assert.AreEqual(BreakIterator.DONE, bi.MoveNext());
            Assert.AreEqual(0, bi.Current);

            bi.SetText(TEXT);

            // Ensure position starts at 0 when initialized
            Assert.AreEqual(0, bi.Current);

            // Check first boundary (Apache^)
            Assert.AreEqual(6, bi.MoveNext());

            // Ensure Current returns the last boundary iterated to
            Assert.AreEqual(6, bi.Current);

            // Check second boundary (^Lucene)
            Assert.AreEqual(7, bi.MoveNext());

            // Ensure Current returns the last boundary iterated to
            Assert.AreEqual(7, bi.Current);

            // Check third boundary (Lucene^)
            Assert.AreEqual(13, bi.MoveNext());

            // Ensure Current returns the last boundary iterated to
            Assert.AreEqual(13, bi.Current);

            // Check fourth boundary (^TM)
            Assert.AreEqual(14, bi.MoveNext());

            // Check fifth boundary (TM^)
            Assert.AreEqual(16, bi.MoveNext());

            // Check sixth boundary (TM)^
            Assert.AreEqual(17, bi.MoveNext());

            // Check seventh boundary (^is)
            Assert.AreEqual(18, bi.MoveNext());

            // Move to (^high-performance)
            bi.MoveNext();
            bi.MoveNext();
            bi.MoveNext();

            // Check next boundary (^high-performance)
            Assert.AreEqual(23, bi.MoveNext());

            // Ensure we don't break on hyphen (high-performance^)
            Assert.AreEqual(39, bi.MoveNext());


            // Check MoveLast()
            Assert.AreEqual(107, bi.MoveLast());

            // Check going past last boundary
            Assert.AreEqual(BreakIterator.DONE, bi.MoveNext());

            // Check we are still at last boundary
            Assert.AreEqual(107, bi.Current);


            // Check MoveFirst()
            Assert.AreEqual(0, bi.MoveFirst());

            // Check going past first boundary
            Assert.AreEqual(BreakIterator.DONE, bi.MovePrevious());

            // Check we are still at first boundary
            Assert.AreEqual(0, bi.Current);
        }
Ejemplo n.º 5
0
        public void TestSentenceIteration()
        {
            BreakIterator bi = GetSentenceInstance(System.Globalization.CultureInfo.InvariantCulture);

            // Test empty
            Assert.AreEqual(0, bi.Current);
            Assert.AreEqual(BreakIterator.DONE, bi.MoveNext());
            Assert.AreEqual(0, bi.Current);

            bi.SetText(SENTENCE_TEXT);

            // Ensure position starts at 0 when initialized
            Assert.AreEqual(0, bi.Current);

            // Check first boundary (in Java.^) - Ensure we don't break on \n
            Assert.AreEqual(108, bi.MoveNext());

            // Ensure Current returns the most recent boundary
            Assert.AreEqual(108, bi.Current);

            // Check next boundary (especially cross-platform.^)
            Assert.AreEqual(221, bi.MoveNext());

            // Check next boundary (free download.^)
            Assert.AreEqual(290, bi.MoveNext());

            // Check next boundary (things easy.^)
            Assert.AreEqual(324, bi.MoveNext());

            // Check next boundary (is powerful.^)
            Assert.AreEqual(344, bi.MoveNext());

            // Check next boundary (is exciting.^)
            Assert.AreEqual(364, bi.MoveNext());

            // Check next boundary (is cool.^)
            Assert.AreEqual(380, bi.MoveNext());

            // Check last boundary (Lucene now?^)
            Assert.AreEqual(400, bi.MoveNext());

            // Check move past last boundary
            Assert.AreEqual(BreakIterator.DONE, bi.MoveNext());

            // Ensure we are still at last boundary
            Assert.AreEqual(400, bi.Current);


            // Check MovePrevious
            Assert.AreEqual(380, bi.MovePrevious());

            // Ensure we get the same value for Current as the last move
            Assert.AreEqual(380, bi.Current);


            // Check MoveFirst
            Assert.AreEqual(0, bi.MoveFirst());

            // Ensure we get the same value for Current as the last move
            Assert.AreEqual(0, bi.Current);

            // Check moving beyond first boundary
            Assert.AreEqual(BreakIterator.DONE, bi.MovePrevious());

            // Ensure we are still at first boundary
            Assert.AreEqual(0, bi.Current);


            // Check MoveLast()
            Assert.AreEqual(400, bi.MoveLast());
        }
Ejemplo n.º 6
0
        public void TestWordIterationThai()
        {
            BreakIterator bi = GetWordInstance(new System.Globalization.CultureInfo("th"));

            // Test empty
            Assert.AreEqual(0, bi.Current);
            Assert.AreEqual(BreakIterator.DONE, bi.MoveNext());
            Assert.AreEqual(0, bi.Current);

            bi.SetText("บริษัทMicrosoftบริการดีที่สุด");

            // Ensure position starts at 0 when initialized
            Assert.AreEqual(0, bi.Current);

            // Check first boundary (บริษัท^Microsoft)
            Assert.AreEqual(6, bi.MoveNext());

            // Ensure Current returns the last boundary iterated to
            Assert.AreEqual(6, bi.Current);

            // Check second boundary (Microsoft^บริการ)
            Assert.AreEqual(15, bi.MoveNext());

            // Ensure Current returns the last boundary iterated to
            Assert.AreEqual(15, bi.Current);

            // Check third boundary (บริการ^ดี)
            Assert.AreEqual(21, bi.MoveNext());

            // Ensure Current returns the last boundary iterated to
            Assert.AreEqual(21, bi.Current);

            // Check fourth boundary (ดี^ที่สุด)
            Assert.AreEqual(23, bi.MoveNext());

            // Check fifth boundary (ดีที่สุด^)
            Assert.AreEqual(29, bi.MoveNext());

            // Check beyond last boundary (ดีที่สุด)^
            Assert.AreEqual(BreakIterator.DONE, bi.MoveNext());

            // Check we are still at last boundary
            Assert.AreEqual(29, bi.Current);

            // Check MovePrevious() (ดี^ที่สุด)
            Assert.AreEqual(23, bi.MovePrevious());


            // Check MoveFirst()
            Assert.AreEqual(0, bi.MoveFirst());

            // Check going past first boundary
            Assert.AreEqual(BreakIterator.DONE, bi.MovePrevious());

            // Check we are still at first boundary
            Assert.AreEqual(0, bi.Current);


            // Check Numerals
            bi.SetText("๑23๔๕๖7");

            // Ensure position starts at 0 when initialized
            Assert.AreEqual(0, bi.Current);

            // Ensure Hindu and Thai numerals stay in one group
            Assert.AreEqual(7, bi.MoveNext());
        }