Beispiel #1
0
        public void TestSentenceBoundary()
        {
            // LUCENENET specific - using a mock of the JDK BreakIterator class, which is just
            // an ICU BreakIterator with custom rules applied. East Asian
            // languages are skipped because the DictionaryBasedBreakIterator is not overridden by the rules.
            switch (CultureInfo.CurrentCulture.TwoLetterISOLanguageName)
            {
            case "th":     // Thai
            case "lo":     // Lao
            case "my":     // Burmese
            case "km":     // Khmer
            case "ja":     // Japanese
            case "ko":     // Korean
            case "zh":     // Chinese
                Assume.That(false, "This test does not apply to East Asian languages.");
                break;
            }

            StringBuilder text = new StringBuilder(TEXT);
            // we test this with default locale, its randomized by LuceneTestCase

            // LUCENENET specific - using a mock of the JDK BreakIterator class, which is just
            // an ICU BreakIterator with custom rules applied.
            BreakIterator    bi      = JdkBreakIterator.GetSentenceInstance(CultureInfo.CurrentCulture);
            IBoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi);

            int start    = TEXT.IndexOf("any application", StringComparison.Ordinal);
            int expected = TEXT.IndexOf("It is a", StringComparison.Ordinal);

            TestFindStartOffset(text, start, expected, scanner);

            expected = TEXT.IndexOf("Apache Lucene is an open source", StringComparison.Ordinal);
            TestFindEndOffset(text, start, expected, scanner);
        }
        public void TestWordBoundary()
        {
            StringBuilder    text    = new StringBuilder(TEXT);
            BreakIterator    bi      = JdkBreakIterator.GetWordInstance(CultureInfo.InvariantCulture);
            IBoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi);

            int start    = TEXT.IndexOf("formance");
            int expected = TEXT.IndexOf("high-performance");

            TestFindStartOffset(text, start, expected, scanner);

            expected = TEXT.IndexOf(", full");
            TestFindEndOffset(text, start, expected, scanner);
        }
        public void TestLineBoundary()
        {
            StringBuilder text = new StringBuilder(TEXT);
            // we test this with default locale, its randomized by LuceneTestCase
            BreakIterator    bi      = BreakIterator.GetLineInstance(CultureInfo.CurrentCulture);
            IBoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi);

            int start    = TEXT.IndexOf("any application");
            int expected = TEXT.IndexOf("nearly");

            TestFindStartOffset(text, start, expected, scanner);

            expected = TEXT.IndexOf("application that requires");
            TestFindEndOffset(text, start, expected, scanner);
        }
        public void TestSentenceBoundary()
        {
            StringBuilder text = new StringBuilder(TEXT);
            // we test this with default locale, its randomized by LuceneTestCase
            BreakIterator    bi      = JdkBreakIterator.GetSentenceInstance(CultureInfo.CurrentCulture);
            IBoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi);

            int start    = TEXT.IndexOf("any application");
            int expected = TEXT.IndexOf("It is a");

            TestFindStartOffset(text, start, expected, scanner);

            expected = TEXT.IndexOf("Apache Lucene is an open source");
            TestFindEndOffset(text, start, expected, scanner);
        }
        public void TestOutOfRange()
        {
            StringBuilder    text    = new StringBuilder(TEXT);
            BreakIterator    bi      = BreakIterator.GetWordInstance(CultureInfo.InvariantCulture);
            IBoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi);

            int start = TEXT.Length + 1;

            assertEquals(start, scanner.FindStartOffset(text, start));
            assertEquals(start, scanner.FindEndOffset(text, start));
            start = 0;
            assertEquals(start, scanner.FindStartOffset(text, start));
            start = -1;
            assertEquals(start, scanner.FindEndOffset(text, start));
        }
Beispiel #6
0
        public void TestWordBoundary()
        {
            StringBuilder text = new StringBuilder(TEXT);
            // LUCENENET specific - using a mock of the JDK BreakIterator class, which is just
            // an ICU BreakIterator with custom rules applied.
            BreakIterator    bi      = JdkBreakIterator.GetWordInstance(CultureInfo.InvariantCulture);
            IBoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi);

            int start    = TEXT.IndexOf("formance", StringComparison.Ordinal);
            int expected = TEXT.IndexOf("high-performance", StringComparison.Ordinal);

            TestFindStartOffset(text, start, expected, scanner);

            expected = TEXT.IndexOf(", full", StringComparison.Ordinal);
            TestFindEndOffset(text, start, expected, scanner);
        }
Beispiel #7
0
        public void TestSentenceBoundary()
        {
            StringBuilder text = new StringBuilder(TEXT);
            // we test this with default locale, its randomized by LuceneTestCase
            //BreakIterator bi = BreakIterator.getSentenceInstance(Locale.getDefault());
            BreakIterator bi = new IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType.SENTENCE, CultureInfo.CurrentCulture)
            {
                EnableHacks = true
            };
            IBoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi);

            int start    = TEXT.IndexOf("any application");
            int expected = TEXT.IndexOf("It is a");

            TestFindStartOffset(text, start, expected, scanner);

            expected = TEXT.IndexOf("Apache Lucene is an open source");
            TestFindEndOffset(text, start, expected, scanner);
        }
Beispiel #8
0
        public void TestWordBoundary()
        {
            StringBuilder text = new StringBuilder(TEXT);
            //BreakIterator bi = BreakIterator.getWordInstance(Locale.ROOT);
            //BreakIterator bi = new WordBreakIterator(CultureInfo.CurrentCulture);
            BreakIterator bi = new IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType.WORD, CultureInfo.InvariantCulture)
            {
                EnableHacks = true
            };
            IBoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi);

            int start    = TEXT.IndexOf("formance");
            int expected = TEXT.IndexOf("high-performance");

            TestFindStartOffset(text, start, expected, scanner);

            expected = TEXT.IndexOf(", full");
            TestFindEndOffset(text, start, expected, scanner);
        }