Ejemplo n.º 1
0
        public void TestSentenceBoundary()
        {
            // LUCENENET specific - using a mock of the JDK BreakIterator class, which is just
            // an ICU BreakIterator with custom rules applied. East Asian
            // languages are skipped because the DictionaryBasedBreakIterator is not overridden by the rules.
            switch (CultureInfo.CurrentCulture.TwoLetterISOLanguageName)
            {
            case "th":     // Thai
            case "lo":     // Lao
            case "my":     // Burmese
            case "km":     // Khmer
            case "ja":     // Japanese
            case "ko":     // Korean
            case "zh":     // Chinese
                Assume.That(false, "This test does not apply to East Asian languages.");
                break;
            }

            StringBuilder text = new StringBuilder(TEXT);
            // we test this with default locale, its randomized by LuceneTestCase

            // LUCENENET specific - using a mock of the JDK BreakIterator class, which is just
            // an ICU BreakIterator with custom rules applied.
            BreakIterator    bi      = JdkBreakIterator.GetSentenceInstance(CultureInfo.CurrentCulture);
            IBoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi);

            int start    = TEXT.IndexOf("any application", StringComparison.Ordinal);
            int expected = TEXT.IndexOf("It is a", StringComparison.Ordinal);

            TestFindStartOffset(text, start, expected, scanner);

            expected = TEXT.IndexOf("Apache Lucene is an open source", StringComparison.Ordinal);
            TestFindEndOffset(text, start, expected, scanner);
        }
        public void TestWordBoundary()
        {
            StringBuilder    text    = new StringBuilder(TEXT);
            BreakIterator    bi      = JdkBreakIterator.GetWordInstance(CultureInfo.InvariantCulture);
            IBoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi);

            int start    = TEXT.IndexOf("formance");
            int expected = TEXT.IndexOf("high-performance");

            TestFindStartOffset(text, start, expected, scanner);

            expected = TEXT.IndexOf(", full");
            TestFindEndOffset(text, start, expected, scanner);
        }
        public void TestSentenceBoundary()
        {
            StringBuilder text = new StringBuilder(TEXT);
            // we test this with default locale, its randomized by LuceneTestCase
            BreakIterator    bi      = JdkBreakIterator.GetSentenceInstance(CultureInfo.CurrentCulture);
            IBoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi);

            int start    = TEXT.IndexOf("any application");
            int expected = TEXT.IndexOf("It is a");

            TestFindStartOffset(text, start, expected, scanner);

            expected = TEXT.IndexOf("Apache Lucene is an open source");
            TestFindEndOffset(text, start, expected, scanner);
        }
Ejemplo n.º 4
0
        public void TestWordBoundary()
        {
            StringBuilder text = new StringBuilder(TEXT);
            // LUCENENET specific - using a mock of the JDK BreakIterator class, which is just
            // an ICU BreakIterator with custom rules applied.
            BreakIterator    bi      = JdkBreakIterator.GetWordInstance(CultureInfo.InvariantCulture);
            IBoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi);

            int start    = TEXT.IndexOf("formance", StringComparison.Ordinal);
            int expected = TEXT.IndexOf("high-performance", StringComparison.Ordinal);

            TestFindStartOffset(text, start, expected, scanner);

            expected = TEXT.IndexOf(", full", StringComparison.Ordinal);
            TestFindEndOffset(text, start, expected, scanner);
        }
Ejemplo n.º 5
0
 private BreakIterator GetWordInstance(System.Globalization.CultureInfo locale)
 {
     return(JdkBreakIterator.GetWordInstance(locale));
 }
Ejemplo n.º 6
0
 /// <summary>
 /// Returns the <see cref="BreakIterator"/> to use for
 /// dividing text into passages.  This instantiates an
 /// <see cref="BreakIterator.GetSentenceInstance(CultureInfo)"/> by default;
 /// subclasses can override to customize.
 /// </summary>
 protected virtual BreakIterator GetBreakIterator(string field)
 {
     return(JdkBreakIterator.GetSentenceInstance(CultureInfo.InvariantCulture));
 }