public void TestFirstPosition() { //BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT); BreakIterator expected = new IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType.SENTENCE, CultureInfo.InvariantCulture); BreakIterator actual = new WholeBreakIterator(); assertSameBreaks("000ab000", 3, 2, 4, expected, actual); }
public void TestSliceStart() { //BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT); BreakIterator expected = new IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType.SENTENCE, CultureInfo.InvariantCulture); BreakIterator actual = new WholeBreakIterator(); assertSameBreaks("000a", 3, 1, expected, actual); assertSameBreaks("000ab", 3, 2, expected, actual); assertSameBreaks("000abc", 3, 3, expected, actual); assertSameBreaks("000", 3, 0, expected, actual); }
public void TestSingleSentences() { //BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT); BreakIterator expected = new IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType.SENTENCE, CultureInfo.InvariantCulture); BreakIterator actual = new WholeBreakIterator(); assertSameBreaks("a", expected, actual); assertSameBreaks("ab", expected, actual); assertSameBreaks("abc", expected, actual); assertSameBreaks("", expected, actual); }
public void TestLineBoundary() { StringBuilder text = new StringBuilder(TEXT); // we test this with default locale, its randomized by LuceneTestCase //BreakIterator bi = BreakIterator.getLineInstance(Locale.getDefault()); BreakIterator bi = new IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType.LINE, CultureInfo.CurrentCulture); IBoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi); int start = TEXT.IndexOf("any application"); int expected = TEXT.IndexOf("nearly"); TestFindStartOffset(text, start, expected, scanner); expected = TEXT.IndexOf("application that requires"); TestFindEndOffset(text, start, expected, scanner); }
public void TestOutOfRange() { StringBuilder text = new StringBuilder(TEXT); //BreakIterator bi = BreakIterator.getWordInstance(Locale.ROOT); BreakIterator bi = new IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType.WORD, CultureInfo.InvariantCulture); IBoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi); int start = TEXT.Length + 1; assertEquals(start, scanner.FindStartOffset(text, start)); assertEquals(start, scanner.FindEndOffset(text, start)); start = 0; assertEquals(start, scanner.FindStartOffset(text, start)); start = -1; assertEquals(start, scanner.FindEndOffset(text, start)); }
public void TestSentenceBoundary() { StringBuilder text = new StringBuilder(TEXT); // we test this with default locale, its randomized by LuceneTestCase //BreakIterator bi = BreakIterator.getSentenceInstance(Locale.getDefault()); BreakIterator bi = new IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType.SENTENCE, CultureInfo.CurrentCulture) { EnableHacks = true }; IBoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi); int start = TEXT.IndexOf("any application"); int expected = TEXT.IndexOf("It is a"); TestFindStartOffset(text, start, expected, scanner); expected = TEXT.IndexOf("Apache Lucene is an open source"); TestFindEndOffset(text, start, expected, scanner); }
public void TestWordBoundary() { StringBuilder text = new StringBuilder(TEXT); //BreakIterator bi = BreakIterator.getWordInstance(Locale.ROOT); //BreakIterator bi = new WordBreakIterator(CultureInfo.CurrentCulture); BreakIterator bi = new IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType.WORD, CultureInfo.InvariantCulture) { EnableHacks = true }; IBoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi); int start = TEXT.IndexOf("formance"); int expected = TEXT.IndexOf("high-performance"); TestFindStartOffset(text, start, expected, scanner); expected = TEXT.IndexOf(", full"); TestFindEndOffset(text, start, expected, scanner); }