public void TestSentenceBoundary() { // LUCENENET specific - using a mock of the JDK BreakIterator class, which is just // an ICU BreakIterator with custom rules applied. East Asian // languages are skipped because the DictionaryBasedBreakIterator is not overridden by the rules. switch (CultureInfo.CurrentCulture.TwoLetterISOLanguageName) { case "th": // Thai case "lo": // Lao case "my": // Burmese case "km": // Khmer case "ja": // Japanese case "ko": // Korean case "zh": // Chinese Assume.That(false, "This test does not apply to East Asian languages."); break; } StringBuilder text = new StringBuilder(TEXT); // we test this with default locale, its randomized by LuceneTestCase // LUCENENET specific - using a mock of the JDK BreakIterator class, which is just // an ICU BreakIterator with custom rules applied. BreakIterator bi = JdkBreakIterator.GetSentenceInstance(CultureInfo.CurrentCulture); IBoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi); int start = TEXT.IndexOf("any application", StringComparison.Ordinal); int expected = TEXT.IndexOf("It is a", StringComparison.Ordinal); TestFindStartOffset(text, start, expected, scanner); expected = TEXT.IndexOf("Apache Lucene is an open source", StringComparison.Ordinal); TestFindEndOffset(text, start, expected, scanner); }
public void TestWordBoundary() { StringBuilder text = new StringBuilder(TEXT); BreakIterator bi = JdkBreakIterator.GetWordInstance(CultureInfo.InvariantCulture); IBoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi); int start = TEXT.IndexOf("formance"); int expected = TEXT.IndexOf("high-performance"); TestFindStartOffset(text, start, expected, scanner); expected = TEXT.IndexOf(", full"); TestFindEndOffset(text, start, expected, scanner); }
public void TestLineBoundary() { StringBuilder text = new StringBuilder(TEXT); // we test this with default locale, its randomized by LuceneTestCase BreakIterator bi = BreakIterator.GetLineInstance(CultureInfo.CurrentCulture); IBoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi); int start = TEXT.IndexOf("any application"); int expected = TEXT.IndexOf("nearly"); TestFindStartOffset(text, start, expected, scanner); expected = TEXT.IndexOf("application that requires"); TestFindEndOffset(text, start, expected, scanner); }
public void TestSentenceBoundary() { StringBuilder text = new StringBuilder(TEXT); // we test this with default locale, its randomized by LuceneTestCase BreakIterator bi = JdkBreakIterator.GetSentenceInstance(CultureInfo.CurrentCulture); IBoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi); int start = TEXT.IndexOf("any application"); int expected = TEXT.IndexOf("It is a"); TestFindStartOffset(text, start, expected, scanner); expected = TEXT.IndexOf("Apache Lucene is an open source"); TestFindEndOffset(text, start, expected, scanner); }
public void TestOutOfRange() { StringBuilder text = new StringBuilder(TEXT); BreakIterator bi = BreakIterator.GetWordInstance(CultureInfo.InvariantCulture); IBoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi); int start = TEXT.Length + 1; assertEquals(start, scanner.FindStartOffset(text, start)); assertEquals(start, scanner.FindEndOffset(text, start)); start = 0; assertEquals(start, scanner.FindStartOffset(text, start)); start = -1; assertEquals(start, scanner.FindEndOffset(text, start)); }
public void TestWordBoundary() { StringBuilder text = new StringBuilder(TEXT); // LUCENENET specific - using a mock of the JDK BreakIterator class, which is just // an ICU BreakIterator with custom rules applied. BreakIterator bi = JdkBreakIterator.GetWordInstance(CultureInfo.InvariantCulture); IBoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi); int start = TEXT.IndexOf("formance", StringComparison.Ordinal); int expected = TEXT.IndexOf("high-performance", StringComparison.Ordinal); TestFindStartOffset(text, start, expected, scanner); expected = TEXT.IndexOf(", full", StringComparison.Ordinal); TestFindEndOffset(text, start, expected, scanner); }
public void TestSentenceBoundary() { StringBuilder text = new StringBuilder(TEXT); // we test this with default locale, its randomized by LuceneTestCase //BreakIterator bi = BreakIterator.getSentenceInstance(Locale.getDefault()); BreakIterator bi = new IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType.SENTENCE, CultureInfo.CurrentCulture) { EnableHacks = true }; IBoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi); int start = TEXT.IndexOf("any application"); int expected = TEXT.IndexOf("It is a"); TestFindStartOffset(text, start, expected, scanner); expected = TEXT.IndexOf("Apache Lucene is an open source"); TestFindEndOffset(text, start, expected, scanner); }
public void TestWordBoundary() { StringBuilder text = new StringBuilder(TEXT); //BreakIterator bi = BreakIterator.getWordInstance(Locale.ROOT); //BreakIterator bi = new WordBreakIterator(CultureInfo.CurrentCulture); BreakIterator bi = new IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType.WORD, CultureInfo.InvariantCulture) { EnableHacks = true }; IBoundaryScanner scanner = new BreakIteratorBoundaryScanner(bi); int start = TEXT.IndexOf("formance"); int expected = TEXT.IndexOf("high-performance"); TestFindStartOffset(text, start, expected, scanner); expected = TEXT.IndexOf(", full"); TestFindEndOffset(text, start, expected, scanner); }