public virtual void testConsumeSentenceInstance() { // we use the default locale, as its randomized by LuceneTestCase BreakIterator bi = BreakIterator.getSentenceInstance(Locale.Default); CharArrayIterator ci = CharArrayIterator.newSentenceInstance(); for (int i = 0; i < 10000; i++) { char[] text = TestUtil.randomUnicodeString(random()).toCharArray(); ci.setText(text, 0, text.Length); consume(bi, ci); } }
static CharArrayIterator() { bool v; try { BreakIterator bi = BreakIterator.getSentenceInstance(Locale.US); bi.Text = "\udb40\udc53"; bi.next(); v = false; } catch (Exception) { v = true; } HAS_BUGGY_BREAKITERATORS = v; }
public SentenceAndWordTokenizer(Reader reader) : base(reader, BreakIterator.getSentenceInstance(Locale.ROOT)) { }
public WholeSentenceTokenizer(Reader reader) : base(reader, BreakIterator.getSentenceInstance(Locale.ROOT)) { }