/// <summary> /// Creates a new <see cref="ThaiTokenizer"/>, supplying the <see cref="Lucene.Net.Util.AttributeSource.AttributeFactory"/> </summary> public ThaiTokenizer(AttributeFactory factory, TextReader reader) : base(factory, reader, (BreakIterator)sentenceProto.Clone()) { // LUCENENET specific - DBBI_AVAILABLE removed because ICU always has a dictionary-based BreakIterator wordBreaker = new ThaiWordBreaker((BreakIterator)proto.Clone()); termAtt = AddAttribute <ICharTermAttribute>(); offsetAtt = AddAttribute <IOffsetAttribute>(); }
/// <summary> /// Creates a new <see cref="ThaiTokenizer"/>, supplying the <see cref="Lucene.Net.Util.AttributeSource.AttributeFactory"/> </summary> public ThaiTokenizer(AttributeFactory factory, TextReader reader) : base(factory, reader, new IcuBreakIterator(global::Icu.BreakIterator.UBreakIteratorType.SENTENCE, new CultureInfo("th"))) { if (!DBBI_AVAILABLE) { throw new System.NotSupportedException("This JRE does not have support for Thai segmentation"); } wordBreaker = new ThaiWordBreaker(new IcuBreakIterator(global::Icu.BreakIterator.UBreakIteratorType.WORD, CultureInfo.InvariantCulture)); termAtt = AddAttribute <ICharTermAttribute>(); offsetAtt = AddAttribute <IOffsetAttribute>(); }
/// <summary> /// Creates a new ThaiTokenizer, supplying the AttributeFactory </summary> public ThaiTokenizer(AttributeFactory factory, TextReader reader) : base(factory, reader, BreakIterator.CreateSentenceInstance(Locale.GetUS())) { if (!DBBI_AVAILABLE) { throw new System.NotSupportedException("This JRE does not have support for Thai segmentation"); } wordBreaker = new ThaiWordBreaker(BreakIterator.CreateWordInstance(Locale.GetUS())); termAtt = AddAttribute <ICharTermAttribute>(); offsetAtt = AddAttribute <IOffsetAttribute>(); }
/// <summary> /// Creates a new ThaiTokenizer, supplying the AttributeFactory </summary> public ThaiTokenizer(AttributeFactory factory, TextReader reader) : base(factory, reader, BreakIterator.CreateSentenceInstance(Locale.GetUS())) { if (!DBBI_AVAILABLE) { throw new System.NotSupportedException("This JRE does not have support for Thai segmentation"); } wordBreaker = new ThaiWordBreaker(BreakIterator.CreateWordInstance(Locale.GetUS())); termAtt = AddAttribute<ICharTermAttribute>(); offsetAtt = AddAttribute<IOffsetAttribute>(); }
/// <summary> /// Creates a new <see cref="ThaiTokenizer"/>, supplying the <see cref="Lucene.Net.Util.AttributeSource.AttributeFactory"/> </summary> public ThaiTokenizer(AttributeFactory factory, TextReader reader) : base(factory, reader, CreateSentenceClone()) { // LUCENENET specific - DBBI_AVAILABLE removed because ICU always has a dictionary-based BreakIterator UninterruptableMonitor.Enter(syncLock); try { wordBreaker = new ThaiWordBreaker((BreakIterator)proto.Clone()); } finally { UninterruptableMonitor.Exit(syncLock); } termAtt = AddAttribute <ICharTermAttribute>(); offsetAtt = AddAttribute <IOffsetAttribute>(); }