Beispiel #1
0
        /// <summary>
        /// Construct a new <see cref="ICUTokenizer"/> that breaks text into words from the given
        /// <see cref="TextReader"/>, using a tailored <see cref="BreakIterator"/> configuration.
        /// </summary>
        /// <param name="factory"><see cref="Lucene.Net.Util.AttributeSource.AttributeFactory"/> to use.</param>
        /// <param name="input"><see cref="TextReader"/> containing text to tokenize.</param>
        /// <param name="config">Tailored <see cref="BreakIterator"/> configuration.</param>
        public ICUTokenizer(AttributeFactory factory, TextReader input, ICUTokenizerConfig config)
            : base(factory, input)
        {
            this.offsetAtt = AddAttribute <IOffsetAttribute>();
            this.termAtt   = AddAttribute <ICharTermAttribute>();
            this.typeAtt   = AddAttribute <ITypeAttribute>();
            this.scriptAtt = AddAttribute <IScriptAttribute>();

            this.config = config;
            breaker     = new CompositeBreakIterator(config);
        }