Beispiel #1
0
        /// <summary>
        /// Construct a new <see cref="ICUTokenizer"/> that breaks text into words from the given
        /// <see cref="TextReader"/>, using a tailored <see cref="BreakIterator"/> configuration.
        /// </summary>
        /// <param name="factory"><see cref="Lucene.Net.Util.AttributeSource.AttributeFactory"/> to use.</param>
        /// <param name="input"><see cref="TextReader"/> containing text to tokenize.</param>
        /// <param name="config">Tailored <see cref="BreakIterator"/> configuration.</param>
        public ICUTokenizer(AttributeFactory factory, TextReader input, ICUTokenizerConfig config)
            : base(factory, input)
        {
            this.offsetAtt = AddAttribute <IOffsetAttribute>();
            this.termAtt   = AddAttribute <ICharTermAttribute>();
            this.typeAtt   = AddAttribute <ITypeAttribute>();
            this.scriptAtt = AddAttribute <IScriptAttribute>();

            this.config = config;
            breaker     = new CompositeBreakIterator(config);
        }
 public virtual void Inform(IResourceLoader loader)
 {
     Debug.Assert(tailored != null, "init must be called first!");
     if (tailored.Count == 0)
     {
         config = new DefaultICUTokenizerConfig(cjkAsWords, myanmarAsWords);
     }
     else
     {
         config = new DefaultICUTokenizerConfigAnonymousHelper(cjkAsWords, myanmarAsWords, tailored, loader);
     }
 }
Beispiel #3
0
 /// <summary>
 /// Construct a new <see cref="ICUTokenizer"/> that breaks text into words from the given
 /// <see cref="TextReader"/>, using a tailored <see cref="BreakIterator"/> configuration.
 /// </summary>
 /// <remarks>
 /// The default attribute factory is used.
 /// </remarks>
 /// <param name="input"><see cref="TextReader"/> containing text to tokenize.</param>
 /// <param name="config">Tailored <see cref="BreakIterator"/> configuration.</param>
 public ICUTokenizer(TextReader input, ICUTokenizerConfig config)
     : this(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, input, config)
 {
 }
Beispiel #4
0
 public CompositeBreakIterator(ICUTokenizerConfig config)
 {
     this.config         = config;
     this.scriptIterator = new ScriptIterator(config.CombineCJ);
 }