Example #1
0
        public NerProcessorConfig(TokenizerConfig tokenizerConfig)
        {
            TokenizerConfig = tokenizerConfig;

            //set ner specially
            TokenizerConfig.TokenizeMode |= TokenizeMode.Ner;
            if (TokenizerConfig.NerInputTypeProcessorFactory == null)
            {
                TokenizerConfig.NerInputTypeProcessorFactory = new NerInputTypeProcessorFactory(TokenizerConfig.LanguageType);
            }
        }
Example #2
0
        public PosTaggerProcessorConfig(TokenizerConfig tokenizerConfig, string posTaggerResourcesXmlFilename)
        {
            Model           = new PosTaggerResourcesModel(posTaggerResourcesXmlFilename);
            TokenizerConfig = tokenizerConfig;

            TokenizerConfig.TokenizeMode |= TokenizeMode.PosTagger;
            if (TokenizerConfig.PosTaggerInputTypeProcessorFactory == null)
            {
                TokenizerConfig.PosTaggerInputTypeProcessorFactory = new PosTaggerInputTypeProcessorFactory(Model, TokenizerConfig.LanguageType);
            }
        }
Example #3
0
 public NerProcessorConfig(string tokenizerResourcesXmlFilename,
                           LanguageTypeEnum languageType,
                           SentSplitterConfig sentSplitterConfig)
 {
     TokenizerConfig = new TokenizerConfig(tokenizerResourcesXmlFilename)
     {
         TokenizeMode                 = TokenizeMode.Ner,
         SentSplitterConfig           = sentSplitterConfig,
         NerInputTypeProcessorFactory = new NerInputTypeProcessorFactory(languageType),
         LanguageType                 = languageType,
     };
 }
Example #4
0
 public PosTaggerProcessorConfig(string tokenizerResourcesXmlFilename,
                                 string posTaggerResourcesXmlFilename,
                                 LanguageTypeEnum languageType,
                                 SentSplitterConfig sentSplitterConfig)
 {
     Model           = new PosTaggerResourcesModel(posTaggerResourcesXmlFilename);
     TokenizerConfig = new TokenizerConfig(tokenizerResourcesXmlFilename)
     {
         TokenizeMode       = TokenizeMode.PosTagger,
         LanguageType       = languageType,
         SentSplitterConfig = sentSplitterConfig,
         PosTaggerInputTypeProcessorFactory = new PosTaggerInputTypeProcessorFactory(Model, languageType),
     };
 }
Example #5
0
 public Tokenizer()
 {
     Config = TokenizerConfig.DefaultConfig;
 }
Example #6
0
 public Tokenizer(TokenizerConfig config)
 {
     Config = config;
 }