private Tokenizer(TokenizerConfig4NerModelBuilder config) { config.UrlDetectorConfig.UrlExtractMode = UrlDetector.UrlExtractModeEnum.Position; _urlDetector = new UrlDetector(config.UrlDetectorConfig); _buildModelSentence = Sentence.CreateEmpty(); _words = new List <Word>(DEFAULT_WORDSLIST_CAPACITY); _buildModelWords = new List <Buildmodel_word_t>(DEFAULT_WORDSLIST_CAPACITY); _particleThatExclusion = config.Model.ParticleThatExclusion; _UIM = XlatUnsafe.Inst._UPPER_INVARIANT_MAP; _CTM = XlatUnsafe.Inst._CHARTYPE_MAP; _CCTM = UnsafeConst.GetInstanceByLanguage(config.LanguageType)._CRF_CHARTYPE_MAP; ReAllocWordToUpperBuffer(DEFAULT_WORDTOUPPERBUFFER); _posTaggerInputTypeProcessor = DummyPosTaggerInputTypeProcessor.Instance; _nerInputTypeProcessor = config.NerInputTypeProcessorFactory.CreateInstance(); }
public static Tokenizer Create4NerModelBuilder(TokenizerConfig4NerModelBuilder config) { var tokenizer = new Tokenizer(config); return(tokenizer); }