private Tokenizer(TokenizerConfig4NerModelBuilder config) { config.UrlDetectorConfig.UrlExtractMode = UrlDetector.UrlExtractModeEnum.Position; _UrlDetector = new UrlDetector(config.UrlDetectorConfig); _BuildModelSent = sent_t.CreateEmpty(); _Words = new List <word_t>(DEFAULT_WORDSLIST_CAPACITY); _BuildModelWords = new List <buildmodel_word_t>(DEFAULT_WORDSLIST_CAPACITY); _ParticleThatExclusion = config.Model.ParticleThatExclusion; _UIM = xlat_Unsafe.Inst._UPPER_INVARIANT_MAP; _CTM = xlat_Unsafe.Inst._CHARTYPE_MAP; _CCTM = UnsafeConst.GetInstanceByLanguage(config.LanguageType)._CRF_CHARTYPE_MAP; //UnsafeConst.Inst._CRF_CHARTYPE_MAP; //--// ReAllocWordToUpperBuffer(DEFAULT_WORDTOUPPERBUFFER); _PosTaggerInputTypeProcessor = Dummy_PosTaggerInputTypeProcessor.Instance; _NerInputTypeProcessor = config.NerInputTypeProcessorFactory.CreateInstance(); }
public static Tokenizer Create4NerModelBuilder(TokenizerConfig4NerModelBuilder config) { var tokenizer = new Tokenizer(config); return(tokenizer); }