Пример #1
0
        private Tokenizer(TokenizerConfig4NerModelBuilder config)
        {
            config.UrlDetectorConfig.UrlExtractMode = UrlDetector.UrlExtractModeEnum.Position;

            _UrlDetector           = new UrlDetector(config.UrlDetectorConfig);
            _BuildModelSent        = sent_t.CreateEmpty();
            _Words                 = new List <word_t>(DEFAULT_WORDSLIST_CAPACITY);
            _BuildModelWords       = new List <buildmodel_word_t>(DEFAULT_WORDSLIST_CAPACITY);
            _ParticleThatExclusion = config.Model.ParticleThatExclusion;

            _UIM  = xlat_Unsafe.Inst._UPPER_INVARIANT_MAP;
            _CTM  = xlat_Unsafe.Inst._CHARTYPE_MAP;
            _CCTM = UnsafeConst.GetInstanceByLanguage(config.LanguageType)._CRF_CHARTYPE_MAP;   //UnsafeConst.Inst._CRF_CHARTYPE_MAP;

            //--//
            ReAllocWordToUpperBuffer(DEFAULT_WORDTOUPPERBUFFER);

            _PosTaggerInputTypeProcessor = Dummy_PosTaggerInputTypeProcessor.Instance;
            _NerInputTypeProcessor       = config.NerInputTypeProcessorFactory.CreateInstance();
        }
Пример #2
0
        public static Tokenizer Create4NerModelBuilder(TokenizerConfig4NerModelBuilder config)
        {
            var tokenizer = new Tokenizer(config);

            return(tokenizer);
        }