public SentSplitterConfig(string sentSplitterResourcesXmlFilename, string urlDetectorResourcesXmlFilename) { Model = new SentSplitterModel(sentSplitterResourcesXmlFilename); UrlDetectorConfig = new UrlDetectorConfig(urlDetectorResourcesXmlFilename); SplitBySmiles = true; }
public ner_tokenizer(UrlDetectorConfig config) { config.UrlExtractMode = UrlDetector.UrlExtractModeEnum.Position; _UrlDetector = new UrlDetector(config); _BuildModelSent = sent_t.CreateEmpty(); _Words = new List <word_t>(DEFAULT_WORDSLIST_CAPACITY); _BuildModelWords = new List <buildmodel_word_t>(DEFAULT_WORDSLIST_CAPACITY); }
public NerModelBuilderConfig(string tokenizerResourcesXmlFilename, LanguageTypeEnum languageType, UrlDetectorConfig urlDetectorConfig) { TokenizerConfig4NerModelBuilder = new TokenizerConfig4NerModelBuilder(tokenizerResourcesXmlFilename) { UrlDetectorConfig = urlDetectorConfig, NerInputTypeProcessorFactory = new NerInputTypeProcessorFactory(languageType), LanguageType = languageType, }; }
public PosTaggerModelBuilder(string templateFilename, LanguageTypeEnum languageType, UrlDetectorConfig urlDetectorConfig) { templateFilename.ThrowIfNullOrWhiteSpace("templateFilename"); urlDetectorConfig.ThrowIfNull("urlDetectorConfig"); _posTaggerScriber = PosTaggerScriber.Create4ModelBuilder(templateFilename); _posTaggerInputTypeProcessor = CreatePosTaggerInputTypeProcessor(languageType); _urlDetector = new UrlDetector(urlDetectorConfig); _words = new List <Word>(); }
public ConcurrentFactory(UrlDetectorConfig config, int instanceCount) { if (instanceCount <= 0) { throw (new ArgumentException("instanceCount")); } _Semaphore = new Semaphore(instanceCount, instanceCount); _Stack = new ConcurrentStack <UrlDetector>(); for (int i = 0; i < instanceCount; i++) { _Stack.Push(new UrlDetector(config)); } }
public mld_tokenizer(UrlDetectorModel urlModel, int wordCapacity) { var urlConfig = new UrlDetectorConfig() { Model = urlModel, UrlExtractMode = UrlDetector.UrlExtractModeEnum.Position, }; _UrlDetector = new UrlDetector(urlConfig); _Words = new List <string>(Math.Max(DEFAULT_WORDCAPACITY, wordCapacity)); _NgramsSB = new StringBuilder(); _AddWordToListAction = new Action <string>(AddWordToList); _UIM = xlat_Unsafe.Inst._UPPER_INVARIANT_MAP; _CTM = xlat_Unsafe.Inst._CHARTYPE_MAP; _IAW = UnsafeConst.Inst._INTERPRETE_AS_WHITESPACE; _DWC = UnsafeConst.Inst._DIGIT_WORD_CHARS; //--// ReAllocWordToUpperBuffer(DEFAULT_WORDTOUPPERBUFFER); }
public ConcurrentFactory GetConcurrentFactory() { var f = _ConcurrentFactory; if (f == null) { lock ( _SyncLock ) { f = _ConcurrentFactory; if (f == null) { var config = new UrlDetectorConfig(Config.URL_DETECTOR_RESOURCES_XML_FILENAME) { UrlExtractMode = UrlDetector.UrlExtractModeEnum.Position, }; f = new ConcurrentFactory(config, Config.CONCURRENT_FACTORY_INSTANCE_COUNT); _ConcurrentFactory = f; } } } return(f); }