public void Dispose() { if (Processor != null) { Processor.Dispose(); Processor = null; } if (MorphoModel != null) { MorphoModel.Dispose(); MorphoModel = null; } if (MorphoAmbiguityResolverModel != null) { MorphoAmbiguityResolverModel.Dispose(); MorphoAmbiguityResolverModel = null; } if (SentSplitterConfig != null) { SentSplitterConfig.Dispose(); SentSplitterConfig = null; } }
public static ConcurrentFactory GetConcurrentFactory() { var f = _ConcurrentFactory; if (f == null) { lock ( _SyncLock ) { f = _ConcurrentFactory; if (f == null) { var sentSplitterConfig = new SentSplitterConfig(Config.SENT_SPLITTER_RESOURCES_XML_FILENAME, Config.URL_DETECTOR_RESOURCES_XML_FILENAME); var config = new NerProcessorConfig(Config.TOKENIZER_RESOURCES_XML_FILENAME, Config.LANGUAGE_TYPE, sentSplitterConfig) { ModelFilename = Config.NER_MODEL_FILENAME, TemplateFilename = Config.NER_TEMPLATE_FILENAME, }; f = new ConcurrentFactory(config, Config.CONCURRENT_FACTORY_INSTANCE_COUNT); _ConcurrentFactory = f; } } } return(f); }
public static (PosTaggerProcessorConfig config, SentSplitterConfig sentSplitterConfig) CreatePosTaggerProcessorConfig() { var sentSplitterConfig = new SentSplitterConfig(Config.SENT_SPLITTER_RESOURCES_XML_FILENAME, Config.URL_DETECTOR_RESOURCES_XML_FILENAME); var config = new PosTaggerProcessorConfig(Config.TOKENIZER_RESOURCES_XML_FILENAME, Config.POSTAGGER_RESOURCES_XML_FILENAME, LanguageTypeEnum.Ru, sentSplitterConfig) { ModelFilename = Config.POSTAGGER_MODEL_FILENAME, TemplateFilename = Config.POSTAGGER_TEMPLATE_FILENAME, }; return(config, sentSplitterConfig); }
public NerProcessorConfig(string tokenizerResourcesXmlFilename, LanguageTypeEnum languageType, SentSplitterConfig sentSplitterConfig) { TokenizerConfig = new TokenizerConfig(tokenizerResourcesXmlFilename) { TokenizeMode = TokenizeMode.Ner, SentSplitterConfig = sentSplitterConfig, NerInputTypeProcessorFactory = new NerInputTypeProcessorFactory(languageType), LanguageType = languageType, }; }
private static NerProcessorConfig CreateNerProcessorConfig() { var sentSplitterConfig = new SentSplitterConfig(Config.SENT_SPLITTER_RESOURCES_XML_FILENAME, Config.URL_DETECTOR_RESOURCES_XML_FILENAME); var config = new NerProcessorConfig(Config.TOKENIZER_RESOURCES_XML_FILENAME, Config.LANGUAGE_TYPE, sentSplitterConfig) { ModelFilename = Config.NER_MODEL_FILENAME, TemplateFilename = Config.NER_TEMPLATE_FILENAME, }; return(config); }
public PosTaggerProcessorConfig(string tokenizerResourcesXmlFilename, string posTaggerResourcesXmlFilename, LanguageTypeEnum languageType, SentSplitterConfig sentSplitterConfig) { Model = new PosTaggerResourcesModel(posTaggerResourcesXmlFilename); TokenizerConfig = new TokenizerConfig(tokenizerResourcesXmlFilename) { TokenizeMode = TokenizeMode.PosTagger, LanguageType = languageType, SentSplitterConfig = sentSplitterConfig, PosTaggerInputTypeProcessorFactory = new PosTaggerInputTypeProcessorFactory(Model, languageType), }; }
public ner_tokenizer(SentSplitterConfig config, int wordCapacity) { _SentSplitter = new SentSplitter(config); _Words = new List <word_t>(wordCapacity); }
private bool _NotSkipNonLetterAndNonDigitToTheEnd; //need for NER-model-builder #endregion public ner_tokenizer(SentSplitterConfig config) { _SentSplitter = new SentSplitter(config); _Words = new List <word_t>(DEFAULT_WORDSLIST_CAPACITY); }