public PosTaggerProcessorConfig(TokenizerConfig tokenizerConfig, string posTaggerResourcesXmlFilename) { Model = new PosTaggerResourcesModel(posTaggerResourcesXmlFilename); TokenizerConfig = tokenizerConfig; TokenizerConfig.TokenizeMode |= TokenizeMode.PosTagger; if (TokenizerConfig.PosTaggerInputTypeProcessorFactory == null) { TokenizerConfig.PosTaggerInputTypeProcessorFactory = new PosTaggerInputTypeProcessorFactory(Model, TokenizerConfig.LanguageType); } }
public PosTaggerProcessorConfig(string tokenizerResourcesXmlFilename, string posTaggerResourcesXmlFilename, LanguageTypeEnum languageType, SentSplitterConfig sentSplitterConfig) { Model = new PosTaggerResourcesModel(posTaggerResourcesXmlFilename); TokenizerConfig = new TokenizerConfig(tokenizerResourcesXmlFilename) { TokenizeMode = TokenizeMode.PosTagger, LanguageType = languageType, SentSplitterConfig = sentSplitterConfig, PosTaggerInputTypeProcessorFactory = new PosTaggerInputTypeProcessorFactory(Model, languageType), }; }
internal PosTaggerInputTypeProcessorFactory(PosTaggerResourcesModel model, LanguageTypeEnum languageType) { switch (languageType) { case LanguageTypeEnum.Ru: _posTaggerInputTypeProcessor = new PosTaggerInputTypeProcessorRu(model.Numbers, model.Abbreviations); break; case LanguageTypeEnum.En: _posTaggerInputTypeProcessor = new PosTaggerInputTypeProcessorEn(model.Numbers, model.Abbreviations); break; default: throw new ArgumentException(languageType.ToString()); } }
public PosTaggerPreMerging(PosTaggerResourcesModel model) { _model = model; }