public DefaultFallbackRecognizer(Core.Tokenization.TokenType t, int priority, System.Globalization.CultureInfo culture, Core.Resources.IResourceDataAccessor dataAccessor, bool separateClitics) : base(t, priority, null, "DefaultFallbackRecognizer") { // NOTE the token type is ignored in the implementation as the fallback recognizer will deliver multiple token types _Culture = culture; if (dataAccessor != null) { _Resources = new Sdl.LanguagePlatform.Lingua.Resources.LanguageResources(_Culture, dataAccessor); } _IsFallbackRecognizer = true; // TODO test performance of building up the trie and matching instead fo using StartsWith() on // the list of clitics HashSet <string> leadingClitics = Core.CultureInfoExtensions.GetLeadingClitics(_Culture); if (leadingClitics != null) { int p = 0; _LeadingClitics = new Trie <char, int>(); foreach (string s in leadingClitics) { _LeadingClitics.Add(s.ToCharArray(), p++); } } }
public RuleBasedStemmer(Resources.LanguageResources resources) { _Resources = resources; _Rules = resources.StemmingRules; }