public DefaultFallbackRecognizer(Core.Tokenization.TokenType t, int priority, System.Globalization.CultureInfo culture, Core.Resources.IResourceDataAccessor dataAccessor, bool separateClitics)
            : base(t, priority, null, "DefaultFallbackRecognizer")
        {
            // NOTE the token type is ignored in the implementation as the fallback recognizer will deliver multiple token types
            _Culture = culture;
            if (dataAccessor != null)
            {
                _Resources = new Sdl.LanguagePlatform.Lingua.Resources.LanguageResources(_Culture, dataAccessor);
            }

            _IsFallbackRecognizer = true;

            // TODO test performance of building up the trie and matching instead fo using StartsWith() on
            //  the list of clitics
            HashSet <string> leadingClitics = Core.CultureInfoExtensions.GetLeadingClitics(_Culture);

            if (leadingClitics != null)
            {
                int p = 0;
                _LeadingClitics = new Trie <char, int>();
                foreach (string s in leadingClitics)
                {
                    _LeadingClitics.Add(s.ToCharArray(), p++);
                }
            }
        }
Beispiel #2
0
 public RuleBasedStemmer(Resources.LanguageResources resources)
 {
     _Resources = resources;
     _Rules     = resources.StemmingRules;
 }