public Recognizer(Core.Tokenization.TokenType t,
                   int priority,
                   string tokenClassName,
                   string recognizerName)
     : this(t, priority, tokenClassName, recognizerName, false)
 {
 }
Beispiel #2
0
 public DefaultThaiFallbackRecognizer(Core.Tokenization.TokenType t, int priority, System.Globalization.CultureInfo culture, Core.Resources.IResourceDataAccessor dataAccessor)
     : base(t, priority, culture, dataAccessor, false)
 {
     // also used for Khmer now
     // System.Diagnostics.Debug.Assert(culture.TwoLetterISOLanguageName.Equals("th", StringComparison.OrdinalIgnoreCase));
     _IsFallbackRecognizer = true;
 }
        public DefaultFallbackRecognizer(Core.Tokenization.TokenType t, int priority, System.Globalization.CultureInfo culture, Core.Resources.IResourceDataAccessor dataAccessor, bool separateClitics)
            : base(t, priority, null, "DefaultFallbackRecognizer")
        {
            // NOTE the token type is ignored in the implementation as the fallback recognizer will deliver multiple token types
            _Culture = culture;
            if (dataAccessor != null)
            {
                _Resources = new Sdl.LanguagePlatform.Lingua.Resources.LanguageResources(_Culture, dataAccessor);
            }

            _IsFallbackRecognizer = true;

            // TODO test performance of building up the trie and matching instead fo using StartsWith() on
            //  the list of clitics
            HashSet <string> leadingClitics = Core.CultureInfoExtensions.GetLeadingClitics(_Culture);

            if (leadingClitics != null)
            {
                int p = 0;
                _LeadingClitics = new Trie <char, int>();
                foreach (string s in leadingClitics)
                {
                    _LeadingClitics.Add(s.ToCharArray(), p++);
                }
            }
        }
Beispiel #4
0
        public DefaultChineseFallbackRecognizer(Core.Tokenization.TokenType t, int priority, System.Globalization.CultureInfo culture, Core.Resources.IResourceDataAccessor dataAccessor)
            : base(t, priority, culture, dataAccessor, false)
        {
            int i = 0;

            _IsFallbackRecognizer = true;
            _DefaultPunctCharset  = CharacterSetParser.Parse(_DEFAULT_PUNC_CS, ref i);
        }
Beispiel #5
0
 public WordlistRecognizer(Core.Tokenization.TokenType t, int priority, string tokenClassName, string recognizerName, Wordlist w)
     : base(t, priority, tokenClassName, recognizerName)
 {
     if (w == null)
     {
         throw new ArgumentNullException("w");
     }
     _List = w;
 }
        public DefaultJapaneseFallbackRecognizer(Core.Tokenization.TokenType t, int priority, System.Globalization.CultureInfo culture, Core.Resources.IResourceDataAccessor dataAccessor)
            : base(t, priority, culture, dataAccessor, false)
        {
            // TODO outsource the pattern into resource assembly, or make externally configurable?
            _DefaultWordRegex = new System.Text.RegularExpressions.Regex(_DEFAULT_WORD_RX, System.Text.RegularExpressions.RegexOptions.ExplicitCapture);
            int i = 0;

            _IsFallbackRecognizer = true;
            _DefaultPunctCharset  = CharacterSetParser.Parse(_DEFAULT_PUNC_CS, ref i);
        }
 public Recognizer(Core.Tokenization.TokenType t,
                   int priority,
                   string tokenClassName,
                   string recognizerName,
                   bool autoSubstitutable)
 {
     _Type                       = t;
     _Priority                   = priority;
     _TokenClassName             = tokenClassName;
     _RecognizerName             = recognizerName;
     _AdditionalTerminators      = null;
     _IsFallbackRecognizer       = false;
     _OverrideFallbackRecognizer = false;
     _AutoSubstitutable          = autoSubstitutable;
 }
 public DefaultJAZHFallbackRecognizer(Core.Tokenization.TokenType t, int priority, System.Globalization.CultureInfo culture, Core.Resources.IResourceDataAccessor dataAccessor)
     : base(t, priority, culture, dataAccessor, false)
 {
     _IsFallbackRecognizer = true;
 }
Beispiel #9
0
 public RegexRecognizer(Core.Tokenization.TokenType t, int priority, string tokenClassName, string recognizerName, bool autoSubstitutable)
     : base(t, priority, tokenClassName, recognizerName, autoSubstitutable)
 {
     // TODO allow passing regex options?
     _Patterns = new List <RegexRecognizerPattern>();
 }