Beispiel #1
0
        public Tagger(IList <LexiconEntry> lexicon, StringMixOptions options)
        {
            if (lexicon == null)
            {
                throw new ArgumentNullException("lexicon");
            }

            _lexicon = new Dictionary <string, LexiconEntry>(DEFAULT_CAPACITY);
            _options = options;

            string itemvalue = String.Empty;

            foreach (var item in lexicon)
            {
                itemvalue = _options.MatchesAreCaseSensitive ? item.Value : item.Value.ToLower();
                if (_lexicon.ContainsKey(itemvalue))
                {
                    foreach (var tag in item.Tags)
                    {
                        if (!_lexicon[itemvalue].Tags.Contains(tag))
                        {
                            _lexicon[itemvalue].Tags.AddRange(item.Tags);
                        }
                    }
                }
                else
                {
                    _lexicon.Add(itemvalue, item);
                }
            }
        }
Beispiel #2
0
 public Tagger GetBasicTagger(List <LexiconEntry> lexicon, StringMixOptions options)
 {
     return(new Tagger(lexicon, options));
 }
Beispiel #3
0
 /// <summary>
 /// For a given string, lexicon and processing options, provide a list of tagged tokens.
 /// </summary>
 /// <param name="str">
 /// The string that shall be operated upon
 /// </param>
 /// <param name="lexicon">
 /// See LexiconEntry
 /// </param>
 /// <param name="options">
 /// See StringMixOptions: basically string processing options for delimiters, casing, etc
 /// </param>
 /// <returns>
 /// A list of Tagged Tokens. Think of tokens as the terms seen in text that would
 /// be processed.  If the incoming string were "Fred Flintstone" there are two terms
 /// --tokens--.  Using the lexicon, the library attaches tags --meaning-- to these terms.
 /// These meanings can be turned into sequences --Patterns-- that then can be further processed
 /// for matches.
 /// </returns>
 public static List <TaggedToken> Tokenize(this String str, List <LexiconEntry> lexicon, StringMixOptions options)
 {
     return(Tokenize(str, new Tagger(lexicon, options)));
 }
Beispiel #4
0
 /// <summary>
 /// A convenience method that performs the full chain of tokenize, tag, Match, and Transform
 /// to a string object.  Its the equivilent to doing:
 ///
 /// "Fred Flintstone".Tokenize(lexicon, options).Match("FL").Transform<Name>(NameTransformer);
 ///
 /// </summary>
 /// <typeparam name="T">
 /// The type that should be returned by this call
 /// </typeparam>
 /// <param name="str">
 /// The string being operated upon
 /// </param>
 /// <param name="lexicon">
 /// See LexiconEntry
 /// </param>
 /// <param name="options">
 /// The StringMixOptions that you could have provided to the tokenize method.
 /// </param>
 /// <param name="MatchRegEx">
 /// The Regular Expression that will be tested against all of the patterns of the tokens in the
 /// target string.  Can be thought of as the same as a call to .Match(tokens, [match criteria])
 /// </param>
 /// <param name="transformer">
 /// The ITransformer[T] implementation that will be used to convert the matchset to the target object type
 /// </param>
 /// <returns>
 /// an object of type T
 /// </returns>
 public static T Transform <T>(this String str, List <LexiconEntry> lexicon, StringMixOptions options, string MatchRegEx, ITransformer <T> transformer) where T : new()
 {
     return(Tokenize(str, lexicon, options)
            .Match(MatchRegEx)
            .Transform <T>(transformer));
 }