Ejemplo n.º 1
0
 public virtual string Stem(string word)
 {
     try
     {
         lexer.Yyreset(new StringReader(word));
         lexer.Yybegin(Morpha.any);
         string wordRes = lexer.Next();
         return(wordRes);
     }
     catch (IOException)
     {
         log.Warning("Morphology.stem() had error on word " + word);
         return(word);
     }
 }
Ejemplo n.º 2
0
        /// <summary>
        /// Lemmatize the word, being sensitive to the tag, using the
        /// passed in lexer.
        /// </summary>
        /// <param name="lowercase">
        /// If this is true, words other than proper nouns will
        /// be changed to all lowercase.
        /// </param>
        private static string Lemmatize(string word, string tag, Morpha lexer, bool lowercase)
        {
            bool   wordHasForbiddenChar = word.IndexOf('_') >= 0 || word.IndexOf(' ') >= 0 || word.IndexOf('\n') >= 0;
            string quotedWord           = word;

            if (wordHasForbiddenChar)
            {
                // choose something unlikely. Classical Vedic!
                quotedWord = quotedWord.ReplaceAll("_", "\u1CF0");
                quotedWord = quotedWord.ReplaceAll(" ", "\u1CF1");
                quotedWord = quotedWord.ReplaceAll("\n", "\u1CF2");
            }
            string wordtag = quotedWord + '_' + tag;

            try
            {
                lexer.SetOption(1, lowercase);
                lexer.Yyreset(new StringReader(wordtag));
                lexer.Yybegin(Morpha.scan);
                string wordRes = lexer.Next();
                lexer.Next();
                // go past tag
                if (wordHasForbiddenChar)
                {
                    wordRes = wordRes.ReplaceAll("\u1CF0", "_");
                    wordRes = wordRes.ReplaceAll("\u1CF1", " ");
                    wordRes = wordRes.ReplaceAll("\u1CF2", "\n");
                }
                return(wordRes);
            }
            catch (IOException)
            {
                log.Warning("Morphology.stem() had error on word " + word + '/' + tag);
                return(word);
            }
        }