public virtual string Stem(string word) { try { lexer.Yyreset(new StringReader(word)); lexer.Yybegin(Morpha.any); string wordRes = lexer.Next(); return(wordRes); } catch (IOException) { log.Warning("Morphology.stem() had error on word " + word); return(word); } }
/// <summary> /// Lemmatize the word, being sensitive to the tag, using the /// passed in lexer. /// </summary> /// <param name="lowercase"> /// If this is true, words other than proper nouns will /// be changed to all lowercase. /// </param> private static string Lemmatize(string word, string tag, Morpha lexer, bool lowercase) { bool wordHasForbiddenChar = word.IndexOf('_') >= 0 || word.IndexOf(' ') >= 0 || word.IndexOf('\n') >= 0; string quotedWord = word; if (wordHasForbiddenChar) { // choose something unlikely. Classical Vedic! quotedWord = quotedWord.ReplaceAll("_", "\u1CF0"); quotedWord = quotedWord.ReplaceAll(" ", "\u1CF1"); quotedWord = quotedWord.ReplaceAll("\n", "\u1CF2"); } string wordtag = quotedWord + '_' + tag; try { lexer.SetOption(1, lowercase); lexer.Yyreset(new StringReader(wordtag)); lexer.Yybegin(Morpha.scan); string wordRes = lexer.Next(); lexer.Next(); // go past tag if (wordHasForbiddenChar) { wordRes = wordRes.ReplaceAll("\u1CF0", "_"); wordRes = wordRes.ReplaceAll("\u1CF1", " "); wordRes = wordRes.ReplaceAll("\u1CF2", "\n"); } return(wordRes); } catch (IOException) { log.Warning("Morphology.stem() had error on word " + word + '/' + tag); return(word); } }