public WordnetDictionary(string searchDirectory) { mEngine = new DataFileEngine(searchDirectory); Dictionary <string, string[][]> suffixMap = new Dictionary <string, string[][]>(); suffixMap.Add("noun", new string[][] { new string[] { "s", "" }, new string[] { "ses", "s" }, new string[] { "xes", "x" }, new string[] { "zes", "z" }, new string[] { "ches", "ch" }, new string[] { "shes", "sh" }, new string[] { "men", "man" }, new string[] { "ies", "y" } }); suffixMap.Add("verb", new string[][] { new string[] { "s", "" }, new string[] { "ies", "y" }, new string[] { "es", "e" }, new string[] { "es", "" }, new string[] { "ed", "e" }, new string[] { "ed", "" }, new string[] { "ing", "e" }, new string[] { "ing", "" } }); suffixMap.Add("adjective", new string[][] { new string[] { "er", "" }, new string[] { "est", "" }, new string[] { "er", "e" }, new string[] { "est", "e" } }); DetachSuffixesOperation tokDso = new DetachSuffixesOperation(suffixMap); tokDso.AddDelegate(DetachSuffixesOperation.Operations, new IOperation[] { new LookupIndexWordOperation(mEngine), new LookupExceptionsOperation(mEngine) }); TokenizerOperation tokOp = new TokenizerOperation(mEngine, new string[] { " ", "-" }); tokOp.AddDelegate(TokenizerOperation.TokenOperations, new IOperation[] { new LookupIndexWordOperation(mEngine), new LookupExceptionsOperation(mEngine), tokDso }); DetachSuffixesOperation morphDso = new DetachSuffixesOperation(suffixMap); morphDso.AddDelegate(DetachSuffixesOperation.Operations, new IOperation[] { new LookupIndexWordOperation(mEngine), new LookupExceptionsOperation(mEngine) }); mDefaultOperations = new IOperation[] { new LookupExceptionsOperation(mEngine), morphDso, tokOp }; }
public string Lemmatize(string lemma, string partOfSpeech) { if (mDefaultOperations == null) { var suffixMap = new Dictionary <string, string[][]> { { "noun", new string[][] { new string[] { "s", "" }, new string[] { "ses", "s" }, new string[] { "xes", "x" }, new string[] { "zes", "z" }, new string[] { "ches", "ch" }, new string[] { "shes", "sh" }, new string[] { "men", "man" }, new string[] { "ies", "y" } } } , { "verb", new string[][] { new string[] { "s", "" }, new string[] { "ies", "y" }, new string[] { "es", "e" }, new string[] { "es", "" }, new string[] { "ed", "e" }, new string[] { "ed", "" }, new string[] { "ing", "e" }, new string[] { "ing", "" } } } //, //{ // "adjective", new string[][] // { // new string[] {"er", ""}, new string[] {"est", ""}, new string[] {"er", "e"}, // new string[] {"est", "e"} // } //} }; var tokDso = new DetachSuffixesOperation(suffixMap); tokDso.AddDelegate(DetachSuffixesOperation.Operations, new IOperation[] { new LookupIndexWordOperation(this), new LookupExceptionsOperation(this) }); var tokOp = new TokenizerOperation(this, new string[] { " ", "-" }); tokOp.AddDelegate(TokenizerOperation.TokenOperations, new IOperation[] { new LookupIndexWordOperation(this), new LookupExceptionsOperation(this), tokDso }); var morphDso = new DetachSuffixesOperation(suffixMap); morphDso.AddDelegate(DetachSuffixesOperation.Operations, new IOperation[] { new LookupIndexWordOperation(this), new LookupExceptionsOperation(this) }); mDefaultOperations = new IOperation[] { new LookupExceptionsOperation(this), morphDso, tokOp }; } return(Lemmatize(lemma, partOfSpeech, mDefaultOperations)); }