public void BuildModel(string msdSpec, MsdSplitTree.BeamSearchParams beamSearchOpt) { if (ltnRootNode != null) { return; } //if msd are used and other criterias are fulfiled than use MsdSplitTreeOptimization if (lsett.bUseMsdSplitTreeOptimization && lsett.eMsdConsider == LemmatizerSettings.MsdConsideration.Distinct && !string.IsNullOrEmpty(msdSpec)) { msdSplitTree = new MsdSplitTree(elExamples.ListExamples, new MsdSpec(msdSpec), beamSearchOpt); //Console.WriteLine("MsdSplitTree consturcetd with {0} leaves!",msdSplitTree.subTreeSizeRecurs); ExampleList el = elExamples; elExamples = new ExampleList(lsett); //int s = 0; Dictionary <string, double> msds = new Dictionary <string, double>(); foreach (LemmaExample le in el.ListExamples) { //Console.WriteLine("{0}: {1}",s++,le.Msd); string newMsd = msdSplitTree.TransformMsd(le.Msd); elExamples.AddExample(le.Word, le.Lemma, le.Weight, newMsd); //Console.WriteLine("\t" + newMsd); if (msds.ContainsKey(newMsd)) { msds[newMsd] += le.Weight; } else { msds[newMsd] = le.Weight; } } foreach (KeyValuePair <string, double> msd in msds) { //Console.WriteLine("{0} {1}", msd.Key, msd.Value); } //TODO problem, if buildmodel is called twice than a problem occurs!!!! } elExamples.FinalizeAdditions(); if (!lsett.bBuildFrontLemmatizer) { ltnRootNode = new LemmaTreeNode(lsett, elExamples); } else { ltnRootNode = new LemmaTreeNode(lsett, elExamples.GetFrontRearExampleList(false)); ltnRootNodeFront = new LemmaTreeNode(lsett, elExamples.GetFrontRearExampleList(true)); } }
/// <summary> /// Extended lemamtization interface with more options /// </summary> /// <param name="sWord">word to be lemmatized</param> /// <param name="ignoreCase">If true than casing will be ignored. If set to false, than lemmatizer will match the longest rule it knows but requiering same casing of rule and word.</param> /// <param name="sMsd">morpho static descriptor of the word to be lemmatized</param> /// <returns>Lemmatized word.</returns> public string Lemmatize(string sWord, bool ignoreCase, string sMsd) { string sNewMsd = sMsd; if (sMsd != null && lsett.bUseMsdSplitTreeOptimization && lsett.eMsdConsider == LemmatizerSettings.MsdConsideration.Distinct) { sNewMsd = msdSplitTree.TransformMsd(sNewMsd); } if (!lsett.bBuildFrontLemmatizer) { return(ltrRootNodeSafe.Lemmatize(sWord, ignoreCase, sNewMsd)); } else { string sWordFront = LemmaExample.StringReverse(sWord); string sLemmaFront = ltrRootNodeFrontSafe.Lemmatize(sWordFront, ignoreCase, sNewMsd); string sWordRear = LemmaExample.StringReverse(sLemmaFront); return(ltrRootNodeSafe.Lemmatize(sWordRear, ignoreCase, sNewMsd)); } }