/// <summary>Converts a tree to the Morfette training format.</summary> private static string TreeToMorfette(Tree tree) { StringBuilder sb = new StringBuilder(); IList <ILabel> yield = tree.Yield(); IList <ILabel> tagYield = tree.PreTerminalYield(); System.Diagnostics.Debug.Assert(yield.Count == tagYield.Count); int listLen = yield.Count; for (int i = 0; i < listLen; ++i) { CoreLabel token = (CoreLabel)yield[i]; CoreLabel tag = (CoreLabel)tagYield[i]; string morphStr = token.OriginalText(); if (morphStr == null || morphStr.Equals(string.Empty)) { morphStr = tag.Value(); } string lemma = token.Lemma(); if (lemma == null || lemma.Equals(string.Empty)) { lemma = token.Value(); } sb.Append(string.Format("%s %s %s%n", token.Value(), lemma, morphStr)); } return(sb.ToString()); }
/// <param name="args"/> public static void Main(string[] args) { if (args.Length != 1) { System.Console.Error.Printf("Usage: java %s tree_file%n", typeof(TreeToMorfette).FullName); System.Environment.Exit(-1); } string treeFile = args[0]; ITreeReaderFactory trf = new FrenchTreeReaderFactory(); try { ITreeReader tr = trf.NewTreeReader(new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8"))); for (Tree tree1; (tree1 = tr.ReadTree()) != null;) { IList <ILabel> pretermYield = tree1.PreTerminalYield(); IList <ILabel> yield = tree1.Yield(); int yieldLen = yield.Count; for (int i = 0; i < yieldLen; ++i) { CoreLabel rawToken = (CoreLabel)yield[i]; string word = rawToken.Value(); string morphStr = rawToken.OriginalText(); Pair <string, string> lemmaMorph = MorphoFeatureSpecification.SplitMorphString(word, morphStr); string lemma = lemmaMorph.First(); string morph = lemmaMorph.Second(); if (morph == null || morph.Equals(string.Empty) || morph.Equals("XXX")) { morph = ((CoreLabel)pretermYield[i]).Value(); } System.Console.Out.Printf("%s %s %s%n", word, lemma, morph); } System.Console.Out.WriteLine(); } tr.Close(); } catch (UnsupportedEncodingException e) { Sharpen.Runtime.PrintStackTrace(e); } catch (FileNotFoundException e) { Sharpen.Runtime.PrintStackTrace(e); } catch (IOException e) { Sharpen.Runtime.PrintStackTrace(e); } }
private static void ReplacePOSTag(Tree t, MorphoFeatureSpecification morpho) { if (!t.IsPreTerminal()) { throw new ArgumentException("Can only operate on preterminals"); } if (!(t.Label() is CoreLabel)) { throw new ArgumentException("Only operates on CoreLabels"); } CoreLabel label = (CoreLabel)t.Label(); Tree child = t.Children()[0]; if (!(child.Label() is CoreLabel)) { throw new ArgumentException("Only operates on CoreLabels"); } CoreLabel childLabel = (CoreLabel)child.Label(); // Morphological Analysis string morphStr = childLabel.OriginalText(); if (morphStr == null || morphStr.Equals(string.Empty)) { morphStr = label.Value(); // POS subcategory string subCat = childLabel.Category(); if (subCat != null && subCat != string.Empty) { morphStr += "-" + subCat + "--"; } else { morphStr += "---"; } } MorphoFeatures feats = morpho.StrToFeatures(morphStr); if (feats.GetAltTag() != null && !feats.GetAltTag().Equals(string.Empty)) { label.SetValue(feats.GetAltTag()); label.SetTag(feats.GetAltTag()); } }
public static void MungeLeaves(Tree tree, bool lemmasAsLeaves, bool addMorphoToLeaves) { IList <ILabel> labels = tree.Yield(); foreach (ILabel label in labels) { ++nTokens; if (!(label is CoreLabel)) { throw new ArgumentException("Only works with CoreLabels trees"); } CoreLabel coreLabel = (CoreLabel)label; string lemma = coreLabel.Lemma(); //PTB escaping since we're going to put this in the leaf if (lemma == null) { // No lemma, so just add the surface form lemma = coreLabel.Word(); } else { if (lemma.Equals("(")) { lemma = "-LRB-"; } else { if (lemma.Equals(")")) { lemma = "-RRB-"; } } } if (lemmasAsLeaves) { string escapedLemma = lemma; coreLabel.SetWord(escapedLemma); coreLabel.SetValue(escapedLemma); coreLabel.SetLemma(lemma); } if (addMorphoToLeaves) { string morphStr = coreLabel.OriginalText(); if (morphStr == null || morphStr.Equals(string.Empty)) { morphStr = MorphoFeatureSpecification.NoAnalysis; } else { ++nMorphAnalyses; } // Normalize punctuation analyses if (morphStr.StartsWith("PONCT")) { morphStr = "PUNC"; } string newLeaf = string.Format("%s%s%s%s%s", coreLabel.Value(), MorphoFeatureSpecification.MorphoMark, lemma, MorphoFeatureSpecification.LemmaMark, morphStr); coreLabel.SetValue(newLeaf); coreLabel.SetWord(newLeaf); } } }