public virtual ICollection <Tree> Process(ICollection <Tree> coll) { IList <Tree> ret = new List <Tree>(); // Apparently TsurgeonPatterns are not thread safe MultiWordTreeExpander expander = new MultiWordTreeExpander(); foreach (Tree t in coll) { // Begin with basic POS / phrasal category inference MultiWordPreprocessor.TraverseAndFix(t, null, this._enclosing.unigramTagger, this.ner); // Now "decompress" further the expanded trees formed by multiword token splitting t = expander.ExpandPhrases(t, this.tn, this.tf); t = this.tn.NormalizeWholeTree(t, this.tf); ret.Add(t); } return(ret); }
private static void ResolveDummyTags(File treeFile, TwoDimensionalCounter <string, string> unigramTagger, bool retainNER, TreeNormalizer tn) { ITreeFactory tf = new LabeledScoredTreeFactory(); MultiWordTreeExpander expander = new MultiWordTreeExpander(); try { BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8")); ITreeReaderFactory trf = new SpanishTreeReaderFactory(); ITreeReader tr = trf.NewTreeReader(br); PrintWriter pw = new PrintWriter(new TextWriter(new FileOutputStream(new File(treeFile + ".fixed")), false, "UTF-8")); int nTrees = 0; for (Tree t; (t = tr.ReadTree()) != null; nTrees++) { TraverseAndFix(t, null, unigramTagger, retainNER); // Now "decompress" further the expanded trees formed by // multiword token splitting t = expander.ExpandPhrases(t, tn, tf); if (tn != null) { t = tn.NormalizeWholeTree(t, tf); } pw.Println(t.ToString()); } pw.Close(); tr.Close(); System.Console.Out.WriteLine("Processed " + nTrees + " trees"); } catch (UnsupportedEncodingException e) { Sharpen.Runtime.PrintStackTrace(e); } catch (FileNotFoundException e) { Sharpen.Runtime.PrintStackTrace(e); } catch (IOException e) { Sharpen.Runtime.PrintStackTrace(e); } }