public virtual ICollection <Tree> Process(ICollection <Tree> coll)
            {
                IList <Tree> ret = new List <Tree>();
                // Apparently TsurgeonPatterns are not thread safe
                MultiWordTreeExpander expander = new MultiWordTreeExpander();

                foreach (Tree t in coll)
                {
                    // Begin with basic POS / phrasal category inference
                    MultiWordPreprocessor.TraverseAndFix(t, null, this._enclosing.unigramTagger, this.ner);
                    // Now "decompress" further the expanded trees formed by multiword token splitting
                    t = expander.ExpandPhrases(t, this.tn, this.tf);
                    t = this.tn.NormalizeWholeTree(t, this.tf);
                    ret.Add(t);
                }
                return(ret);
            }
Ejemplo n.º 2
0
        private static void ResolveDummyTags(File treeFile, TwoDimensionalCounter <string, string> unigramTagger, bool retainNER, TreeNormalizer tn)
        {
            ITreeFactory          tf       = new LabeledScoredTreeFactory();
            MultiWordTreeExpander expander = new MultiWordTreeExpander();

            try
            {
                BufferedReader     br  = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8"));
                ITreeReaderFactory trf = new SpanishTreeReaderFactory();
                ITreeReader        tr  = trf.NewTreeReader(br);
                PrintWriter        pw  = new PrintWriter(new TextWriter(new FileOutputStream(new File(treeFile + ".fixed")), false, "UTF-8"));
                int nTrees             = 0;
                for (Tree t; (t = tr.ReadTree()) != null; nTrees++)
                {
                    TraverseAndFix(t, null, unigramTagger, retainNER);
                    // Now "decompress" further the expanded trees formed by
                    // multiword token splitting
                    t = expander.ExpandPhrases(t, tn, tf);
                    if (tn != null)
                    {
                        t = tn.NormalizeWholeTree(t, tf);
                    }
                    pw.Println(t.ToString());
                }
                pw.Close();
                tr.Close();
                System.Console.Out.WriteLine("Processed " + nTrees + " trees");
            }
            catch (UnsupportedEncodingException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (FileNotFoundException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (IOException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
        }