/// <summary>Processes a single file containing AnCora XML trees.</summary>
        /// <remarks>
        /// Processes a single file containing AnCora XML trees. Returns MWE statistics for the trees in
        /// the file and the actual parsed trees.
        /// </remarks>
        private static Pair <TwoDimensionalCounter <string, string>, IList <Tree> > ProcessTreeFile(File file, SpanishXMLTreeReaderFactory trf, string encoding)
        {
            TwoDimensionalCounter <string, string> tagger = new TwoDimensionalCounter <string, string>();

            try
            {
                Reader       @in   = new BufferedReader(new InputStreamReader(new FileInputStream(file), encoding));
                ITreeReader  tr    = trf.NewTreeReader(file.GetPath(), @in);
                IList <Tree> trees = new List <Tree>();
                Tree         t;
                Tree         splitPoint;
                while ((t = tr.ReadTree()) != null)
                {
                    do
                    {
                        // We may need to split the current tree into multiple parts.
                        // (If not, a call to `split` with a `null` split-point is a
                        // no-op
                        splitPoint = FindSplitPoint(t);
                        Pair <Tree, Tree> split = Split(t, splitPoint);
                        Tree toAdd = split.First();
                        t = split.Second();
                        trees.Add(toAdd);
                        UpdateTagger(tagger, toAdd);
                    }while (splitPoint != null);
                }
                tr.Close();
                return(new Pair <TwoDimensionalCounter <string, string>, IList <Tree> >(tagger, trees));
            }
            catch (IOException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
                return(null);
            }
        }
Beispiel #2
0
        /// <param name="args">File to run on</param>
        public static void Main(string[] args)
        {
            if (args.Length < 1)
            {
                System.Console.Out.Printf("Usage: java %s tree_file%n", typeof(Edu.Stanford.Nlp.Trees.International.Negra.NegraPennTreeReaderFactory).FullName);
                return;
            }
            ITreebankLanguagePack tlp = new NegraPennLanguagePack();
            ITreeReaderFactory    trf = new Edu.Stanford.Nlp.Trees.International.Negra.NegraPennTreeReaderFactory(2, false, false, tlp);

            try
            {
                ITreeReader tr = trf.NewTreeReader(IOUtils.ReaderFromString(args[0], tlp.GetEncoding()));
                for (Tree t; (t = tr.ReadTree()) != null;)
                {
                    t.PennPrint();
                }
                tr.Close();
            }
            catch (UnsupportedEncodingException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (FileNotFoundException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (IOException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
        }
Beispiel #3
0
 private static void ResolveDummyTags(File treeFile, TwoDimensionalCounter <string, string> pretermLabel, TwoDimensionalCounter <string, string> unigramTagger)
 {
     try
     {
         BufferedReader     br  = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8"));
         ITreeReaderFactory trf = new FrenchTreeReaderFactory();
         ITreeReader        tr  = trf.NewTreeReader(br);
         PrintWriter        pw  = new PrintWriter(new TextWriter(new FileOutputStream(new File(treeFile + ".fixed")), false, "UTF-8"));
         int nTrees             = 0;
         for (Tree t; (t = tr.ReadTree()) != null; nTrees++)
         {
             TraverseAndFix(t, pretermLabel, unigramTagger);
             pw.Println(t.ToString());
         }
         pw.Close();
         tr.Close();
         System.Console.Out.WriteLine("Processed " + nTrees + " trees");
     }
     catch (UnsupportedEncodingException e)
     {
         Sharpen.Runtime.PrintStackTrace(e);
     }
     catch (FileNotFoundException e)
     {
         Sharpen.Runtime.PrintStackTrace(e);
     }
     catch (IOException e)
     {
         Sharpen.Runtime.PrintStackTrace(e);
     }
 }
Beispiel #4
0
        /// <param name="args"/>
        public static void Main(string[] args)
        {
            if (args.Length != 1)
            {
                System.Console.Error.Printf("Usage: java %s tree_file%n", typeof(SplitMaker).FullName);
                System.Environment.Exit(-1);
            }
            ITreebankLanguagePack tlp = new HebrewTreebankLanguagePack();
            string inputFile          = args[0];
            File   treeFile           = new File(inputFile);

            try
            {
                ITreeReaderFactory trf     = new HebrewTreeReaderFactory();
                BufferedReader     br      = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), tlp.GetEncoding()));
                ITreeReader        tr      = trf.NewTreeReader(br);
                PrintWriter        pwDev   = new PrintWriter(new TextWriter(new FileOutputStream(inputFile + ".clean.dev"), false, tlp.GetEncoding()));
                PrintWriter        pwTrain = new PrintWriter(new TextWriter(new FileOutputStream(inputFile + ".clean.train"), false, tlp.GetEncoding()));
                PrintWriter        pwTest  = new PrintWriter(new TextWriter(new FileOutputStream(inputFile + ".clean.test"), false, tlp.GetEncoding()));
                int numTrees = 0;
                for (Tree t; ((t = tr.ReadTree()) != null); numTrees++)
                {
                    if (numTrees < 483)
                    {
                        pwDev.Println(t.ToString());
                    }
                    else
                    {
                        if (numTrees >= 483 && numTrees < 5724)
                        {
                            pwTrain.Println(t.ToString());
                        }
                        else
                        {
                            pwTest.Println(t.ToString());
                        }
                    }
                }
                tr.Close();
                pwDev.Close();
                pwTrain.Close();
                pwTest.Close();
                System.Console.Error.Printf("Processed %d trees.%n", numTrees);
            }
            catch (UnsupportedEncodingException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (FileNotFoundException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (IOException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
        }
        /// <summary>For debugging.</summary>
        /// <param name="args"/>
        public static void Main(string[] args)
        {
            if (args.Length < 1)
            {
                System.Console.Error.Printf("Usage: java %s tree_file(s)%n%n", typeof(Edu.Stanford.Nlp.Trees.International.French.FrenchXMLTreeReader).FullName);
                System.Environment.Exit(-1);
            }
            IList <File> fileList = new List <File>();

            foreach (string arg in args)
            {
                fileList.Add(new File(arg));
            }
            ITreeReaderFactory trf             = new FrenchXMLTreeReaderFactory(false);
            int totalTrees                     = 0;
            ICollection <string> morphAnalyses = Generics.NewHashSet();

            try
            {
                foreach (File file in fileList)
                {
                    ITreeReader tr = trf.NewTreeReader(new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8")));
                    Tree        t;
                    int         numTrees;
                    string      canonicalFileName = Sharpen.Runtime.Substring(file.GetName(), 0, file.GetName().LastIndexOf('.'));
                    for (numTrees = 0; (t = tr.ReadTree()) != null; numTrees++)
                    {
                        string ftbID = ((CoreLabel)t.Label()).Get(typeof(CoreAnnotations.SentenceIDAnnotation));
                        System.Console.Out.Printf("%s-%s\t%s%n", canonicalFileName, ftbID, t.ToString());
                        IList <ILabel> leaves = t.Yield();
                        foreach (ILabel label in leaves)
                        {
                            if (label is CoreLabel)
                            {
                                morphAnalyses.Add(((CoreLabel)label).OriginalText());
                            }
                        }
                    }
                    tr.Close();
                    System.Console.Error.Printf("%s: %d trees%n", file.GetName(), numTrees);
                    totalTrees += numTrees;
                }
                //wsg2011: Print out the observed morphological analyses
                //      for(String analysis : morphAnalyses)
                //        log.info(analysis);
                System.Console.Error.Printf("%nRead %d trees%n", totalTrees);
            }
            catch (FileNotFoundException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (IOException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
        }
Beispiel #6
0
        //Delete sentence-initial punctuation
        //Delete sentence final punctuation that is preceded by punctuation (first time)
        //Delete sentence final punctuation that is preceded by punctuation (second time)
        //Convert remaining sentence-final punctuation to either . if it is not [.!?]
        //Delete medial, sentence-final punctuation
        //Now move the sentence-final mark under SENT
        //For those trees that lack a sentence-final punc, add one.
        //Finally, delete these punctuation marks, which I can't seem to kill otherwise...
        //A bad MWADV tree in the training set
        // Not sure why this got a label of X.  Similar trees suggest it
        // should be A instead
        // This also seems to be mislabeled
        /// <param name="args"/>
        public static void Main(string[] args)
        {
            if (args.Length != 1)
            {
                log.Info("Usage: java " + typeof(Edu.Stanford.Nlp.International.French.Pipeline.FTBCorrector).FullName + " filename\n");
                System.Environment.Exit(-1);
            }
            ITreeTransformer tt = new Edu.Stanford.Nlp.International.French.Pipeline.FTBCorrector();
            File             f  = new File(args[0]);

            try
            {
                //These bad trees in the Candito training set should be thrown out:
                //  (ROOT (SENT (" ") (. .)))
                //  (ROOT (SENT (. .)))
                TregexPattern      pBadTree  = TregexPattern.Compile("@SENT <: @PUNC");
                TregexPattern      pBadTree2 = TregexPattern.Compile("@SENT <1 @PUNC <2 @PUNC !<3 __");
                BufferedReader     br        = new BufferedReader(new InputStreamReader(new FileInputStream(f), "UTF-8"));
                ITreeReaderFactory trf       = new FrenchTreeReaderFactory();
                ITreeReader        tr        = trf.NewTreeReader(br);
                int nTrees = 0;
                for (Tree t; (t = tr.ReadTree()) != null; nTrees++)
                {
                    TregexMatcher m  = pBadTree.Matcher(t);
                    TregexMatcher m2 = pBadTree2.Matcher(t);
                    if (m.Find() || m2.Find())
                    {
                        log.Info("Discarding tree: " + t.ToString());
                    }
                    else
                    {
                        Tree fixedT = tt.TransformTree(t);
                        System.Console.Out.WriteLine(fixedT.ToString());
                    }
                }
                tr.Close();
                System.Console.Error.Printf("Wrote %d trees%n", nTrees);
            }
            catch (UnsupportedEncodingException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (FileNotFoundException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (IOException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (TregexParseException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
        }
Beispiel #7
0
        /// <param name="args"/>
        public static void Main(string[] args)
        {
            if (args.Length != 1)
            {
                System.Console.Error.Printf("Usage: java %s file%n", typeof(MWEPreprocessor).FullName);
                System.Environment.Exit(-1);
            }
            File treeFile = new File(args[0]);
            TwoDimensionalCounter <string, string> labelTerm     = new TwoDimensionalCounter <string, string>();
            TwoDimensionalCounter <string, string> termLabel     = new TwoDimensionalCounter <string, string>();
            TwoDimensionalCounter <string, string> labelPreterm  = new TwoDimensionalCounter <string, string>();
            TwoDimensionalCounter <string, string> pretermLabel  = new TwoDimensionalCounter <string, string>();
            TwoDimensionalCounter <string, string> unigramTagger = new TwoDimensionalCounter <string, string>();

            try
            {
                BufferedReader     br  = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8"));
                ITreeReaderFactory trf = new FrenchTreeReaderFactory();
                ITreeReader        tr  = trf.NewTreeReader(br);
                for (Tree t; (t = tr.ReadTree()) != null;)
                {
                    CountMWEStatistics(t, unigramTagger, labelPreterm, pretermLabel, labelTerm, termLabel);
                }
                tr.Close();
                //Closes the underlying reader
                System.Console.Out.WriteLine("Generating {MWE Type -> Terminal}");
                PrintCounter(labelTerm, "label_term.csv");
                System.Console.Out.WriteLine("Generating {Terminal -> MWE Type}");
                PrintCounter(termLabel, "term_label.csv");
                System.Console.Out.WriteLine("Generating {MWE Type -> POS sequence}");
                PrintCounter(labelPreterm, "label_pos.csv");
                System.Console.Out.WriteLine("Generating {POS sequence -> MWE Type}");
                PrintCounter(pretermLabel, "pos_label.csv");
                System.Console.Out.WriteLine("Resolving DUMMY tags");
                ResolveDummyTags(treeFile, pretermLabel, unigramTagger);
                System.Console.Out.WriteLine("#Unknown Word Types: " + MWEPreprocessor.ManualUWModel.nUnknownWordTypes);
                System.Console.Out.WriteLine("#Missing POS: " + nMissingPOS);
                System.Console.Out.WriteLine("#Missing Phrasal: " + nMissingPhrasal);
                System.Console.Out.WriteLine("Done!");
            }
            catch (UnsupportedEncodingException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (FileNotFoundException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (IOException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
        }
Beispiel #8
0
        /// <param name="args"/>
        public static void Main(string[] args)
        {
            if (args.Length != 2)
            {
                System.Console.Error.Printf("Usage: java %s tree_file morfette_tnt_file%n", typeof(MungeTreesWithMorfetteAnalyses).FullName);
                System.Environment.Exit(-1);
            }
            string             treeFile     = args[0];
            string             morfetteFile = args[1];
            ITreeReaderFactory trf          = new FrenchTreeReaderFactory();

            try
            {
                ITreeReader tr = trf.NewTreeReader(new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8")));
                IEnumerator <IList <CoreLabel> > morfetteItr = new MungeTreesWithMorfetteAnalyses.MorfetteFileIterator(morfetteFile);
                for (Tree tree; (tree = tr.ReadTree()) != null && morfetteItr.MoveNext();)
                {
                    IList <CoreLabel> analysis = morfetteItr.Current;
                    IList <ILabel>    yield    = tree.Yield();
                    System.Diagnostics.Debug.Assert(analysis.Count == yield.Count);
                    int yieldLen = yield.Count;
                    for (int i = 0; i < yieldLen; ++i)
                    {
                        CoreLabel tokenAnalysis = analysis[i];
                        ILabel    token         = yield[i];
                        string    lemma         = GetLemma(token.Value(), tokenAnalysis.Lemma());
                        string    newLeaf       = string.Format("%s%s%s%s%s", token.Value(), MorphoFeatureSpecification.MorphoMark, lemma, MorphoFeatureSpecification.LemmaMark, tokenAnalysis.Tag());
                        ((CoreLabel)token).SetValue(newLeaf);
                    }
                    System.Console.Out.WriteLine(tree.ToString());
                }
                if (tr.ReadTree() != null || morfetteItr.MoveNext())
                {
                    log.Info("WARNING: Uneven input files!");
                }
                tr.Close();
            }
            catch (UnsupportedEncodingException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (FileNotFoundException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (IOException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
        }
Beispiel #9
0
        /// <param name="args"/>
        public static void Main(string[] args)
        {
            Properties options = StringUtils.ArgsToProperties(args, argOptionDefs);

            if (!options.Contains(string.Empty) || options.Contains("help"))
            {
                log.Info(Usage());
                return;
            }
            bool retainNER = PropertiesUtils.GetBool(options, "ner", false);
            bool normalize = PropertiesUtils.GetBool(options, "normalize", true);
            File treeFile  = new File(options.GetProperty(string.Empty));
            TwoDimensionalCounter <string, string> labelTerm     = new TwoDimensionalCounter <string, string>();
            TwoDimensionalCounter <string, string> termLabel     = new TwoDimensionalCounter <string, string>();
            TwoDimensionalCounter <string, string> labelPreterm  = new TwoDimensionalCounter <string, string>();
            TwoDimensionalCounter <string, string> pretermLabel  = new TwoDimensionalCounter <string, string>();
            TwoDimensionalCounter <string, string> unigramTagger = new TwoDimensionalCounter <string, string>();

            try
            {
                BufferedReader     br  = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8"));
                ITreeReaderFactory trf = new SpanishTreeReaderFactory();
                ITreeReader        tr  = trf.NewTreeReader(br);
                for (Tree t; (t = tr.ReadTree()) != null;)
                {
                    UpdateTagger(unigramTagger, t);
                }
                tr.Close();
                //Closes the underlying reader
                System.Console.Out.WriteLine("Resolving DUMMY tags");
                ResolveDummyTags(treeFile, unigramTagger, retainNER, normalize ? new SpanishTreeNormalizer(true, false, false) : null);
                System.Console.Out.WriteLine("#Unknown Word Types: " + MultiWordPreprocessor.ManualUWModel.nUnknownWordTypes);
                System.Console.Out.WriteLine(string.Format("#Missing POS: %d (fixed: %d, %.2f%%)", nMissingPOS, nFixedPOS, (double)nFixedPOS / nMissingPOS * 100));
                System.Console.Out.WriteLine(string.Format("#Missing Phrasal: %d (fixed: %d, %.2f%%)", nMissingPhrasal, nFixedPhrasal, (double)nFixedPhrasal / nMissingPhrasal * 100));
                System.Console.Out.WriteLine("Done!");
            }
            catch (UnsupportedEncodingException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (FileNotFoundException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (IOException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
        }
        /// <param name="args"/>
        public static void Main(string[] args)
        {
            if (args.Length != 1)
            {
                System.Console.Error.Printf("Usage: java %s tree_file%n", typeof(TreeToMorfette).FullName);
                System.Environment.Exit(-1);
            }
            string             treeFile = args[0];
            ITreeReaderFactory trf      = new FrenchTreeReaderFactory();

            try
            {
                ITreeReader tr = trf.NewTreeReader(new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8")));
                for (Tree tree1; (tree1 = tr.ReadTree()) != null;)
                {
                    IList <ILabel> pretermYield = tree1.PreTerminalYield();
                    IList <ILabel> yield        = tree1.Yield();
                    int            yieldLen     = yield.Count;
                    for (int i = 0; i < yieldLen; ++i)
                    {
                        CoreLabel             rawToken   = (CoreLabel)yield[i];
                        string                word       = rawToken.Value();
                        string                morphStr   = rawToken.OriginalText();
                        Pair <string, string> lemmaMorph = MorphoFeatureSpecification.SplitMorphString(word, morphStr);
                        string                lemma      = lemmaMorph.First();
                        string                morph      = lemmaMorph.Second();
                        if (morph == null || morph.Equals(string.Empty) || morph.Equals("XXX"))
                        {
                            morph = ((CoreLabel)pretermYield[i]).Value();
                        }
                        System.Console.Out.Printf("%s %s %s%n", word, lemma, morph);
                    }
                    System.Console.Out.WriteLine();
                }
                tr.Close();
            }
            catch (UnsupportedEncodingException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (FileNotFoundException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (IOException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
        }
Beispiel #11
0
        private static void ResolveDummyTags(File treeFile, TwoDimensionalCounter <string, string> unigramTagger, bool retainNER, TreeNormalizer tn)
        {
            ITreeFactory          tf       = new LabeledScoredTreeFactory();
            MultiWordTreeExpander expander = new MultiWordTreeExpander();

            try
            {
                BufferedReader     br  = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8"));
                ITreeReaderFactory trf = new SpanishTreeReaderFactory();
                ITreeReader        tr  = trf.NewTreeReader(br);
                PrintWriter        pw  = new PrintWriter(new TextWriter(new FileOutputStream(new File(treeFile + ".fixed")), false, "UTF-8"));
                int nTrees             = 0;
                for (Tree t; (t = tr.ReadTree()) != null; nTrees++)
                {
                    TraverseAndFix(t, null, unigramTagger, retainNER);
                    // Now "decompress" further the expanded trees formed by
                    // multiword token splitting
                    t = expander.ExpandPhrases(t, tn, tf);
                    if (tn != null)
                    {
                        t = tn.NormalizeWholeTree(t, tf);
                    }
                    pw.Println(t.ToString());
                }
                pw.Close();
                tr.Close();
                System.Console.Out.WriteLine("Processed " + nTrees + " trees");
            }
            catch (UnsupportedEncodingException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (FileNotFoundException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (IOException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
        }
        /// <summary>For debugging.</summary>
        /// <param name="args"/>
        public static void Main(string[] args)
        {
            if (args.Length != 1)
            {
                System.Console.Error.Printf("Usage: java %s atb_tree_file > atb_tree_file.out%n", typeof(Edu.Stanford.Nlp.International.Arabic.Pipeline.MWETreeVisitorExternal).FullName);
                System.Environment.Exit(-1);
            }
            ITreeReaderFactory trf = new ArabicTreeReaderFactory();

            try
            {
                ITreeReader  tr      = trf.NewTreeReader(new BufferedReader(new InputStreamReader(new FileInputStream(args[0]), "UTF-8")));
                ITreeVisitor visitor = new Edu.Stanford.Nlp.International.Arabic.Pipeline.MWETreeVisitorExternal();
                int          treeId  = 0;
                for (Tree tree; (tree = tr.ReadTree()) != null; ++treeId)
                {
                    if (tree.Value().Equals("ROOT"))
                    {
                        // Skip over the ROOT tag
                        tree = tree.FirstChild();
                    }
                    visitor.VisitTree(tree);
                    System.Console.Out.WriteLine(tree.ToString());
                }
                tr.Close();
                System.Console.Error.Printf("Processed %d trees.%n", treeId);
            }
            catch (UnsupportedEncodingException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (FileNotFoundException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (IOException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
        }
Beispiel #13
0
        //Delete sentence-initial punctuation
        //Delete sentence-initial punctuation (again)
        //Delete sentence final punctuation that is preceded by punctuation (first time)
        //Delete sentence final punctuation that is preceded by punctuation (second time)
        //Convert remaining sentence-final punctuation to . if it is not [.!?]
        //Delete medial, sentence-final punctuation
        //    ("@PUNC=punc <: /[!\\.\\?]+/ $. __\n"
        //        + "prune punc\n"
        //        + "\n") +
        //Now move the sentence-final mark under the top-level node
        //For those trees that lack a sentence-final punc, add one.
        //    ("/^[^\\.!\\?]$/ >>- (__ > @ROOT <- __=loc) <: __\n"
        //        + "insert (PUNC .) $- loc\n"
        //        + "\n");
        /// <param name="args"/>
        public static void Main(string[] args)
        {
            if (args.Length != 1)
            {
                log.Info("Usage: java " + typeof(Edu.Stanford.Nlp.International.Arabic.Pipeline.ATBCorrector).FullName + " filename\n");
                System.Environment.Exit(-1);
            }
            ITreeTransformer tt = new Edu.Stanford.Nlp.International.Arabic.Pipeline.ATBCorrector();
            File             f  = new File(args[0]);

            try
            {
                BufferedReader     br  = new BufferedReader(new InputStreamReader(new FileInputStream(f), "UTF-8"));
                ITreeReaderFactory trf = new ArabicTreeReaderFactory.ArabicRawTreeReaderFactory();
                ITreeReader        tr  = trf.NewTreeReader(br);
                int nTrees             = 0;
                for (Tree t; (t = tr.ReadTree()) != null; nTrees++)
                {
                    Tree fixedT = tt.TransformTree(t);
                    System.Console.Out.WriteLine(fixedT.ToString());
                }
                tr.Close();
                System.Console.Error.Printf("Wrote %d trees%n", nTrees);
            }
            catch (UnsupportedEncodingException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (FileNotFoundException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (IOException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
        }
        /// <param name="args"/>
        public static void Main(string[] args)
        {
            if (args.Length != 1)
            {
                System.Console.Error.Printf("Usage: java %s tree_file > trees%n", typeof(HebrewTreeReaderFactory).FullName);
                System.Environment.Exit(-1);
            }
            ITreebankLanguagePack tlp = new HebrewTreebankLanguagePack();
            File treeFile             = new File(args[0]);

            try
            {
                ITreeReaderFactory trf = new HebrewTreeReaderFactory();
                BufferedReader     br  = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), tlp.GetEncoding()));
                ITreeReader        tr  = trf.NewTreeReader(br);
                int numTrees           = 0;
                for (Tree t; ((t = tr.ReadTree()) != null); numTrees++)
                {
                    System.Console.Out.WriteLine(t.ToString());
                }
                tr.Close();
                System.Console.Error.Printf("Processed %d trees.%n", numTrees);
            }
            catch (UnsupportedEncodingException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (FileNotFoundException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (IOException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
        }
 /// <summary>Close the Reader behind this <code>TreeReader</code>.</summary>
 /// <exception cref="System.IO.IOException"/>
 public virtual void Close()
 {
     tr.Close();
 }
        public static void Main(string[] args)
        {
            if (args.Length < 1)
            {
                System.Console.Error.Printf("Usage: java %s tree_file%n", typeof(TreeToTSV).FullName);
                System.Environment.Exit(-1);
            }
            string treeFile = args[0];

            try
            {
                BufferedReader     br        = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8"));
                ITreeReaderFactory trf       = new SpanishTreeReaderFactory();
                ITreeReader        tr        = trf.NewTreeReader(br);
                StringBuilder      sb        = new StringBuilder();
                string             nl        = Runtime.GetProperty("line.separator");
                Pattern            nePattern = Pattern.Compile("^grup\\.nom\\.");
                Pattern            npPattern = Pattern.Compile("^np0000.$");
                for (Tree tree; (tree = tr.ReadTree()) != null;)
                {
                    foreach (Tree t in tree)
                    {
                        if (!t.IsPreTerminal())
                        {
                            continue;
                        }
                        char   type         = 'O';
                        Tree   grandma      = t.Ancestor(1, tree);
                        string grandmaValue = ((CoreLabel)grandma.Label()).Value();
                        // grup.nom.x
                        if (nePattern.Matcher(grandmaValue).Find())
                        {
                            type = grandmaValue[9];
                        }
                        else
                        {
                            // else check the pos for np0000x or not
                            string pos = ((CoreLabel)t.Label()).Value();
                            if (npPattern.Matcher(pos).Find())
                            {
                                type = pos[6];
                            }
                        }
                        Tree   wordNode = t.FirstChild();
                        string word     = ((CoreLabel)wordNode.Label()).Value();
                        sb.Append(word).Append("\t");
                        switch (type)
                        {
                        case 'p':
                        {
                            sb.Append("PERS");
                            break;
                        }

                        case 'l':
                        {
                            sb.Append("LUG");
                            break;
                        }

                        case 'o':
                        {
                            sb.Append("ORG");
                            break;
                        }

                        case '0':
                        {
                            sb.Append("OTROS");
                            break;
                        }

                        default:
                        {
                            sb.Append("O");
                            break;
                        }
                        }
                        sb.Append(nl);
                    }
                    sb.Append(nl);
                }
                System.Console.Out.Write(sb.ToString());
                tr.Close();
            }
            catch (UnsupportedEncodingException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (FileNotFoundException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (IOException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
        }
        public static void Main(string[] args)
        {
            if (args.Length != 1)
            {
                System.Console.Error.Printf("Usage: java %s file%n", typeof(Edu.Stanford.Nlp.International.French.Scripts.MWEFrequencyDist).FullName);
                System.Environment.Exit(-1);
            }
            File treeFile = new File(args[0]);
            TwoDimensionalCounter <string, string> mweLabelToString = new TwoDimensionalCounter <string, string>();
            ICollection <string> uniquePOSSequences = Generics.NewHashSet();

            try
            {
                BufferedReader     br   = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8"));
                ITreeReaderFactory trf  = new FrenchTreeReaderFactory();
                ITreeReader        tr   = trf.NewTreeReader(br);
                TregexPattern      pMWE = TregexPattern.Compile("/^MW/");
                for (Tree t; (t = tr.ReadTree()) != null;)
                {
                    //Count MWE statistics
                    TregexMatcher m = pMWE.Matcher(t);
                    while (m.FindNextMatchingNode())
                    {
                        Tree              match     = m.GetMatch();
                        string            label     = match.Value();
                        IList <CoreLabel> yield     = match.TaggedLabeledYield();
                        StringBuilder     termYield = new StringBuilder();
                        StringBuilder     posYield  = new StringBuilder();
                        foreach (CoreLabel cl in yield)
                        {
                            termYield.Append(cl.Word()).Append(" ");
                            posYield.Append(cl.Tag()).Append(" ");
                        }
                        mweLabelToString.IncrementCount(label, termYield.ToString().Trim());
                        uniquePOSSequences.Add(posYield.ToString().Trim());
                    }
                }
                tr.Close();
                //Closes the underlying reader
                System.Console.Out.Printf("Type\t#Type\t#Single\t%%Single\t%%Total%n");
                double nMWEs          = mweLabelToString.TotalCount();
                int    nAllSingletons = 0;
                int    nTokens        = 0;
                foreach (string mweLabel in mweLabelToString.FirstKeySet())
                {
                    int               nSingletons = 0;
                    double            totalCount  = mweLabelToString.TotalCount(mweLabel);
                    ICounter <string> mc          = mweLabelToString.GetCounter(mweLabel);
                    foreach (string term in mc.KeySet())
                    {
                        if (mc.GetCount(term) == 1.0)
                        {
                            nSingletons++;
                        }
                        nTokens += term.Split("\\s+").Length *(int)mc.GetCount(term);
                    }
                    nAllSingletons += nSingletons;
                    System.Console.Out.Printf("%s\t%d\t%d\t%.2f\t%.2f%n", mweLabel, (int)totalCount, nSingletons, 100.0 * nSingletons / totalCount, 100.0 * totalCount / nMWEs);
                }
                System.Console.Out.Printf("TOTAL:\t%d\t%d\t%.2f%n", (int)nMWEs, nAllSingletons, 100.0 * nAllSingletons / nMWEs);
                System.Console.Out.WriteLine("#tokens = " + nTokens);
                System.Console.Out.WriteLine("#unique MWE POS sequences = " + uniquePOSSequences.Count);
            }
            catch (UnsupportedEncodingException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (FileNotFoundException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (TregexParseException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (IOException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
        }