예제 #1
0
        /// <summary>Lets you test out the TreeBinarizer on the command line.</summary>
        /// <remarks>
        /// Lets you test out the TreeBinarizer on the command line.
        /// This main method doesn't yet handle as many flags as one would like.
        /// But it does have:
        /// <ul>
        /// <li> -tlp TreebankLanguagePack
        /// <li>-tlpp TreebankLangParserParams
        /// <li>-insideFactor
        /// <li>-markovOrder
        /// </ul>
        /// </remarks>
        /// <param name="args">
        /// Command line arguments: flags as above, as above followed by
        /// treebankPath
        /// </param>
        public static void Main(string[] args)
        {
            ITreebankLangParserParams tlpp = null;
            // TreebankLangParserParams tlpp = new EnglishTreebankParserParams();
            // TreeReaderFactory trf = new LabeledScoredTreeReaderFactory();
            // Looks like it must build CategoryWordTagFactory!!
            ITreeReaderFactory    trf     = null;
            string                fileExt = "mrg";
            IHeadFinder           hf      = new ModCollinsHeadFinder();
            ITreebankLanguagePack tlp     = new PennTreebankLanguagePack();
            bool   insideFactor           = false;
            bool   mf               = false;
            int    mo               = 1;
            bool   uwl              = false;
            bool   uat              = false;
            double sst              = 20.0;
            bool   mfs              = false;
            bool   simpleLabels     = false;
            bool   noRebinarization = false;
            int    i = 0;

            while (i < args.Length && args[i].StartsWith("-"))
            {
                if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-tlp") && i + 1 < args.Length)
                {
                    try
                    {
                        tlp = (ITreebankLanguagePack)System.Activator.CreateInstance(Sharpen.Runtime.GetType(args[i + 1]));
                    }
                    catch (Exception e)
                    {
                        log.Info("Couldn't instantiate: " + args[i + 1]);
                        throw new Exception(e);
                    }
                    i++;
                }
                else
                {
                    if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-tlpp") && i + 1 < args.Length)
                    {
                        try
                        {
                            tlpp = (ITreebankLangParserParams)System.Activator.CreateInstance(Sharpen.Runtime.GetType(args[i + 1]));
                        }
                        catch (Exception e)
                        {
                            log.Info("Couldn't instantiate: " + args[i + 1]);
                            throw new Exception(e);
                        }
                        i++;
                    }
                    else
                    {
                        if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-insideFactor"))
                        {
                            insideFactor = true;
                        }
                        else
                        {
                            if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-markovOrder") && i + 1 < args.Length)
                            {
                                i++;
                                mo = System.Convert.ToInt32(args[i]);
                            }
                            else
                            {
                                if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-simpleLabels"))
                                {
                                    simpleLabels = true;
                                }
                                else
                                {
                                    if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-noRebinarization"))
                                    {
                                        noRebinarization = true;
                                    }
                                    else
                                    {
                                        log.Info("Unknown option:" + args[i]);
                                    }
                                }
                            }
                        }
                    }
                }
                i++;
            }
            if (i >= args.Length)
            {
                log.Info("usage: java TreeBinarizer [-tlpp class|-markovOrder int|...] treebankPath");
                System.Environment.Exit(0);
            }
            Treebank treebank;

            if (tlpp != null)
            {
                treebank = tlpp.MemoryTreebank();
                tlp      = tlpp.TreebankLanguagePack();
                fileExt  = tlp.TreebankFileExtension();
                hf       = tlpp.HeadFinder();
            }
            else
            {
                treebank = new DiskTreebank(trf);
            }
            treebank.LoadPath(args[i], fileExt, true);
            ITreeTransformer tt = new Edu.Stanford.Nlp.Parser.Lexparser.TreeBinarizer(hf, tlp, insideFactor, mf, mo, uwl, uat, sst, mfs, simpleLabels, noRebinarization);

            foreach (Tree t in treebank)
            {
                Tree newT = tt.TransformTree(t);
                System.Console.Out.WriteLine("Original tree:");
                t.PennPrint();
                System.Console.Out.WriteLine("Binarized tree:");
                newT.PennPrint();
                System.Console.Out.WriteLine();
            }
        }