コード例 #1
0
        public static void Main(string[] args)
        {
            if (args.Length < minArgs)
            {
                System.Console.Out.WriteLine(Usage());
                System.Environment.Exit(-1);
            }
            Properties options             = StringUtils.ArgsToProperties(args, ArgDefs());
            Language   language            = PropertiesUtils.Get(options, "l", Language.English, typeof(Language));
            ITreebankLangParserParams tlpp = language.@params;
            DiskTreebank tb            = null;
            string       encoding      = options.GetProperty("l", "UTF-8");
            bool         removeBracket = PropertiesUtils.GetBool(options, "b", false);

            tlpp.SetInputEncoding(encoding);
            tlpp.SetOutputEncoding(encoding);
            tb = tlpp.DiskTreebank();
            string[] files = options.GetProperty(string.Empty, string.Empty).Split("\\s+");
            if (files.Length != 0)
            {
                foreach (string filename in files)
                {
                    tb.LoadPath(filename);
                }
            }
            else
            {
                log.Info(Usage());
                System.Environment.Exit(-1);
            }
            PrintWriter  pwo         = tlpp.Pw();
            string       startSymbol = tlpp.TreebankLanguagePack().StartSymbol();
            ITreeFactory tf          = new LabeledScoredTreeFactory();
            int          nTrees      = 0;

            foreach (Tree t in tb)
            {
                if (removeBracket)
                {
                    if (t.Value().Equals(startSymbol))
                    {
                        t = t.FirstChild();
                    }
                }
                else
                {
                    if (!t.Value().Equals(startSymbol))
                    {
                        //Add a bracket if it isn't already there
                        t = tf.NewTreeNode(startSymbol, Java.Util.Collections.SingletonList(t));
                    }
                }
                pwo.Println(t.ToString());
                nTrees++;
            }
            pwo.Close();
            System.Console.Error.Printf("Processed %d trees.%n", nTrees);
        }
コード例 #2
0
        public static void Main(string[] args)
        {
            if (args.Length < minArgs)
            {
                System.Console.Out.WriteLine(usage);
                System.Environment.Exit(-1);
            }
            // Process command-line options
            Properties options  = StringUtils.ArgsToProperties(args, optionArgDefinitions);
            string     fileName = options.GetProperty(string.Empty);

            if (fileName == null || fileName.Equals(string.Empty))
            {
                System.Console.Out.WriteLine(usage);
                System.Environment.Exit(-1);
            }
            Language language = PropertiesUtils.Get(options, "l", Language.English, typeof(Language));
            ITreebankLangParserParams tlpp = language.@params;
            string encoding = options.GetProperty("e", "UTF-8");

            tlpp.SetInputEncoding(encoding);
            tlpp.SetOutputEncoding(encoding);
            DiskTreebank tb = tlpp.DiskTreebank();

            tb.LoadPath(fileName);
            // Statistics
            ICounter <string> binaryRuleTypes  = new ClassicCounter <string>(20000);
            IList <int>       branchingFactors = new List <int>(20000);
            int nTrees                 = 0;
            int nUnaryRules            = 0;
            int nBinaryRules           = 0;
            int binaryBranchingFactors = 0;
            // Read the treebank
            PrintWriter pw = tlpp.Pw();

            foreach (Tree tree in tb)
            {
                if (tree.Value().Equals("ROOT"))
                {
                    tree = tree.FirstChild();
                }
                ++nTrees;
                foreach (Tree subTree in tree)
                {
                    if (subTree.IsPhrasal())
                    {
                        if (subTree.NumChildren() > 1)
                        {
                            ++nBinaryRules;
                            branchingFactors.Add(subTree.NumChildren());
                            binaryBranchingFactors += subTree.NumChildren();
                            binaryRuleTypes.IncrementCount(TreeToRuleString(subTree));
                        }
                        else
                        {
                            ++nUnaryRules;
                        }
                    }
                }
            }
            double mean = (double)binaryBranchingFactors / (double)nBinaryRules;

            System.Console.Out.Printf("#trees:\t%d%n", nTrees);
            System.Console.Out.Printf("#binary:\t%d%n", nBinaryRules);
            System.Console.Out.Printf("#binary types:\t%d%n", binaryRuleTypes.KeySet().Count);
            System.Console.Out.Printf("mean branching:\t%.4f%n", mean);
            System.Console.Out.Printf("stddev branching:\t%.4f%n", StandardDeviation(branchingFactors, mean));
            System.Console.Out.Printf("rule entropy:\t%.5f%n", Counters.Entropy(binaryRuleTypes));
            System.Console.Out.Printf("#unaries:\t%d%n", nUnaryRules);
        }
コード例 #3
0
        public static void Main(string[] args)
        {
            if (args.Length < minArgs)
            {
                System.Console.Out.WriteLine(usage);
                System.Environment.Exit(-1);
            }
            // Process command-line options
            Properties options  = StringUtils.ArgsToProperties(args, optionArgDefinitions);
            string     fileName = options.GetProperty(string.Empty);

            if (fileName == null || fileName.Equals(string.Empty))
            {
                System.Console.Out.WriteLine(usage);
                System.Environment.Exit(-1);
            }
            int      maxLen                = PropertiesUtils.GetInt(options, "y", int.MaxValue);
            bool     printTrees            = PropertiesUtils.GetBool(options, "p", false);
            bool     flattenTrees          = PropertiesUtils.GetBool(options, "f", false);
            bool     printPOS              = PropertiesUtils.GetBool(options, "a", false);
            bool     printTnT              = PropertiesUtils.GetBool(options, "t", false);
            Language language              = PropertiesUtils.Get(options, "l", Language.English, typeof(Language));
            ITreebankLangParserParams tlpp = language.@params;
            string encoding                = options.GetProperty("e", "UTF-8");

            tlpp.SetInputEncoding(encoding);
            tlpp.SetOutputEncoding(encoding);
            DiskTreebank tb = tlpp.DiskTreebank();

            tb.LoadPath(fileName);
            // Read the treebank
            PrintWriter pw       = tlpp.Pw();
            int         numTrees = 0;

            foreach (Tree tree in tb)
            {
                if (tree.Yield().Count > maxLen)
                {
                    continue;
                }
                ++numTrees;
                if (printTrees)
                {
                    pw.Println(tree.ToString());
                }
                else
                {
                    if (flattenTrees)
                    {
                        pw.Println(SentenceUtils.ListToString(tree.Yield()));
                    }
                    else
                    {
                        if (printPOS)
                        {
                            pw.Println(SentenceUtils.ListToString(tree.PreTerminalYield()));
                        }
                        else
                        {
                            if (printTnT)
                            {
                                IList <CoreLabel> yield = tree.TaggedLabeledYield();
                                foreach (CoreLabel label in yield)
                                {
                                    pw.Printf("%s\t%s%n", label.Word(), label.Tag());
                                }
                                pw.Println();
                            }
                        }
                    }
                }
            }
            System.Console.Error.Printf("Read %d trees.%n", numTrees);
        }