Exemple #1
0
        /// <summary>Run the Evalb scoring metric on guess/gold input.</summary>
        /// <remarks>Run the Evalb scoring metric on guess/gold input. The default language is English.</remarks>
        /// <param name="args"/>
        public static void Main(string[] args)
        {
            if (args.Length < minArgs)
            {
                log.Info(Usage());
                System.Environment.Exit(-1);
            }
            Properties options             = StringUtils.ArgsToProperties(args, OptionArgDefs());
            Language   language            = PropertiesUtils.Get(options, "l", Language.English, typeof(Language));
            ITreebankLangParserParams tlpp = language.@params;
            int  maxGoldYield      = PropertiesUtils.GetInt(options, "y", int.MaxValue);
            bool Verbose           = PropertiesUtils.GetBool(options, "v", false);
            bool sortByF1          = PropertiesUtils.HasProperty(options, "s");
            int  worstKTreesToEmit = PropertiesUtils.GetInt(options, "s", 0);
            PriorityQueue <Triple <double, Tree, Tree> > queue = sortByF1 ? new PriorityQueue <Triple <double, Tree, Tree> >(2000, new Evalb.F1Comparator()) : null;
            bool   doCatLevel = PropertiesUtils.GetBool(options, "c", false);
            string labelRegex = options.GetProperty("f", null);
            string encoding   = options.GetProperty("e", "UTF-8");

            string[] parsedArgs = options.GetProperty(string.Empty, string.Empty).Split("\\s+");
            if (parsedArgs.Length != minArgs)
            {
                log.Info(Usage());
                System.Environment.Exit(-1);
            }
            string goldFile  = parsedArgs[0];
            string guessFile = parsedArgs[1];

            // Command-line has been parsed. Configure the metric for evaluation.
            tlpp.SetInputEncoding(encoding);
            PrintWriter pwOut         = tlpp.Pw();
            Treebank    guessTreebank = tlpp.DiskTreebank();

            guessTreebank.LoadPath(guessFile);
            pwOut.Println("GUESS TREEBANK:");
            pwOut.Println(guessTreebank.TextualSummary());
            Treebank goldTreebank = tlpp.DiskTreebank();

            goldTreebank.LoadPath(goldFile);
            pwOut.Println("GOLD TREEBANK:");
            pwOut.Println(goldTreebank.TextualSummary());
            Evalb            metric   = new Evalb("Evalb LP/LR", true);
            EvalbByCat       evalbCat = (doCatLevel) ? new EvalbByCat("EvalbByCat LP/LR", true, labelRegex) : null;
            ITreeTransformer tc       = tlpp.Collinizer();
            //The evalb ref implementation assigns status for each tree pair as follows:
            //
            //   0 - Ok (yields match)
            //   1 - length mismatch
            //   2 - null parse e.g. (()).
            //
            //In the cases of 1,2, evalb does not include the tree pair in the LP/LR computation.
            IEnumerator <Tree> goldItr  = goldTreebank.GetEnumerator();
            IEnumerator <Tree> guessItr = guessTreebank.GetEnumerator();
            int goldLineId        = 0;
            int guessLineId       = 0;
            int skippedGuessTrees = 0;

            while (guessItr.MoveNext() && goldItr.MoveNext())
            {
                Tree           guessTree  = guessItr.Current;
                IList <ILabel> guessYield = guessTree.Yield();
                guessLineId++;
                Tree           goldTree  = goldItr.Current;
                IList <ILabel> goldYield = goldTree.Yield();
                goldLineId++;
                // Check that we should evaluate this tree
                if (goldYield.Count > maxGoldYield)
                {
                    skippedGuessTrees++;
                    continue;
                }
                // Only trees with equal yields can be evaluated
                if (goldYield.Count != guessYield.Count)
                {
                    pwOut.Printf("Yield mismatch gold: %d tokens vs. guess: %d tokens (lines: gold %d guess %d)%n", goldYield.Count, guessYield.Count, goldLineId, guessLineId);
                    skippedGuessTrees++;
                    continue;
                }
                Tree evalGuess = tc.TransformTree(guessTree);
                Tree evalGold  = tc.TransformTree(goldTree);
                metric.Evaluate(evalGuess, evalGold, ((Verbose) ? pwOut : null));
                if (doCatLevel)
                {
                    evalbCat.Evaluate(evalGuess, evalGold, ((Verbose) ? pwOut : null));
                }
                if (sortByF1)
                {
                    StoreTrees(queue, guessTree, goldTree, metric.GetLastF1());
                }
            }
            if (guessItr.MoveNext() || goldItr.MoveNext())
            {
                System.Console.Error.Printf("Guess/gold files do not have equal lengths (guess: %d gold: %d)%n.", guessLineId, goldLineId);
            }
            pwOut.Println("================================================================================");
            if (skippedGuessTrees != 0)
            {
                pwOut.Printf("%s %d guess trees\n", "Unable to evaluate", skippedGuessTrees);
            }
            metric.Display(true, pwOut);
            pwOut.Println();
            if (doCatLevel)
            {
                evalbCat.Display(true, pwOut);
                pwOut.Println();
            }
            if (sortByF1)
            {
                EmitSortedTrees(queue, worstKTreesToEmit, guessFile);
            }
            pwOut.Close();
        }
Exemple #2
0
        /// <summary>Execute with no arguments for usage.</summary>
        public static void Main(string[] args)
        {
            if (!ValidateCommandLine(args))
            {
                log.Info(Usage());
                System.Environment.Exit(-1);
            }
            DateTime startTime = new DateTime();

            System.Console.Out.WriteLine("##################################");
            System.Console.Out.WriteLine("# Stanford Treebank Preprocessor #");
            System.Console.Out.WriteLine("##################################");
            System.Console.Out.Printf("Start time: %s%n", startTime);
            System.Console.Out.Printf("Configuration: %s%n%n", configFile);
            ConfigParser cp = new ConfigParser(configFile);

            cp.Parse();
            DistributionPackage distrib = new DistributionPackage();

            foreach (Properties dsParams in cp)
            {
                string nameOfDataset = PropertiesUtils.HasProperty(dsParams, ConfigParser.paramName) ? dsParams.GetProperty(ConfigParser.paramName) : "UN-NAMED";
                if (outputPath != null)
                {
                    dsParams.SetProperty(ConfigParser.paramOutputPath, outputPath);
                }
                IDataset ds = GetDatasetClass(dsParams);
                if (ds == null)
                {
                    System.Console.Out.Printf("Unable to instantiate TYPE for dataset %s. Check the javadocs%n", nameOfDataset);
                    continue;
                }
                bool shouldDistribute = dsParams.Contains(ConfigParser.paramDistrib) && bool.ParseBoolean(dsParams.GetProperty(ConfigParser.paramDistrib));
                dsParams.Remove(ConfigParser.paramDistrib);
                bool lacksRequiredOptions = !(ds.SetOptions(dsParams));
                if (lacksRequiredOptions)
                {
                    System.Console.Out.Printf("Skipping dataset %s as it lacks required parameters. Check the javadocs%n", nameOfDataset);
                    continue;
                }
                ds.Build();
                if (shouldDistribute)
                {
                    distrib.AddFiles(ds.GetFilenames());
                }
                if (Verbose)
                {
                    System.Console.Out.Printf("%s%n", ds.ToString());
                }
            }
            if (MakeDistrib)
            {
                distrib.Make(distribName);
            }
            if (Verbose)
            {
                System.Console.Out.WriteLine("-->configuration details");
                System.Console.Out.WriteLine(cp.ToString());
                if (MakeDistrib)
                {
                    System.Console.Out.WriteLine("-->distribution package details");
                    System.Console.Out.WriteLine(distrib.ToString());
                }
            }
            DateTime stopTime    = new DateTime();
            long     elapsedTime = stopTime.GetTime() - startTime.GetTime();

            System.Console.Out.Printf("Completed processing at %s%n", stopTime);
            System.Console.Out.Printf("Elapsed time: %d seconds%n", (int)(elapsedTime / 1000F));
        }