Esempio n. 1
0
        /// <summary>Run the scoring metric on guess/gold input.</summary>
        /// <remarks>
        /// Run the scoring metric on guess/gold input. This method performs "Collinization."
        /// The default language is English.
        /// </remarks>
        /// <param name="args"/>
        public static void Main(string[] args)
        {
            if (args.Length < minArgs)
            {
                System.Console.Out.WriteLine(usage.ToString());
                System.Environment.Exit(-1);
            }
            ITreebankLangParserParams tlpp = new EnglishTreebankParserParams();
            int    maxGoldYield            = int.MaxValue;
            bool   Verbose   = false;
            string encoding  = "UTF-8";
            string guessFile = null;
            string goldFile  = null;
            IDictionary <string, string[]> argsMap = StringUtils.ArgsToMap(args, optionArgDefs);

            foreach (KeyValuePair <string, string[]> opt in argsMap)
            {
                if (opt.Key == null)
                {
                    continue;
                }
                if (opt.Key.Equals("-l"))
                {
                    Language lang = Language.ValueOf(opt.Value[0].Trim());
                    tlpp = lang.@params;
                }
                else
                {
                    if (opt.Key.Equals("-y"))
                    {
                        maxGoldYield = System.Convert.ToInt32(opt.Value[0].Trim());
                    }
                    else
                    {
                        if (opt.Key.Equals("-v"))
                        {
                            Verbose = true;
                        }
                        else
                        {
                            if (opt.Key.Equals("-c"))
                            {
                                Edu.Stanford.Nlp.Parser.Metrics.TaggingEval.doCatLevelEval = true;
                            }
                            else
                            {
                                if (opt.Key.Equals("-e"))
                                {
                                    encoding = opt.Value[0];
                                }
                                else
                                {
                                    log.Info(usage.ToString());
                                    System.Environment.Exit(-1);
                                }
                            }
                        }
                    }
                }
                //Non-option arguments located at key null
                string[] rest = argsMap[null];
                if (rest == null || rest.Length < minArgs)
                {
                    log.Info(usage.ToString());
                    System.Environment.Exit(-1);
                }
                goldFile  = rest[0];
                guessFile = rest[1];
            }
            tlpp.SetInputEncoding(encoding);
            PrintWriter pwOut         = tlpp.Pw();
            Treebank    guessTreebank = tlpp.DiskTreebank();

            guessTreebank.LoadPath(guessFile);
            pwOut.Println("GUESS TREEBANK:");
            pwOut.Println(guessTreebank.TextualSummary());
            Treebank goldTreebank = tlpp.DiskTreebank();

            goldTreebank.LoadPath(goldFile);
            pwOut.Println("GOLD TREEBANK:");
            pwOut.Println(goldTreebank.TextualSummary());
            Edu.Stanford.Nlp.Parser.Metrics.TaggingEval metric = new Edu.Stanford.Nlp.Parser.Metrics.TaggingEval("Tagging LP/LR");
            ITreeTransformer tc = tlpp.Collinizer();
            //The evalb ref implementation assigns status for each tree pair as follows:
            //
            //   0 - Ok (yields match)
            //   1 - length mismatch
            //   2 - null parse e.g. (()).
            //
            //In the cases of 1,2, evalb does not include the tree pair in the LP/LR computation.
            IEnumerator <Tree> goldItr  = goldTreebank.GetEnumerator();
            IEnumerator <Tree> guessItr = guessTreebank.GetEnumerator();
            int goldLineId        = 0;
            int guessLineId       = 0;
            int skippedGuessTrees = 0;

            while (guessItr.MoveNext() && goldItr.MoveNext())
            {
                Tree           guessTree  = guessItr.Current;
                IList <ILabel> guessYield = guessTree.Yield();
                guessLineId++;
                Tree           goldTree  = goldItr.Current;
                IList <ILabel> goldYield = goldTree.Yield();
                goldLineId++;
                // Check that we should evaluate this tree
                if (goldYield.Count > maxGoldYield)
                {
                    skippedGuessTrees++;
                    continue;
                }
                // Only trees with equal yields can be evaluated
                if (goldYield.Count != guessYield.Count)
                {
                    pwOut.Printf("Yield mismatch gold: %d tokens vs. guess: %d tokens (lines: gold %d guess %d)%n", goldYield.Count, guessYield.Count, goldLineId, guessLineId);
                    skippedGuessTrees++;
                    continue;
                }
                Tree evalGuess = tc.TransformTree(guessTree);
                Tree evalGold  = tc.TransformTree(goldTree);
                metric.Evaluate(evalGuess, evalGold, ((Verbose) ? pwOut : null));
            }
            if (guessItr.MoveNext() || goldItr.MoveNext())
            {
                System.Console.Error.Printf("Guess/gold files do not have equal lengths (guess: %d gold: %d)%n.", guessLineId, goldLineId);
            }
            pwOut.Println("================================================================================");
            if (skippedGuessTrees != 0)
            {
                pwOut.Printf("%s %d guess trees\n", "Unable to evaluate", skippedGuessTrees);
            }
            metric.Display(true, pwOut);
            pwOut.Println();
            pwOut.Close();
        }
        /// <summary>Run the scoring metric on guess/gold input.</summary>
        /// <remarks>
        /// Run the scoring metric on guess/gold input. This method performs "Collinization."
        /// The default language is English.
        /// </remarks>
        /// <param name="args"/>
        public static void Main(string[] args)
        {
            if (args.Length < minArgs)
            {
                System.Console.Out.WriteLine(usage.ToString());
                System.Environment.Exit(-1);
            }
            ITreebankLangParserParams tlpp = new EnglishTreebankParserParams();
            int    maxGoldYield            = int.MaxValue;
            int    maxGuessYield           = int.MaxValue;
            bool   Verbose   = false;
            bool   skipGuess = false;
            bool   tagMode   = false;
            string guessFile = null;
            string goldFile  = null;

            for (int i = 0; i < args.Length; i++)
            {
                if (args[i].StartsWith("-"))
                {
                    switch (args[i])
                    {
                    case "-l":
                    {
                        Language lang = Language.ValueOf(args[++i].Trim());
                        tlpp = lang.@params;
                        break;
                    }

                    case "-y":
                    {
                        maxGoldYield = System.Convert.ToInt32(args[++i].Trim());
                        break;
                    }

                    case "-t":
                    {
                        tagMode = true;
                        break;
                    }

                    case "-v":
                    {
                        Verbose = true;
                        break;
                    }

                    case "-g":
                    {
                        maxGuessYield = System.Convert.ToInt32(args[++i].Trim());
                        skipGuess     = true;
                        break;
                    }

                    default:
                    {
                        System.Console.Out.WriteLine(usage.ToString());
                        System.Environment.Exit(-1);
                        break;
                    }
                    }
                }
                else
                {
                    //Required parameters
                    goldFile  = args[i++];
                    guessFile = args[i];
                    break;
                }
            }
            PrintWriter pwOut         = tlpp.Pw();
            Treebank    guessTreebank = tlpp.DiskTreebank();

            guessTreebank.LoadPath(guessFile);
            pwOut.Println("GUESS TREEBANK:");
            pwOut.Println(guessTreebank.TextualSummary());
            Treebank goldTreebank = tlpp.DiskTreebank();

            goldTreebank.LoadPath(goldFile);
            pwOut.Println("GOLD TREEBANK:");
            pwOut.Println(goldTreebank.TextualSummary());
            string evalName = (tagMode) ? "TsarfatyTAG" : "TsarfatySEG";

            Edu.Stanford.Nlp.Parser.Metrics.TsarfatyEval eval = new Edu.Stanford.Nlp.Parser.Metrics.TsarfatyEval(evalName, tagMode);
            ITreeTransformer tc = tlpp.Collinizer();
            //PennTreeReader skips over null/malformed parses. So when the yields of the gold/guess trees
            //don't match, we need to keep looking for the next gold tree that matches.
            //The evalb ref implementation differs slightly as it expects one tree per line. It assigns
            //status as follows:
            //
            //   0 - Ok (yields match)
            //   1 - length mismatch
            //   2 - null parse e.g. (()).
            //
            //In the cases of 1,2, evalb does not include the tree pair in the LP/LR computation.
            IEnumerator <Tree> goldItr = goldTreebank.GetEnumerator();
            int goldLineId             = 0;
            int skippedGuessTrees      = 0;

            foreach (Tree guess in guessTreebank)
            {
                Tree          evalGuess  = tc.TransformTree(guess);
                List <ILabel> guessSent  = guess.Yield();
                string        guessChars = SentenceUtils.ListToString(guessSent).ReplaceAll("\\s+", string.Empty);
                if (guessSent.Count > maxGuessYield)
                {
                    skippedGuessTrees++;
                    continue;
                }
                bool doneEval = false;
                while (goldItr.MoveNext() && !doneEval)
                {
                    Tree gold     = goldItr.Current;
                    Tree evalGold = tc.TransformTree(gold);
                    goldLineId++;
                    List <ILabel> goldSent  = gold.Yield();
                    string        goldChars = SentenceUtils.ListToString(goldSent).ReplaceAll("\\s+", string.Empty);
                    if (goldSent.Count > maxGoldYield)
                    {
                        continue;
                    }
                    else
                    {
                        if (goldChars.Length != guessChars.Length)
                        {
                            pwOut.Printf("Char level yield mismatch at line %d (guess: %d gold: %d)\n", goldLineId, guessChars.Length, goldChars.Length);
                            skippedGuessTrees++;
                            break;
                        }
                    }
                    //Default evalb behavior -- skip this guess tree
                    eval.Evaluate(evalGuess, evalGold, ((Verbose) ? pwOut : null));
                    doneEval = true;
                }
            }
            //Move to the next guess parse
            pwOut.Println("================================================================================");
            if (skippedGuessTrees != 0)
            {
                pwOut.Printf("%s %d guess trees\n", ((skipGuess) ? "Skipped" : "Unable to evaluate"), skippedGuessTrees);
            }
            eval.Display(true, pwOut);
            pwOut.Println();
            pwOut.Close();
        }