/// <summary>Run the Evalb scoring metric on guess/gold input.</summary> /// <remarks>Run the Evalb scoring metric on guess/gold input. The default language is English.</remarks> /// <param name="args"/> public static void Main(string[] args) { if (args.Length < minArgs) { log.Info(Usage()); System.Environment.Exit(-1); } Properties options = StringUtils.ArgsToProperties(args, OptionArgDefs()); Language language = PropertiesUtils.Get(options, "l", Language.English, typeof(Language)); ITreebankLangParserParams tlpp = language.@params; int maxGoldYield = PropertiesUtils.GetInt(options, "y", int.MaxValue); bool Verbose = PropertiesUtils.GetBool(options, "v", false); bool sortByF1 = PropertiesUtils.HasProperty(options, "s"); int worstKTreesToEmit = PropertiesUtils.GetInt(options, "s", 0); PriorityQueue <Triple <double, Tree, Tree> > queue = sortByF1 ? new PriorityQueue <Triple <double, Tree, Tree> >(2000, new Evalb.F1Comparator()) : null; bool doCatLevel = PropertiesUtils.GetBool(options, "c", false); string labelRegex = options.GetProperty("f", null); string encoding = options.GetProperty("e", "UTF-8"); string[] parsedArgs = options.GetProperty(string.Empty, string.Empty).Split("\\s+"); if (parsedArgs.Length != minArgs) { log.Info(Usage()); System.Environment.Exit(-1); } string goldFile = parsedArgs[0]; string guessFile = parsedArgs[1]; // Command-line has been parsed. Configure the metric for evaluation. tlpp.SetInputEncoding(encoding); PrintWriter pwOut = tlpp.Pw(); Treebank guessTreebank = tlpp.DiskTreebank(); guessTreebank.LoadPath(guessFile); pwOut.Println("GUESS TREEBANK:"); pwOut.Println(guessTreebank.TextualSummary()); Treebank goldTreebank = tlpp.DiskTreebank(); goldTreebank.LoadPath(goldFile); pwOut.Println("GOLD TREEBANK:"); pwOut.Println(goldTreebank.TextualSummary()); Evalb metric = new Evalb("Evalb LP/LR", true); EvalbByCat evalbCat = (doCatLevel) ? new EvalbByCat("EvalbByCat LP/LR", true, labelRegex) : null; ITreeTransformer tc = tlpp.Collinizer(); //The evalb ref implementation assigns status for each tree pair as follows: // // 0 - Ok (yields match) // 1 - length mismatch // 2 - null parse e.g. (()). // //In the cases of 1,2, evalb does not include the tree pair in the LP/LR computation. IEnumerator <Tree> goldItr = goldTreebank.GetEnumerator(); IEnumerator <Tree> guessItr = guessTreebank.GetEnumerator(); int goldLineId = 0; int guessLineId = 0; int skippedGuessTrees = 0; while (guessItr.MoveNext() && goldItr.MoveNext()) { Tree guessTree = guessItr.Current; IList <ILabel> guessYield = guessTree.Yield(); guessLineId++; Tree goldTree = goldItr.Current; IList <ILabel> goldYield = goldTree.Yield(); goldLineId++; // Check that we should evaluate this tree if (goldYield.Count > maxGoldYield) { skippedGuessTrees++; continue; } // Only trees with equal yields can be evaluated if (goldYield.Count != guessYield.Count) { pwOut.Printf("Yield mismatch gold: %d tokens vs. guess: %d tokens (lines: gold %d guess %d)%n", goldYield.Count, guessYield.Count, goldLineId, guessLineId); skippedGuessTrees++; continue; } Tree evalGuess = tc.TransformTree(guessTree); Tree evalGold = tc.TransformTree(goldTree); metric.Evaluate(evalGuess, evalGold, ((Verbose) ? pwOut : null)); if (doCatLevel) { evalbCat.Evaluate(evalGuess, evalGold, ((Verbose) ? pwOut : null)); } if (sortByF1) { StoreTrees(queue, guessTree, goldTree, metric.GetLastF1()); } } if (guessItr.MoveNext() || goldItr.MoveNext()) { System.Console.Error.Printf("Guess/gold files do not have equal lengths (guess: %d gold: %d)%n.", guessLineId, goldLineId); } pwOut.Println("================================================================================"); if (skippedGuessTrees != 0) { pwOut.Printf("%s %d guess trees\n", "Unable to evaluate", skippedGuessTrees); } metric.Display(true, pwOut); pwOut.Println(); if (doCatLevel) { evalbCat.Display(true, pwOut); pwOut.Println(); } if (sortByF1) { EmitSortedTrees(queue, worstKTreesToEmit, guessFile); } pwOut.Close(); }
/// <summary>Execute with no arguments for usage.</summary> public static void Main(string[] args) { if (!ValidateCommandLine(args)) { log.Info(Usage()); System.Environment.Exit(-1); } DateTime startTime = new DateTime(); System.Console.Out.WriteLine("##################################"); System.Console.Out.WriteLine("# Stanford Treebank Preprocessor #"); System.Console.Out.WriteLine("##################################"); System.Console.Out.Printf("Start time: %s%n", startTime); System.Console.Out.Printf("Configuration: %s%n%n", configFile); ConfigParser cp = new ConfigParser(configFile); cp.Parse(); DistributionPackage distrib = new DistributionPackage(); foreach (Properties dsParams in cp) { string nameOfDataset = PropertiesUtils.HasProperty(dsParams, ConfigParser.paramName) ? dsParams.GetProperty(ConfigParser.paramName) : "UN-NAMED"; if (outputPath != null) { dsParams.SetProperty(ConfigParser.paramOutputPath, outputPath); } IDataset ds = GetDatasetClass(dsParams); if (ds == null) { System.Console.Out.Printf("Unable to instantiate TYPE for dataset %s. Check the javadocs%n", nameOfDataset); continue; } bool shouldDistribute = dsParams.Contains(ConfigParser.paramDistrib) && bool.ParseBoolean(dsParams.GetProperty(ConfigParser.paramDistrib)); dsParams.Remove(ConfigParser.paramDistrib); bool lacksRequiredOptions = !(ds.SetOptions(dsParams)); if (lacksRequiredOptions) { System.Console.Out.Printf("Skipping dataset %s as it lacks required parameters. Check the javadocs%n", nameOfDataset); continue; } ds.Build(); if (shouldDistribute) { distrib.AddFiles(ds.GetFilenames()); } if (Verbose) { System.Console.Out.Printf("%s%n", ds.ToString()); } } if (MakeDistrib) { distrib.Make(distribName); } if (Verbose) { System.Console.Out.WriteLine("-->configuration details"); System.Console.Out.WriteLine(cp.ToString()); if (MakeDistrib) { System.Console.Out.WriteLine("-->distribution package details"); System.Console.Out.WriteLine(distrib.ToString()); } } DateTime stopTime = new DateTime(); long elapsedTime = stopTime.GetTime() - startTime.GetTime(); System.Console.Out.Printf("Completed processing at %s%n", stopTime); System.Console.Out.Printf("Elapsed time: %d seconds%n", (int)(elapsedTime / 1000F)); }