private void PrintHeader(PrintWriter @out, string nodeName) { @out.Write("<html><head>"); @out.Printf("<title>%s Reconfiguration Utility</title>%n", StringEscapeUtils.EscapeHtml (nodeName)); @out.Write("</head><body>\n"); @out.Printf("<h1>%s Reconfiguration Utility</h1>%n", StringEscapeUtils.EscapeHtml (nodeName)); }
public virtual void Display(bool verbose, PrintWriter pw) { Random rand = new Random(); double corpusLevel = corpusAvg / corpusNum; double sentLevel = sentAvg / sentNum; double sentEx = 100.0 * sentExact / sentNum; if (verbose) { IDictionary <double, IList <CoreLabel> > avgMap = new SortedDictionary <double, IList <CoreLabel> >(); foreach (KeyValuePair <IList <CoreLabel>, double> entry in catAvg) { double avg = entry.Value / catNum[entry.Key]; if (double.IsNaN(avg)) { avg = -1.0; } if (avgMap.Contains(avg)) { avgMap[avg + (rand.NextDouble() / 10000.0)] = entry.Key; } else { avgMap[avg] = entry.Key; } } pw.Println("============================================================"); pw.Println("Leaf Ancestor Metric" + "(" + name + ") -- final statistics"); pw.Println("============================================================"); pw.Println("#Sentences: " + (int)sentNum); pw.Println(); pw.Println("Sentence-level (macro-averaged)"); pw.Printf(" Avg: %.3f%n", sentLevel); pw.Printf(" Exact: %.2f%%%n", sentEx); pw.Println(); pw.Println("Corpus-level (micro-averaged)"); pw.Printf(" Avg: %.3f%n", corpusLevel); pw.Println("============================================================"); foreach (IList <CoreLabel> lineage in avgMap.Values) { if (catNum[lineage] < 30.0) { continue; } double avg = catAvg[lineage] / catNum[lineage]; pw.Printf(" %.3f\t%d\t%s%n", avg, (int)((double)catNum[lineage]), ToString(lineage)); } pw.Println("============================================================"); } else { pw.Printf("%s summary: corpus: %.3f sent: %.3f sent-ex: %.2f%n", name, corpusLevel, sentLevel, sentEx); } }
public virtual void Display(bool verbose, PrintWriter pw) { if (unkWords.IsEmpty()) { pw.Printf("UnknownWordPrinter: all words known by DVModel%n"); } else { pw.Printf("UnknownWordPrinter: the following words are unknown%n"); foreach (string word in unkWords) { pw.Printf(" %s%n", word); } } }
//WSGDEBUG - For debugging public virtual void DebugPrint(PrintWriter pw) { foreach (string word in verbStems.Keys) { pw.Printf("%s : %s\n", word, GetStem(word)); } }
public virtual void SaveToFilename(string s) { PrintWriter bw = null; try { bw = IOUtils.GetPrintWriter(s); for (int i = 0; i < size; i++) { E o = Get(i); if (o != null) { bw.Printf("%d=%s%n", i, o.ToString()); } } bw.Close(); } catch (IOException e) { Sharpen.Runtime.PrintStackTrace(e); } finally { if (bw != null) { bw.Close(); } } }
public virtual void PrintAnswers(IList <CoreLabel> doc, PrintWriter pw) { pw.Println("Answer\tGoldAnswer\tCharacter"); foreach (CoreLabel word in doc) { pw.Printf("%s\t%s\t%s%n", word.Get(typeof(CoreAnnotations.AnswerAnnotation)), word.Get(typeof(CoreAnnotations.GoldAnswerAnnotation)), word.Get(typeof(CoreAnnotations.CharAnnotation))); } }
/// <summary>Lists the nodes matching the given node states</summary> /// <param name="nodeStates"/> /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> private void ListClusterNodes(ICollection <NodeState> nodeStates) { PrintWriter writer = new PrintWriter(new OutputStreamWriter(sysout, Sharpen.Extensions.GetEncoding ("UTF-8"))); IList <NodeReport> nodesReport = client.GetNodeReports(Sharpen.Collections.ToArray (nodeStates, new NodeState[0])); writer.WriteLine("Total Nodes:" + nodesReport.Count); writer.Printf(NodesPattern, "Node-Id", "Node-State", "Node-Http-Address", "Number-of-Running-Containers" ); foreach (NodeReport nodeReport in nodesReport) { writer.Printf(NodesPattern, nodeReport.GetNodeId(), nodeReport.GetNodeState(), nodeReport .GetHttpAddress(), nodeReport.GetNumContainers()); } writer.Flush(); }
/// <summary>Lists the application attempts matching the given applicationid</summary> /// <param name="applicationId"/> /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> private void ListApplicationAttempts(string applicationId) { PrintWriter writer = new PrintWriter(new OutputStreamWriter(sysout, Sharpen.Extensions.GetEncoding ("UTF-8"))); IList <ApplicationAttemptReport> appAttemptsReport = client.GetApplicationAttempts (ConverterUtils.ToApplicationId(applicationId)); writer.WriteLine("Total number of application attempts " + ":" + appAttemptsReport .Count); writer.Printf(ApplicationAttemptsPattern, "ApplicationAttempt-Id", "State", "AM-Container-Id" , "Tracking-URL"); foreach (ApplicationAttemptReport appAttemptReport in appAttemptsReport) { writer.Printf(ApplicationAttemptsPattern, appAttemptReport.GetApplicationAttemptId (), appAttemptReport.GetYarnApplicationAttemptState(), appAttemptReport.GetAMContainerId ().ToString(), appAttemptReport.GetTrackingUrl()); } writer.Flush(); }
/// <summary>Lists the containers matching the given application attempts</summary> /// <param name="appAttemptId"/> /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> private void ListContainers(string appAttemptId) { PrintWriter writer = new PrintWriter(new OutputStreamWriter(sysout, Sharpen.Extensions.GetEncoding ("UTF-8"))); IList <ContainerReport> appsReport = client.GetContainers(ConverterUtils.ToApplicationAttemptId (appAttemptId)); writer.WriteLine("Total number of containers " + ":" + appsReport.Count); writer.Printf(ContainerPattern, "Container-Id", "Start Time", "Finish Time", "State" , "Host", "Node Http Address", "LOG-URL"); foreach (ContainerReport containerReport in appsReport) { writer.Printf(ContainerPattern, containerReport.GetContainerId(), Times.Format(containerReport .GetCreationTime()), Times.Format(containerReport.GetFinishTime()), containerReport .GetContainerState(), containerReport.GetAssignedNode(), containerReport.GetNodeHttpAddress () == null ? "N/A" : containerReport.GetNodeHttpAddress(), containerReport.GetLogUrl ()); } writer.Flush(); }
public virtual void DisplayJobList(JobStatus[] jobs, PrintWriter writer) { writer.WriteLine("Total jobs:" + jobs.Length); writer.Printf(headerPattern, "JobId", "State", "StartTime", "UserName", "Queue", "Priority", "UsedContainers", "RsvdContainers", "UsedMem", "RsvdMem", "NeededMem" , "AM info"); foreach (JobStatus job in jobs) { int numUsedSlots = job.GetNumUsedSlots(); int numReservedSlots = job.GetNumReservedSlots(); int usedMem = job.GetUsedMem(); int rsvdMem = job.GetReservedMem(); int neededMem = job.GetNeededMem(); writer.Printf(dataPattern, job.GetJobID().ToString(), job.GetState(), job.GetStartTime (), job.GetUsername(), job.GetQueue(), job.GetPriority().ToString(), numUsedSlots < 0 ? Unavailable : numUsedSlots, numReservedSlots < 0 ? Unavailable : numReservedSlots , usedMem < 0 ? Unavailable : string.Format(memPattern, usedMem), rsvdMem < 0 ? Unavailable : string.Format(memPattern, rsvdMem), neededMem < 0 ? Unavailable : string.Format(memPattern, neededMem), job.GetSchedulingInfo()); } writer.Flush(); }
/// <summary> /// Lists the applications matching the given application Types And application /// States present in the Resource Manager /// </summary> /// <param name="appTypes"/> /// <param name="appStates"/> /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/> /// <exception cref="System.IO.IOException"/> private void ListApplications(ICollection <string> appTypes, EnumSet <YarnApplicationState > appStates) { PrintWriter writer = new PrintWriter(new OutputStreamWriter(sysout, Sharpen.Extensions.GetEncoding ("UTF-8"))); if (allAppStates) { foreach (YarnApplicationState appState in YarnApplicationState.Values()) { appStates.AddItem(appState); } } else { if (appStates.IsEmpty()) { appStates.AddItem(YarnApplicationState.Running); appStates.AddItem(YarnApplicationState.Accepted); appStates.AddItem(YarnApplicationState.Submitted); } } IList <ApplicationReport> appsReport = client.GetApplications(appTypes, appStates); writer.WriteLine("Total number of applications (application-types: " + appTypes + " and states: " + appStates + ")" + ":" + appsReport.Count); writer.Printf(ApplicationsPattern, "Application-Id", "Application-Name", "Application-Type" , "User", "Queue", "State", "Final-State", "Progress", "Tracking-URL"); foreach (ApplicationReport appReport in appsReport) { DecimalFormat formatter = new DecimalFormat("###.##%"); string progress = formatter.Format(appReport.GetProgress()); writer.Printf(ApplicationsPattern, appReport.GetApplicationId(), appReport.GetName (), appReport.GetApplicationType(), appReport.GetUser(), appReport.GetQueue(), appReport .GetYarnApplicationState(), appReport.GetFinalApplicationStatus(), progress, appReport .GetOriginalTrackingUrl()); } writer.Flush(); }
public virtual void Evaluate(Tree guess, Tree gold, PrintWriter pw, double weight) { IList <ILabel> words = guess.Yield(); int pos = 0; foreach (ILabel word in words) { ++pos; SimpleMatrix wv = model.GetWordVector(word.Value()); // would be faster but more implementation-specific if we // removed wv.equals if (wv == unk || wv.Equals(unk)) { pw.Printf(" Unknown word in position %d: %s%n", pos, word.Value()); unkWords.Add(word.Value()); } } }
public static void PrintCounter(TwoDimensionalCounter <string, string> cnt, string fname) { try { PrintWriter pw = new PrintWriter(new TextWriter(new FileOutputStream(new File(fname)), false, "UTF-8")); foreach (string key in cnt.FirstKeySet()) { foreach (string val in cnt.GetCounter(key).KeySet()) { pw.Printf("%s\t%s\t%d%n", key, val, (int)cnt.GetCount(key, val)); } } pw.Close(); } catch (UnsupportedEncodingException e) { Sharpen.Runtime.PrintStackTrace(e); } catch (FileNotFoundException e) { Sharpen.Runtime.PrintStackTrace(e); } }
public static void WriteConllFile(string outFile, IList <ICoreMap> sentences, IList <DependencyTree> trees) { try { PrintWriter output = IOUtils.GetPrintWriter(outFile); for (int i = 0; i < sentences.Count; i++) { ICoreMap sentence = sentences[i]; DependencyTree tree = trees[i]; IList <CoreLabel> tokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation)); for (int j = 1; j <= size; ++j) { CoreLabel token = tokens[j - 1]; output.Printf("%d\t%s\t_\t%s\t%s\t_\t%d\t%s\t_\t_%n", j, token.Word(), token.Tag(), token.Tag(), tree.GetHead(j), tree.GetLabel(j)); } output.Println(); } output.Close(); } catch (Exception e) { throw new RuntimeIOException(e); } }
/// <summary>Run the scoring metric on guess/gold input.</summary> /// <remarks> /// Run the scoring metric on guess/gold input. This method performs "Collinization." /// The default language is English. /// </remarks> /// <param name="args"/> public static void Main(string[] args) { if (args.Length < minArgs) { System.Console.Out.WriteLine(usage.ToString()); System.Environment.Exit(-1); } ITreebankLangParserParams tlpp = new EnglishTreebankParserParams(); int maxGoldYield = int.MaxValue; bool Verbose = false; string encoding = "UTF-8"; string guessFile = null; string goldFile = null; IDictionary <string, string[]> argsMap = StringUtils.ArgsToMap(args, optionArgDefs); foreach (KeyValuePair <string, string[]> opt in argsMap) { if (opt.Key == null) { continue; } if (opt.Key.Equals("-l")) { Language lang = Language.ValueOf(opt.Value[0].Trim()); tlpp = lang.@params; } else { if (opt.Key.Equals("-y")) { maxGoldYield = System.Convert.ToInt32(opt.Value[0].Trim()); } else { if (opt.Key.Equals("-v")) { Verbose = true; } else { if (opt.Key.Equals("-c")) { Edu.Stanford.Nlp.Parser.Metrics.TaggingEval.doCatLevelEval = true; } else { if (opt.Key.Equals("-e")) { encoding = opt.Value[0]; } else { log.Info(usage.ToString()); System.Environment.Exit(-1); } } } } } //Non-option arguments located at key null string[] rest = argsMap[null]; if (rest == null || rest.Length < minArgs) { log.Info(usage.ToString()); System.Environment.Exit(-1); } goldFile = rest[0]; guessFile = rest[1]; } tlpp.SetInputEncoding(encoding); PrintWriter pwOut = tlpp.Pw(); Treebank guessTreebank = tlpp.DiskTreebank(); guessTreebank.LoadPath(guessFile); pwOut.Println("GUESS TREEBANK:"); pwOut.Println(guessTreebank.TextualSummary()); Treebank goldTreebank = tlpp.DiskTreebank(); goldTreebank.LoadPath(goldFile); pwOut.Println("GOLD TREEBANK:"); pwOut.Println(goldTreebank.TextualSummary()); Edu.Stanford.Nlp.Parser.Metrics.TaggingEval metric = new Edu.Stanford.Nlp.Parser.Metrics.TaggingEval("Tagging LP/LR"); ITreeTransformer tc = tlpp.Collinizer(); //The evalb ref implementation assigns status for each tree pair as follows: // // 0 - Ok (yields match) // 1 - length mismatch // 2 - null parse e.g. (()). // //In the cases of 1,2, evalb does not include the tree pair in the LP/LR computation. IEnumerator <Tree> goldItr = goldTreebank.GetEnumerator(); IEnumerator <Tree> guessItr = guessTreebank.GetEnumerator(); int goldLineId = 0; int guessLineId = 0; int skippedGuessTrees = 0; while (guessItr.MoveNext() && goldItr.MoveNext()) { Tree guessTree = guessItr.Current; IList <ILabel> guessYield = guessTree.Yield(); guessLineId++; Tree goldTree = goldItr.Current; IList <ILabel> goldYield = goldTree.Yield(); goldLineId++; // Check that we should evaluate this tree if (goldYield.Count > maxGoldYield) { skippedGuessTrees++; continue; } // Only trees with equal yields can be evaluated if (goldYield.Count != guessYield.Count) { pwOut.Printf("Yield mismatch gold: %d tokens vs. guess: %d tokens (lines: gold %d guess %d)%n", goldYield.Count, guessYield.Count, goldLineId, guessLineId); skippedGuessTrees++; continue; } Tree evalGuess = tc.TransformTree(guessTree); Tree evalGold = tc.TransformTree(goldTree); metric.Evaluate(evalGuess, evalGold, ((Verbose) ? pwOut : null)); } if (guessItr.MoveNext() || goldItr.MoveNext()) { System.Console.Error.Printf("Guess/gold files do not have equal lengths (guess: %d gold: %d)%n.", guessLineId, goldLineId); } pwOut.Println("================================================================================"); if (skippedGuessTrees != 0) { pwOut.Printf("%s %d guess trees\n", "Unable to evaluate", skippedGuessTrees); } metric.Display(true, pwOut); pwOut.Println(); pwOut.Close(); }
/// <param name="args"/> public static void Main(string[] args) { if (args.Length < MinArgs) { log.Info(Usage()); System.Environment.Exit(-1); } Properties options = StringUtils.ArgsToProperties(args, OptionArgDefs()); bool Verbose = PropertiesUtils.GetBool(options, "v", false); Language Language = PropertiesUtils.Get(options, "l", Language.English, typeof(Language)); int MaxGoldYield = PropertiesUtils.GetInt(options, "g", int.MaxValue); int MaxGuessYield = PropertiesUtils.GetInt(options, "y", int.MaxValue); string[] parsedArgs = options.GetProperty(string.Empty, string.Empty).Split("\\s+"); if (parsedArgs.Length != MinArgs) { log.Info(Usage()); System.Environment.Exit(-1); } File goldFile = new File(parsedArgs[0]); File guessFile = new File(parsedArgs[1]); ITreebankLangParserParams tlpp = Language.@params; PrintWriter pwOut = tlpp.Pw(); Treebank guessTreebank = tlpp.DiskTreebank(); guessTreebank.LoadPath(guessFile); pwOut.Println("GUESS TREEBANK:"); pwOut.Println(guessTreebank.TextualSummary()); Treebank goldTreebank = tlpp.DiskTreebank(); goldTreebank.LoadPath(goldFile); pwOut.Println("GOLD TREEBANK:"); pwOut.Println(goldTreebank.TextualSummary()); Edu.Stanford.Nlp.Parser.Metrics.CollinsDepEval depEval = new Edu.Stanford.Nlp.Parser.Metrics.CollinsDepEval("CollinsDep", true, tlpp.HeadFinder(), tlpp.TreebankLanguagePack().StartSymbol()); ITreeTransformer tc = tlpp.Collinizer(); //PennTreeReader skips over null/malformed parses. So when the yields of the gold/guess trees //don't match, we need to keep looking for the next gold tree that matches. //The evalb ref implementation differs slightly as it expects one tree per line. It assigns //status as follows: // // 0 - Ok (yields match) // 1 - length mismatch // 2 - null parse e.g. (()). // //In the cases of 1,2, evalb does not include the tree pair in the LP/LR computation. IEnumerator <Tree> goldItr = goldTreebank.GetEnumerator(); int goldLineId = 0; int skippedGuessTrees = 0; foreach (Tree guess in guessTreebank) { Tree evalGuess = tc.TransformTree(guess); if (guess.Yield().Count > MaxGuessYield) { skippedGuessTrees++; continue; } bool doneEval = false; while (goldItr.MoveNext() && !doneEval) { Tree gold = goldItr.Current; Tree evalGold = tc.TransformTree(gold); goldLineId++; if (gold.Yield().Count > MaxGoldYield) { continue; } else { if (evalGold.Yield().Count != evalGuess.Yield().Count) { pwOut.Println("Yield mismatch at gold line " + goldLineId); skippedGuessTrees++; break; } } //Default evalb behavior -- skip this guess tree depEval.Evaluate(evalGuess, evalGold, ((Verbose) ? pwOut : null)); doneEval = true; } } //Move to the next guess parse pwOut.Println("================================================================================"); if (skippedGuessTrees != 0) { pwOut.Printf("%s %d guess trees\n", ((MaxGuessYield < int.MaxValue) ? "Skipped" : "Unable to evaluate"), skippedGuessTrees); } depEval.Display(true, pwOut); pwOut.Close(); }
public virtual void ProcessResults(IParserQuery pq, Tree goldTree, PrintWriter pwErr, PrintWriter pwOut, PrintWriter pwFileOut, PrintWriter pwStats, TreePrint treePrint) { if (pq.SaidMemMessage()) { saidMemMessage = true; } Tree tree; IList <IHasWord> sentence = pq.OriginalSentence(); try { tree = pq.GetBestParse(); } catch (NoSuchParseException) { tree = null; } IList <ScoredObject <Tree> > kbestPCFGTrees = null; if (tree != null && kbestPCFG > 0) { kbestPCFGTrees = pq.GetKBestPCFGParses(kbestPCFG); } //combo parse goes to pwOut (System.out) if (op.testOptions.verbose) { pwOut.Println("ComboParser best"); Tree ot = tree; if (ot != null && !op.tlpParams.TreebankLanguagePack().IsStartSymbol(ot.Value())) { ot = ot.TreeFactory().NewTreeNode(op.tlpParams.TreebankLanguagePack().StartSymbol(), Java.Util.Collections.SingletonList(ot)); } treePrint.PrintTree(ot, pwOut); } else { treePrint.PrintTree(tree, pwOut); } // **OUTPUT** // print various n-best like outputs (including 1-best) // print various statistics if (tree != null) { if (op.testOptions.printAllBestParses) { IList <ScoredObject <Tree> > parses = pq.GetBestPCFGParses(); int sz = parses.Count; if (sz > 1) { pwOut.Println("There were " + sz + " best PCFG parses with score " + parses[0].Score() + '.'); Tree transGoldTree = collinizer.TransformTree(goldTree); int iii = 0; foreach (ScoredObject <Tree> sot in parses) { iii++; Tree tb = sot.Object(); Tree tbd = debinarizer.TransformTree(tb); tbd = subcategoryStripper.TransformTree(tbd); pq.RestoreOriginalWords(tbd); pwOut.Println("PCFG Parse #" + iii + " with score " + tbd.Score()); tbd.PennPrint(pwOut); Tree tbtr = collinizer.TransformTree(tbd); // pwOut.println("Tree size = " + tbtr.size() + "; depth = " + tbtr.depth()); kGoodLB.Evaluate(tbtr, transGoldTree, pwErr); } } } else { // Huang and Chiang (2006) Algorithm 3 output from the PCFG parser if (op.testOptions.printPCFGkBest > 0 && op.testOptions.outputkBestEquivocation == null) { IList <ScoredObject <Tree> > trees = kbestPCFGTrees.SubList(0, op.testOptions.printPCFGkBest); Tree transGoldTree = collinizer.TransformTree(goldTree); int i = 0; foreach (ScoredObject <Tree> tp in trees) { i++; pwOut.Println("PCFG Parse #" + i + " with score " + tp.Score()); Tree tbd = tp.Object(); tbd.PennPrint(pwOut); Tree tbtr = collinizer.TransformTree(tbd); kGoodLB.Evaluate(tbtr, transGoldTree, pwErr); } } else { // Chart parser (factored) n-best list if (op.testOptions.printFactoredKGood > 0 && pq.HasFactoredParse()) { // DZ: debug n best trees IList <ScoredObject <Tree> > trees = pq.GetKGoodFactoredParses(op.testOptions.printFactoredKGood); Tree transGoldTree = collinizer.TransformTree(goldTree); int ii = 0; foreach (ScoredObject <Tree> tp in trees) { ii++; pwOut.Println("Factored Parse #" + ii + " with score " + tp.Score()); Tree tbd = tp.Object(); tbd.PennPrint(pwOut); Tree tbtr = collinizer.TransformTree(tbd); kGoodLB.Evaluate(tbtr, transGoldTree, pwOut); } } else { //1-best output if (pwFileOut != null) { pwFileOut.Println(tree.ToString()); } } } } //Print the derivational entropy if (op.testOptions.outputkBestEquivocation != null && op.testOptions.printPCFGkBest > 0) { IList <ScoredObject <Tree> > trees = kbestPCFGTrees.SubList(0, op.testOptions.printPCFGkBest); double[] logScores = new double[trees.Count]; int treeId = 0; foreach (ScoredObject <Tree> kBestTree in trees) { logScores[treeId++] = kBestTree.Score(); } //Re-normalize double entropy = 0.0; double denom = ArrayMath.LogSum(logScores); foreach (double logScore in logScores) { double logPr = logScore - denom; entropy += System.Math.Exp(logPr) * (logPr / System.Math.Log(2)); } entropy *= -1; //Convert to bits pwStats.Printf("%f\t%d\t%d\n", entropy, trees.Count, sentence.Count); } } // **EVALUATION** // Perform various evaluations specified by the user if (tree != null) { //Strip subcategories and remove punctuation for evaluation tree = subcategoryStripper.TransformTree(tree); Tree treeFact = collinizer.TransformTree(tree); //Setup the gold tree if (op.testOptions.verbose) { pwOut.Println("Correct parse"); treePrint.PrintTree(goldTree, pwOut); } Tree transGoldTree = collinizer.TransformTree(goldTree); if (transGoldTree != null) { transGoldTree = subcategoryStripper.TransformTree(transGoldTree); } //Can't do evaluation in these two cases if (transGoldTree == null) { pwErr.Println("Couldn't transform gold tree for evaluation, skipping eval. Gold tree was:"); goldTree.PennPrint(pwErr); numSkippedEvals++; return; } else { if (treeFact == null) { pwErr.Println("Couldn't transform hypothesis tree for evaluation, skipping eval. Tree was:"); tree.PennPrint(pwErr); numSkippedEvals++; return; } else { if (treeFact.Yield().Count != transGoldTree.Yield().Count) { IList <ILabel> fYield = treeFact.Yield(); IList <ILabel> gYield = transGoldTree.Yield(); pwErr.Println("WARNING: Evaluation could not be performed due to gold/parsed yield mismatch."); pwErr.Printf(" sizes: gold: %d (transf) %d (orig); parsed: %d (transf) %d (orig).%n", gYield.Count, goldTree.Yield().Count, fYield.Count, tree.Yield().Count); pwErr.Println(" gold: " + SentenceUtils.ListToString(gYield, true)); pwErr.Println(" pars: " + SentenceUtils.ListToString(fYield, true)); numSkippedEvals++; return; } } } if (topKEvals.Count > 0) { IList <Tree> transGuesses = new List <Tree>(); int kbest = System.Math.Min(op.testOptions.evalPCFGkBest, kbestPCFGTrees.Count); foreach (ScoredObject <Tree> guess in kbestPCFGTrees.SubList(0, kbest)) { transGuesses.Add(collinizer.TransformTree(guess.Object())); } foreach (BestOfTopKEval eval in topKEvals) { eval.Evaluate(transGuesses, transGoldTree, pwErr); } } //PCFG eval Tree treePCFG = pq.GetBestPCFGParse(); if (treePCFG != null) { Tree treePCFGeval = collinizer.TransformTree(treePCFG); if (pcfgLB != null) { pcfgLB.Evaluate(treePCFGeval, transGoldTree, pwErr); } if (pcfgChildSpecific != null) { pcfgChildSpecific.Evaluate(treePCFGeval, transGoldTree, pwErr); } if (pcfgLA != null) { pcfgLA.Evaluate(treePCFGeval, transGoldTree, pwErr); } if (pcfgCB != null) { pcfgCB.Evaluate(treePCFGeval, transGoldTree, pwErr); } if (pcfgDA != null) { // Re-index the leaves after Collinization, stripping traces, etc. treePCFGeval.IndexLeaves(true); transGoldTree.IndexLeaves(true); pcfgDA.Evaluate(treePCFGeval, transGoldTree, pwErr); } if (pcfgTA != null) { pcfgTA.Evaluate(treePCFGeval, transGoldTree, pwErr); } if (pcfgLL != null && pq.GetPCFGParser() != null) { pcfgLL.RecordScore(pq.GetPCFGParser(), pwErr); } if (pcfgRUO != null) { pcfgRUO.Evaluate(treePCFGeval, transGoldTree, pwErr); } if (pcfgCUO != null) { pcfgCUO.Evaluate(treePCFGeval, transGoldTree, pwErr); } if (pcfgCatE != null) { pcfgCatE.Evaluate(treePCFGeval, transGoldTree, pwErr); } } //Dependency eval // todo: is treeDep really useful here, or should we really use depDAEval tree (debinarized) throughout? We use it for parse, and it sure seems like we could use it for tag eval, but maybe not factDA? Tree treeDep = pq.GetBestDependencyParse(false); if (treeDep != null) { Tree goldTreeB = binarizerOnly.TransformTree(goldTree); Tree goldTreeEval = goldTree.DeepCopy(); goldTreeEval.IndexLeaves(true); goldTreeEval.PercolateHeads(op.Langpack().HeadFinder()); Tree depDAEval = pq.GetBestDependencyParse(true); depDAEval.IndexLeaves(true); depDAEval.PercolateHeadIndices(); if (depDA != null) { depDA.Evaluate(depDAEval, goldTreeEval, pwErr); } if (depTA != null) { Tree undoneTree = debinarizer.TransformTree(treeDep); undoneTree = subcategoryStripper.TransformTree(undoneTree); pq.RestoreOriginalWords(undoneTree); // pwErr.println("subcategoryStripped tree: " + undoneTree.toStructureDebugString()); depTA.Evaluate(undoneTree, goldTree, pwErr); } if (depLL != null && pq.GetDependencyParser() != null) { depLL.RecordScore(pq.GetDependencyParser(), pwErr); } Tree factTreeB; if (pq.HasFactoredParse()) { factTreeB = pq.GetBestFactoredParse(); } else { factTreeB = treeDep; } if (factDA != null) { factDA.Evaluate(factTreeB, goldTreeB, pwErr); } } //Factored parser (1best) eval if (factLB != null) { factLB.Evaluate(treeFact, transGoldTree, pwErr); } if (factChildSpecific != null) { factChildSpecific.Evaluate(treeFact, transGoldTree, pwErr); } if (factLA != null) { factLA.Evaluate(treeFact, transGoldTree, pwErr); } if (factTA != null) { factTA.Evaluate(tree, boundaryRemover.TransformTree(goldTree), pwErr); } if (factLL != null && pq.GetFactoredParser() != null) { factLL.RecordScore(pq.GetFactoredParser(), pwErr); } if (factCB != null) { factCB.Evaluate(treeFact, transGoldTree, pwErr); } foreach (IEval eval_1 in evals) { eval_1.Evaluate(treeFact, transGoldTree, pwErr); } if (parserQueryEvals != null) { foreach (IParserQueryEval eval in parserQueryEvals) { eval_1.Evaluate(pq, transGoldTree, pwErr); } } if (op.testOptions.evalb) { // empty out scores just in case NanScores(tree); EvalbFormatWriter.WriteEVALBline(treeFact, transGoldTree); } } pwErr.Println(); }
/// <summary>The meat of the outputter</summary> /// <exception cref="System.IO.IOException"/> private static void Print(Annotation annotation, PrintWriter pw, AnnotationOutputter.Options options) { double beam = options.beamPrintingOption; IList <ICoreMap> sentences = annotation.Get(typeof(CoreAnnotations.SentencesAnnotation)); // Display docid if available string docId = annotation.Get(typeof(CoreAnnotations.DocIDAnnotation)); if (docId != null) { IList <CoreLabel> tokens = annotation.Get(typeof(CoreAnnotations.TokensAnnotation)); int nSentences = (sentences != null) ? sentences.Count : 0; int nTokens = (tokens != null) ? tokens.Count : 0; pw.Printf("Document: ID=%s (%d sentences, %d tokens)%n", docId, nSentences, nTokens); } // Display doctitle if available string docTitle = annotation.Get(typeof(CoreAnnotations.DocTitleAnnotation)); if (docTitle != null) { pw.Printf("Document Title: %s%n", docTitle); } // Display docdate if available string docDate = annotation.Get(typeof(CoreAnnotations.DocDateAnnotation)); if (docDate != null) { pw.Printf("Document Date: %s%n", docDate); } // Display doctype if available string docType = annotation.Get(typeof(CoreAnnotations.DocTypeAnnotation)); if (docType != null) { pw.Printf("Document Type: %s%n", docType); } // Display docsourcetype if available string docSourceType = annotation.Get(typeof(CoreAnnotations.DocSourceTypeAnnotation)); if (docSourceType != null) { pw.Printf("Document Source Type: %s%n", docSourceType); } // display each sentence in this annotation if (sentences != null) { for (int i = 0; i < sz; i++) { pw.Println(); ICoreMap sentence = sentences[i]; IList <CoreLabel> tokens = sentence.Get(typeof(CoreAnnotations.TokensAnnotation)); string sentiment = sentence.Get(typeof(SentimentCoreAnnotations.SentimentClass)); string piece; if (sentiment == null) { piece = string.Empty; } else { piece = ", sentiment: " + sentiment; } pw.Printf("Sentence #%d (%d tokens%s):%n", (i + 1), tokens.Count, piece); string text = sentence.Get(typeof(CoreAnnotations.TextAnnotation)); pw.Println(text); // display the token-level annotations string[] tokenAnnotations = new string[] { "Text", "PartOfSpeech", "Lemma", "Answer", "NamedEntityTag", "CharacterOffsetBegin", "CharacterOffsetEnd", "NormalizedNamedEntityTag", "Timex", "TrueCase", "TrueCaseText", "SentimentClass", "WikipediaEntity" }; pw.Println(); pw.Println("Tokens:"); foreach (CoreLabel token in tokens) { pw.Print(token.ToShorterString(tokenAnnotations)); pw.Println(); } // display the parse tree for this sentence Tree tree = sentence.Get(typeof(TreeCoreAnnotations.TreeAnnotation)); if (tree != null) { pw.Println(); pw.Println("Constituency parse: "); options.constituentTreePrinter.PrintTree(tree, pw); } // display sentiment tree if they asked for sentiment if (!StringUtils.IsNullOrEmpty(sentiment)) { pw.Println(); pw.Println("Sentiment-annotated binary tree:"); Tree sTree = sentence.Get(typeof(SentimentCoreAnnotations.SentimentAnnotatedTree)); if (sTree != null) { sTree.PennPrint(pw, null); pw.Println(); } } // It is possible to turn off the semantic graphs, in which // case we don't want to recreate them using the dependency // printer. This might be relevant if using CoreNLP for a // language which doesn't have dependencies, for example. if (sentence.Get(typeof(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation)) != null) { pw.Println(); pw.Println("Dependency Parse (enhanced plus plus dependencies):"); pw.Print(sentence.Get(typeof(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation)).ToList()); } // display the entity mentions IList <ICoreMap> entityMentions = sentence.Get(typeof(CoreAnnotations.MentionsAnnotation)); if (entityMentions != null) { pw.Println(); pw.Println("Extracted the following NER entity mentions:"); foreach (ICoreMap entityMention in entityMentions) { if (entityMention.Get(typeof(CoreAnnotations.EntityTypeAnnotation)) != null) { pw.Println(entityMention.Get(typeof(CoreAnnotations.TextAnnotation)) + "\t" + entityMention.Get(typeof(CoreAnnotations.EntityTypeAnnotation))); } } } // display MachineReading entities and relations IList <EntityMention> entities = sentence.Get(typeof(MachineReadingAnnotations.EntityMentionsAnnotation)); if (entities != null) { pw.Println(); pw.Println("Extracted the following MachineReading entity mentions:"); foreach (EntityMention e in entities) { pw.Print('\t'); pw.Println(e); } } IList <RelationMention> relations = sentence.Get(typeof(MachineReadingAnnotations.RelationMentionsAnnotation)); if (relations != null) { pw.Println(); pw.Println("Extracted the following MachineReading relation mentions:"); foreach (RelationMention r in relations) { if (r.PrintableObject(beam)) { pw.Println(r); } } } // display OpenIE triples ICollection <RelationTriple> openieTriples = sentence.Get(typeof(NaturalLogicAnnotations.RelationTriplesAnnotation)); if (openieTriples != null && !openieTriples.IsEmpty()) { pw.Println(); pw.Println("Extracted the following Open IE triples:"); foreach (RelationTriple triple in openieTriples) { pw.Println(OpenIE.TripleToString(triple, docId, sentence)); } } // display KBP triples ICollection <RelationTriple> kbpTriples = sentence.Get(typeof(CoreAnnotations.KBPTriplesAnnotation)); if (kbpTriples != null && !kbpTriples.IsEmpty()) { pw.Println(); pw.Println("Extracted the following KBP triples:"); foreach (RelationTriple triple in kbpTriples) { pw.Println(triple); } } } } else { IList <CoreLabel> tokens = annotation.Get(typeof(CoreAnnotations.TokensAnnotation)); pw.Println("Tokens:"); pw.Println(annotation.Get(typeof(CoreAnnotations.TextAnnotation))); foreach (CoreLabel token in tokens) { int tokenCharBegin = token.Get(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation)); int tokenCharEnd = token.Get(typeof(CoreAnnotations.CharacterOffsetEndAnnotation)); pw.Println("[Text=" + token.Word() + " CharacterOffsetBegin=" + tokenCharBegin + " CharacterOffsetEnd=" + tokenCharEnd + ']'); } } // display the old-style doc-level coref annotations // this is not supported anymore! //String corefAnno = annotation.get(CorefPLAnnotation.class); //if(corefAnno != null) os.println(corefAnno); // display the new-style coreference graph IDictionary <int, CorefChain> corefChains = annotation.Get(typeof(CorefCoreAnnotations.CorefChainAnnotation)); if (corefChains != null && sentences != null) { foreach (CorefChain chain in corefChains.Values) { CorefChain.CorefMention representative = chain.GetRepresentativeMention(); bool outputHeading = false; foreach (CorefChain.CorefMention mention in chain.GetMentionsInTextualOrder()) { if (mention == representative) { continue; } if (!outputHeading) { outputHeading = true; pw.Println(); pw.Println("Coreference set:"); } // all offsets start at 1! pw.Printf("\t(%d,%d,[%d,%d]) -> (%d,%d,[%d,%d]), that is: \"%s\" -> \"%s\"%n", mention.sentNum, mention.headIndex, mention.startIndex, mention.endIndex, representative.sentNum, representative.headIndex, representative.startIndex, representative .endIndex, mention.mentionSpan, representative.mentionSpan); } } } // display quotes if available if (annotation.Get(typeof(CoreAnnotations.QuotationsAnnotation)) != null) { pw.Println(); pw.Println("Extracted quotes: "); IList <ICoreMap> allQuotes = QuoteAnnotator.GatherQuotes(annotation); foreach (ICoreMap quote in allQuotes) { string speakerString; if (quote.Get(typeof(QuoteAttributionAnnotator.CanonicalMentionAnnotation)) != null) { speakerString = quote.Get(typeof(QuoteAttributionAnnotator.CanonicalMentionAnnotation)); } else { if (quote.Get(typeof(QuoteAttributionAnnotator.SpeakerAnnotation)) != null) { speakerString = quote.Get(typeof(QuoteAttributionAnnotator.SpeakerAnnotation)); } else { speakerString = "Unknown"; } } pw.Printf("[QuotationIndex=%d, CharacterOffsetBegin=%d, Text=%s, Speaker=%s]%n", quote.Get(typeof(CoreAnnotations.QuotationIndexAnnotation)), quote.Get(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation)), quote.Get(typeof(CoreAnnotations.TextAnnotation )), speakerString); } } pw.Flush(); }
/// <exception cref="System.IO.IOException"/> public override OutputStream Write(Annotation corpus, OutputStream os) { if (!(os is GZIPOutputStream)) { if (compress) { os = new GZIPOutputStream(os); } } PrintWriter pw = new PrintWriter(os); // save the coref graph in the new format IDictionary <int, CorefChain> chains = corpus.Get(typeof(CorefCoreAnnotations.CorefChainAnnotation)); SaveCorefChains(chains, pw); // save the coref graph on one line // Note: this is the old format! IList <Pair <IntTuple, IntTuple> > corefGraph = corpus.Get(typeof(CorefCoreAnnotations.CorefGraphAnnotation)); if (corefGraph != null) { bool first = true; foreach (Pair <IntTuple, IntTuple> arc in corefGraph) { if (!first) { pw.Print(" "); } pw.Printf("%d %d %d %d", arc.first.Get(0), arc.first.Get(1), arc.second.Get(0), arc.second.Get(1)); first = false; } } pw.Println(); // save sentences separated by an empty line IList <ICoreMap> sentences = corpus.Get(typeof(CoreAnnotations.SentencesAnnotation)); foreach (ICoreMap sent in sentences) { // save the parse tree first, on a single line Tree tree = sent.Get(typeof(TreeCoreAnnotations.TreeAnnotation)); if (tree != null) { string treeString = tree.ToString(); // no \n allowed in the parse tree string (might happen due to tokenization of HTML/XML/RDF tags) treeString = treeString.ReplaceAll("\n", " "); pw.Println(treeString); } else { pw.Println(); } SemanticGraph collapsedDeps = sent.Get(typeof(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation)); SaveDependencyGraph(collapsedDeps, pw); SemanticGraph uncollapsedDeps = sent.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation)); SaveDependencyGraph(uncollapsedDeps, pw); SemanticGraph ccDeps = sent.Get(typeof(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation)); SaveDependencyGraph(ccDeps, pw); // save all sentence tokens IList <CoreLabel> tokens = sent.Get(typeof(CoreAnnotations.TokensAnnotation)); if (tokens != null) { foreach (CoreLabel token in tokens) { SaveToken(token, haveExplicitAntecedent, pw); pw.Println(); } } // add an empty line after every sentence pw.Println(); } pw.Flush(); return(os); }
/// <summary>Run the Evalb scoring metric on guess/gold input.</summary> /// <remarks>Run the Evalb scoring metric on guess/gold input. The default language is English.</remarks> /// <param name="args"/> public static void Main(string[] args) { if (args.Length < minArgs) { log.Info(Usage()); System.Environment.Exit(-1); } Properties options = StringUtils.ArgsToProperties(args, OptionArgDefs()); Language language = PropertiesUtils.Get(options, "l", Language.English, typeof(Language)); ITreebankLangParserParams tlpp = language.@params; int maxGoldYield = PropertiesUtils.GetInt(options, "y", int.MaxValue); bool Verbose = PropertiesUtils.GetBool(options, "v", false); bool sortByF1 = PropertiesUtils.HasProperty(options, "s"); int worstKTreesToEmit = PropertiesUtils.GetInt(options, "s", 0); PriorityQueue <Triple <double, Tree, Tree> > queue = sortByF1 ? new PriorityQueue <Triple <double, Tree, Tree> >(2000, new Evalb.F1Comparator()) : null; bool doCatLevel = PropertiesUtils.GetBool(options, "c", false); string labelRegex = options.GetProperty("f", null); string encoding = options.GetProperty("e", "UTF-8"); string[] parsedArgs = options.GetProperty(string.Empty, string.Empty).Split("\\s+"); if (parsedArgs.Length != minArgs) { log.Info(Usage()); System.Environment.Exit(-1); } string goldFile = parsedArgs[0]; string guessFile = parsedArgs[1]; // Command-line has been parsed. Configure the metric for evaluation. tlpp.SetInputEncoding(encoding); PrintWriter pwOut = tlpp.Pw(); Treebank guessTreebank = tlpp.DiskTreebank(); guessTreebank.LoadPath(guessFile); pwOut.Println("GUESS TREEBANK:"); pwOut.Println(guessTreebank.TextualSummary()); Treebank goldTreebank = tlpp.DiskTreebank(); goldTreebank.LoadPath(goldFile); pwOut.Println("GOLD TREEBANK:"); pwOut.Println(goldTreebank.TextualSummary()); Evalb metric = new Evalb("Evalb LP/LR", true); EvalbByCat evalbCat = (doCatLevel) ? new EvalbByCat("EvalbByCat LP/LR", true, labelRegex) : null; ITreeTransformer tc = tlpp.Collinizer(); //The evalb ref implementation assigns status for each tree pair as follows: // // 0 - Ok (yields match) // 1 - length mismatch // 2 - null parse e.g. (()). // //In the cases of 1,2, evalb does not include the tree pair in the LP/LR computation. IEnumerator <Tree> goldItr = goldTreebank.GetEnumerator(); IEnumerator <Tree> guessItr = guessTreebank.GetEnumerator(); int goldLineId = 0; int guessLineId = 0; int skippedGuessTrees = 0; while (guessItr.MoveNext() && goldItr.MoveNext()) { Tree guessTree = guessItr.Current; IList <ILabel> guessYield = guessTree.Yield(); guessLineId++; Tree goldTree = goldItr.Current; IList <ILabel> goldYield = goldTree.Yield(); goldLineId++; // Check that we should evaluate this tree if (goldYield.Count > maxGoldYield) { skippedGuessTrees++; continue; } // Only trees with equal yields can be evaluated if (goldYield.Count != guessYield.Count) { pwOut.Printf("Yield mismatch gold: %d tokens vs. guess: %d tokens (lines: gold %d guess %d)%n", goldYield.Count, guessYield.Count, goldLineId, guessLineId); skippedGuessTrees++; continue; } Tree evalGuess = tc.TransformTree(guessTree); Tree evalGold = tc.TransformTree(goldTree); metric.Evaluate(evalGuess, evalGold, ((Verbose) ? pwOut : null)); if (doCatLevel) { evalbCat.Evaluate(evalGuess, evalGold, ((Verbose) ? pwOut : null)); } if (sortByF1) { StoreTrees(queue, guessTree, goldTree, metric.GetLastF1()); } } if (guessItr.MoveNext() || goldItr.MoveNext()) { System.Console.Error.Printf("Guess/gold files do not have equal lengths (guess: %d gold: %d)%n.", guessLineId, goldLineId); } pwOut.Println("================================================================================"); if (skippedGuessTrees != 0) { pwOut.Printf("%s %d guess trees\n", "Unable to evaluate", skippedGuessTrees); } metric.Display(true, pwOut); pwOut.Println(); if (doCatLevel) { evalbCat.Display(true, pwOut); pwOut.Println(); } if (sortByF1) { EmitSortedTrees(queue, worstKTreesToEmit, guessFile); } pwOut.Close(); }
public override void Display(bool verbose, PrintWriter pw) { if (precisions.KeySet().Count != recalls.KeySet().Count) { log.Error("Different counts for precisions and recalls!"); return; } ICollection <ILabel> cats = GetEvalLabelSet(precisions.KeySet()); Random rand = new Random(); IDictionary <double, ILabel> f1Map = new SortedDictionary <double, ILabel>(); foreach (ILabel cat in cats) { double pnum2 = pnums2.GetCount(cat); double rnum2 = rnums2.GetCount(cat); double prec = precisions2.GetCount(cat) / pnum2; double rec = recalls2.GetCount(cat) / rnum2; double f1 = 2.0 / (1.0 / prec + 1.0 / rec); if (f1.Equals(double.NaN)) { f1 = -1.0; } if (f1Map.Contains(f1)) { f1Map[f1 + (rand.NextDouble() / 1000.0)] = cat; } else { f1Map[f1] = cat; } } pw.Println("============================================================"); pw.Println("Labeled Bracketed Evaluation by Category -- final statistics"); pw.Println("============================================================"); // Per category double catPrecisions = 0.0; double catPrecisionNums = 0.0; double catRecalls = 0.0; double catRecallNums = 0.0; foreach (ILabel cat_1 in f1Map.Values) { double pnum2 = pnums2.GetCount(cat_1); double rnum2 = rnums2.GetCount(cat_1); double prec = precisions2.GetCount(cat_1) / pnum2; prec *= 100.0; double rec = recalls2.GetCount(cat_1) / rnum2; rec *= 100.0; double f1 = 2.0 / (1.0 / prec + 1.0 / rec); catPrecisions += precisions2.GetCount(cat_1); catPrecisionNums += pnum2; catRecalls += recalls2.GetCount(cat_1); catRecallNums += rnum2; string Lp = pnum2 == 0.0 ? "N/A" : string.Format("%.2f", prec); string Lr = rnum2 == 0.0 ? "N/A" : string.Format("%.2f", rec); string F1 = (pnum2 == 0.0 || rnum2 == 0.0) ? "N/A" : string.Format("%.2f", f1); pw.Printf("%s\tLP: %s\tguessed: %d\tLR: %s\tgold: %d\t F1: %s%n", cat_1.Value(), Lp, (int)pnum2, Lr, (int)rnum2, F1); } pw.Println("============================================================"); // Totals double prec_1 = catPrecisions / catPrecisionNums; double rec_1 = catRecalls / catRecallNums; double f1_1 = (2 * prec_1 * rec_1) / (prec_1 + rec_1); pw.Printf("Total\tLP: %.2f\tguessed: %d\tLR: %.2f\tgold: %d\t F1: %.2f%n", prec_1 * 100.0, (int)catPrecisionNums, rec_1 * 100.0, (int)catRecallNums, f1_1 * 100.0); pw.Println("============================================================"); }
public static void Main(string[] args) { if (args.Length < minArgs) { System.Console.Out.WriteLine(usage.ToString()); System.Environment.Exit(-1); } ITreebankLangParserParams tlpp = new EnglishTreebankParserParams(); DiskTreebank tb = null; string encoding = "UTF-8"; string puncTag = null; for (int i = 0; i < args.Length; i++) { if (args[i].StartsWith("-")) { switch (args[i]) { case "-l": { Language lang = Language.ValueOf(args[++i].Trim()); tlpp = lang.@params; break; } case "-e": { encoding = args[++i]; break; } default: { System.Console.Out.WriteLine(usage.ToString()); System.Environment.Exit(-1); break; } } } else { puncTag = args[i++]; if (tb == null) { if (tlpp == null) { System.Console.Out.WriteLine(usage.ToString()); System.Environment.Exit(-1); } else { tlpp.SetInputEncoding(encoding); tlpp.SetOutputEncoding(encoding); tb = tlpp.DiskTreebank(); } } tb.LoadPath(args[i]); } } ICounter <string> puncTypes = new ClassicCounter <string>(); foreach (Tree t in tb) { IList <CoreLabel> yield = t.TaggedLabeledYield(); foreach (CoreLabel word in yield) { if (word.Tag().Equals(puncTag)) { puncTypes.IncrementCount(word.Word()); } } } IList <string> biggestKeys = new List <string>(puncTypes.KeySet()); biggestKeys.Sort(Counters.ToComparatorDescending(puncTypes)); PrintWriter pw = tlpp.Pw(); foreach (string wordType in biggestKeys) { pw.Printf("%s\t%d%n", wordType, (int)puncTypes.GetCount(wordType)); } pw.Close(); }
public static void Main(string[] args) { if (args.Length < minArgs) { System.Console.Out.WriteLine(usage); System.Environment.Exit(-1); } // Process command-line options Properties options = StringUtils.ArgsToProperties(args, optionArgDefinitions); string fileName = options.GetProperty(string.Empty); if (fileName == null || fileName.Equals(string.Empty)) { System.Console.Out.WriteLine(usage); System.Environment.Exit(-1); } int maxLen = PropertiesUtils.GetInt(options, "y", int.MaxValue); bool printTrees = PropertiesUtils.GetBool(options, "p", false); bool flattenTrees = PropertiesUtils.GetBool(options, "f", false); bool printPOS = PropertiesUtils.GetBool(options, "a", false); bool printTnT = PropertiesUtils.GetBool(options, "t", false); Language language = PropertiesUtils.Get(options, "l", Language.English, typeof(Language)); ITreebankLangParserParams tlpp = language.@params; string encoding = options.GetProperty("e", "UTF-8"); tlpp.SetInputEncoding(encoding); tlpp.SetOutputEncoding(encoding); DiskTreebank tb = tlpp.DiskTreebank(); tb.LoadPath(fileName); // Read the treebank PrintWriter pw = tlpp.Pw(); int numTrees = 0; foreach (Tree tree in tb) { if (tree.Yield().Count > maxLen) { continue; } ++numTrees; if (printTrees) { pw.Println(tree.ToString()); } else { if (flattenTrees) { pw.Println(SentenceUtils.ListToString(tree.Yield())); } else { if (printPOS) { pw.Println(SentenceUtils.ListToString(tree.PreTerminalYield())); } else { if (printTnT) { IList <CoreLabel> yield = tree.TaggedLabeledYield(); foreach (CoreLabel label in yield) { pw.Printf("%s\t%s%n", label.Word(), label.Tag()); } pw.Println(); } } } } } System.Console.Error.Printf("Read %d trees.%n", numTrees); }
public static void Main(string[] args) { if (args.Length < minArgs) { System.Console.Out.WriteLine(usage.ToString()); System.Environment.Exit(-1); } ITreebankLangParserParams tlpp = new EnglishTreebankParserParams(); DiskTreebank tb = null; string encoding = "UTF-8"; TregexPattern rootMatch = null; for (int i = 0; i < args.Length; i++) { if (args[i].StartsWith("-")) { switch (args[i]) { case "-l": { Language lang = Language.ValueOf(args[++i].Trim()); tlpp = lang.@params; break; } case "-e": { encoding = args[++i]; break; } default: { System.Console.Out.WriteLine(usage.ToString()); System.Environment.Exit(-1); break; } } } else { rootMatch = TregexPattern.Compile("@" + args[i++]); if (tb == null) { if (tlpp == null) { System.Console.Out.WriteLine(usage.ToString()); System.Environment.Exit(-1); } else { tlpp.SetInputEncoding(encoding); tlpp.SetOutputEncoding(encoding); tb = tlpp.DiskTreebank(); } } tb.LoadPath(args[i++]); } } ICounter <string> rhsCounter = new ClassicCounter <string>(); foreach (Tree t in tb) { TregexMatcher m = rootMatch.Matcher(t); while (m.FindNextMatchingNode()) { Tree match = m.GetMatch(); StringBuilder sb = new StringBuilder(); foreach (Tree kid in match.Children()) { sb.Append(kid.Value()).Append(" "); } rhsCounter.IncrementCount(sb.ToString().Trim()); } } IList <string> biggestKeys = new List <string>(rhsCounter.KeySet()); biggestKeys.Sort(Counters.ToComparatorDescending(rhsCounter)); PrintWriter pw = tlpp.Pw(); foreach (string rhs in biggestKeys) { pw.Printf("%s\t%d%n", rhs, (int)rhsCounter.GetCount(rhs)); } pw.Close(); }
/// <exception cref="System.Exception"/> protected internal override void SerializeTextClassifier(PrintWriter pw) { base.SerializeTextClassifier(pw); pw.Printf("nodeFeatureIndicesMap.size()=\t%d%n", nodeFeatureIndicesMap.Size()); for (int i = 0; i < nodeFeatureIndicesMap.Size(); i++) { pw.Printf("%d\t%d%n", i, nodeFeatureIndicesMap.Get(i)); } pw.Printf("edgeFeatureIndicesMap.size()=\t%d%n", edgeFeatureIndicesMap.Size()); for (int i_1 = 0; i_1 < edgeFeatureIndicesMap.Size(); i_1++) { pw.Printf("%d\t%d%n", i_1, edgeFeatureIndicesMap.Get(i_1)); } if (flags.secondOrderNonLinear) { pw.Printf("inputLayerWeights4Edge.length=\t%d%n", inputLayerWeights4Edge.Length); foreach (double[] ws in inputLayerWeights4Edge) { List <double> list = new List <double>(); foreach (double w in ws) { list.Add(w); } pw.Printf("%d\t%s%n", ws.Length, StringUtils.Join(list, " ")); } pw.Printf("outputLayerWeights4Edge.length=\t%d%n", outputLayerWeights4Edge.Length); foreach (double[] ws_1 in outputLayerWeights4Edge) { List <double> list = new List <double>(); foreach (double w in ws_1) { list.Add(w); } pw.Printf("%d\t%s%n", ws_1.Length, StringUtils.Join(list, " ")); } } else { pw.Printf("linearWeights.length=\t%d%n", linearWeights.Length); foreach (double[] ws in linearWeights) { List <double> list = new List <double>(); foreach (double w in ws) { list.Add(w); } pw.Printf("%d\t%s%n", ws.Length, StringUtils.Join(list, " ")); } } pw.Printf("inputLayerWeights.length=\t%d%n", inputLayerWeights.Length); foreach (double[] ws_2 in inputLayerWeights) { List <double> list = new List <double>(); foreach (double w in ws_2) { list.Add(w); } pw.Printf("%d\t%s%n", ws_2.Length, StringUtils.Join(list, " ")); } pw.Printf("outputLayerWeights.length=\t%d%n", outputLayerWeights.Length); foreach (double[] ws_3 in outputLayerWeights) { List <double> list = new List <double>(); foreach (double w in ws_3) { list.Add(w); } pw.Printf("%d\t%s%n", ws_3.Length, StringUtils.Join(list, " ")); } }
/// <summary> /// Evaluate accuracy when the input is gold segmented text *with* segmentation /// markers and morphological analyses. /// </summary> /// <remarks> /// Evaluate accuracy when the input is gold segmented text *with* segmentation /// markers and morphological analyses. In other words, the evaluation file has the /// same format as the training data. /// </remarks> /// <param name="pwOut"/> private void Evaluate(PrintWriter pwOut) { log.Info("Starting evaluation..."); bool hasSegmentationMarkers = true; bool hasTags = true; IDocumentReaderAndWriter <CoreLabel> docReader = new ArabicDocumentReaderAndWriter(hasSegmentationMarkers, hasTags, hasDomainLabels, domain, tf); ObjectBank <IList <CoreLabel> > lines = classifier.MakeObjectBankFromFile(flags.testFile, docReader); PrintWriter tedEvalGoldTree = null; PrintWriter tedEvalParseTree = null; PrintWriter tedEvalGoldSeg = null; PrintWriter tedEvalParseSeg = null; if (tedEvalPrefix != null) { try { tedEvalGoldTree = new PrintWriter(tedEvalPrefix + "_gold.ftree"); tedEvalGoldSeg = new PrintWriter(tedEvalPrefix + "_gold.segmentation"); tedEvalParseTree = new PrintWriter(tedEvalPrefix + "_parse.ftree"); tedEvalParseSeg = new PrintWriter(tedEvalPrefix + "_parse.segmentation"); } catch (FileNotFoundException e) { System.Console.Error.Printf("%s: %s%n", typeof(Edu.Stanford.Nlp.International.Arabic.Process.ArabicSegmenter).FullName, e.Message); } } ICounter <string> labelTotal = new ClassicCounter <string>(); ICounter <string> labelCorrect = new ClassicCounter <string>(); int total = 0; int correct = 0; foreach (IList <CoreLabel> line in lines) { string[] inputTokens = TedEvalSanitize(IOBUtils.IOBToString(line).ReplaceAll(":", "#pm#")).Split(" "); string[] goldTokens = TedEvalSanitize(IOBUtils.IOBToString(line, ":")).Split(" "); line = classifier.Classify(line); string[] parseTokens = TedEvalSanitize(IOBUtils.IOBToString(line, ":")).Split(" "); foreach (CoreLabel label in line) { // Do not evaluate labeling of whitespace string observation = label.Get(typeof(CoreAnnotations.CharAnnotation)); if (!observation.Equals(IOBUtils.GetBoundaryCharacter())) { total++; string hypothesis = label.Get(typeof(CoreAnnotations.AnswerAnnotation)); string reference = label.Get(typeof(CoreAnnotations.GoldAnswerAnnotation)); labelTotal.IncrementCount(reference); if (hypothesis.Equals(reference)) { correct++; labelCorrect.IncrementCount(reference); } } } if (tedEvalParseSeg != null) { tedEvalGoldTree.Printf("(root"); tedEvalParseTree.Printf("(root"); int safeLength = inputTokens.Length; if (inputTokens.Length != goldTokens.Length) { log.Info("In generating TEDEval files: Input and gold do not have the same number of tokens"); log.Info(" (ignoring any extras)"); log.Info(" input: " + Arrays.ToString(inputTokens)); log.Info(" gold: " + Arrays.ToString(goldTokens)); safeLength = Math.Min(inputTokens.Length, goldTokens.Length); } if (inputTokens.Length != parseTokens.Length) { log.Info("In generating TEDEval files: Input and parse do not have the same number of tokens"); log.Info(" (ignoring any extras)"); log.Info(" input: " + Arrays.ToString(inputTokens)); log.Info(" parse: " + Arrays.ToString(parseTokens)); safeLength = Math.Min(inputTokens.Length, parseTokens.Length); } for (int i = 0; i < safeLength; i++) { foreach (string segment in goldTokens[i].Split(":")) { tedEvalGoldTree.Printf(" (seg %s)", segment); } tedEvalGoldSeg.Printf("%s\t%s%n", inputTokens[i], goldTokens[i]); foreach (string segment_1 in parseTokens[i].Split(":")) { tedEvalParseTree.Printf(" (seg %s)", segment_1); } tedEvalParseSeg.Printf("%s\t%s%n", inputTokens[i], parseTokens[i]); } tedEvalGoldTree.Printf(")%n"); tedEvalGoldSeg.Println(); tedEvalParseTree.Printf(")%n"); tedEvalParseSeg.Println(); } } double accuracy = ((double)correct) / ((double)total); accuracy *= 100.0; pwOut.Println("EVALUATION RESULTS"); pwOut.Printf("#datums:\t%d%n", total); pwOut.Printf("#correct:\t%d%n", correct); pwOut.Printf("accuracy:\t%.2f%n", accuracy); pwOut.Println("=================="); // Output the per label accuracies pwOut.Println("PER LABEL ACCURACIES"); foreach (string refLabel in labelTotal.KeySet()) { double nTotal = labelTotal.GetCount(refLabel); double nCorrect = labelCorrect.GetCount(refLabel); double acc = (nCorrect / nTotal) * 100.0; pwOut.Printf(" %s\t%.2f%n", refLabel, acc); } if (tedEvalParseSeg != null) { tedEvalGoldTree.Close(); tedEvalGoldSeg.Close(); tedEvalParseTree.Close(); tedEvalParseSeg.Close(); } }
/// <summary>Execute with no arguments for usage.</summary> public static void Main(string[] args) { if (!ValidateCommandLine(args)) { log.Info(Usage); System.Environment.Exit(-1); } ITreebankLangParserParams tlpp = Language.@params; PrintWriter pwOut = tlpp.Pw(); Treebank guessTreebank = tlpp.DiskTreebank(); guessTreebank.LoadPath(guessFile); pwOut.Println("GUESS TREEBANK:"); pwOut.Println(guessTreebank.TextualSummary()); Treebank goldTreebank = tlpp.DiskTreebank(); goldTreebank.LoadPath(goldFile); pwOut.Println("GOLD TREEBANK:"); pwOut.Println(goldTreebank.TextualSummary()); Edu.Stanford.Nlp.Parser.Metrics.LeafAncestorEval metric = new Edu.Stanford.Nlp.Parser.Metrics.LeafAncestorEval("LeafAncestor"); ITreeTransformer tc = tlpp.Collinizer(); //The evalb ref implementation assigns status for each tree pair as follows: // // 0 - Ok (yields match) // 1 - length mismatch // 2 - null parse e.g. (()). // //In the cases of 1,2, evalb does not include the tree pair in the LP/LR computation. IEnumerator <Tree> goldItr = goldTreebank.GetEnumerator(); IEnumerator <Tree> guessItr = guessTreebank.GetEnumerator(); int goldLineId = 0; int guessLineId = 0; int skippedGuessTrees = 0; while (guessItr.MoveNext() && goldItr.MoveNext()) { Tree guessTree = guessItr.Current; IList <ILabel> guessYield = guessTree.Yield(); guessLineId++; Tree goldTree = goldItr.Current; IList <ILabel> goldYield = goldTree.Yield(); goldLineId++; // Check that we should evaluate this tree if (goldYield.Count > MaxGoldYield) { skippedGuessTrees++; continue; } // Only trees with equal yields can be evaluated if (goldYield.Count != guessYield.Count) { pwOut.Printf("Yield mismatch gold: %d tokens vs. guess: %d tokens (lines: gold %d guess %d)%n", goldYield.Count, guessYield.Count, goldLineId, guessLineId); skippedGuessTrees++; continue; } Tree evalGuess = tc.TransformTree(guessTree); Tree evalGold = tc.TransformTree(goldTree); metric.Evaluate(evalGuess, evalGold, ((Verbose) ? pwOut : null)); } if (guessItr.MoveNext() || goldItr.MoveNext()) { System.Console.Error.Printf("Guess/gold files do not have equal lengths (guess: %d gold: %d)%n.", guessLineId, goldLineId); } pwOut.Println("================================================================================"); if (skippedGuessTrees != 0) { pwOut.Printf("%s %d guess trees%n", "Unable to evaluate", skippedGuessTrees); } metric.Display(true, pwOut); pwOut.Close(); }
public static void Main(string[] args) { if (args.Length < minArgs) { System.Console.Out.WriteLine(usage.ToString()); System.Environment.Exit(-1); } ITreebankLangParserParams tlpp = new EnglishTreebankParserParams(); DiskTreebank tb = null; string encoding = "UTF-8"; Language lang = Language.English; for (int i = 0; i < args.Length; i++) { if (args[i].StartsWith("-")) { switch (args[i]) { case "-l": { lang = Language.ValueOf(args[++i].Trim()); tlpp = lang.@params; break; } case "-e": { encoding = args[++i]; break; } default: { System.Console.Out.WriteLine(usage.ToString()); System.Environment.Exit(-1); break; } } } else { if (tb == null) { if (tlpp == null) { System.Console.Out.WriteLine(usage.ToString()); System.Environment.Exit(-1); } else { tlpp.SetInputEncoding(encoding); tlpp.SetOutputEncoding(encoding); tb = tlpp.DiskTreebank(); } } tb.LoadPath(args[i]); } } PrintWriter pw = tlpp.Pw(); Options op = new Options(); Options.LexOptions lexOptions = op.lexOptions; if (lang == Language.French) { lexOptions.useUnknownWordSignatures = 1; lexOptions.smartMutation = false; lexOptions.unknownSuffixSize = 2; lexOptions.unknownPrefixSize = 1; } else { if (lang == Language.Arabic) { lexOptions.smartMutation = false; lexOptions.useUnknownWordSignatures = 9; lexOptions.unknownPrefixSize = 1; lexOptions.unknownSuffixSize = 1; } } IIndex <string> wordIndex = new HashIndex <string>(); IIndex <string> tagIndex = new HashIndex <string>(); ILexicon lex = tlpp.Lex(op, wordIndex, tagIndex); int computeAfter = (int)(0.50 * tb.Count); ICounter <string> vocab = new ClassicCounter <string>(); ICounter <string> unkCounter = new ClassicCounter <string>(); int treeId = 0; foreach (Tree t in tb) { IList <ILabel> yield = t.Yield(); int posId = 0; foreach (ILabel word in yield) { vocab.IncrementCount(word.Value()); if (treeId > computeAfter && vocab.GetCount(word.Value()) < 2.0) { // if(lex.getUnknownWordModel().getSignature(word.value(), posId++).equals("UNK")) // pw.println(word.value()); unkCounter.IncrementCount(lex.GetUnknownWordModel().GetSignature(word.Value(), posId++)); } } treeId++; } IList <string> biggestKeys = new List <string>(unkCounter.KeySet()); biggestKeys.Sort(Counters.ToComparatorDescending(unkCounter)); foreach (string wordType in biggestKeys) { pw.Printf("%s\t%d%n", wordType, (int)unkCounter.GetCount(wordType)); } pw.Close(); pw.Close(); }
/// <summary>Run the scoring metric on guess/gold input.</summary> /// <remarks> /// Run the scoring metric on guess/gold input. This method performs "Collinization." /// The default language is English. /// </remarks> /// <param name="args"/> public static void Main(string[] args) { if (args.Length < minArgs) { System.Console.Out.WriteLine(usage.ToString()); System.Environment.Exit(-1); } ITreebankLangParserParams tlpp = new EnglishTreebankParserParams(); int maxGoldYield = int.MaxValue; int maxGuessYield = int.MaxValue; bool Verbose = false; bool skipGuess = false; bool tagMode = false; string guessFile = null; string goldFile = null; for (int i = 0; i < args.Length; i++) { if (args[i].StartsWith("-")) { switch (args[i]) { case "-l": { Language lang = Language.ValueOf(args[++i].Trim()); tlpp = lang.@params; break; } case "-y": { maxGoldYield = System.Convert.ToInt32(args[++i].Trim()); break; } case "-t": { tagMode = true; break; } case "-v": { Verbose = true; break; } case "-g": { maxGuessYield = System.Convert.ToInt32(args[++i].Trim()); skipGuess = true; break; } default: { System.Console.Out.WriteLine(usage.ToString()); System.Environment.Exit(-1); break; } } } else { //Required parameters goldFile = args[i++]; guessFile = args[i]; break; } } PrintWriter pwOut = tlpp.Pw(); Treebank guessTreebank = tlpp.DiskTreebank(); guessTreebank.LoadPath(guessFile); pwOut.Println("GUESS TREEBANK:"); pwOut.Println(guessTreebank.TextualSummary()); Treebank goldTreebank = tlpp.DiskTreebank(); goldTreebank.LoadPath(goldFile); pwOut.Println("GOLD TREEBANK:"); pwOut.Println(goldTreebank.TextualSummary()); string evalName = (tagMode) ? "TsarfatyTAG" : "TsarfatySEG"; Edu.Stanford.Nlp.Parser.Metrics.TsarfatyEval eval = new Edu.Stanford.Nlp.Parser.Metrics.TsarfatyEval(evalName, tagMode); ITreeTransformer tc = tlpp.Collinizer(); //PennTreeReader skips over null/malformed parses. So when the yields of the gold/guess trees //don't match, we need to keep looking for the next gold tree that matches. //The evalb ref implementation differs slightly as it expects one tree per line. It assigns //status as follows: // // 0 - Ok (yields match) // 1 - length mismatch // 2 - null parse e.g. (()). // //In the cases of 1,2, evalb does not include the tree pair in the LP/LR computation. IEnumerator <Tree> goldItr = goldTreebank.GetEnumerator(); int goldLineId = 0; int skippedGuessTrees = 0; foreach (Tree guess in guessTreebank) { Tree evalGuess = tc.TransformTree(guess); List <ILabel> guessSent = guess.Yield(); string guessChars = SentenceUtils.ListToString(guessSent).ReplaceAll("\\s+", string.Empty); if (guessSent.Count > maxGuessYield) { skippedGuessTrees++; continue; } bool doneEval = false; while (goldItr.MoveNext() && !doneEval) { Tree gold = goldItr.Current; Tree evalGold = tc.TransformTree(gold); goldLineId++; List <ILabel> goldSent = gold.Yield(); string goldChars = SentenceUtils.ListToString(goldSent).ReplaceAll("\\s+", string.Empty); if (goldSent.Count > maxGoldYield) { continue; } else { if (goldChars.Length != guessChars.Length) { pwOut.Printf("Char level yield mismatch at line %d (guess: %d gold: %d)\n", goldLineId, guessChars.Length, goldChars.Length); skippedGuessTrees++; break; } } //Default evalb behavior -- skip this guess tree eval.Evaluate(evalGuess, evalGold, ((Verbose) ? pwOut : null)); doneEval = true; } } //Move to the next guess parse pwOut.Println("================================================================================"); if (skippedGuessTrees != 0) { pwOut.Printf("%s %d guess trees\n", ((skipGuess) ? "Skipped" : "Unable to evaluate"), skippedGuessTrees); } eval.Display(true, pwOut); pwOut.Println(); pwOut.Close(); }