Пример #1
0
 private void PrintHeader(PrintWriter @out, string nodeName)
 {
     @out.Write("<html><head>");
     @out.Printf("<title>%s Reconfiguration Utility</title>%n", StringEscapeUtils.EscapeHtml
                     (nodeName));
     @out.Write("</head><body>\n");
     @out.Printf("<h1>%s Reconfiguration Utility</h1>%n", StringEscapeUtils.EscapeHtml
                     (nodeName));
 }
Пример #2
0
        public virtual void Display(bool verbose, PrintWriter pw)
        {
            Random rand        = new Random();
            double corpusLevel = corpusAvg / corpusNum;
            double sentLevel   = sentAvg / sentNum;
            double sentEx      = 100.0 * sentExact / sentNum;

            if (verbose)
            {
                IDictionary <double, IList <CoreLabel> > avgMap = new SortedDictionary <double, IList <CoreLabel> >();
                foreach (KeyValuePair <IList <CoreLabel>, double> entry in catAvg)
                {
                    double avg = entry.Value / catNum[entry.Key];
                    if (double.IsNaN(avg))
                    {
                        avg = -1.0;
                    }
                    if (avgMap.Contains(avg))
                    {
                        avgMap[avg + (rand.NextDouble() / 10000.0)] = entry.Key;
                    }
                    else
                    {
                        avgMap[avg] = entry.Key;
                    }
                }
                pw.Println("============================================================");
                pw.Println("Leaf Ancestor Metric" + "(" + name + ") -- final statistics");
                pw.Println("============================================================");
                pw.Println("#Sentences: " + (int)sentNum);
                pw.Println();
                pw.Println("Sentence-level (macro-averaged)");
                pw.Printf(" Avg: %.3f%n", sentLevel);
                pw.Printf(" Exact: %.2f%%%n", sentEx);
                pw.Println();
                pw.Println("Corpus-level (micro-averaged)");
                pw.Printf(" Avg: %.3f%n", corpusLevel);
                pw.Println("============================================================");
                foreach (IList <CoreLabel> lineage in avgMap.Values)
                {
                    if (catNum[lineage] < 30.0)
                    {
                        continue;
                    }
                    double avg = catAvg[lineage] / catNum[lineage];
                    pw.Printf(" %.3f\t%d\t%s%n", avg, (int)((double)catNum[lineage]), ToString(lineage));
                }
                pw.Println("============================================================");
            }
            else
            {
                pw.Printf("%s summary: corpus: %.3f sent: %.3f sent-ex: %.2f%n", name, corpusLevel, sentLevel, sentEx);
            }
        }
 public virtual void Display(bool verbose, PrintWriter pw)
 {
     if (unkWords.IsEmpty())
     {
         pw.Printf("UnknownWordPrinter: all words known by DVModel%n");
     }
     else
     {
         pw.Printf("UnknownWordPrinter: the following words are unknown%n");
         foreach (string word in unkWords)
         {
             pw.Printf("  %s%n", word);
         }
     }
 }
Пример #4
0
 //WSGDEBUG - For debugging
 public virtual void DebugPrint(PrintWriter pw)
 {
     foreach (string word in verbStems.Keys)
     {
         pw.Printf("%s : %s\n", word, GetStem(word));
     }
 }
        public virtual void SaveToFilename(string s)
        {
            PrintWriter bw = null;

            try
            {
                bw = IOUtils.GetPrintWriter(s);
                for (int i = 0; i < size; i++)
                {
                    E o = Get(i);
                    if (o != null)
                    {
                        bw.Printf("%d=%s%n", i, o.ToString());
                    }
                }
                bw.Close();
            }
            catch (IOException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            finally
            {
                if (bw != null)
                {
                    bw.Close();
                }
            }
        }
Пример #6
0
 public virtual void PrintAnswers(IList <CoreLabel> doc, PrintWriter pw)
 {
     pw.Println("Answer\tGoldAnswer\tCharacter");
     foreach (CoreLabel word in doc)
     {
         pw.Printf("%s\t%s\t%s%n", word.Get(typeof(CoreAnnotations.AnswerAnnotation)), word.Get(typeof(CoreAnnotations.GoldAnswerAnnotation)), word.Get(typeof(CoreAnnotations.CharAnnotation)));
     }
 }
Пример #7
0
        /// <summary>Lists the nodes matching the given node states</summary>
        /// <param name="nodeStates"/>
        /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/>
        /// <exception cref="System.IO.IOException"/>
        private void ListClusterNodes(ICollection <NodeState> nodeStates)
        {
            PrintWriter writer = new PrintWriter(new OutputStreamWriter(sysout, Sharpen.Extensions.GetEncoding
                                                                            ("UTF-8")));
            IList <NodeReport> nodesReport = client.GetNodeReports(Sharpen.Collections.ToArray
                                                                       (nodeStates, new NodeState[0]));

            writer.WriteLine("Total Nodes:" + nodesReport.Count);
            writer.Printf(NodesPattern, "Node-Id", "Node-State", "Node-Http-Address", "Number-of-Running-Containers"
                          );
            foreach (NodeReport nodeReport in nodesReport)
            {
                writer.Printf(NodesPattern, nodeReport.GetNodeId(), nodeReport.GetNodeState(), nodeReport
                              .GetHttpAddress(), nodeReport.GetNumContainers());
            }
            writer.Flush();
        }
Пример #8
0
        /// <summary>Lists the application attempts matching the given applicationid</summary>
        /// <param name="applicationId"/>
        /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/>
        /// <exception cref="System.IO.IOException"/>
        private void ListApplicationAttempts(string applicationId)
        {
            PrintWriter writer = new PrintWriter(new OutputStreamWriter(sysout, Sharpen.Extensions.GetEncoding
                                                                            ("UTF-8")));
            IList <ApplicationAttemptReport> appAttemptsReport = client.GetApplicationAttempts
                                                                     (ConverterUtils.ToApplicationId(applicationId));

            writer.WriteLine("Total number of application attempts " + ":" + appAttemptsReport
                             .Count);
            writer.Printf(ApplicationAttemptsPattern, "ApplicationAttempt-Id", "State", "AM-Container-Id"
                          , "Tracking-URL");
            foreach (ApplicationAttemptReport appAttemptReport in appAttemptsReport)
            {
                writer.Printf(ApplicationAttemptsPattern, appAttemptReport.GetApplicationAttemptId
                                  (), appAttemptReport.GetYarnApplicationAttemptState(), appAttemptReport.GetAMContainerId
                                  ().ToString(), appAttemptReport.GetTrackingUrl());
            }
            writer.Flush();
        }
Пример #9
0
        /// <summary>Lists the containers matching the given application attempts</summary>
        /// <param name="appAttemptId"/>
        /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/>
        /// <exception cref="System.IO.IOException"/>
        private void ListContainers(string appAttemptId)
        {
            PrintWriter writer = new PrintWriter(new OutputStreamWriter(sysout, Sharpen.Extensions.GetEncoding
                                                                            ("UTF-8")));
            IList <ContainerReport> appsReport = client.GetContainers(ConverterUtils.ToApplicationAttemptId
                                                                          (appAttemptId));

            writer.WriteLine("Total number of containers " + ":" + appsReport.Count);
            writer.Printf(ContainerPattern, "Container-Id", "Start Time", "Finish Time", "State"
                          , "Host", "Node Http Address", "LOG-URL");
            foreach (ContainerReport containerReport in appsReport)
            {
                writer.Printf(ContainerPattern, containerReport.GetContainerId(), Times.Format(containerReport
                                                                                               .GetCreationTime()), Times.Format(containerReport.GetFinishTime()), containerReport
                              .GetContainerState(), containerReport.GetAssignedNode(), containerReport.GetNodeHttpAddress
                                  () == null ? "N/A" : containerReport.GetNodeHttpAddress(), containerReport.GetLogUrl
                                  ());
            }
            writer.Flush();
        }
Пример #10
0
 public virtual void DisplayJobList(JobStatus[] jobs, PrintWriter writer)
 {
     writer.WriteLine("Total jobs:" + jobs.Length);
     writer.Printf(headerPattern, "JobId", "State", "StartTime", "UserName", "Queue",
                   "Priority", "UsedContainers", "RsvdContainers", "UsedMem", "RsvdMem", "NeededMem"
                   , "AM info");
     foreach (JobStatus job in jobs)
     {
         int numUsedSlots     = job.GetNumUsedSlots();
         int numReservedSlots = job.GetNumReservedSlots();
         int usedMem          = job.GetUsedMem();
         int rsvdMem          = job.GetReservedMem();
         int neededMem        = job.GetNeededMem();
         writer.Printf(dataPattern, job.GetJobID().ToString(), job.GetState(), job.GetStartTime
                           (), job.GetUsername(), job.GetQueue(), job.GetPriority().ToString(), numUsedSlots
                       < 0 ? Unavailable : numUsedSlots, numReservedSlots < 0 ? Unavailable : numReservedSlots
                       , usedMem < 0 ? Unavailable : string.Format(memPattern, usedMem), rsvdMem < 0 ?
                       Unavailable : string.Format(memPattern, rsvdMem), neededMem < 0 ? Unavailable :
                       string.Format(memPattern, neededMem), job.GetSchedulingInfo());
     }
     writer.Flush();
 }
Пример #11
0
        /// <summary>
        /// Lists the applications matching the given application Types And application
        /// States present in the Resource Manager
        /// </summary>
        /// <param name="appTypes"/>
        /// <param name="appStates"/>
        /// <exception cref="Org.Apache.Hadoop.Yarn.Exceptions.YarnException"/>
        /// <exception cref="System.IO.IOException"/>
        private void ListApplications(ICollection <string> appTypes, EnumSet <YarnApplicationState
                                                                              > appStates)
        {
            PrintWriter writer = new PrintWriter(new OutputStreamWriter(sysout, Sharpen.Extensions.GetEncoding
                                                                            ("UTF-8")));

            if (allAppStates)
            {
                foreach (YarnApplicationState appState in YarnApplicationState.Values())
                {
                    appStates.AddItem(appState);
                }
            }
            else
            {
                if (appStates.IsEmpty())
                {
                    appStates.AddItem(YarnApplicationState.Running);
                    appStates.AddItem(YarnApplicationState.Accepted);
                    appStates.AddItem(YarnApplicationState.Submitted);
                }
            }
            IList <ApplicationReport> appsReport = client.GetApplications(appTypes, appStates);

            writer.WriteLine("Total number of applications (application-types: " + appTypes +
                             " and states: " + appStates + ")" + ":" + appsReport.Count);
            writer.Printf(ApplicationsPattern, "Application-Id", "Application-Name", "Application-Type"
                          , "User", "Queue", "State", "Final-State", "Progress", "Tracking-URL");
            foreach (ApplicationReport appReport in appsReport)
            {
                DecimalFormat formatter = new DecimalFormat("###.##%");
                string        progress  = formatter.Format(appReport.GetProgress());
                writer.Printf(ApplicationsPattern, appReport.GetApplicationId(), appReport.GetName
                                  (), appReport.GetApplicationType(), appReport.GetUser(), appReport.GetQueue(), appReport
                              .GetYarnApplicationState(), appReport.GetFinalApplicationStatus(), progress, appReport
                              .GetOriginalTrackingUrl());
            }
            writer.Flush();
        }
        public virtual void Evaluate(Tree guess, Tree gold, PrintWriter pw, double weight)
        {
            IList <ILabel> words = guess.Yield();
            int            pos   = 0;

            foreach (ILabel word in words)
            {
                ++pos;
                SimpleMatrix wv = model.GetWordVector(word.Value());
                // would be faster but more implementation-specific if we
                // removed wv.equals
                if (wv == unk || wv.Equals(unk))
                {
                    pw.Printf("  Unknown word in position %d: %s%n", pos, word.Value());
                    unkWords.Add(word.Value());
                }
            }
        }
Пример #13
0
 public static void PrintCounter(TwoDimensionalCounter <string, string> cnt, string fname)
 {
     try
     {
         PrintWriter pw = new PrintWriter(new TextWriter(new FileOutputStream(new File(fname)), false, "UTF-8"));
         foreach (string key in cnt.FirstKeySet())
         {
             foreach (string val in cnt.GetCounter(key).KeySet())
             {
                 pw.Printf("%s\t%s\t%d%n", key, val, (int)cnt.GetCount(key, val));
             }
         }
         pw.Close();
     }
     catch (UnsupportedEncodingException e)
     {
         Sharpen.Runtime.PrintStackTrace(e);
     }
     catch (FileNotFoundException e)
     {
         Sharpen.Runtime.PrintStackTrace(e);
     }
 }
Пример #14
0
 public static void WriteConllFile(string outFile, IList <ICoreMap> sentences, IList <DependencyTree> trees)
 {
     try
     {
         PrintWriter output = IOUtils.GetPrintWriter(outFile);
         for (int i = 0; i < sentences.Count; i++)
         {
             ICoreMap          sentence = sentences[i];
             DependencyTree    tree     = trees[i];
             IList <CoreLabel> tokens   = sentence.Get(typeof(CoreAnnotations.TokensAnnotation));
             for (int j = 1; j <= size; ++j)
             {
                 CoreLabel token = tokens[j - 1];
                 output.Printf("%d\t%s\t_\t%s\t%s\t_\t%d\t%s\t_\t_%n", j, token.Word(), token.Tag(), token.Tag(), tree.GetHead(j), tree.GetLabel(j));
             }
             output.Println();
         }
         output.Close();
     }
     catch (Exception e)
     {
         throw new RuntimeIOException(e);
     }
 }
Пример #15
0
        /// <summary>Run the scoring metric on guess/gold input.</summary>
        /// <remarks>
        /// Run the scoring metric on guess/gold input. This method performs "Collinization."
        /// The default language is English.
        /// </remarks>
        /// <param name="args"/>
        public static void Main(string[] args)
        {
            if (args.Length < minArgs)
            {
                System.Console.Out.WriteLine(usage.ToString());
                System.Environment.Exit(-1);
            }
            ITreebankLangParserParams tlpp = new EnglishTreebankParserParams();
            int    maxGoldYield            = int.MaxValue;
            bool   Verbose   = false;
            string encoding  = "UTF-8";
            string guessFile = null;
            string goldFile  = null;
            IDictionary <string, string[]> argsMap = StringUtils.ArgsToMap(args, optionArgDefs);

            foreach (KeyValuePair <string, string[]> opt in argsMap)
            {
                if (opt.Key == null)
                {
                    continue;
                }
                if (opt.Key.Equals("-l"))
                {
                    Language lang = Language.ValueOf(opt.Value[0].Trim());
                    tlpp = lang.@params;
                }
                else
                {
                    if (opt.Key.Equals("-y"))
                    {
                        maxGoldYield = System.Convert.ToInt32(opt.Value[0].Trim());
                    }
                    else
                    {
                        if (opt.Key.Equals("-v"))
                        {
                            Verbose = true;
                        }
                        else
                        {
                            if (opt.Key.Equals("-c"))
                            {
                                Edu.Stanford.Nlp.Parser.Metrics.TaggingEval.doCatLevelEval = true;
                            }
                            else
                            {
                                if (opt.Key.Equals("-e"))
                                {
                                    encoding = opt.Value[0];
                                }
                                else
                                {
                                    log.Info(usage.ToString());
                                    System.Environment.Exit(-1);
                                }
                            }
                        }
                    }
                }
                //Non-option arguments located at key null
                string[] rest = argsMap[null];
                if (rest == null || rest.Length < minArgs)
                {
                    log.Info(usage.ToString());
                    System.Environment.Exit(-1);
                }
                goldFile  = rest[0];
                guessFile = rest[1];
            }
            tlpp.SetInputEncoding(encoding);
            PrintWriter pwOut         = tlpp.Pw();
            Treebank    guessTreebank = tlpp.DiskTreebank();

            guessTreebank.LoadPath(guessFile);
            pwOut.Println("GUESS TREEBANK:");
            pwOut.Println(guessTreebank.TextualSummary());
            Treebank goldTreebank = tlpp.DiskTreebank();

            goldTreebank.LoadPath(goldFile);
            pwOut.Println("GOLD TREEBANK:");
            pwOut.Println(goldTreebank.TextualSummary());
            Edu.Stanford.Nlp.Parser.Metrics.TaggingEval metric = new Edu.Stanford.Nlp.Parser.Metrics.TaggingEval("Tagging LP/LR");
            ITreeTransformer tc = tlpp.Collinizer();
            //The evalb ref implementation assigns status for each tree pair as follows:
            //
            //   0 - Ok (yields match)
            //   1 - length mismatch
            //   2 - null parse e.g. (()).
            //
            //In the cases of 1,2, evalb does not include the tree pair in the LP/LR computation.
            IEnumerator <Tree> goldItr  = goldTreebank.GetEnumerator();
            IEnumerator <Tree> guessItr = guessTreebank.GetEnumerator();
            int goldLineId        = 0;
            int guessLineId       = 0;
            int skippedGuessTrees = 0;

            while (guessItr.MoveNext() && goldItr.MoveNext())
            {
                Tree           guessTree  = guessItr.Current;
                IList <ILabel> guessYield = guessTree.Yield();
                guessLineId++;
                Tree           goldTree  = goldItr.Current;
                IList <ILabel> goldYield = goldTree.Yield();
                goldLineId++;
                // Check that we should evaluate this tree
                if (goldYield.Count > maxGoldYield)
                {
                    skippedGuessTrees++;
                    continue;
                }
                // Only trees with equal yields can be evaluated
                if (goldYield.Count != guessYield.Count)
                {
                    pwOut.Printf("Yield mismatch gold: %d tokens vs. guess: %d tokens (lines: gold %d guess %d)%n", goldYield.Count, guessYield.Count, goldLineId, guessLineId);
                    skippedGuessTrees++;
                    continue;
                }
                Tree evalGuess = tc.TransformTree(guessTree);
                Tree evalGold  = tc.TransformTree(goldTree);
                metric.Evaluate(evalGuess, evalGold, ((Verbose) ? pwOut : null));
            }
            if (guessItr.MoveNext() || goldItr.MoveNext())
            {
                System.Console.Error.Printf("Guess/gold files do not have equal lengths (guess: %d gold: %d)%n.", guessLineId, goldLineId);
            }
            pwOut.Println("================================================================================");
            if (skippedGuessTrees != 0)
            {
                pwOut.Printf("%s %d guess trees\n", "Unable to evaluate", skippedGuessTrees);
            }
            metric.Display(true, pwOut);
            pwOut.Println();
            pwOut.Close();
        }
Пример #16
0
        /// <param name="args"/>
        public static void Main(string[] args)
        {
            if (args.Length < MinArgs)
            {
                log.Info(Usage());
                System.Environment.Exit(-1);
            }
            Properties options       = StringUtils.ArgsToProperties(args, OptionArgDefs());
            bool       Verbose       = PropertiesUtils.GetBool(options, "v", false);
            Language   Language      = PropertiesUtils.Get(options, "l", Language.English, typeof(Language));
            int        MaxGoldYield  = PropertiesUtils.GetInt(options, "g", int.MaxValue);
            int        MaxGuessYield = PropertiesUtils.GetInt(options, "y", int.MaxValue);

            string[] parsedArgs = options.GetProperty(string.Empty, string.Empty).Split("\\s+");
            if (parsedArgs.Length != MinArgs)
            {
                log.Info(Usage());
                System.Environment.Exit(-1);
            }
            File goldFile  = new File(parsedArgs[0]);
            File guessFile = new File(parsedArgs[1]);
            ITreebankLangParserParams tlpp = Language.@params;
            PrintWriter pwOut         = tlpp.Pw();
            Treebank    guessTreebank = tlpp.DiskTreebank();

            guessTreebank.LoadPath(guessFile);
            pwOut.Println("GUESS TREEBANK:");
            pwOut.Println(guessTreebank.TextualSummary());
            Treebank goldTreebank = tlpp.DiskTreebank();

            goldTreebank.LoadPath(goldFile);
            pwOut.Println("GOLD TREEBANK:");
            pwOut.Println(goldTreebank.TextualSummary());
            Edu.Stanford.Nlp.Parser.Metrics.CollinsDepEval depEval = new Edu.Stanford.Nlp.Parser.Metrics.CollinsDepEval("CollinsDep", true, tlpp.HeadFinder(), tlpp.TreebankLanguagePack().StartSymbol());
            ITreeTransformer tc = tlpp.Collinizer();
            //PennTreeReader skips over null/malformed parses. So when the yields of the gold/guess trees
            //don't match, we need to keep looking for the next gold tree that matches.
            //The evalb ref implementation differs slightly as it expects one tree per line. It assigns
            //status as follows:
            //
            //   0 - Ok (yields match)
            //   1 - length mismatch
            //   2 - null parse e.g. (()).
            //
            //In the cases of 1,2, evalb does not include the tree pair in the LP/LR computation.
            IEnumerator <Tree> goldItr = goldTreebank.GetEnumerator();
            int goldLineId             = 0;
            int skippedGuessTrees      = 0;

            foreach (Tree guess in guessTreebank)
            {
                Tree evalGuess = tc.TransformTree(guess);
                if (guess.Yield().Count > MaxGuessYield)
                {
                    skippedGuessTrees++;
                    continue;
                }
                bool doneEval = false;
                while (goldItr.MoveNext() && !doneEval)
                {
                    Tree gold     = goldItr.Current;
                    Tree evalGold = tc.TransformTree(gold);
                    goldLineId++;
                    if (gold.Yield().Count > MaxGoldYield)
                    {
                        continue;
                    }
                    else
                    {
                        if (evalGold.Yield().Count != evalGuess.Yield().Count)
                        {
                            pwOut.Println("Yield mismatch at gold line " + goldLineId);
                            skippedGuessTrees++;
                            break;
                        }
                    }
                    //Default evalb behavior -- skip this guess tree
                    depEval.Evaluate(evalGuess, evalGold, ((Verbose) ? pwOut : null));
                    doneEval = true;
                }
            }
            //Move to the next guess parse
            pwOut.Println("================================================================================");
            if (skippedGuessTrees != 0)
            {
                pwOut.Printf("%s %d guess trees\n", ((MaxGuessYield < int.MaxValue) ? "Skipped" : "Unable to evaluate"), skippedGuessTrees);
            }
            depEval.Display(true, pwOut);
            pwOut.Close();
        }
        public virtual void ProcessResults(IParserQuery pq, Tree goldTree, PrintWriter pwErr, PrintWriter pwOut, PrintWriter pwFileOut, PrintWriter pwStats, TreePrint treePrint)
        {
            if (pq.SaidMemMessage())
            {
                saidMemMessage = true;
            }
            Tree             tree;
            IList <IHasWord> sentence = pq.OriginalSentence();

            try
            {
                tree = pq.GetBestParse();
            }
            catch (NoSuchParseException)
            {
                tree = null;
            }
            IList <ScoredObject <Tree> > kbestPCFGTrees = null;

            if (tree != null && kbestPCFG > 0)
            {
                kbestPCFGTrees = pq.GetKBestPCFGParses(kbestPCFG);
            }
            //combo parse goes to pwOut (System.out)
            if (op.testOptions.verbose)
            {
                pwOut.Println("ComboParser best");
                Tree ot = tree;
                if (ot != null && !op.tlpParams.TreebankLanguagePack().IsStartSymbol(ot.Value()))
                {
                    ot = ot.TreeFactory().NewTreeNode(op.tlpParams.TreebankLanguagePack().StartSymbol(), Java.Util.Collections.SingletonList(ot));
                }
                treePrint.PrintTree(ot, pwOut);
            }
            else
            {
                treePrint.PrintTree(tree, pwOut);
            }
            // **OUTPUT**
            // print various n-best like outputs (including 1-best)
            // print various statistics
            if (tree != null)
            {
                if (op.testOptions.printAllBestParses)
                {
                    IList <ScoredObject <Tree> > parses = pq.GetBestPCFGParses();
                    int sz = parses.Count;
                    if (sz > 1)
                    {
                        pwOut.Println("There were " + sz + " best PCFG parses with score " + parses[0].Score() + '.');
                        Tree transGoldTree = collinizer.TransformTree(goldTree);
                        int  iii           = 0;
                        foreach (ScoredObject <Tree> sot in parses)
                        {
                            iii++;
                            Tree tb  = sot.Object();
                            Tree tbd = debinarizer.TransformTree(tb);
                            tbd = subcategoryStripper.TransformTree(tbd);
                            pq.RestoreOriginalWords(tbd);
                            pwOut.Println("PCFG Parse #" + iii + " with score " + tbd.Score());
                            tbd.PennPrint(pwOut);
                            Tree tbtr = collinizer.TransformTree(tbd);
                            // pwOut.println("Tree size = " + tbtr.size() + "; depth = " + tbtr.depth());
                            kGoodLB.Evaluate(tbtr, transGoldTree, pwErr);
                        }
                    }
                }
                else
                {
                    // Huang and Chiang (2006) Algorithm 3 output from the PCFG parser
                    if (op.testOptions.printPCFGkBest > 0 && op.testOptions.outputkBestEquivocation == null)
                    {
                        IList <ScoredObject <Tree> > trees = kbestPCFGTrees.SubList(0, op.testOptions.printPCFGkBest);
                        Tree transGoldTree = collinizer.TransformTree(goldTree);
                        int  i             = 0;
                        foreach (ScoredObject <Tree> tp in trees)
                        {
                            i++;
                            pwOut.Println("PCFG Parse #" + i + " with score " + tp.Score());
                            Tree tbd = tp.Object();
                            tbd.PennPrint(pwOut);
                            Tree tbtr = collinizer.TransformTree(tbd);
                            kGoodLB.Evaluate(tbtr, transGoldTree, pwErr);
                        }
                    }
                    else
                    {
                        // Chart parser (factored) n-best list
                        if (op.testOptions.printFactoredKGood > 0 && pq.HasFactoredParse())
                        {
                            // DZ: debug n best trees
                            IList <ScoredObject <Tree> > trees = pq.GetKGoodFactoredParses(op.testOptions.printFactoredKGood);
                            Tree transGoldTree = collinizer.TransformTree(goldTree);
                            int  ii            = 0;
                            foreach (ScoredObject <Tree> tp in trees)
                            {
                                ii++;
                                pwOut.Println("Factored Parse #" + ii + " with score " + tp.Score());
                                Tree tbd = tp.Object();
                                tbd.PennPrint(pwOut);
                                Tree tbtr = collinizer.TransformTree(tbd);
                                kGoodLB.Evaluate(tbtr, transGoldTree, pwOut);
                            }
                        }
                        else
                        {
                            //1-best output
                            if (pwFileOut != null)
                            {
                                pwFileOut.Println(tree.ToString());
                            }
                        }
                    }
                }
                //Print the derivational entropy
                if (op.testOptions.outputkBestEquivocation != null && op.testOptions.printPCFGkBest > 0)
                {
                    IList <ScoredObject <Tree> > trees = kbestPCFGTrees.SubList(0, op.testOptions.printPCFGkBest);
                    double[] logScores = new double[trees.Count];
                    int      treeId    = 0;
                    foreach (ScoredObject <Tree> kBestTree in trees)
                    {
                        logScores[treeId++] = kBestTree.Score();
                    }
                    //Re-normalize
                    double entropy = 0.0;
                    double denom   = ArrayMath.LogSum(logScores);
                    foreach (double logScore in logScores)
                    {
                        double logPr = logScore - denom;
                        entropy += System.Math.Exp(logPr) * (logPr / System.Math.Log(2));
                    }
                    entropy *= -1;
                    //Convert to bits
                    pwStats.Printf("%f\t%d\t%d\n", entropy, trees.Count, sentence.Count);
                }
            }
            // **EVALUATION**
            // Perform various evaluations specified by the user
            if (tree != null)
            {
                //Strip subcategories and remove punctuation for evaluation
                tree = subcategoryStripper.TransformTree(tree);
                Tree treeFact = collinizer.TransformTree(tree);
                //Setup the gold tree
                if (op.testOptions.verbose)
                {
                    pwOut.Println("Correct parse");
                    treePrint.PrintTree(goldTree, pwOut);
                }
                Tree transGoldTree = collinizer.TransformTree(goldTree);
                if (transGoldTree != null)
                {
                    transGoldTree = subcategoryStripper.TransformTree(transGoldTree);
                }
                //Can't do evaluation in these two cases
                if (transGoldTree == null)
                {
                    pwErr.Println("Couldn't transform gold tree for evaluation, skipping eval. Gold tree was:");
                    goldTree.PennPrint(pwErr);
                    numSkippedEvals++;
                    return;
                }
                else
                {
                    if (treeFact == null)
                    {
                        pwErr.Println("Couldn't transform hypothesis tree for evaluation, skipping eval. Tree was:");
                        tree.PennPrint(pwErr);
                        numSkippedEvals++;
                        return;
                    }
                    else
                    {
                        if (treeFact.Yield().Count != transGoldTree.Yield().Count)
                        {
                            IList <ILabel> fYield = treeFact.Yield();
                            IList <ILabel> gYield = transGoldTree.Yield();
                            pwErr.Println("WARNING: Evaluation could not be performed due to gold/parsed yield mismatch.");
                            pwErr.Printf("  sizes: gold: %d (transf) %d (orig); parsed: %d (transf) %d (orig).%n", gYield.Count, goldTree.Yield().Count, fYield.Count, tree.Yield().Count);
                            pwErr.Println("  gold: " + SentenceUtils.ListToString(gYield, true));
                            pwErr.Println("  pars: " + SentenceUtils.ListToString(fYield, true));
                            numSkippedEvals++;
                            return;
                        }
                    }
                }
                if (topKEvals.Count > 0)
                {
                    IList <Tree> transGuesses = new List <Tree>();
                    int          kbest        = System.Math.Min(op.testOptions.evalPCFGkBest, kbestPCFGTrees.Count);
                    foreach (ScoredObject <Tree> guess in kbestPCFGTrees.SubList(0, kbest))
                    {
                        transGuesses.Add(collinizer.TransformTree(guess.Object()));
                    }
                    foreach (BestOfTopKEval eval in topKEvals)
                    {
                        eval.Evaluate(transGuesses, transGoldTree, pwErr);
                    }
                }
                //PCFG eval
                Tree treePCFG = pq.GetBestPCFGParse();
                if (treePCFG != null)
                {
                    Tree treePCFGeval = collinizer.TransformTree(treePCFG);
                    if (pcfgLB != null)
                    {
                        pcfgLB.Evaluate(treePCFGeval, transGoldTree, pwErr);
                    }
                    if (pcfgChildSpecific != null)
                    {
                        pcfgChildSpecific.Evaluate(treePCFGeval, transGoldTree, pwErr);
                    }
                    if (pcfgLA != null)
                    {
                        pcfgLA.Evaluate(treePCFGeval, transGoldTree, pwErr);
                    }
                    if (pcfgCB != null)
                    {
                        pcfgCB.Evaluate(treePCFGeval, transGoldTree, pwErr);
                    }
                    if (pcfgDA != null)
                    {
                        // Re-index the leaves after Collinization, stripping traces, etc.
                        treePCFGeval.IndexLeaves(true);
                        transGoldTree.IndexLeaves(true);
                        pcfgDA.Evaluate(treePCFGeval, transGoldTree, pwErr);
                    }
                    if (pcfgTA != null)
                    {
                        pcfgTA.Evaluate(treePCFGeval, transGoldTree, pwErr);
                    }
                    if (pcfgLL != null && pq.GetPCFGParser() != null)
                    {
                        pcfgLL.RecordScore(pq.GetPCFGParser(), pwErr);
                    }
                    if (pcfgRUO != null)
                    {
                        pcfgRUO.Evaluate(treePCFGeval, transGoldTree, pwErr);
                    }
                    if (pcfgCUO != null)
                    {
                        pcfgCUO.Evaluate(treePCFGeval, transGoldTree, pwErr);
                    }
                    if (pcfgCatE != null)
                    {
                        pcfgCatE.Evaluate(treePCFGeval, transGoldTree, pwErr);
                    }
                }
                //Dependency eval
                // todo: is treeDep really useful here, or should we really use depDAEval tree (debinarized) throughout? We use it for parse, and it sure seems like we could use it for tag eval, but maybe not factDA?
                Tree treeDep = pq.GetBestDependencyParse(false);
                if (treeDep != null)
                {
                    Tree goldTreeB    = binarizerOnly.TransformTree(goldTree);
                    Tree goldTreeEval = goldTree.DeepCopy();
                    goldTreeEval.IndexLeaves(true);
                    goldTreeEval.PercolateHeads(op.Langpack().HeadFinder());
                    Tree depDAEval = pq.GetBestDependencyParse(true);
                    depDAEval.IndexLeaves(true);
                    depDAEval.PercolateHeadIndices();
                    if (depDA != null)
                    {
                        depDA.Evaluate(depDAEval, goldTreeEval, pwErr);
                    }
                    if (depTA != null)
                    {
                        Tree undoneTree = debinarizer.TransformTree(treeDep);
                        undoneTree = subcategoryStripper.TransformTree(undoneTree);
                        pq.RestoreOriginalWords(undoneTree);
                        // pwErr.println("subcategoryStripped tree: " + undoneTree.toStructureDebugString());
                        depTA.Evaluate(undoneTree, goldTree, pwErr);
                    }
                    if (depLL != null && pq.GetDependencyParser() != null)
                    {
                        depLL.RecordScore(pq.GetDependencyParser(), pwErr);
                    }
                    Tree factTreeB;
                    if (pq.HasFactoredParse())
                    {
                        factTreeB = pq.GetBestFactoredParse();
                    }
                    else
                    {
                        factTreeB = treeDep;
                    }
                    if (factDA != null)
                    {
                        factDA.Evaluate(factTreeB, goldTreeB, pwErr);
                    }
                }
                //Factored parser (1best) eval
                if (factLB != null)
                {
                    factLB.Evaluate(treeFact, transGoldTree, pwErr);
                }
                if (factChildSpecific != null)
                {
                    factChildSpecific.Evaluate(treeFact, transGoldTree, pwErr);
                }
                if (factLA != null)
                {
                    factLA.Evaluate(treeFact, transGoldTree, pwErr);
                }
                if (factTA != null)
                {
                    factTA.Evaluate(tree, boundaryRemover.TransformTree(goldTree), pwErr);
                }
                if (factLL != null && pq.GetFactoredParser() != null)
                {
                    factLL.RecordScore(pq.GetFactoredParser(), pwErr);
                }
                if (factCB != null)
                {
                    factCB.Evaluate(treeFact, transGoldTree, pwErr);
                }
                foreach (IEval eval_1 in evals)
                {
                    eval_1.Evaluate(treeFact, transGoldTree, pwErr);
                }
                if (parserQueryEvals != null)
                {
                    foreach (IParserQueryEval eval in parserQueryEvals)
                    {
                        eval_1.Evaluate(pq, transGoldTree, pwErr);
                    }
                }
                if (op.testOptions.evalb)
                {
                    // empty out scores just in case
                    NanScores(tree);
                    EvalbFormatWriter.WriteEVALBline(treeFact, transGoldTree);
                }
            }
            pwErr.Println();
        }
        /// <summary>The meat of the outputter</summary>
        /// <exception cref="System.IO.IOException"/>
        private static void Print(Annotation annotation, PrintWriter pw, AnnotationOutputter.Options options)
        {
            double           beam      = options.beamPrintingOption;
            IList <ICoreMap> sentences = annotation.Get(typeof(CoreAnnotations.SentencesAnnotation));
            // Display docid if available
            string docId = annotation.Get(typeof(CoreAnnotations.DocIDAnnotation));

            if (docId != null)
            {
                IList <CoreLabel> tokens = annotation.Get(typeof(CoreAnnotations.TokensAnnotation));
                int nSentences           = (sentences != null) ? sentences.Count : 0;
                int nTokens = (tokens != null) ? tokens.Count : 0;
                pw.Printf("Document: ID=%s (%d sentences, %d tokens)%n", docId, nSentences, nTokens);
            }
            // Display doctitle if available
            string docTitle = annotation.Get(typeof(CoreAnnotations.DocTitleAnnotation));

            if (docTitle != null)
            {
                pw.Printf("Document Title: %s%n", docTitle);
            }
            // Display docdate if available
            string docDate = annotation.Get(typeof(CoreAnnotations.DocDateAnnotation));

            if (docDate != null)
            {
                pw.Printf("Document Date: %s%n", docDate);
            }
            // Display doctype if available
            string docType = annotation.Get(typeof(CoreAnnotations.DocTypeAnnotation));

            if (docType != null)
            {
                pw.Printf("Document Type: %s%n", docType);
            }
            // Display docsourcetype if available
            string docSourceType = annotation.Get(typeof(CoreAnnotations.DocSourceTypeAnnotation));

            if (docSourceType != null)
            {
                pw.Printf("Document Source Type: %s%n", docSourceType);
            }
            // display each sentence in this annotation
            if (sentences != null)
            {
                for (int i = 0; i < sz; i++)
                {
                    pw.Println();
                    ICoreMap          sentence  = sentences[i];
                    IList <CoreLabel> tokens    = sentence.Get(typeof(CoreAnnotations.TokensAnnotation));
                    string            sentiment = sentence.Get(typeof(SentimentCoreAnnotations.SentimentClass));
                    string            piece;
                    if (sentiment == null)
                    {
                        piece = string.Empty;
                    }
                    else
                    {
                        piece = ", sentiment: " + sentiment;
                    }
                    pw.Printf("Sentence #%d (%d tokens%s):%n", (i + 1), tokens.Count, piece);
                    string text = sentence.Get(typeof(CoreAnnotations.TextAnnotation));
                    pw.Println(text);
                    // display the token-level annotations
                    string[] tokenAnnotations = new string[] { "Text", "PartOfSpeech", "Lemma", "Answer", "NamedEntityTag", "CharacterOffsetBegin", "CharacterOffsetEnd", "NormalizedNamedEntityTag", "Timex", "TrueCase", "TrueCaseText", "SentimentClass", "WikipediaEntity" };
                    pw.Println();
                    pw.Println("Tokens:");
                    foreach (CoreLabel token in tokens)
                    {
                        pw.Print(token.ToShorterString(tokenAnnotations));
                        pw.Println();
                    }
                    // display the parse tree for this sentence
                    Tree tree = sentence.Get(typeof(TreeCoreAnnotations.TreeAnnotation));
                    if (tree != null)
                    {
                        pw.Println();
                        pw.Println("Constituency parse: ");
                        options.constituentTreePrinter.PrintTree(tree, pw);
                    }
                    // display sentiment tree if they asked for sentiment
                    if (!StringUtils.IsNullOrEmpty(sentiment))
                    {
                        pw.Println();
                        pw.Println("Sentiment-annotated binary tree:");
                        Tree sTree = sentence.Get(typeof(SentimentCoreAnnotations.SentimentAnnotatedTree));
                        if (sTree != null)
                        {
                            sTree.PennPrint(pw, null);
                            pw.Println();
                        }
                    }
                    // It is possible to turn off the semantic graphs, in which
                    // case we don't want to recreate them using the dependency
                    // printer.  This might be relevant if using CoreNLP for a
                    // language which doesn't have dependencies, for example.
                    if (sentence.Get(typeof(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation)) != null)
                    {
                        pw.Println();
                        pw.Println("Dependency Parse (enhanced plus plus dependencies):");
                        pw.Print(sentence.Get(typeof(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation)).ToList());
                    }
                    // display the entity mentions
                    IList <ICoreMap> entityMentions = sentence.Get(typeof(CoreAnnotations.MentionsAnnotation));
                    if (entityMentions != null)
                    {
                        pw.Println();
                        pw.Println("Extracted the following NER entity mentions:");
                        foreach (ICoreMap entityMention in entityMentions)
                        {
                            if (entityMention.Get(typeof(CoreAnnotations.EntityTypeAnnotation)) != null)
                            {
                                pw.Println(entityMention.Get(typeof(CoreAnnotations.TextAnnotation)) + "\t" + entityMention.Get(typeof(CoreAnnotations.EntityTypeAnnotation)));
                            }
                        }
                    }
                    // display MachineReading entities and relations
                    IList <EntityMention> entities = sentence.Get(typeof(MachineReadingAnnotations.EntityMentionsAnnotation));
                    if (entities != null)
                    {
                        pw.Println();
                        pw.Println("Extracted the following MachineReading entity mentions:");
                        foreach (EntityMention e in entities)
                        {
                            pw.Print('\t');
                            pw.Println(e);
                        }
                    }
                    IList <RelationMention> relations = sentence.Get(typeof(MachineReadingAnnotations.RelationMentionsAnnotation));
                    if (relations != null)
                    {
                        pw.Println();
                        pw.Println("Extracted the following MachineReading relation mentions:");
                        foreach (RelationMention r in relations)
                        {
                            if (r.PrintableObject(beam))
                            {
                                pw.Println(r);
                            }
                        }
                    }
                    // display OpenIE triples
                    ICollection <RelationTriple> openieTriples = sentence.Get(typeof(NaturalLogicAnnotations.RelationTriplesAnnotation));
                    if (openieTriples != null && !openieTriples.IsEmpty())
                    {
                        pw.Println();
                        pw.Println("Extracted the following Open IE triples:");
                        foreach (RelationTriple triple in openieTriples)
                        {
                            pw.Println(OpenIE.TripleToString(triple, docId, sentence));
                        }
                    }
                    // display KBP triples
                    ICollection <RelationTriple> kbpTriples = sentence.Get(typeof(CoreAnnotations.KBPTriplesAnnotation));
                    if (kbpTriples != null && !kbpTriples.IsEmpty())
                    {
                        pw.Println();
                        pw.Println("Extracted the following KBP triples:");
                        foreach (RelationTriple triple in kbpTriples)
                        {
                            pw.Println(triple);
                        }
                    }
                }
            }
            else
            {
                IList <CoreLabel> tokens = annotation.Get(typeof(CoreAnnotations.TokensAnnotation));
                pw.Println("Tokens:");
                pw.Println(annotation.Get(typeof(CoreAnnotations.TextAnnotation)));
                foreach (CoreLabel token in tokens)
                {
                    int tokenCharBegin = token.Get(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation));
                    int tokenCharEnd   = token.Get(typeof(CoreAnnotations.CharacterOffsetEndAnnotation));
                    pw.Println("[Text=" + token.Word() + " CharacterOffsetBegin=" + tokenCharBegin + " CharacterOffsetEnd=" + tokenCharEnd + ']');
                }
            }
            // display the old-style doc-level coref annotations
            // this is not supported anymore!
            //String corefAnno = annotation.get(CorefPLAnnotation.class);
            //if(corefAnno != null) os.println(corefAnno);
            // display the new-style coreference graph
            IDictionary <int, CorefChain> corefChains = annotation.Get(typeof(CorefCoreAnnotations.CorefChainAnnotation));

            if (corefChains != null && sentences != null)
            {
                foreach (CorefChain chain in corefChains.Values)
                {
                    CorefChain.CorefMention representative = chain.GetRepresentativeMention();
                    bool outputHeading = false;
                    foreach (CorefChain.CorefMention mention in chain.GetMentionsInTextualOrder())
                    {
                        if (mention == representative)
                        {
                            continue;
                        }
                        if (!outputHeading)
                        {
                            outputHeading = true;
                            pw.Println();
                            pw.Println("Coreference set:");
                        }
                        // all offsets start at 1!
                        pw.Printf("\t(%d,%d,[%d,%d]) -> (%d,%d,[%d,%d]), that is: \"%s\" -> \"%s\"%n", mention.sentNum, mention.headIndex, mention.startIndex, mention.endIndex, representative.sentNum, representative.headIndex, representative.startIndex, representative
                                  .endIndex, mention.mentionSpan, representative.mentionSpan);
                    }
                }
            }
            // display quotes if available
            if (annotation.Get(typeof(CoreAnnotations.QuotationsAnnotation)) != null)
            {
                pw.Println();
                pw.Println("Extracted quotes: ");
                IList <ICoreMap> allQuotes = QuoteAnnotator.GatherQuotes(annotation);
                foreach (ICoreMap quote in allQuotes)
                {
                    string speakerString;
                    if (quote.Get(typeof(QuoteAttributionAnnotator.CanonicalMentionAnnotation)) != null)
                    {
                        speakerString = quote.Get(typeof(QuoteAttributionAnnotator.CanonicalMentionAnnotation));
                    }
                    else
                    {
                        if (quote.Get(typeof(QuoteAttributionAnnotator.SpeakerAnnotation)) != null)
                        {
                            speakerString = quote.Get(typeof(QuoteAttributionAnnotator.SpeakerAnnotation));
                        }
                        else
                        {
                            speakerString = "Unknown";
                        }
                    }
                    pw.Printf("[QuotationIndex=%d, CharacterOffsetBegin=%d, Text=%s, Speaker=%s]%n", quote.Get(typeof(CoreAnnotations.QuotationIndexAnnotation)), quote.Get(typeof(CoreAnnotations.CharacterOffsetBeginAnnotation)), quote.Get(typeof(CoreAnnotations.TextAnnotation
                                                                                                                                                                                                                                                      )), speakerString);
                }
            }
            pw.Flush();
        }
        /// <exception cref="System.IO.IOException"/>
        public override OutputStream Write(Annotation corpus, OutputStream os)
        {
            if (!(os is GZIPOutputStream))
            {
                if (compress)
                {
                    os = new GZIPOutputStream(os);
                }
            }
            PrintWriter pw = new PrintWriter(os);
            // save the coref graph in the new format
            IDictionary <int, CorefChain> chains = corpus.Get(typeof(CorefCoreAnnotations.CorefChainAnnotation));

            SaveCorefChains(chains, pw);
            // save the coref graph on one line
            // Note: this is the old format!
            IList <Pair <IntTuple, IntTuple> > corefGraph = corpus.Get(typeof(CorefCoreAnnotations.CorefGraphAnnotation));

            if (corefGraph != null)
            {
                bool first = true;
                foreach (Pair <IntTuple, IntTuple> arc in corefGraph)
                {
                    if (!first)
                    {
                        pw.Print(" ");
                    }
                    pw.Printf("%d %d %d %d", arc.first.Get(0), arc.first.Get(1), arc.second.Get(0), arc.second.Get(1));
                    first = false;
                }
            }
            pw.Println();
            // save sentences separated by an empty line
            IList <ICoreMap> sentences = corpus.Get(typeof(CoreAnnotations.SentencesAnnotation));

            foreach (ICoreMap sent in sentences)
            {
                // save the parse tree first, on a single line
                Tree tree = sent.Get(typeof(TreeCoreAnnotations.TreeAnnotation));
                if (tree != null)
                {
                    string treeString = tree.ToString();
                    // no \n allowed in the parse tree string (might happen due to tokenization of HTML/XML/RDF tags)
                    treeString = treeString.ReplaceAll("\n", " ");
                    pw.Println(treeString);
                }
                else
                {
                    pw.Println();
                }
                SemanticGraph collapsedDeps = sent.Get(typeof(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation));
                SaveDependencyGraph(collapsedDeps, pw);
                SemanticGraph uncollapsedDeps = sent.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
                SaveDependencyGraph(uncollapsedDeps, pw);
                SemanticGraph ccDeps = sent.Get(typeof(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation));
                SaveDependencyGraph(ccDeps, pw);
                // save all sentence tokens
                IList <CoreLabel> tokens = sent.Get(typeof(CoreAnnotations.TokensAnnotation));
                if (tokens != null)
                {
                    foreach (CoreLabel token in tokens)
                    {
                        SaveToken(token, haveExplicitAntecedent, pw);
                        pw.Println();
                    }
                }
                // add an empty line after every sentence
                pw.Println();
            }
            pw.Flush();
            return(os);
        }
Пример #20
0
        /// <summary>Run the Evalb scoring metric on guess/gold input.</summary>
        /// <remarks>Run the Evalb scoring metric on guess/gold input. The default language is English.</remarks>
        /// <param name="args"/>
        public static void Main(string[] args)
        {
            if (args.Length < minArgs)
            {
                log.Info(Usage());
                System.Environment.Exit(-1);
            }
            Properties options             = StringUtils.ArgsToProperties(args, OptionArgDefs());
            Language   language            = PropertiesUtils.Get(options, "l", Language.English, typeof(Language));
            ITreebankLangParserParams tlpp = language.@params;
            int  maxGoldYield      = PropertiesUtils.GetInt(options, "y", int.MaxValue);
            bool Verbose           = PropertiesUtils.GetBool(options, "v", false);
            bool sortByF1          = PropertiesUtils.HasProperty(options, "s");
            int  worstKTreesToEmit = PropertiesUtils.GetInt(options, "s", 0);
            PriorityQueue <Triple <double, Tree, Tree> > queue = sortByF1 ? new PriorityQueue <Triple <double, Tree, Tree> >(2000, new Evalb.F1Comparator()) : null;
            bool   doCatLevel = PropertiesUtils.GetBool(options, "c", false);
            string labelRegex = options.GetProperty("f", null);
            string encoding   = options.GetProperty("e", "UTF-8");

            string[] parsedArgs = options.GetProperty(string.Empty, string.Empty).Split("\\s+");
            if (parsedArgs.Length != minArgs)
            {
                log.Info(Usage());
                System.Environment.Exit(-1);
            }
            string goldFile  = parsedArgs[0];
            string guessFile = parsedArgs[1];

            // Command-line has been parsed. Configure the metric for evaluation.
            tlpp.SetInputEncoding(encoding);
            PrintWriter pwOut         = tlpp.Pw();
            Treebank    guessTreebank = tlpp.DiskTreebank();

            guessTreebank.LoadPath(guessFile);
            pwOut.Println("GUESS TREEBANK:");
            pwOut.Println(guessTreebank.TextualSummary());
            Treebank goldTreebank = tlpp.DiskTreebank();

            goldTreebank.LoadPath(goldFile);
            pwOut.Println("GOLD TREEBANK:");
            pwOut.Println(goldTreebank.TextualSummary());
            Evalb            metric   = new Evalb("Evalb LP/LR", true);
            EvalbByCat       evalbCat = (doCatLevel) ? new EvalbByCat("EvalbByCat LP/LR", true, labelRegex) : null;
            ITreeTransformer tc       = tlpp.Collinizer();
            //The evalb ref implementation assigns status for each tree pair as follows:
            //
            //   0 - Ok (yields match)
            //   1 - length mismatch
            //   2 - null parse e.g. (()).
            //
            //In the cases of 1,2, evalb does not include the tree pair in the LP/LR computation.
            IEnumerator <Tree> goldItr  = goldTreebank.GetEnumerator();
            IEnumerator <Tree> guessItr = guessTreebank.GetEnumerator();
            int goldLineId        = 0;
            int guessLineId       = 0;
            int skippedGuessTrees = 0;

            while (guessItr.MoveNext() && goldItr.MoveNext())
            {
                Tree           guessTree  = guessItr.Current;
                IList <ILabel> guessYield = guessTree.Yield();
                guessLineId++;
                Tree           goldTree  = goldItr.Current;
                IList <ILabel> goldYield = goldTree.Yield();
                goldLineId++;
                // Check that we should evaluate this tree
                if (goldYield.Count > maxGoldYield)
                {
                    skippedGuessTrees++;
                    continue;
                }
                // Only trees with equal yields can be evaluated
                if (goldYield.Count != guessYield.Count)
                {
                    pwOut.Printf("Yield mismatch gold: %d tokens vs. guess: %d tokens (lines: gold %d guess %d)%n", goldYield.Count, guessYield.Count, goldLineId, guessLineId);
                    skippedGuessTrees++;
                    continue;
                }
                Tree evalGuess = tc.TransformTree(guessTree);
                Tree evalGold  = tc.TransformTree(goldTree);
                metric.Evaluate(evalGuess, evalGold, ((Verbose) ? pwOut : null));
                if (doCatLevel)
                {
                    evalbCat.Evaluate(evalGuess, evalGold, ((Verbose) ? pwOut : null));
                }
                if (sortByF1)
                {
                    StoreTrees(queue, guessTree, goldTree, metric.GetLastF1());
                }
            }
            if (guessItr.MoveNext() || goldItr.MoveNext())
            {
                System.Console.Error.Printf("Guess/gold files do not have equal lengths (guess: %d gold: %d)%n.", guessLineId, goldLineId);
            }
            pwOut.Println("================================================================================");
            if (skippedGuessTrees != 0)
            {
                pwOut.Printf("%s %d guess trees\n", "Unable to evaluate", skippedGuessTrees);
            }
            metric.Display(true, pwOut);
            pwOut.Println();
            if (doCatLevel)
            {
                evalbCat.Display(true, pwOut);
                pwOut.Println();
            }
            if (sortByF1)
            {
                EmitSortedTrees(queue, worstKTreesToEmit, guessFile);
            }
            pwOut.Close();
        }
Пример #21
0
        public override void Display(bool verbose, PrintWriter pw)
        {
            if (precisions.KeySet().Count != recalls.KeySet().Count)
            {
                log.Error("Different counts for precisions and recalls!");
                return;
            }
            ICollection <ILabel> cats = GetEvalLabelSet(precisions.KeySet());
            Random rand = new Random();
            IDictionary <double, ILabel> f1Map = new SortedDictionary <double, ILabel>();

            foreach (ILabel cat in cats)
            {
                double pnum2 = pnums2.GetCount(cat);
                double rnum2 = rnums2.GetCount(cat);
                double prec  = precisions2.GetCount(cat) / pnum2;
                double rec   = recalls2.GetCount(cat) / rnum2;
                double f1    = 2.0 / (1.0 / prec + 1.0 / rec);
                if (f1.Equals(double.NaN))
                {
                    f1 = -1.0;
                }
                if (f1Map.Contains(f1))
                {
                    f1Map[f1 + (rand.NextDouble() / 1000.0)] = cat;
                }
                else
                {
                    f1Map[f1] = cat;
                }
            }
            pw.Println("============================================================");
            pw.Println("Labeled Bracketed Evaluation by Category -- final statistics");
            pw.Println("============================================================");
            // Per category
            double catPrecisions    = 0.0;
            double catPrecisionNums = 0.0;
            double catRecalls       = 0.0;
            double catRecallNums    = 0.0;

            foreach (ILabel cat_1 in f1Map.Values)
            {
                double pnum2 = pnums2.GetCount(cat_1);
                double rnum2 = rnums2.GetCount(cat_1);
                double prec  = precisions2.GetCount(cat_1) / pnum2;
                prec *= 100.0;
                double rec = recalls2.GetCount(cat_1) / rnum2;
                rec *= 100.0;
                double f1 = 2.0 / (1.0 / prec + 1.0 / rec);
                catPrecisions    += precisions2.GetCount(cat_1);
                catPrecisionNums += pnum2;
                catRecalls       += recalls2.GetCount(cat_1);
                catRecallNums    += rnum2;
                string Lp = pnum2 == 0.0 ? "N/A" : string.Format("%.2f", prec);
                string Lr = rnum2 == 0.0 ? "N/A" : string.Format("%.2f", rec);
                string F1 = (pnum2 == 0.0 || rnum2 == 0.0) ? "N/A" : string.Format("%.2f", f1);
                pw.Printf("%s\tLP: %s\tguessed: %d\tLR: %s\tgold: %d\t F1: %s%n", cat_1.Value(), Lp, (int)pnum2, Lr, (int)rnum2, F1);
            }
            pw.Println("============================================================");
            // Totals
            double prec_1 = catPrecisions / catPrecisionNums;
            double rec_1  = catRecalls / catRecallNums;
            double f1_1   = (2 * prec_1 * rec_1) / (prec_1 + rec_1);

            pw.Printf("Total\tLP: %.2f\tguessed: %d\tLR: %.2f\tgold: %d\t F1: %.2f%n", prec_1 * 100.0, (int)catPrecisionNums, rec_1 * 100.0, (int)catRecallNums, f1_1 * 100.0);
            pw.Println("============================================================");
        }
        public static void Main(string[] args)
        {
            if (args.Length < minArgs)
            {
                System.Console.Out.WriteLine(usage.ToString());
                System.Environment.Exit(-1);
            }
            ITreebankLangParserParams tlpp = new EnglishTreebankParserParams();
            DiskTreebank tb       = null;
            string       encoding = "UTF-8";
            string       puncTag  = null;

            for (int i = 0; i < args.Length; i++)
            {
                if (args[i].StartsWith("-"))
                {
                    switch (args[i])
                    {
                    case "-l":
                    {
                        Language lang = Language.ValueOf(args[++i].Trim());
                        tlpp = lang.@params;
                        break;
                    }

                    case "-e":
                    {
                        encoding = args[++i];
                        break;
                    }

                    default:
                    {
                        System.Console.Out.WriteLine(usage.ToString());
                        System.Environment.Exit(-1);
                        break;
                    }
                    }
                }
                else
                {
                    puncTag = args[i++];
                    if (tb == null)
                    {
                        if (tlpp == null)
                        {
                            System.Console.Out.WriteLine(usage.ToString());
                            System.Environment.Exit(-1);
                        }
                        else
                        {
                            tlpp.SetInputEncoding(encoding);
                            tlpp.SetOutputEncoding(encoding);
                            tb = tlpp.DiskTreebank();
                        }
                    }
                    tb.LoadPath(args[i]);
                }
            }
            ICounter <string> puncTypes = new ClassicCounter <string>();

            foreach (Tree t in tb)
            {
                IList <CoreLabel> yield = t.TaggedLabeledYield();
                foreach (CoreLabel word in yield)
                {
                    if (word.Tag().Equals(puncTag))
                    {
                        puncTypes.IncrementCount(word.Word());
                    }
                }
            }
            IList <string> biggestKeys = new List <string>(puncTypes.KeySet());

            biggestKeys.Sort(Counters.ToComparatorDescending(puncTypes));
            PrintWriter pw = tlpp.Pw();

            foreach (string wordType in biggestKeys)
            {
                pw.Printf("%s\t%d%n", wordType, (int)puncTypes.GetCount(wordType));
            }
            pw.Close();
        }
        public static void Main(string[] args)
        {
            if (args.Length < minArgs)
            {
                System.Console.Out.WriteLine(usage);
                System.Environment.Exit(-1);
            }
            // Process command-line options
            Properties options  = StringUtils.ArgsToProperties(args, optionArgDefinitions);
            string     fileName = options.GetProperty(string.Empty);

            if (fileName == null || fileName.Equals(string.Empty))
            {
                System.Console.Out.WriteLine(usage);
                System.Environment.Exit(-1);
            }
            int      maxLen                = PropertiesUtils.GetInt(options, "y", int.MaxValue);
            bool     printTrees            = PropertiesUtils.GetBool(options, "p", false);
            bool     flattenTrees          = PropertiesUtils.GetBool(options, "f", false);
            bool     printPOS              = PropertiesUtils.GetBool(options, "a", false);
            bool     printTnT              = PropertiesUtils.GetBool(options, "t", false);
            Language language              = PropertiesUtils.Get(options, "l", Language.English, typeof(Language));
            ITreebankLangParserParams tlpp = language.@params;
            string encoding                = options.GetProperty("e", "UTF-8");

            tlpp.SetInputEncoding(encoding);
            tlpp.SetOutputEncoding(encoding);
            DiskTreebank tb = tlpp.DiskTreebank();

            tb.LoadPath(fileName);
            // Read the treebank
            PrintWriter pw       = tlpp.Pw();
            int         numTrees = 0;

            foreach (Tree tree in tb)
            {
                if (tree.Yield().Count > maxLen)
                {
                    continue;
                }
                ++numTrees;
                if (printTrees)
                {
                    pw.Println(tree.ToString());
                }
                else
                {
                    if (flattenTrees)
                    {
                        pw.Println(SentenceUtils.ListToString(tree.Yield()));
                    }
                    else
                    {
                        if (printPOS)
                        {
                            pw.Println(SentenceUtils.ListToString(tree.PreTerminalYield()));
                        }
                        else
                        {
                            if (printTnT)
                            {
                                IList <CoreLabel> yield = tree.TaggedLabeledYield();
                                foreach (CoreLabel label in yield)
                                {
                                    pw.Printf("%s\t%s%n", label.Word(), label.Tag());
                                }
                                pw.Println();
                            }
                        }
                    }
                }
            }
            System.Console.Error.Printf("Read %d trees.%n", numTrees);
        }
Пример #24
0
        public static void Main(string[] args)
        {
            if (args.Length < minArgs)
            {
                System.Console.Out.WriteLine(usage.ToString());
                System.Environment.Exit(-1);
            }
            ITreebankLangParserParams tlpp = new EnglishTreebankParserParams();
            DiskTreebank  tb        = null;
            string        encoding  = "UTF-8";
            TregexPattern rootMatch = null;

            for (int i = 0; i < args.Length; i++)
            {
                if (args[i].StartsWith("-"))
                {
                    switch (args[i])
                    {
                    case "-l":
                    {
                        Language lang = Language.ValueOf(args[++i].Trim());
                        tlpp = lang.@params;
                        break;
                    }

                    case "-e":
                    {
                        encoding = args[++i];
                        break;
                    }

                    default:
                    {
                        System.Console.Out.WriteLine(usage.ToString());
                        System.Environment.Exit(-1);
                        break;
                    }
                    }
                }
                else
                {
                    rootMatch = TregexPattern.Compile("@" + args[i++]);
                    if (tb == null)
                    {
                        if (tlpp == null)
                        {
                            System.Console.Out.WriteLine(usage.ToString());
                            System.Environment.Exit(-1);
                        }
                        else
                        {
                            tlpp.SetInputEncoding(encoding);
                            tlpp.SetOutputEncoding(encoding);
                            tb = tlpp.DiskTreebank();
                        }
                    }
                    tb.LoadPath(args[i++]);
                }
            }
            ICounter <string> rhsCounter = new ClassicCounter <string>();

            foreach (Tree t in tb)
            {
                TregexMatcher m = rootMatch.Matcher(t);
                while (m.FindNextMatchingNode())
                {
                    Tree          match = m.GetMatch();
                    StringBuilder sb    = new StringBuilder();
                    foreach (Tree kid in match.Children())
                    {
                        sb.Append(kid.Value()).Append(" ");
                    }
                    rhsCounter.IncrementCount(sb.ToString().Trim());
                }
            }
            IList <string> biggestKeys = new List <string>(rhsCounter.KeySet());

            biggestKeys.Sort(Counters.ToComparatorDescending(rhsCounter));
            PrintWriter pw = tlpp.Pw();

            foreach (string rhs in biggestKeys)
            {
                pw.Printf("%s\t%d%n", rhs, (int)rhsCounter.GetCount(rhs));
            }
            pw.Close();
        }
Пример #25
0
 /// <exception cref="System.Exception"/>
 protected internal override void SerializeTextClassifier(PrintWriter pw)
 {
     base.SerializeTextClassifier(pw);
     pw.Printf("nodeFeatureIndicesMap.size()=\t%d%n", nodeFeatureIndicesMap.Size());
     for (int i = 0; i < nodeFeatureIndicesMap.Size(); i++)
     {
         pw.Printf("%d\t%d%n", i, nodeFeatureIndicesMap.Get(i));
     }
     pw.Printf("edgeFeatureIndicesMap.size()=\t%d%n", edgeFeatureIndicesMap.Size());
     for (int i_1 = 0; i_1 < edgeFeatureIndicesMap.Size(); i_1++)
     {
         pw.Printf("%d\t%d%n", i_1, edgeFeatureIndicesMap.Get(i_1));
     }
     if (flags.secondOrderNonLinear)
     {
         pw.Printf("inputLayerWeights4Edge.length=\t%d%n", inputLayerWeights4Edge.Length);
         foreach (double[] ws in inputLayerWeights4Edge)
         {
             List <double> list = new List <double>();
             foreach (double w in ws)
             {
                 list.Add(w);
             }
             pw.Printf("%d\t%s%n", ws.Length, StringUtils.Join(list, " "));
         }
         pw.Printf("outputLayerWeights4Edge.length=\t%d%n", outputLayerWeights4Edge.Length);
         foreach (double[] ws_1 in outputLayerWeights4Edge)
         {
             List <double> list = new List <double>();
             foreach (double w in ws_1)
             {
                 list.Add(w);
             }
             pw.Printf("%d\t%s%n", ws_1.Length, StringUtils.Join(list, " "));
         }
     }
     else
     {
         pw.Printf("linearWeights.length=\t%d%n", linearWeights.Length);
         foreach (double[] ws in linearWeights)
         {
             List <double> list = new List <double>();
             foreach (double w in ws)
             {
                 list.Add(w);
             }
             pw.Printf("%d\t%s%n", ws.Length, StringUtils.Join(list, " "));
         }
     }
     pw.Printf("inputLayerWeights.length=\t%d%n", inputLayerWeights.Length);
     foreach (double[] ws_2 in inputLayerWeights)
     {
         List <double> list = new List <double>();
         foreach (double w in ws_2)
         {
             list.Add(w);
         }
         pw.Printf("%d\t%s%n", ws_2.Length, StringUtils.Join(list, " "));
     }
     pw.Printf("outputLayerWeights.length=\t%d%n", outputLayerWeights.Length);
     foreach (double[] ws_3 in outputLayerWeights)
     {
         List <double> list = new List <double>();
         foreach (double w in ws_3)
         {
             list.Add(w);
         }
         pw.Printf("%d\t%s%n", ws_3.Length, StringUtils.Join(list, " "));
     }
 }
Пример #26
0
        /// <summary>
        /// Evaluate accuracy when the input is gold segmented text *with* segmentation
        /// markers and morphological analyses.
        /// </summary>
        /// <remarks>
        /// Evaluate accuracy when the input is gold segmented text *with* segmentation
        /// markers and morphological analyses. In other words, the evaluation file has the
        /// same format as the training data.
        /// </remarks>
        /// <param name="pwOut"/>
        private void Evaluate(PrintWriter pwOut)
        {
            log.Info("Starting evaluation...");
            bool hasSegmentationMarkers = true;
            bool hasTags = true;
            IDocumentReaderAndWriter <CoreLabel> docReader = new ArabicDocumentReaderAndWriter(hasSegmentationMarkers, hasTags, hasDomainLabels, domain, tf);
            ObjectBank <IList <CoreLabel> >      lines     = classifier.MakeObjectBankFromFile(flags.testFile, docReader);
            PrintWriter tedEvalGoldTree  = null;
            PrintWriter tedEvalParseTree = null;
            PrintWriter tedEvalGoldSeg   = null;
            PrintWriter tedEvalParseSeg  = null;

            if (tedEvalPrefix != null)
            {
                try
                {
                    tedEvalGoldTree  = new PrintWriter(tedEvalPrefix + "_gold.ftree");
                    tedEvalGoldSeg   = new PrintWriter(tedEvalPrefix + "_gold.segmentation");
                    tedEvalParseTree = new PrintWriter(tedEvalPrefix + "_parse.ftree");
                    tedEvalParseSeg  = new PrintWriter(tedEvalPrefix + "_parse.segmentation");
                }
                catch (FileNotFoundException e)
                {
                    System.Console.Error.Printf("%s: %s%n", typeof(Edu.Stanford.Nlp.International.Arabic.Process.ArabicSegmenter).FullName, e.Message);
                }
            }
            ICounter <string> labelTotal   = new ClassicCounter <string>();
            ICounter <string> labelCorrect = new ClassicCounter <string>();
            int total   = 0;
            int correct = 0;

            foreach (IList <CoreLabel> line in lines)
            {
                string[] inputTokens = TedEvalSanitize(IOBUtils.IOBToString(line).ReplaceAll(":", "#pm#")).Split(" ");
                string[] goldTokens  = TedEvalSanitize(IOBUtils.IOBToString(line, ":")).Split(" ");
                line = classifier.Classify(line);
                string[] parseTokens = TedEvalSanitize(IOBUtils.IOBToString(line, ":")).Split(" ");
                foreach (CoreLabel label in line)
                {
                    // Do not evaluate labeling of whitespace
                    string observation = label.Get(typeof(CoreAnnotations.CharAnnotation));
                    if (!observation.Equals(IOBUtils.GetBoundaryCharacter()))
                    {
                        total++;
                        string hypothesis = label.Get(typeof(CoreAnnotations.AnswerAnnotation));
                        string reference  = label.Get(typeof(CoreAnnotations.GoldAnswerAnnotation));
                        labelTotal.IncrementCount(reference);
                        if (hypothesis.Equals(reference))
                        {
                            correct++;
                            labelCorrect.IncrementCount(reference);
                        }
                    }
                }
                if (tedEvalParseSeg != null)
                {
                    tedEvalGoldTree.Printf("(root");
                    tedEvalParseTree.Printf("(root");
                    int safeLength = inputTokens.Length;
                    if (inputTokens.Length != goldTokens.Length)
                    {
                        log.Info("In generating TEDEval files: Input and gold do not have the same number of tokens");
                        log.Info("    (ignoring any extras)");
                        log.Info("  input: " + Arrays.ToString(inputTokens));
                        log.Info("  gold: " + Arrays.ToString(goldTokens));
                        safeLength = Math.Min(inputTokens.Length, goldTokens.Length);
                    }
                    if (inputTokens.Length != parseTokens.Length)
                    {
                        log.Info("In generating TEDEval files: Input and parse do not have the same number of tokens");
                        log.Info("    (ignoring any extras)");
                        log.Info("  input: " + Arrays.ToString(inputTokens));
                        log.Info("  parse: " + Arrays.ToString(parseTokens));
                        safeLength = Math.Min(inputTokens.Length, parseTokens.Length);
                    }
                    for (int i = 0; i < safeLength; i++)
                    {
                        foreach (string segment in goldTokens[i].Split(":"))
                        {
                            tedEvalGoldTree.Printf(" (seg %s)", segment);
                        }
                        tedEvalGoldSeg.Printf("%s\t%s%n", inputTokens[i], goldTokens[i]);
                        foreach (string segment_1 in parseTokens[i].Split(":"))
                        {
                            tedEvalParseTree.Printf(" (seg %s)", segment_1);
                        }
                        tedEvalParseSeg.Printf("%s\t%s%n", inputTokens[i], parseTokens[i]);
                    }
                    tedEvalGoldTree.Printf(")%n");
                    tedEvalGoldSeg.Println();
                    tedEvalParseTree.Printf(")%n");
                    tedEvalParseSeg.Println();
                }
            }
            double accuracy = ((double)correct) / ((double)total);

            accuracy *= 100.0;
            pwOut.Println("EVALUATION RESULTS");
            pwOut.Printf("#datums:\t%d%n", total);
            pwOut.Printf("#correct:\t%d%n", correct);
            pwOut.Printf("accuracy:\t%.2f%n", accuracy);
            pwOut.Println("==================");
            // Output the per label accuracies
            pwOut.Println("PER LABEL ACCURACIES");
            foreach (string refLabel in labelTotal.KeySet())
            {
                double nTotal   = labelTotal.GetCount(refLabel);
                double nCorrect = labelCorrect.GetCount(refLabel);
                double acc      = (nCorrect / nTotal) * 100.0;
                pwOut.Printf(" %s\t%.2f%n", refLabel, acc);
            }
            if (tedEvalParseSeg != null)
            {
                tedEvalGoldTree.Close();
                tedEvalGoldSeg.Close();
                tedEvalParseTree.Close();
                tedEvalParseSeg.Close();
            }
        }
Пример #27
0
        /// <summary>Execute with no arguments for usage.</summary>
        public static void Main(string[] args)
        {
            if (!ValidateCommandLine(args))
            {
                log.Info(Usage);
                System.Environment.Exit(-1);
            }
            ITreebankLangParserParams tlpp = Language.@params;
            PrintWriter pwOut         = tlpp.Pw();
            Treebank    guessTreebank = tlpp.DiskTreebank();

            guessTreebank.LoadPath(guessFile);
            pwOut.Println("GUESS TREEBANK:");
            pwOut.Println(guessTreebank.TextualSummary());
            Treebank goldTreebank = tlpp.DiskTreebank();

            goldTreebank.LoadPath(goldFile);
            pwOut.Println("GOLD TREEBANK:");
            pwOut.Println(goldTreebank.TextualSummary());
            Edu.Stanford.Nlp.Parser.Metrics.LeafAncestorEval metric = new Edu.Stanford.Nlp.Parser.Metrics.LeafAncestorEval("LeafAncestor");
            ITreeTransformer tc = tlpp.Collinizer();
            //The evalb ref implementation assigns status for each tree pair as follows:
            //
            //   0 - Ok (yields match)
            //   1 - length mismatch
            //   2 - null parse e.g. (()).
            //
            //In the cases of 1,2, evalb does not include the tree pair in the LP/LR computation.
            IEnumerator <Tree> goldItr  = goldTreebank.GetEnumerator();
            IEnumerator <Tree> guessItr = guessTreebank.GetEnumerator();
            int goldLineId        = 0;
            int guessLineId       = 0;
            int skippedGuessTrees = 0;

            while (guessItr.MoveNext() && goldItr.MoveNext())
            {
                Tree           guessTree  = guessItr.Current;
                IList <ILabel> guessYield = guessTree.Yield();
                guessLineId++;
                Tree           goldTree  = goldItr.Current;
                IList <ILabel> goldYield = goldTree.Yield();
                goldLineId++;
                // Check that we should evaluate this tree
                if (goldYield.Count > MaxGoldYield)
                {
                    skippedGuessTrees++;
                    continue;
                }
                // Only trees with equal yields can be evaluated
                if (goldYield.Count != guessYield.Count)
                {
                    pwOut.Printf("Yield mismatch gold: %d tokens vs. guess: %d tokens (lines: gold %d guess %d)%n", goldYield.Count, guessYield.Count, goldLineId, guessLineId);
                    skippedGuessTrees++;
                    continue;
                }
                Tree evalGuess = tc.TransformTree(guessTree);
                Tree evalGold  = tc.TransformTree(goldTree);
                metric.Evaluate(evalGuess, evalGold, ((Verbose) ? pwOut : null));
            }
            if (guessItr.MoveNext() || goldItr.MoveNext())
            {
                System.Console.Error.Printf("Guess/gold files do not have equal lengths (guess: %d gold: %d)%n.", guessLineId, goldLineId);
            }
            pwOut.Println("================================================================================");
            if (skippedGuessTrees != 0)
            {
                pwOut.Printf("%s %d guess trees%n", "Unable to evaluate", skippedGuessTrees);
            }
            metric.Display(true, pwOut);
            pwOut.Close();
        }
Пример #28
0
        public static void Main(string[] args)
        {
            if (args.Length < minArgs)
            {
                System.Console.Out.WriteLine(usage.ToString());
                System.Environment.Exit(-1);
            }
            ITreebankLangParserParams tlpp = new EnglishTreebankParserParams();
            DiskTreebank tb       = null;
            string       encoding = "UTF-8";
            Language     lang     = Language.English;

            for (int i = 0; i < args.Length; i++)
            {
                if (args[i].StartsWith("-"))
                {
                    switch (args[i])
                    {
                    case "-l":
                    {
                        lang = Language.ValueOf(args[++i].Trim());
                        tlpp = lang.@params;
                        break;
                    }

                    case "-e":
                    {
                        encoding = args[++i];
                        break;
                    }

                    default:
                    {
                        System.Console.Out.WriteLine(usage.ToString());
                        System.Environment.Exit(-1);
                        break;
                    }
                    }
                }
                else
                {
                    if (tb == null)
                    {
                        if (tlpp == null)
                        {
                            System.Console.Out.WriteLine(usage.ToString());
                            System.Environment.Exit(-1);
                        }
                        else
                        {
                            tlpp.SetInputEncoding(encoding);
                            tlpp.SetOutputEncoding(encoding);
                            tb = tlpp.DiskTreebank();
                        }
                    }
                    tb.LoadPath(args[i]);
                }
            }
            PrintWriter pw = tlpp.Pw();
            Options     op = new Options();

            Options.LexOptions lexOptions = op.lexOptions;
            if (lang == Language.French)
            {
                lexOptions.useUnknownWordSignatures = 1;
                lexOptions.smartMutation            = false;
                lexOptions.unknownSuffixSize        = 2;
                lexOptions.unknownPrefixSize        = 1;
            }
            else
            {
                if (lang == Language.Arabic)
                {
                    lexOptions.smartMutation            = false;
                    lexOptions.useUnknownWordSignatures = 9;
                    lexOptions.unknownPrefixSize        = 1;
                    lexOptions.unknownSuffixSize        = 1;
                }
            }
            IIndex <string>   wordIndex    = new HashIndex <string>();
            IIndex <string>   tagIndex     = new HashIndex <string>();
            ILexicon          lex          = tlpp.Lex(op, wordIndex, tagIndex);
            int               computeAfter = (int)(0.50 * tb.Count);
            ICounter <string> vocab        = new ClassicCounter <string>();
            ICounter <string> unkCounter   = new ClassicCounter <string>();
            int               treeId       = 0;

            foreach (Tree t in tb)
            {
                IList <ILabel> yield = t.Yield();
                int            posId = 0;
                foreach (ILabel word in yield)
                {
                    vocab.IncrementCount(word.Value());
                    if (treeId > computeAfter && vocab.GetCount(word.Value()) < 2.0)
                    {
                        //          if(lex.getUnknownWordModel().getSignature(word.value(), posId++).equals("UNK"))
                        //            pw.println(word.value());
                        unkCounter.IncrementCount(lex.GetUnknownWordModel().GetSignature(word.Value(), posId++));
                    }
                }
                treeId++;
            }
            IList <string> biggestKeys = new List <string>(unkCounter.KeySet());

            biggestKeys.Sort(Counters.ToComparatorDescending(unkCounter));
            foreach (string wordType in biggestKeys)
            {
                pw.Printf("%s\t%d%n", wordType, (int)unkCounter.GetCount(wordType));
            }
            pw.Close();
            pw.Close();
        }
        /// <summary>Run the scoring metric on guess/gold input.</summary>
        /// <remarks>
        /// Run the scoring metric on guess/gold input. This method performs "Collinization."
        /// The default language is English.
        /// </remarks>
        /// <param name="args"/>
        public static void Main(string[] args)
        {
            if (args.Length < minArgs)
            {
                System.Console.Out.WriteLine(usage.ToString());
                System.Environment.Exit(-1);
            }
            ITreebankLangParserParams tlpp = new EnglishTreebankParserParams();
            int    maxGoldYield            = int.MaxValue;
            int    maxGuessYield           = int.MaxValue;
            bool   Verbose   = false;
            bool   skipGuess = false;
            bool   tagMode   = false;
            string guessFile = null;
            string goldFile  = null;

            for (int i = 0; i < args.Length; i++)
            {
                if (args[i].StartsWith("-"))
                {
                    switch (args[i])
                    {
                    case "-l":
                    {
                        Language lang = Language.ValueOf(args[++i].Trim());
                        tlpp = lang.@params;
                        break;
                    }

                    case "-y":
                    {
                        maxGoldYield = System.Convert.ToInt32(args[++i].Trim());
                        break;
                    }

                    case "-t":
                    {
                        tagMode = true;
                        break;
                    }

                    case "-v":
                    {
                        Verbose = true;
                        break;
                    }

                    case "-g":
                    {
                        maxGuessYield = System.Convert.ToInt32(args[++i].Trim());
                        skipGuess     = true;
                        break;
                    }

                    default:
                    {
                        System.Console.Out.WriteLine(usage.ToString());
                        System.Environment.Exit(-1);
                        break;
                    }
                    }
                }
                else
                {
                    //Required parameters
                    goldFile  = args[i++];
                    guessFile = args[i];
                    break;
                }
            }
            PrintWriter pwOut         = tlpp.Pw();
            Treebank    guessTreebank = tlpp.DiskTreebank();

            guessTreebank.LoadPath(guessFile);
            pwOut.Println("GUESS TREEBANK:");
            pwOut.Println(guessTreebank.TextualSummary());
            Treebank goldTreebank = tlpp.DiskTreebank();

            goldTreebank.LoadPath(goldFile);
            pwOut.Println("GOLD TREEBANK:");
            pwOut.Println(goldTreebank.TextualSummary());
            string evalName = (tagMode) ? "TsarfatyTAG" : "TsarfatySEG";

            Edu.Stanford.Nlp.Parser.Metrics.TsarfatyEval eval = new Edu.Stanford.Nlp.Parser.Metrics.TsarfatyEval(evalName, tagMode);
            ITreeTransformer tc = tlpp.Collinizer();
            //PennTreeReader skips over null/malformed parses. So when the yields of the gold/guess trees
            //don't match, we need to keep looking for the next gold tree that matches.
            //The evalb ref implementation differs slightly as it expects one tree per line. It assigns
            //status as follows:
            //
            //   0 - Ok (yields match)
            //   1 - length mismatch
            //   2 - null parse e.g. (()).
            //
            //In the cases of 1,2, evalb does not include the tree pair in the LP/LR computation.
            IEnumerator <Tree> goldItr = goldTreebank.GetEnumerator();
            int goldLineId             = 0;
            int skippedGuessTrees      = 0;

            foreach (Tree guess in guessTreebank)
            {
                Tree          evalGuess  = tc.TransformTree(guess);
                List <ILabel> guessSent  = guess.Yield();
                string        guessChars = SentenceUtils.ListToString(guessSent).ReplaceAll("\\s+", string.Empty);
                if (guessSent.Count > maxGuessYield)
                {
                    skippedGuessTrees++;
                    continue;
                }
                bool doneEval = false;
                while (goldItr.MoveNext() && !doneEval)
                {
                    Tree gold     = goldItr.Current;
                    Tree evalGold = tc.TransformTree(gold);
                    goldLineId++;
                    List <ILabel> goldSent  = gold.Yield();
                    string        goldChars = SentenceUtils.ListToString(goldSent).ReplaceAll("\\s+", string.Empty);
                    if (goldSent.Count > maxGoldYield)
                    {
                        continue;
                    }
                    else
                    {
                        if (goldChars.Length != guessChars.Length)
                        {
                            pwOut.Printf("Char level yield mismatch at line %d (guess: %d gold: %d)\n", goldLineId, guessChars.Length, goldChars.Length);
                            skippedGuessTrees++;
                            break;
                        }
                    }
                    //Default evalb behavior -- skip this guess tree
                    eval.Evaluate(evalGuess, evalGold, ((Verbose) ? pwOut : null));
                    doneEval = true;
                }
            }
            //Move to the next guess parse
            pwOut.Println("================================================================================");
            if (skippedGuessTrees != 0)
            {
                pwOut.Printf("%s %d guess trees\n", ((skipGuess) ? "Skipped" : "Unable to evaluate"), skippedGuessTrees);
            }
            eval.Display(true, pwOut);
            pwOut.Println();
            pwOut.Close();
        }