// 0
        // 1
        // 2
        // 3
        // 4
        // 5
        // 6
        private static void RunIOBResultsTest(string[] gold, string[] guess, double tp, double fp, double fn)
        {
            IList <CoreLabel> sentence = MakeListCoreLabel(gold, guess);
            ICounter <string> entityTP = new ClassicCounter <string>();
            ICounter <string> entityFP = new ClassicCounter <string>();
            ICounter <string> entityFN = new ClassicCounter <string>();

            IOBUtils.CountEntityResults(sentence, entityTP, entityFP, entityFN, Bg);
            NUnit.Framework.Assert.AreEqual("For true positives", tp, entityTP.TotalCount(), 0.0001);
            NUnit.Framework.Assert.AreEqual("For false positives", fp, entityFP.TotalCount(), 0.0001);
            NUnit.Framework.Assert.AreEqual("For false negatives", fn, entityFN.TotalCount(), 0.0001);
        }
Exemple #2
0
        //  private static String stripTag(String tag) {
        //    if (tag.startsWith("DT")) {
        //      String newTag = tag.substring(2, tag.length());
        //      return newTag.length() > 0 ? newTag : tag;
        //    }
        //    return tag;
        //  }
        /// <param name="args"/>
        public static void Main(string[] args)
        {
            if (args.Length != 3)
            {
                System.Console.Error.Printf("Usage: java %s language filename features%n", typeof(TreebankFactoredLexiconStats).FullName);
                System.Environment.Exit(-1);
            }
            Language language = Language.ValueOf(args[0]);
            ITreebankLangParserParams tlpp = language.@params;

            if (language.Equals(Language.Arabic))
            {
                string[] options = new string[] { "-arabicFactored" };
                tlpp.SetOptionFlag(options, 0);
            }
            else
            {
                string[] options = new string[] { "-frenchFactored" };
                tlpp.SetOptionFlag(options, 0);
            }
            Treebank tb = tlpp.DiskTreebank();

            tb.LoadPath(args[1]);
            MorphoFeatureSpecification morphoSpec = language.Equals(Language.Arabic) ? new ArabicMorphoFeatureSpecification() : new FrenchMorphoFeatureSpecification();

            string[] features = args[2].Trim().Split(",");
            foreach (string feature in features)
            {
                morphoSpec.Activate(MorphoFeatureSpecification.MorphoFeatureType.ValueOf(feature));
            }
            // Counters
            ICounter <string> wordTagCounter  = new ClassicCounter <string>(30000);
            ICounter <string> morphTagCounter = new ClassicCounter <string>(500);
            //    Counter<String> signatureTagCounter = new ClassicCounter<String>();
            ICounter <string> morphCounter           = new ClassicCounter <string>(500);
            ICounter <string> wordCounter            = new ClassicCounter <string>(30000);
            ICounter <string> tagCounter             = new ClassicCounter <string>(300);
            ICounter <string> lemmaCounter           = new ClassicCounter <string>(25000);
            ICounter <string> lemmaTagCounter        = new ClassicCounter <string>(25000);
            ICounter <string> richTagCounter         = new ClassicCounter <string>(1000);
            ICounter <string> reducedTagCounter      = new ClassicCounter <string>(500);
            ICounter <string> reducedTagLemmaCounter = new ClassicCounter <string>(500);
            IDictionary <string, ICollection <string> > wordLemmaMap           = Generics.NewHashMap();
            TwoDimensionalIntCounter <string, string>   lemmaReducedTagCounter = new TwoDimensionalIntCounter <string, string>(30000);
            TwoDimensionalIntCounter <string, string>   reducedTagTagCounter   = new TwoDimensionalIntCounter <string, string>(500);
            TwoDimensionalIntCounter <string, string>   tagReducedTagCounter   = new TwoDimensionalIntCounter <string, string>(300);
            int numTrees = 0;

            foreach (Tree tree in tb)
            {
                foreach (Tree subTree in tree)
                {
                    if (!subTree.IsLeaf())
                    {
                        tlpp.TransformTree(subTree, tree);
                    }
                }
                IList <ILabel> pretermList = tree.PreTerminalYield();
                IList <ILabel> yield       = tree.Yield();
                System.Diagnostics.Debug.Assert(yield.Count == pretermList.Count);
                int yieldLen = yield.Count;
                for (int i = 0; i < yieldLen; ++i)
                {
                    string tag   = pretermList[i].Value();
                    string word  = yield[i].Value();
                    string morph = ((CoreLabel)yield[i]).OriginalText();
                    // Note: if there is no lemma, then we use the surface form.
                    Pair <string, string> lemmaTag = MorphoFeatureSpecification.SplitMorphString(word, morph);
                    string lemma   = lemmaTag.First();
                    string richTag = lemmaTag.Second();
                    // WSGDEBUG
                    if (tag.Contains("MW"))
                    {
                        lemma += "-MWE";
                    }
                    lemmaCounter.IncrementCount(lemma);
                    lemmaTagCounter.IncrementCount(lemma + tag);
                    richTagCounter.IncrementCount(richTag);
                    string reducedTag = morphoSpec.StrToFeatures(richTag).ToString();
                    reducedTagCounter.IncrementCount(reducedTag);
                    reducedTagLemmaCounter.IncrementCount(reducedTag + lemma);
                    wordTagCounter.IncrementCount(word + tag);
                    morphTagCounter.IncrementCount(morph + tag);
                    morphCounter.IncrementCount(morph);
                    wordCounter.IncrementCount(word);
                    tagCounter.IncrementCount(tag);
                    reducedTag = reducedTag.Equals(string.Empty) ? "NONE" : reducedTag;
                    if (wordLemmaMap.Contains(word))
                    {
                        wordLemmaMap[word].Add(lemma);
                    }
                    else
                    {
                        ICollection <string> lemmas = Generics.NewHashSet(1);
                        wordLemmaMap[word] = lemmas;
                    }
                    lemmaReducedTagCounter.IncrementCount(lemma, reducedTag);
                    reducedTagTagCounter.IncrementCount(lemma + reducedTag, tag);
                    tagReducedTagCounter.IncrementCount(tag, reducedTag);
                }
                ++numTrees;
            }
            // Barf...
            System.Console.Out.WriteLine("Language: " + language.ToString());
            System.Console.Out.Printf("#trees:\t%d%n", numTrees);
            System.Console.Out.Printf("#tokens:\t%d%n", (int)wordCounter.TotalCount());
            System.Console.Out.Printf("#words:\t%d%n", wordCounter.KeySet().Count);
            System.Console.Out.Printf("#tags:\t%d%n", tagCounter.KeySet().Count);
            System.Console.Out.Printf("#wordTagPairs:\t%d%n", wordTagCounter.KeySet().Count);
            System.Console.Out.Printf("#lemmas:\t%d%n", lemmaCounter.KeySet().Count);
            System.Console.Out.Printf("#lemmaTagPairs:\t%d%n", lemmaTagCounter.KeySet().Count);
            System.Console.Out.Printf("#feattags:\t%d%n", reducedTagCounter.KeySet().Count);
            System.Console.Out.Printf("#feattag+lemmas:\t%d%n", reducedTagLemmaCounter.KeySet().Count);
            System.Console.Out.Printf("#richtags:\t%d%n", richTagCounter.KeySet().Count);
            System.Console.Out.Printf("#richtag+lemma:\t%d%n", morphCounter.KeySet().Count);
            System.Console.Out.Printf("#richtag+lemmaTagPairs:\t%d%n", morphTagCounter.KeySet().Count);
            // Extra
            System.Console.Out.WriteLine("==================");
            StringBuilder sbNoLemma    = new StringBuilder();
            StringBuilder sbMultLemmas = new StringBuilder();

            foreach (KeyValuePair <string, ICollection <string> > wordLemmas in wordLemmaMap)
            {
                string word = wordLemmas.Key;
                ICollection <string> lemmas = wordLemmas.Value;
                if (lemmas.Count == 0)
                {
                    sbNoLemma.Append("NO LEMMAS FOR WORD: " + word + "\n");
                    continue;
                }
                if (lemmas.Count > 1)
                {
                    sbMultLemmas.Append("MULTIPLE LEMMAS: " + word + " " + SetToString(lemmas) + "\n");
                    continue;
                }
                string lemma = lemmas.GetEnumerator().Current;
                ICollection <string> reducedTags = lemmaReducedTagCounter.GetCounter(lemma).KeySet();
                if (reducedTags.Count > 1)
                {
                    System.Console.Out.Printf("%s --> %s%n", word, lemma);
                    foreach (string reducedTag in reducedTags)
                    {
                        int    count   = lemmaReducedTagCounter.GetCount(lemma, reducedTag);
                        string posTags = SetToString(reducedTagTagCounter.GetCounter(lemma + reducedTag).KeySet());
                        System.Console.Out.Printf("\t%s\t%d\t%s%n", reducedTag, count, posTags);
                    }
                    System.Console.Out.WriteLine();
                }
            }
            System.Console.Out.WriteLine("==================");
            System.Console.Out.WriteLine(sbNoLemma.ToString());
            System.Console.Out.WriteLine(sbMultLemmas.ToString());
            System.Console.Out.WriteLine("==================");
            IList <string> tags = new List <string>(tagReducedTagCounter.FirstKeySet());

            tags.Sort();
            foreach (string tag_1 in tags)
            {
                System.Console.Out.WriteLine(tag_1);
                ICollection <string> reducedTags = tagReducedTagCounter.GetCounter(tag_1).KeySet();
                foreach (string reducedTag in reducedTags)
                {
                    int count = tagReducedTagCounter.GetCount(tag_1, reducedTag);
                    //        reducedTag = reducedTag.equals("") ? "NONE" : reducedTag;
                    System.Console.Out.Printf("\t%s\t%d%n", reducedTag, count);
                }
                System.Console.Out.WriteLine();
            }
            System.Console.Out.WriteLine("==================");
        }
        public static void PrintStats(ICollection <Tree> trees, PrintWriter pw)
        {
            ClassicCounter <int>        wordLengthCounter = new ClassicCounter <int>();
            ClassicCounter <TaggedWord> wordCounter       = new ClassicCounter <TaggedWord>();
            ClassicCounter <ChineseCharacterBasedLexicon.Symbol> charCounter = new ClassicCounter <ChineseCharacterBasedLexicon.Symbol>();
            int counter = 0;

            foreach (Tree tree in trees)
            {
                counter++;
                IList <TaggedWord> taggedWords = tree.TaggedYield();
                foreach (TaggedWord taggedWord in taggedWords)
                {
                    string word = taggedWord.Word();
                    if (word.Equals(LexiconConstants.Boundary))
                    {
                        continue;
                    }
                    wordCounter.IncrementCount(taggedWord);
                    wordLengthCounter.IncrementCount(int.Parse(word.Length));
                    for (int j = 0; j < length; j++)
                    {
                        ChineseCharacterBasedLexicon.Symbol sym = ChineseCharacterBasedLexicon.Symbol.CannonicalSymbol(word[j]);
                        charCounter.IncrementCount(sym);
                    }
                    charCounter.IncrementCount(ChineseCharacterBasedLexicon.Symbol.EndWord);
                }
            }
            ICollection <ChineseCharacterBasedLexicon.Symbol> singletonChars = Counters.KeysBelow(charCounter, 1.5);
            ICollection <TaggedWord> singletonWords     = Counters.KeysBelow(wordCounter, 1.5);
            ClassicCounter <string>  singletonWordPOSes = new ClassicCounter <string>();

            foreach (TaggedWord taggedWord_1 in singletonWords)
            {
                singletonWordPOSes.IncrementCount(taggedWord_1.Tag());
            }
            Distribution <string> singletonWordPOSDist = Distribution.GetDistribution(singletonWordPOSes);
            ClassicCounter <char> singletonCharRads    = new ClassicCounter <char>();

            foreach (ChineseCharacterBasedLexicon.Symbol s in singletonChars)
            {
                singletonCharRads.IncrementCount(char.ValueOf(RadicalMap.GetRadical(s.GetCh())));
            }
            Distribution <char> singletonCharRadDist = Distribution.GetDistribution(singletonCharRads);
            Distribution <int>  wordLengthDist       = Distribution.GetDistribution(wordLengthCounter);
            NumberFormat        percent = new DecimalFormat("##.##%");

            pw.Println("There are " + singletonChars.Count + " singleton chars out of " + (int)charCounter.TotalCount() + " tokens and " + charCounter.Size() + " types found in " + counter + " trees.");
            pw.Println("Thus singletonChars comprise " + percent.Format(singletonChars.Count / charCounter.TotalCount()) + " of tokens and " + percent.Format((double)singletonChars.Count / charCounter.Size()) + " of types.");
            pw.Println();
            pw.Println("There are " + singletonWords.Count + " singleton words out of " + (int)wordCounter.TotalCount() + " tokens and " + wordCounter.Size() + " types.");
            pw.Println("Thus singletonWords comprise " + percent.Format(singletonWords.Count / wordCounter.TotalCount()) + " of tokens and " + percent.Format((double)singletonWords.Count / wordCounter.Size()) + " of types.");
            pw.Println();
            pw.Println("Distribution over singleton word POS:");
            pw.Println(singletonWordPOSDist.ToString());
            pw.Println();
            pw.Println("Distribution over singleton char radicals:");
            pw.Println(singletonCharRadDist.ToString());
            pw.Println();
            pw.Println("Distribution over word length:");
            pw.Println(wordLengthDist);
        }
Exemple #4
0
        private static void GetSplitters(double cutOff, IDictionary <string, ClassicCounter <IList <string> > > nr, IDictionary <IList <string>, ClassicCounter <IList <string> > > pr, IDictionary <IList <string>, ClassicCounter <IList <string> > > gpr, ICollection <string
                                                                                                                                                                                                                                                                          > splitters)
        {
            // do value of parent
            foreach (string node in nr.Keys)
            {
                IList <Pair <IList <string>, double> > answers = new List <Pair <IList <string>, double> >();
                ClassicCounter <IList <string> >       cntr    = nr[node];
                double support = (cntr.TotalCount());
                foreach (IList <string> key in pr.Keys)
                {
                    if (key[0].Equals(node))
                    {
                        // only do it if they match
                        ClassicCounter <IList <string> > cntr2 = pr[key];
                        double support2 = cntr2.TotalCount();
                        double kl       = Counters.KlDivergence(cntr2, cntr);
                        answers.Add(new Pair <IList <string>, double>(key, kl * support2));
                    }
                }
                answers.Sort(null);
                foreach (Pair <IList <string>, double> p in answers)
                {
                    double psd = p.Second();
                    if (psd >= cutOff)
                    {
                        IList <string> lst  = p.First();
                        string         nd   = lst[0];
                        string         par  = lst[1];
                        string         name = nd + "^" + par;
                        splitters.Add(name);
                    }
                }
            }

            /*
             * // do value of parent with info gain -- yet to finish this
             * for (Iterator it = nr.entrySet().iterator(); it.hasNext(); ) {
             * Map.Entry pair = (Map.Entry) it.next();
             * String node = (String) pair.getKey();
             * Counter cntr = (Counter) pair.getValue();
             * double support = (cntr.totalCount());
             * ArrayList dtrs = new ArrayList();
             * for (Iterator it2 = pr.entrySet().iterator(); it2.hasNext();) {
             * HashMap annotated = new HashMap();
             * Map.Entry pair2 = (Map.Entry) it2.next();
             * List node2 = (List) pair2.getKey();
             * Counter cntr2 = (Counter) pair2.getValue();
             * if (node2.get(0).equals(node)) {   // only do it if they match
             * annotated.put(node2, cntr2);
             * }
             * }
             *
             * // upto
             *
             * List answers = new ArrayList();
             * Collections.sort(answers,
             * new Comparator() {
             * public int compare(Object o1, Object o2) {
             * Pair p1 = (Pair) o1;
             * Pair p2 = (Pair) o2;
             * Double p12 = (Double) p1.second();
             * Double p22 = (Double) p2.second();
             * return p22.compareTo(p12);
             * }
             * });
             * for (int i = 0, size = answers.size(); i < size; i++) {
             * Pair p = (Pair) answers.get(i);
             * double psd = ((Double) p.second()).doubleValue();
             * if (psd >= cutOff) {
             * List lst = (List) p.first();
             * String nd = (String) lst.get(0);
             * String par = (String) lst.get(1);
             * String name = nd + "^" + par;
             * splitters.add(name);
             * }
             * }
             * }
             */
            // do value of grandparent
            foreach (IList <string> node_1 in pr.Keys)
            {
                List <Pair <IList <string>, double> > answers = Generics.NewArrayList();
                ClassicCounter <IList <string> >      cntr    = pr[node_1];
                double support = (cntr.TotalCount());
                if (support < Suppcutoff)
                {
                    continue;
                }
                foreach (IList <string> key in gpr.Keys)
                {
                    if (key[0].Equals(node_1[0]) && key[1].Equals(node_1[1]))
                    {
                        // only do it if they match
                        ClassicCounter <IList <string> > cntr2 = gpr[key];
                        double support2 = (cntr2.TotalCount());
                        double kl       = Counters.KlDivergence(cntr2, cntr);
                        answers.Add(new Pair <IList <string>, double>(key, kl * support2));
                    }
                }
                answers.Sort(null);
                foreach (Pair <IList <string>, double> answer in answers)
                {
                    Pair   p   = (Pair)answer;
                    double psd = ((double)p.Second());
                    if (psd >= cutOff)
                    {
                        IList  lst  = (IList)p.First();
                        string nd   = (string)lst[0];
                        string par  = (string)lst[1];
                        string gpar = (string)lst[2];
                        string name = nd + "^" + par + "~" + gpar;
                        splitters.Add(name);
                    }
                }
            }
        }
Exemple #5
0
        public virtual void PrintStats()
        {
            NumberFormat nf = NumberFormat.GetNumberInstance();

            nf.SetMaximumFractionDigits(2);
            // System.out.println("Node rules");
            // System.out.println(nodeRules);
            // System.out.println("Parent rules");
            // System.out.println(pRules);
            // System.out.println("Grandparent rules");
            // System.out.println(gPRules);
            // Store java code for selSplit
            StringBuilder[] javaSB = new StringBuilder[Cutoffs.Length];
            for (int i = 0; i < Cutoffs.Length; i++)
            {
                javaSB[i] = new StringBuilder("  private static String[] splitters" + (i + 1) + " = new String[] {");
            }
            ClassicCounter <IList <string> > allScores = new ClassicCounter <IList <string> >();

            // do value of parent
            foreach (string node in nodeRules.Keys)
            {
                List <Pair <IList <string>, double> > answers = Generics.NewArrayList();
                ClassicCounter <IList <string> >      cntr    = nodeRules[node];
                double support = (cntr.TotalCount());
                System.Console.Out.WriteLine("Node " + node + " support is " + support);
                foreach (IList <string> key in pRules.Keys)
                {
                    if (key[0].Equals(node))
                    {
                        // only do it if they match
                        ClassicCounter <IList <string> > cntr2 = pRules[key];
                        double support2 = (cntr2.TotalCount());
                        double kl       = Counters.KlDivergence(cntr2, cntr);
                        System.Console.Out.WriteLine("KL(" + key + "||" + node + ") = " + nf.Format(kl) + "\t" + "support(" + key + ") = " + support2);
                        double score = kl * support2;
                        answers.Add(new Pair <IList <string>, double>(key, score));
                        allScores.SetCount(key, score);
                    }
                }
                System.Console.Out.WriteLine("----");
                System.Console.Out.WriteLine("Sorted descending support * KL");
                answers.Sort(null);
                foreach (Pair <IList <string>, double> answer in answers)
                {
                    Pair   p   = (Pair)answer;
                    double psd = ((double)p.Second());
                    System.Console.Out.WriteLine(p.First() + ": " + nf.Format(psd));
                    if (psd >= Cutoffs[0])
                    {
                        IList  lst = (IList)p.First();
                        string nd  = (string)lst[0];
                        string par = (string)lst[1];
                        for (int j = 0; j < Cutoffs.Length; j++)
                        {
                            if (psd >= Cutoffs[j])
                            {
                                javaSB[j].Append("\"").Append(nd).Append("^");
                                javaSB[j].Append(par).Append("\", ");
                            }
                        }
                    }
                }
                System.Console.Out.WriteLine();
            }

            /*
             * // do value of parent with info gain -- yet to finish this
             * for (Iterator it = nodeRules.entrySet().iterator(); it.hasNext(); ) {
             * Map.Entry pair = (Map.Entry) it.next();
             * String node = (String) pair.getKey();
             * Counter cntr = (Counter) pair.getValue();
             * double support = (cntr.totalCount());
             * System.out.println("Node " + node + " support is " + support);
             * ArrayList dtrs = new ArrayList();
             * for (Iterator it2 = pRules.entrySet().iterator(); it2.hasNext();) {
             * HashMap annotated = new HashMap();
             * Map.Entry pair2 = (Map.Entry) it2.next();
             * List node2 = (List) pair2.getKey();
             * Counter cntr2 = (Counter) pair2.getValue();
             * if (node2.get(0).equals(node)) {   // only do it if they match
             * annotated.put(node2, cntr2);
             * }
             * }
             *
             * // upto
             *
             * List answers = new ArrayList();
             * System.out.println("----");
             * System.out.println("Sorted descending support * KL");
             * Collections.sort(answers,
             * new Comparator() {
             * public int compare(Object o1, Object o2) {
             * Pair p1 = (Pair) o1;
             * Pair p2 = (Pair) o2;
             * Double p12 = (Double) p1.second();
             * Double p22 = (Double) p2.second();
             * return p22.compareTo(p12);
             * }
             * });
             * for (int i = 0, size = answers.size(); i < size; i++) {
             * Pair p = (Pair) answers.get(i);
             * double psd = ((Double) p.second()).doubleValue();
             * System.out.println(p.first() + ": " + nf.format(psd));
             * if (psd >= CUTOFFS[0]) {
             * List lst = (List) p.first();
             * String nd = (String) lst.get(0);
             * String par = (String) lst.get(1);
             * for (int j=0; j < CUTOFFS.length; j++) {
             * if (psd >= CUTOFFS[j]) {
             * javaSB[j].append("\"").append(nd).append("^");
             * javaSB[j].append(par).append("\", ");
             * }
             * }
             * }
             * }
             * System.out.println();
             * }
             */
            // do value of grandparent
            foreach (IList <string> node_1 in pRules.Keys)
            {
                List <Pair <IList <string>, double> > answers = Generics.NewArrayList();
                ClassicCounter <IList <string> >      cntr    = pRules[node_1];
                double support = (cntr.TotalCount());
                if (support < Suppcutoff)
                {
                    continue;
                }
                System.Console.Out.WriteLine("Node " + node_1 + " support is " + support);
                foreach (IList <string> key in gPRules.Keys)
                {
                    if (key[0].Equals(node_1[0]) && key[1].Equals(node_1[1]))
                    {
                        // only do it if they match
                        ClassicCounter <IList <string> > cntr2 = gPRules[key];
                        double support2 = (cntr2.TotalCount());
                        double kl       = Counters.KlDivergence(cntr2, cntr);
                        System.Console.Out.WriteLine("KL(" + key + "||" + node_1 + ") = " + nf.Format(kl) + "\t" + "support(" + key + ") = " + support2);
                        double score = kl * support2;
                        answers.Add(Pair.MakePair(key, score));
                        allScores.SetCount(key, score);
                    }
                }
                System.Console.Out.WriteLine("----");
                System.Console.Out.WriteLine("Sorted descending support * KL");
                answers.Sort(null);
                foreach (Pair <IList <string>, double> answer in answers)
                {
                    Pair   p   = (Pair)answer;
                    double psd = ((double)p.Second());
                    System.Console.Out.WriteLine(p.First() + ": " + nf.Format(psd));
                    if (psd >= Cutoffs[0])
                    {
                        IList  lst  = (IList)p.First();
                        string nd   = (string)lst[0];
                        string par  = (string)lst[1];
                        string gpar = (string)lst[2];
                        for (int j = 0; j < Cutoffs.Length; j++)
                        {
                            if (psd >= Cutoffs[j])
                            {
                                javaSB[j].Append("\"").Append(nd).Append("^");
                                javaSB[j].Append(par).Append("~");
                                javaSB[j].Append(gpar).Append("\", ");
                            }
                        }
                    }
                }
                System.Console.Out.WriteLine();
            }
            System.Console.Out.WriteLine();
            System.Console.Out.WriteLine("All scores:");
            IPriorityQueue <IList <string> > pq = Counters.ToPriorityQueue(allScores);

            while (!pq.IsEmpty())
            {
                IList <string> key   = pq.GetFirst();
                double         score = pq.GetPriority(key);
                pq.RemoveFirst();
                System.Console.Out.WriteLine(key + "\t" + score);
            }
            System.Console.Out.WriteLine("  // Automatically generated by ParentAnnotationStats -- preferably don't edit");
            for (int i_1 = 0; i_1 < Cutoffs.Length; i_1++)
            {
                int len = javaSB[i_1].Length;
                javaSB[i_1].Replace(len - 2, len, "};");
                System.Console.Out.WriteLine(javaSB[i_1]);
            }
            System.Console.Out.Write("  public static HashSet splitters = new HashSet(Arrays.asList(");
            for (int i_2 = Cutoffs.Length; i_2 > 0; i_2--)
            {
                if (i_2 == 1)
                {
                    System.Console.Out.Write("splitters1");
                }
                else
                {
                    System.Console.Out.Write("selectiveSplit" + i_2 + " ? splitters" + i_2 + " : (");
                }
            }
            // need to print extra one to close other things open
            for (int i_3 = Cutoffs.Length; i_3 >= 0; i_3--)
            {
                System.Console.Out.Write(")");
            }
            System.Console.Out.WriteLine(";");
        }
        public virtual void PrintStats()
        {
            NumberFormat nf = NumberFormat.GetNumberInstance();

            nf.SetMaximumFractionDigits(2);
            // System.out.println("Node rules");
            // System.out.println(nodeRules);
            // System.out.println("Parent rules");
            // System.out.println(pRules);
            // System.out.println("Grandparent rules");
            // System.out.println(gPRules);
            // Store java code for selSplit
            StringBuilder[] javaSB = new StringBuilder[Cutoffs.Length];
            for (int i = 0; i < Cutoffs.Length; i++)
            {
                javaSB[i] = new StringBuilder("  private static String[] sisterSplit" + (i + 1) + " = new String[] {");
            }
            ArrayList topScores = new ArrayList();

            foreach (object o in nodeRules.Keys)
            {
                ArrayList      answers = new ArrayList();
                string         label   = (string)o;
                ClassicCounter cntr    = (ClassicCounter)nodeRules[label];
                double         support = (cntr.TotalCount());
                System.Console.Out.WriteLine("Node " + label + " support is " + support);
                foreach (object o4 in ((Hashtable)leftRules[label]).Keys)
                {
                    string         sis      = (string)o4;
                    ClassicCounter cntr2    = (ClassicCounter)((Hashtable)leftRules[label])[sis];
                    double         support2 = (cntr2.TotalCount());
                    /* alternative 1: use full distribution to calculate score */
                    double kl = Counters.KlDivergence(cntr2, cntr);
                    /* alternative 2: hold out test-context data to calculate score */

                    /* this doesn't work because it can lead to zero-probability
                     * data points hence infinite divergence */
                    //  Counter tempCounter = new Counter();
                    //  tempCounter.addCounter(cntr2);
                    //  for(Iterator i = tempCounter.seenSet().iterator(); i.hasNext();) {
                    //    Object o = i.next();
                    //    tempCounter.setCount(o,-1*tempCounter.countOf(o));
                    //  }
                    //  System.out.println(tempCounter); //debugging
                    //  tempCounter.addCounter(cntr);
                    //  System.out.println(tempCounter); //debugging
                    //  System.out.println(cntr);
                    //  double kl = cntr2.klDivergence(tempCounter);
                    /* alternative 2 ends here */
                    string annotatedLabel = label + "=l=" + sis;
                    System.Console.Out.WriteLine("KL(" + annotatedLabel + "||" + label + ") = " + nf.Format(kl) + "\t" + "support(" + sis + ") = " + support2);
                    answers.Add(new Pair(annotatedLabel, kl * support2));
                    topScores.Add(new Pair(annotatedLabel, kl * support2));
                }
                foreach (object o3 in ((Hashtable)rightRules[label]).Keys)
                {
                    string         sis            = (string)o3;
                    ClassicCounter cntr2          = (ClassicCounter)((Hashtable)rightRules[label])[sis];
                    double         support2       = (cntr2.TotalCount());
                    double         kl             = Counters.KlDivergence(cntr2, cntr);
                    string         annotatedLabel = label + "=r=" + sis;
                    System.Console.Out.WriteLine("KL(" + annotatedLabel + "||" + label + ") = " + nf.Format(kl) + "\t" + "support(" + sis + ") = " + support2);
                    answers.Add(new Pair(annotatedLabel, kl * support2));
                    topScores.Add(new Pair(annotatedLabel, kl * support2));
                }
                // upto
                System.Console.Out.WriteLine("----");
                System.Console.Out.WriteLine("Sorted descending support * KL");
                answers.Sort(null);
                foreach (object answer in answers)
                {
                    Pair   p   = (Pair)answer;
                    double psd = ((double)p.Second());
                    System.Console.Out.WriteLine(p.First() + ": " + nf.Format(psd));
                    if (psd >= Cutoffs[0])
                    {
                        string annotatedLabel = (string)p.First();
                        foreach (double Cutoff in Cutoffs)
                        {
                            if (psd >= Cutoff)
                            {
                            }
                        }
                    }
                }
                //javaSB[j].append("\"").append(annotatedLabel);
                //javaSB[j].append("\",");
                System.Console.Out.WriteLine();
            }
            topScores.Sort(null);
            string outString = "All enriched categories, sorted by score\n";

            foreach (object topScore in topScores)
            {
                Pair   p   = (Pair)topScore;
                double psd = ((double)p.Second());
                System.Console.Out.WriteLine(p.First() + ": " + nf.Format(psd));
            }
            System.Console.Out.WriteLine();
            System.Console.Out.WriteLine("  // Automatically generated by SisterAnnotationStats -- preferably don't edit");
            int k = Cutoffs.Length - 1;

            for (int j = 0; j < topScores.Count; j++)
            {
                Pair   p   = (Pair)topScores[j];
                double psd = ((double)p.Second());
                if (psd < Cutoffs[k])
                {
                    if (k == 0)
                    {
                        break;
                    }
                    else
                    {
                        k--;
                        j -= 1;
                        // messy but should do it
                        continue;
                    }
                }
                javaSB[k].Append("\"").Append(p.First());
                javaSB[k].Append("\",");
            }
            for (int i_1 = 0; i_1 < Cutoffs.Length; i_1++)
            {
                int len = javaSB[i_1].Length;
                javaSB[i_1].Replace(len - 2, len, "};");
                System.Console.Out.WriteLine(javaSB[i_1]);
            }
            System.Console.Out.Write("  public static String[] sisterSplit = ");
            for (int i_2 = Cutoffs.Length; i_2 > 0; i_2--)
            {
                if (i_2 == 1)
                {
                    System.Console.Out.Write("sisterSplit1");
                }
                else
                {
                    System.Console.Out.Write("selectiveSisterSplit" + i_2 + " ? sisterSplit" + i_2 + " : (");
                }
            }
            // need to print extra one to close other things open
            for (int i_3 = Cutoffs.Length; i_3 >= 0; i_3--)
            {
                System.Console.Out.Write(")");
            }
            System.Console.Out.WriteLine(";");
        }
        /// <summary>Count some stats on what occurs in a file.</summary>
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.TypeLoadException"/>
        public static void Main(string[] args)
        {
            CoNLLDocumentReaderAndWriter rw = new CoNLLDocumentReaderAndWriter();

            rw.Init(new SeqClassifierFlags());
            int               numDocs        = 0;
            int               numTokens      = 0;
            int               numEntities    = 0;
            string            lastAnsBase    = string.Empty;
            ICounter <string> miscCounter    = new ClassicCounter <string>();
            StringBuilder     inProgressMisc = new StringBuilder();

            for (IEnumerator <IList <CoreLabel> > it = rw.GetIterator(IOUtils.ReaderFromString(args[0])); it.MoveNext();)
            {
                IList <CoreLabel> doc = it.Current;
                numDocs++;
                foreach (CoreLabel fl in doc)
                {
                    string word = fl.Word();
                    // System.out.println("FL " + (++i) + " was " + fl);
                    if (word.Equals(Boundary))
                    {
                        continue;
                    }
                    string   ans = fl.Get(typeof(CoreAnnotations.AnswerAnnotation));
                    string   ansBase;
                    string   ansPrefix;
                    string[] bits = ans.Split("-");
                    if (bits.Length == 1)
                    {
                        ansBase   = bits[0];
                        ansPrefix = string.Empty;
                    }
                    else
                    {
                        ansBase   = bits[1];
                        ansPrefix = bits[0];
                    }
                    numTokens++;
                    if (!ansBase.Equals("O"))
                    {
                        if (ansBase.Equals(lastAnsBase))
                        {
                            if (ansPrefix.Equals("B"))
                            {
                                numEntities++;
                                inProgressMisc = MaybeIncrementCounter(inProgressMisc, miscCounter);
                            }
                        }
                        else
                        {
                            numEntities++;
                            inProgressMisc = MaybeIncrementCounter(inProgressMisc, miscCounter);
                        }
                        if (ansBase.Equals("MISC"))
                        {
                            if (inProgressMisc.Length > 0)
                            {
                                // already something there
                                inProgressMisc.Append(' ');
                            }
                            inProgressMisc.Append(word);
                        }
                    }
                    else
                    {
                        inProgressMisc = MaybeIncrementCounter(inProgressMisc, miscCounter);
                    }
                    lastAnsBase = ansBase;
                }
            }
            // for tokens
            // for documents
            System.Console.Out.WriteLine("File " + args[0] + " has " + numDocs + " documents, " + numTokens + " (non-blank line) tokens and " + numEntities + " entities.");
            System.Console.Out.Printf("Here are the %.0f MISC items with counts:%n", miscCounter.TotalCount());
            System.Console.Out.WriteLine(Counters.ToVerticalString(miscCounter, "%.0f\t%s"));
        }