Ejemplo n.º 1
0
        public virtual ICounter <string> GetTopSpeakers(IList <Sieve.MentionData> closestMentions, IList <Sieve.MentionData> closestMentionsBackward, Person.Gender gender, ICoreMap quote, bool overrideGender)
        {
            ICounter <string> topSpeakerInRange               = new ClassicCounter <string>();
            ICounter <string> topSpeakerInRangeIgnoreGender   = new ClassicCounter <string>();
            ICollection <Sieve.MentionData> backwardsMentions = new HashSet <Sieve.MentionData>(closestMentionsBackward);

            foreach (Sieve.MentionData mention in closestMentions)
            {
                double weight = backwardsMentions.Contains(mention) ? BackwardWeight : ForwardWeight;
                if (mention.type.Equals(Name))
                {
                    if (!characterMap.Keys.Contains(mention.text))
                    {
                        continue;
                    }
                    Person p = characterMap[mention.text][0];
                    if ((gender == Person.Gender.Male && p.gender == Person.Gender.Male) || (gender == Person.Gender.Female && p.gender == Person.Gender.Female) || (gender == Person.Gender.Unk))
                    {
                        topSpeakerInRange.IncrementCount(p.name, weight);
                    }
                    topSpeakerInRangeIgnoreGender.IncrementCount(p.name, weight);
                    if (closestMentions.Count == 128 && closestMentionsBackward.Count == 94)
                    {
                        System.Console.Out.WriteLine(p.name + " " + weight + " name");
                    }
                }
                else
                {
                    if (mention.type.Equals(Pronoun))
                    {
                        int    charBeginKey = doc.Get(typeof(CoreAnnotations.TokensAnnotation))[mention.begin].BeginPosition();
                        Person p            = DoCoreference(charBeginKey, quote);
                        if (p != null)
                        {
                            if ((gender == Person.Gender.Male && p.gender == Person.Gender.Male) || (gender == Person.Gender.Female && p.gender == Person.Gender.Female) || (gender == Person.Gender.Unk))
                            {
                                topSpeakerInRange.IncrementCount(p.name, weight);
                            }
                            topSpeakerInRangeIgnoreGender.IncrementCount(p.name, weight);
                            if (closestMentions.Count == 128 && closestMentionsBackward.Count == 94)
                            {
                                System.Console.Out.WriteLine(p.name + " " + weight + " pronoun");
                            }
                        }
                    }
                }
            }
            if (topSpeakerInRange.Size() > 0)
            {
                return(topSpeakerInRange);
            }
            else
            {
                if (gender != Person.Gender.Unk && !overrideGender)
                {
                    return(topSpeakerInRange);
                }
            }
            return(topSpeakerInRangeIgnoreGender);
        }
Ejemplo n.º 2
0
        public virtual ICounter <CandidatePhrase> ChooseTopWords(ICounter <CandidatePhrase> newdt, TwoDimensionalCounter <CandidatePhrase, E> terms, ICounter <CandidatePhrase> useThresholdNumPatternsForTheseWords, ICollection <CandidatePhrase> ignoreWords
                                                                 , double thresholdWordExtract)
        {
            IEnumerator <CandidatePhrase> termIter   = Counters.ToPriorityQueue(newdt).GetEnumerator();
            ICounter <CandidatePhrase>    finalwords = new ClassicCounter <CandidatePhrase>();

            while (termIter.MoveNext())
            {
                if (finalwords.Size() >= constVars.numWordsToAdd)
                {
                    break;
                }
                CandidatePhrase w = termIter.Current;
                if (newdt.GetCount(w) < thresholdWordExtract)
                {
                    Redwood.Log(ConstantsAndVariables.extremedebug, "not adding word " + w + " and any later words because the score " + newdt.GetCount(w) + " is less than the threshold of  " + thresholdWordExtract);
                    break;
                }
                System.Diagnostics.Debug.Assert((newdt.GetCount(w) != double.PositiveInfinity));
                if (useThresholdNumPatternsForTheseWords.ContainsKey(w) && NumNonRedundantPatterns(terms, w) < constVars.thresholdNumPatternsApplied)
                {
                    Redwood.Log("extremePatDebug", "Not adding " + w + " because the number of non redundant patterns are below threshold of " + constVars.thresholdNumPatternsApplied + ":" + terms.GetCounter(w).KeySet());
                    continue;
                }
                CandidatePhrase matchedFuzzy = null;
                if (constVars.minLen4FuzzyForPattern > 0 && ignoreWords != null)
                {
                    matchedFuzzy = ConstantsAndVariables.ContainsFuzzy(ignoreWords, w, constVars.minLen4FuzzyForPattern);
                }
                if (matchedFuzzy == null)
                {
                    Redwood.Log("extremePatDebug", "adding word " + w);
                    finalwords.SetCount(w, newdt.GetCount(w));
                }
                else
                {
                    Redwood.Log("extremePatDebug", "not adding " + w + " because it matched " + matchedFuzzy + " in common English word");
                    ignoreWords.Add(w);
                }
            }
            string nextTen = string.Empty;
            int    n       = 0;

            while (termIter.MoveNext())
            {
                n++;
                if (n > 10)
                {
                    break;
                }
                CandidatePhrase w = termIter.Current;
                nextTen += ";\t" + w + ":" + newdt.GetCount(w);
            }
            Redwood.Log(Redwood.Dbg, "Next ten phrases were " + nextTen);
            return(finalwords);
        }
Ejemplo n.º 3
0
        public virtual void TestSimplerTokens()
        {
            IDictionary <Type, string> prev   = new _Dictionary_44();
            IDictionary <Type, string> next   = new _Dictionary_49();
            PatternToken               token  = new PatternToken("V", false, true, 2, null, false, false, null);
            SurfacePattern             p      = new SurfacePattern(CreateContext(prev), token, CreateContext(next), SurfacePatternFactory.Genre.Prevnext);
            IDictionary <Type, string> prev2  = new _Dictionary_58();
            IDictionary <Type, string> next2  = new _Dictionary_63();
            PatternToken               token2 = new PatternToken("V", false, true, 2, null, false, false, null);
            SurfacePattern             p2     = new SurfacePattern(CreateContext(prev2), token2, CreateContext(next2), SurfacePatternFactory.Genre.Prevnext);

            System.Diagnostics.Debug.Assert(p.CompareTo(p2) == 0);
            ICounter <SurfacePattern> pats = new ClassicCounter <SurfacePattern>();

            pats.SetCount(p, 1);
            pats.SetCount(p2, 1);
            System.Diagnostics.Debug.Assert(pats.Size() == 1);
            System.Console.Out.WriteLine("pats size is " + pats.Size());
            ConcurrentHashIndex <SurfacePattern> index = new ConcurrentHashIndex <SurfacePattern>();

            index.Add(p);
            index.Add(p2);
            System.Diagnostics.Debug.Assert(index.Count == 1);
        }
Ejemplo n.º 4
0
        protected internal static Distribution <string> ComputeInputPrior(IDictionary <string, IList <IList <string> > > allTrainPaths)
        {
            ClassicCounter <string> result = new ClassicCounter <string>();

            foreach (IList <IList <string> > pathList in allTrainPaths.Values)
            {
                foreach (IList <string> path in pathList)
                {
                    foreach (string input in path)
                    {
                        result.IncrementCount(input);
                    }
                }
            }
            return(Distribution.LaplaceSmoothedDistribution(result, result.Size() * 2, 0.5));
        }
Ejemplo n.º 5
0
        /// <summary>TODO(gabor) JavaDoc</summary>
        /// <param name="tokens"/>
        /// <param name="span"/>
        /// <returns/>
        public static string GuessNER(IList <CoreLabel> tokens, Span span)
        {
            ICounter <string> nerGuesses = new ClassicCounter <string>();

            foreach (int i in span)
            {
                nerGuesses.IncrementCount(tokens[i].Ner());
            }
            nerGuesses.Remove("O");
            nerGuesses.Remove(null);
            if (nerGuesses.Size() > 0 && Counters.Max(nerGuesses) >= span.Size() / 2)
            {
                return(Counters.Argmax(nerGuesses));
            }
            else
            {
                return("O");
            }
        }
        public static void PrintStats(ICollection <Tree> trees, PrintWriter pw)
        {
            ClassicCounter <int>        wordLengthCounter = new ClassicCounter <int>();
            ClassicCounter <TaggedWord> wordCounter       = new ClassicCounter <TaggedWord>();
            ClassicCounter <ChineseCharacterBasedLexicon.Symbol> charCounter = new ClassicCounter <ChineseCharacterBasedLexicon.Symbol>();
            int counter = 0;

            foreach (Tree tree in trees)
            {
                counter++;
                IList <TaggedWord> taggedWords = tree.TaggedYield();
                foreach (TaggedWord taggedWord in taggedWords)
                {
                    string word = taggedWord.Word();
                    if (word.Equals(LexiconConstants.Boundary))
                    {
                        continue;
                    }
                    wordCounter.IncrementCount(taggedWord);
                    wordLengthCounter.IncrementCount(int.Parse(word.Length));
                    for (int j = 0; j < length; j++)
                    {
                        ChineseCharacterBasedLexicon.Symbol sym = ChineseCharacterBasedLexicon.Symbol.CannonicalSymbol(word[j]);
                        charCounter.IncrementCount(sym);
                    }
                    charCounter.IncrementCount(ChineseCharacterBasedLexicon.Symbol.EndWord);
                }
            }
            ICollection <ChineseCharacterBasedLexicon.Symbol> singletonChars = Counters.KeysBelow(charCounter, 1.5);
            ICollection <TaggedWord> singletonWords     = Counters.KeysBelow(wordCounter, 1.5);
            ClassicCounter <string>  singletonWordPOSes = new ClassicCounter <string>();

            foreach (TaggedWord taggedWord_1 in singletonWords)
            {
                singletonWordPOSes.IncrementCount(taggedWord_1.Tag());
            }
            Distribution <string> singletonWordPOSDist = Distribution.GetDistribution(singletonWordPOSes);
            ClassicCounter <char> singletonCharRads    = new ClassicCounter <char>();

            foreach (ChineseCharacterBasedLexicon.Symbol s in singletonChars)
            {
                singletonCharRads.IncrementCount(char.ValueOf(RadicalMap.GetRadical(s.GetCh())));
            }
            Distribution <char> singletonCharRadDist = Distribution.GetDistribution(singletonCharRads);
            Distribution <int>  wordLengthDist       = Distribution.GetDistribution(wordLengthCounter);
            NumberFormat        percent = new DecimalFormat("##.##%");

            pw.Println("There are " + singletonChars.Count + " singleton chars out of " + (int)charCounter.TotalCount() + " tokens and " + charCounter.Size() + " types found in " + counter + " trees.");
            pw.Println("Thus singletonChars comprise " + percent.Format(singletonChars.Count / charCounter.TotalCount()) + " of tokens and " + percent.Format((double)singletonChars.Count / charCounter.Size()) + " of types.");
            pw.Println();
            pw.Println("There are " + singletonWords.Count + " singleton words out of " + (int)wordCounter.TotalCount() + " tokens and " + wordCounter.Size() + " types.");
            pw.Println("Thus singletonWords comprise " + percent.Format(singletonWords.Count / wordCounter.TotalCount()) + " of tokens and " + percent.Format((double)singletonWords.Count / wordCounter.Size()) + " of types.");
            pw.Println();
            pw.Println("Distribution over singleton word POS:");
            pw.Println(singletonWordPOSDist.ToString());
            pw.Println();
            pw.Println("Distribution over singleton char radicals:");
            pw.Println(singletonCharRadDist.ToString());
            pw.Println();
            pw.Println("Distribution over word length:");
            pw.Println(wordLengthDist);
        }
 public virtual void DumpSizes()
 {
     //    System.out.println("core dep " + coreDependencies.size());
     System.Console.Out.WriteLine("arg counter " + argCounter.Size());
     System.Console.Out.WriteLine("stop counter " + stopCounter.Size());
 }
        /// <summary>
        /// Return various statistics about the treebank (number of sentences,
        /// words, tag set, etc.).
        /// </summary>
        /// <param name="tlp">
        /// The TreebankLanguagePack used to determine punctuation and an
        /// appropriate character encoding
        /// </param>
        /// <returns>A big string for human consumption describing the treebank</returns>
        public virtual string TextualSummary(ITreebankLanguagePack tlp)
        {
            int  numTrees         = 0;
            int  numTreesLE40     = 0;
            int  numNonUnaryRoots = 0;
            Tree nonUnaryEg       = null;
            ClassicCounter <Tree>   nonUnaries = new ClassicCounter <Tree>();
            ClassicCounter <string> roots      = new ClassicCounter <string>();
            ClassicCounter <string> starts     = new ClassicCounter <string>();
            ClassicCounter <string> puncts     = new ClassicCounter <string>();
            int numUnenclosedLeaves            = 0;
            int numLeaves     = 0;
            int numNonPhrasal = 0;
            int numPreTerminalWithMultipleChildren = 0;
            int numWords                       = 0;
            int numTags                        = 0;
            int shortestSentence               = int.MaxValue;
            int longestSentence                = 0;
            int numNullLabel                   = 0;
            ICollection <string>    words      = Generics.NewHashSet();
            ClassicCounter <string> tags       = new ClassicCounter <string>();
            ClassicCounter <string> cats       = new ClassicCounter <string>();
            Tree leafEg                        = null;
            Tree preTerminalMultipleChildrenEg = null;
            Tree nullLabelEg                   = null;
            Tree rootRewritesAsTaggedWordEg    = null;

            foreach (Tree t in this)
            {
                roots.IncrementCount(t.Value());
                numTrees++;
                int leng = t.Yield().Count;
                if (leng <= 40)
                {
                    numTreesLE40++;
                }
                if (leng < shortestSentence)
                {
                    shortestSentence = leng;
                }
                if (leng > longestSentence)
                {
                    longestSentence = leng;
                }
                if (t.NumChildren() > 1)
                {
                    if (numNonUnaryRoots == 0)
                    {
                        nonUnaryEg = t;
                    }
                    if (numNonUnaryRoots < 100)
                    {
                        nonUnaries.IncrementCount(t.LocalTree());
                    }
                    numNonUnaryRoots++;
                }
                else
                {
                    if (t.IsLeaf())
                    {
                        numUnenclosedLeaves++;
                    }
                    else
                    {
                        Tree t2 = t.FirstChild();
                        if (t2.IsLeaf())
                        {
                            numLeaves++;
                            leafEg = t;
                        }
                        else
                        {
                            if (t2.IsPreTerminal())
                            {
                                if (numNonPhrasal == 0)
                                {
                                    rootRewritesAsTaggedWordEg = t;
                                }
                                numNonPhrasal++;
                            }
                        }
                        starts.IncrementCount(t2.Value());
                    }
                }
                foreach (Tree subtree in t)
                {
                    ILabel lab = subtree.Label();
                    if (lab == null || lab.Value() == null || lab.Value().IsEmpty())
                    {
                        if (numNullLabel == 0)
                        {
                            nullLabelEg = subtree;
                        }
                        numNullLabel++;
                        if (lab == null)
                        {
                            subtree.SetLabel(new StringLabel(string.Empty));
                        }
                        else
                        {
                            if (lab.Value() == null)
                            {
                                subtree.Label().SetValue(string.Empty);
                            }
                        }
                    }
                    if (subtree.IsLeaf())
                    {
                        numWords++;
                        words.Add(subtree.Value());
                    }
                    else
                    {
                        if (subtree.IsPreTerminal())
                        {
                            numTags++;
                            tags.IncrementCount(subtree.Value());
                            if (tlp != null && tlp.IsPunctuationTag(subtree.Value()))
                            {
                                puncts.IncrementCount(subtree.FirstChild().Value());
                            }
                        }
                        else
                        {
                            if (subtree.IsPhrasal())
                            {
                                bool hasLeafChild = false;
                                foreach (Tree kt in subtree.Children())
                                {
                                    if (kt.IsLeaf())
                                    {
                                        hasLeafChild = true;
                                    }
                                }
                                if (hasLeafChild)
                                {
                                    numPreTerminalWithMultipleChildren++;
                                    if (preTerminalMultipleChildrenEg == null)
                                    {
                                        preTerminalMultipleChildrenEg = subtree;
                                    }
                                }
                                cats.IncrementCount(subtree.Value());
                            }
                            else
                            {
                                throw new InvalidOperationException("Treebank: Bad tree in treebank!: " + subtree);
                            }
                        }
                    }
                }
            }
            StringWriter sw = new StringWriter(2000);
            PrintWriter  pw = new PrintWriter(sw);
            NumberFormat nf = NumberFormat.GetNumberInstance();

            nf.SetMaximumFractionDigits(0);
            pw.Println("Treebank has " + numTrees + " trees (" + numTreesLE40 + " of length <= 40) and " + numWords + " words (tokens)");
            if (numTrees > 0)
            {
                if (numTags != numWords)
                {
                    pw.Println("  Warning! numTags differs and is " + numTags);
                }
                if (roots.Size() == 1)
                {
                    string root = (string)Sharpen.Collections.ToArray(roots.KeySet())[0];
                    pw.Println("  The root category is: " + root);
                }
                else
                {
                    pw.Println("  Warning! " + roots.Size() + " different roots in treebank: " + Counters.ToString(roots, nf));
                }
                if (numNonUnaryRoots > 0)
                {
                    pw.Print("  Warning! " + numNonUnaryRoots + " trees without unary initial rewrite.  ");
                    if (numNonUnaryRoots > 100)
                    {
                        pw.Print("First 100 ");
                    }
                    pw.Println("Rewrites: " + Counters.ToString(nonUnaries, nf));
                    pw.Println("    Example: " + nonUnaryEg);
                }
                if (numUnenclosedLeaves > 0 || numLeaves > 0 || numNonPhrasal > 0)
                {
                    pw.Println("  Warning! Non-phrasal trees: " + numUnenclosedLeaves + " bare leaves; " + numLeaves + " root rewrites as leaf; and " + numNonPhrasal + " root rewrites as tagged word");
                    if (numLeaves > 0)
                    {
                        pw.Println("  Example bad root rewrites as leaf: " + leafEg);
                    }
                    if (numNonPhrasal > 0)
                    {
                        pw.Println("  Example bad root rewrites as tagged word: " + rootRewritesAsTaggedWordEg);
                    }
                }
                if (numNullLabel > 0)
                {
                    pw.Println("  Warning!  " + numNullLabel + " tree nodes with null or empty string labels, e.g.:");
                    pw.Println("    " + nullLabelEg);
                }
                if (numPreTerminalWithMultipleChildren > 0)
                {
                    pw.Println("  Warning! " + numPreTerminalWithMultipleChildren + " preterminal nodes with multiple children.");
                    pw.Println("    Example: " + preTerminalMultipleChildrenEg);
                }
                pw.Println("  Sentences range from " + shortestSentence + " to " + longestSentence + " words, with an average length of " + (((numWords * 100) / numTrees) / 100.0) + " words.");
                pw.Println("  " + cats.Size() + " phrasal category types, " + tags.Size() + " tag types, and " + words.Count + " word types");
                string[] empties = new string[] { "*", "0", "*T*", "*RNR*", "*U*", "*?*", "*EXP*", "*ICH*", "*NOT*", "*PPA*", "*OP*", "*pro*", "*PRO*" };
                // What a dopey choice using 0 as an empty element name!!
                // The problem with the below is that words aren't turned into a basic
                // category, but empties commonly are indexed....  Would need to look
                // for them with a suffix of -[0-9]+
                ICollection <string> knownEmpties        = Generics.NewHashSet(Arrays.AsList(empties));
                ICollection <string> emptiesIntersection = Sets.Intersection(words, knownEmpties);
                if (!emptiesIntersection.IsEmpty())
                {
                    pw.Println("  Caution! " + emptiesIntersection.Count + " word types are known empty elements: " + emptiesIntersection);
                }
                ICollection <string> joint = Sets.Intersection(cats.KeySet(), tags.KeySet());
                if (!joint.IsEmpty())
                {
                    pw.Println("  Warning! " + joint.Count + " items are tags and categories: " + joint);
                }
                foreach (string cat in cats.KeySet())
                {
                    if (cat != null && cat.Contains("@"))
                    {
                        pw.Println("  Warning!!  Stanford Parser does not work with categories containing '@' like: " + cat);
                        break;
                    }
                }
                foreach (string cat_1 in tags.KeySet())
                {
                    if (cat_1 != null && cat_1.Contains("@"))
                    {
                        pw.Println("  Warning!!  Stanford Parser does not work with tags containing '@' like: " + cat_1);
                        break;
                    }
                }
                pw.Println("    Cats: " + Counters.ToString(cats, nf));
                pw.Println("    Tags: " + Counters.ToString(tags, nf));
                pw.Println("    " + starts.Size() + " start categories: " + Counters.ToString(starts, nf));
                if (!puncts.IsEmpty())
                {
                    pw.Println("    Puncts: " + Counters.ToString(puncts, nf));
                }
            }
            return(sw.ToString());
        }
        public virtual void FinishTraining()
        {
            Timing.Tick("Counting characters...");
            ClassicCounter <ChineseCharacterBasedLexicon.Symbol> charCounter = new ClassicCounter <ChineseCharacterBasedLexicon.Symbol>();

            // first find all chars that occur only once
            foreach (IList <TaggedWord> labels in trainingSentences)
            {
                foreach (TaggedWord label in labels)
                {
                    string word = label.Word();
                    if (word.Equals(LexiconConstants.Boundary))
                    {
                        continue;
                    }
                    for (int j = 0; j < length; j++)
                    {
                        ChineseCharacterBasedLexicon.Symbol sym = ChineseCharacterBasedLexicon.Symbol.CannonicalSymbol(word[j]);
                        charCounter.IncrementCount(sym);
                    }
                    charCounter.IncrementCount(ChineseCharacterBasedLexicon.Symbol.EndWord);
                }
            }
            ICollection <ChineseCharacterBasedLexicon.Symbol> singletons = Counters.KeysBelow(charCounter, 1.5);

            knownChars = Generics.NewHashSet(charCounter.KeySet());
            Timing.Tick("Counting nGrams...");
            GeneralizedCounter[] POSspecificCharNGrams = new GeneralizedCounter[ContextLength + 1];
            for (int i = 0; i <= ContextLength; i++)
            {
                POSspecificCharNGrams[i] = new GeneralizedCounter(i + 2);
            }
            ClassicCounter <string> POSCounter = new ClassicCounter <string>();
            IList <ISerializable>   context    = new List <ISerializable>(ContextLength + 1);

            foreach (IList <TaggedWord> words in trainingSentences)
            {
                foreach (TaggedWord taggedWord in words)
                {
                    string word = taggedWord.Word();
                    string tag  = taggedWord.Tag();
                    tagIndex.Add(tag);
                    if (word.Equals(LexiconConstants.Boundary))
                    {
                        continue;
                    }
                    POSCounter.IncrementCount(tag);
                    for (int i_1 = 0; i_1 <= size; i_1++)
                    {
                        ChineseCharacterBasedLexicon.Symbol sym;
                        ChineseCharacterBasedLexicon.Symbol unknownCharClass = null;
                        context.Clear();
                        context.Add(tag);
                        if (i_1 < size)
                        {
                            char thisCh = word[i_1];
                            sym = ChineseCharacterBasedLexicon.Symbol.CannonicalSymbol(thisCh);
                            if (singletons.Contains(sym))
                            {
                                unknownCharClass = UnknownCharClass(sym);
                                charCounter.IncrementCount(unknownCharClass);
                            }
                        }
                        else
                        {
                            sym = ChineseCharacterBasedLexicon.Symbol.EndWord;
                        }
                        POSspecificCharNGrams[0].IncrementCount(context, sym);
                        // POS-specific 1-gram
                        if (unknownCharClass != null)
                        {
                            POSspecificCharNGrams[0].IncrementCount(context, unknownCharClass);
                        }
                        // for unknown ch model
                        // context is constructed incrementally:
                        // tag prevChar prevPrevChar
                        // this could be made faster using .sublist like in score
                        for (int j = 1; j <= ContextLength; j++)
                        {
                            // poly grams
                            if (i_1 - j < 0)
                            {
                                context.Add(ChineseCharacterBasedLexicon.Symbol.BeginWord);
                                POSspecificCharNGrams[j].IncrementCount(context, sym);
                                if (unknownCharClass != null)
                                {
                                    POSspecificCharNGrams[j].IncrementCount(context, unknownCharClass);
                                }
                                // for unknown ch model
                                break;
                            }
                            else
                            {
                                ChineseCharacterBasedLexicon.Symbol prev = ChineseCharacterBasedLexicon.Symbol.CannonicalSymbol(word[i_1 - j]);
                                if (singletons.Contains(prev))
                                {
                                    context.Add(UnknownCharClass(prev));
                                }
                                else
                                {
                                    context.Add(prev);
                                }
                                POSspecificCharNGrams[j].IncrementCount(context, sym);
                                if (unknownCharClass != null)
                                {
                                    POSspecificCharNGrams[j].IncrementCount(context, unknownCharClass);
                                }
                            }
                        }
                    }
                }
            }
            // for unknown ch model
            POSDistribution = Distribution.GetDistribution(POSCounter);
            Timing.Tick("Creating character prior distribution...");
            charDistributions = Generics.NewHashMap();
            //    charDistributions = Generics.newHashMap();  // 1.5
            //    charCounter.incrementCount(Symbol.UNKNOWN, singletons.size());
            int numberOfKeys = charCounter.Size() + singletons.Count;
            Distribution <ChineseCharacterBasedLexicon.Symbol> prior = Distribution.GoodTuringSmoothedCounter(charCounter, numberOfKeys);

            charDistributions[Java.Util.Collections.EmptyList] = prior;
            for (int i_2 = 0; i_2 <= ContextLength; i_2++)
            {
                ICollection <KeyValuePair <IList <ISerializable>, ClassicCounter <ChineseCharacterBasedLexicon.Symbol> > > counterEntries = POSspecificCharNGrams[i_2].LowestLevelCounterEntrySet();
                Timing.Tick("Creating " + counterEntries.Count + " character " + (i_2 + 1) + "-gram distributions...");
                foreach (KeyValuePair <IList <ISerializable>, ClassicCounter <ChineseCharacterBasedLexicon.Symbol> > entry in counterEntries)
                {
                    context = entry.Key;
                    ClassicCounter <ChineseCharacterBasedLexicon.Symbol> c         = entry.Value;
                    Distribution <ChineseCharacterBasedLexicon.Symbol>   thisPrior = charDistributions[context.SubList(0, context.Count - 1)];
                    double priorWeight = thisPrior.GetNumberOfKeys() / 200.0;
                    Distribution <ChineseCharacterBasedLexicon.Symbol> newDist = Distribution.DynamicCounterWithDirichletPrior(c, thisPrior, priorWeight);
                    charDistributions[context] = newDist;
                }
            }
        }