示例#1
0
        /// <summary>
        /// Attempt to infer the part of speech of the given preterminal node, which
        /// was created during the expansion of a multi-word token.
        /// </summary>
        private static string InferPOS(Tree t, Tree parent, TwoDimensionalCounter <string, string> unigramTagger)
        {
            string word = t.FirstChild().Value();
            string containingPhraseStr = GetContainingPhrase(t, parent);
            // Overrides: let the manual POS model handle a few special cases first
            string overrideTag = MultiWordPreprocessor.ManualUWModel.GetOverrideTag(word, containingPhraseStr);

            if (overrideTag != null)
            {
                return(overrideTag);
            }
            ICollection <string> unigramTaggerKeys = unigramTagger.FirstKeySet();

            // Try treating this word as a verb and stripping any clitic
            // pronouns. If the stripped version exists in the unigram
            // tagger, then stick with the verb hypothesis
            SpanishVerbStripper.StrippedVerb strippedVerb = verbStripper.SeparatePronouns(word);
            if (strippedVerb != null && unigramTaggerKeys.Contains(strippedVerb.GetStem()))
            {
                string pos = Counters.Argmax(unigramTagger.GetCounter(strippedVerb.GetStem()));
                if (pos.StartsWith("v"))
                {
                    return(pos);
                }
            }
            if (unigramTagger.FirstKeySet().Contains(word))
            {
                return(Counters.Argmax(unigramTagger.GetCounter(word), new MultiWordPreprocessor.POSTieBreaker()));
            }
            return(MultiWordPreprocessor.ManualUWModel.GetTag(word, containingPhraseStr));
        }
示例#2
0
        private static void CountTaggings(Treebank tb, PrintWriter pw)
        {
            TwoDimensionalCounter <string, string> wtc = new TwoDimensionalCounter <string, string>();

            tb.Apply(null);
            foreach (string key in wtc.FirstKeySet())
            {
                pw.Print(key);
                pw.Print('\t');
                ICounter <string> ctr = wtc.GetCounter(key);
                foreach (string k2 in ctr.KeySet())
                {
                    pw.Print(k2 + '\t' + ctr.GetCount(k2) + '\t');
                }
                pw.Println();
            }
        }
示例#3
0
 public static void TraverseAndFix(Tree t, TwoDimensionalCounter <string, string> pretermLabel, TwoDimensionalCounter <string, string> unigramTagger)
 {
     if (t.IsPreTerminal())
     {
         if (t.Value().Equals(FrenchXMLTreeReader.MissingPos))
         {
             nMissingPOS++;
             string word = t.FirstChild().Value();
             string tag  = (unigramTagger.FirstKeySet().Contains(word)) ? Counters.Argmax(unigramTagger.GetCounter(word)) : MWEPreprocessor.ManualUWModel.GetTag(word);
             t.SetValue(tag);
         }
         return;
     }
     foreach (Tree kid in t.Children())
     {
         TraverseAndFix(kid, pretermLabel, unigramTagger);
     }
     //Post-order visit
     if (t.Value().Equals(FrenchXMLTreeReader.MissingPhrasal))
     {
         nMissingPhrasal++;
         StringBuilder sb = new StringBuilder();
         foreach (Tree kid_1 in t.Children())
         {
             sb.Append(kid_1.Value()).Append(" ");
         }
         string posSequence = sb.ToString().Trim();
         if (pretermLabel.FirstKeySet().Contains(posSequence))
         {
             string phrasalCat = Counters.Argmax(pretermLabel.GetCounter(posSequence));
             t.SetValue(phrasalCat);
         }
         else
         {
             System.Console.Out.WriteLine("No phrasal cat for: " + posSequence);
         }
     }
 }
示例#4
0
 public static void PrintCounter(TwoDimensionalCounter <string, string> cnt, string fname)
 {
     try
     {
         PrintWriter pw = new PrintWriter(new TextWriter(new FileOutputStream(new File(fname)), false, "UTF-8"));
         foreach (string key in cnt.FirstKeySet())
         {
             foreach (string val in cnt.GetCounter(key).KeySet())
             {
                 pw.Printf("%s\t%s\t%d%n", key, val, (int)cnt.GetCount(key, val));
             }
         }
         pw.Close();
     }
     catch (UnsupportedEncodingException e)
     {
         Sharpen.Runtime.PrintStackTrace(e);
     }
     catch (FileNotFoundException e)
     {
         Sharpen.Runtime.PrintStackTrace(e);
     }
 }
示例#5
0
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.TypeLoadException"/>
        private ICounter <CandidatePhrase> LearnNewPhrasesPrivate(string label, PatternsForEachToken patternsForEachToken, ICounter <E> patternsLearnedThisIter, ICounter <E> allSelectedPatterns, ICollection <CandidatePhrase> alreadyIdentifiedWords, CollectionValuedMap
                                                                  <E, Triple <string, int, int> > matchedTokensByPat, ICounter <CandidatePhrase> scoreForAllWordsThisIteration, TwoDimensionalCounter <CandidatePhrase, E> terms, TwoDimensionalCounter <CandidatePhrase, E> wordsPatExtracted, TwoDimensionalCounter <E
                                                                                                                                                                                                                                                                                                                       , CandidatePhrase> patternsAndWords4Label, string identifier, ICollection <CandidatePhrase> ignoreWords, bool computeProcDataFreq)
        {
            ICollection <CandidatePhrase> alreadyLabeledWords = new HashSet <CandidatePhrase>();

            if (constVars.doNotApplyPatterns)
            {
                // if want to get the stats by the lossy way of just counting without
                // applying the patterns
                ConstantsAndVariables.DataSentsIterator sentsIter = new ConstantsAndVariables.DataSentsIterator(constVars.batchProcessSents);
                while (sentsIter.MoveNext())
                {
                    Pair <IDictionary <string, DataInstance>, File> sentsf = sentsIter.Current;
                    this.StatsWithoutApplyingPatterns(sentsf.First(), patternsForEachToken, patternsLearnedThisIter, wordsPatExtracted);
                }
            }
            else
            {
                if (patternsLearnedThisIter.Size() > 0)
                {
                    this.ApplyPats(patternsLearnedThisIter, label, wordsPatExtracted, matchedTokensByPat, alreadyLabeledWords);
                }
            }
            if (computeProcDataFreq)
            {
                if (!phraseScorer.wordFreqNorm.Equals(PhraseScorer.Normalization.None))
                {
                    Redwood.Log(Redwood.Dbg, "computing processed freq");
                    foreach (KeyValuePair <CandidatePhrase, double> fq in Data.rawFreq.EntrySet())
                    {
                        double @in = fq.Value;
                        if (phraseScorer.wordFreqNorm.Equals(PhraseScorer.Normalization.Sqrt))
                        {
                            @in = Math.Sqrt(@in);
                        }
                        else
                        {
                            if (phraseScorer.wordFreqNorm.Equals(PhraseScorer.Normalization.Log))
                            {
                                @in = 1 + Math.Log(@in);
                            }
                            else
                            {
                                throw new Exception("can't understand the normalization");
                            }
                        }
                        System.Diagnostics.Debug.Assert(!double.IsNaN(@in), "Why is processed freq nan when rawfreq is " + @in);
                        Data.processedDataFreq.SetCount(fq.Key, @in);
                    }
                }
                else
                {
                    Data.processedDataFreq = Data.rawFreq;
                }
            }
            if (constVars.wordScoring.Equals(GetPatternsFromDataMultiClass.WordScoring.Weightednorm))
            {
                foreach (CandidatePhrase en in wordsPatExtracted.FirstKeySet())
                {
                    if (!constVars.GetOtherSemanticClassesWords().Contains(en) && (en.GetPhraseLemma() == null || !constVars.GetOtherSemanticClassesWords().Contains(CandidatePhrase.CreateOrGet(en.GetPhraseLemma()))) && !alreadyLabeledWords.Contains(en))
                    {
                        terms.AddAll(en, wordsPatExtracted.GetCounter(en));
                    }
                }
                RemoveKeys(terms, ConstantsAndVariables.GetStopWords());
                ICounter <CandidatePhrase> phraseScores = phraseScorer.ScorePhrases(label, terms, wordsPatExtracted, allSelectedPatterns, alreadyIdentifiedWords, false);
                System.Console.Out.WriteLine("count for word U.S. is " + phraseScores.GetCount(CandidatePhrase.CreateOrGet("U.S.")));
                ICollection <CandidatePhrase> ignoreWordsAll;
                if (ignoreWords != null && !ignoreWords.IsEmpty())
                {
                    ignoreWordsAll = CollectionUtils.UnionAsSet(ignoreWords, constVars.GetOtherSemanticClassesWords());
                }
                else
                {
                    ignoreWordsAll = new HashSet <CandidatePhrase>(constVars.GetOtherSemanticClassesWords());
                }
                Sharpen.Collections.AddAll(ignoreWordsAll, constVars.GetSeedLabelDictionary()[label]);
                Sharpen.Collections.AddAll(ignoreWordsAll, constVars.GetLearnedWords(label).KeySet());
                System.Console.Out.WriteLine("ignoreWordsAll contains word U.S. is " + ignoreWordsAll.Contains(CandidatePhrase.CreateOrGet("U.S.")));
                ICounter <CandidatePhrase> finalwords = ChooseTopWords(phraseScores, terms, phraseScores, ignoreWordsAll, constVars.thresholdWordExtract);
                phraseScorer.PrintReasonForChoosing(finalwords);
                scoreForAllWordsThisIteration.Clear();
                Counters.AddInPlace(scoreForAllWordsThisIteration, phraseScores);
                Redwood.Log(ConstantsAndVariables.minimaldebug, "\n\n## Selected Words for " + label + " : " + Counters.ToSortedString(finalwords, finalwords.Size(), "%1$s:%2$.2f", "\t"));
                if (constVars.goldEntities != null)
                {
                    IDictionary <string, bool> goldEntities4Label = constVars.goldEntities[label];
                    if (goldEntities4Label != null)
                    {
                        StringBuilder s = new StringBuilder();
                        finalwords.KeySet().Stream().ForEach(null);
                        Redwood.Log(ConstantsAndVariables.minimaldebug, "\n\n## Gold labels for selected words for label " + label + " : " + s.ToString());
                    }
                    else
                    {
                        Redwood.Log(Redwood.Dbg, "No gold entities provided for label " + label);
                    }
                }
                if (constVars.outDir != null && !constVars.outDir.IsEmpty())
                {
                    string outputdir = constVars.outDir + "/" + identifier + "/" + label;
                    IOUtils.EnsureDir(new File(outputdir));
                    TwoDimensionalCounter <CandidatePhrase, CandidatePhrase> reasonForWords = new TwoDimensionalCounter <CandidatePhrase, CandidatePhrase>();
                    foreach (CandidatePhrase word in finalwords.KeySet())
                    {
                        foreach (E l in wordsPatExtracted.GetCounter(word).KeySet())
                        {
                            foreach (CandidatePhrase w2 in patternsAndWords4Label.GetCounter(l))
                            {
                                reasonForWords.IncrementCount(word, w2);
                            }
                        }
                    }
                    Redwood.Log(ConstantsAndVariables.minimaldebug, "Saving output in " + outputdir);
                    string filename = outputdir + "/words.json";
                    // the json object is an array corresponding to each iteration - of list
                    // of objects,
                    // each of which is a bean of entity and reasons
                    IJsonArrayBuilder obj = Javax.Json.Json.CreateArrayBuilder();
                    if (writtenInJustification.Contains(label) && writtenInJustification[label])
                    {
                        IJsonReader jsonReader = Javax.Json.Json.CreateReader(new BufferedInputStream(new FileInputStream(filename)));
                        IJsonArray  objarr     = jsonReader.ReadArray();
                        foreach (IJsonValue o in objarr)
                        {
                            obj.Add(o);
                        }
                        jsonReader.Close();
                    }
                    IJsonArrayBuilder objThisIter = Javax.Json.Json.CreateArrayBuilder();
                    foreach (CandidatePhrase w in reasonForWords.FirstKeySet())
                    {
                        IJsonObjectBuilder objinner = Javax.Json.Json.CreateObjectBuilder();
                        IJsonArrayBuilder  l        = Javax.Json.Json.CreateArrayBuilder();
                        foreach (CandidatePhrase w2 in reasonForWords.GetCounter(w).KeySet())
                        {
                            l.Add(w2.GetPhrase());
                        }
                        IJsonArrayBuilder pats = Javax.Json.Json.CreateArrayBuilder();
                        foreach (E p in wordsPatExtracted.GetCounter(w))
                        {
                            pats.Add(p.ToStringSimple());
                        }
                        objinner.Add("reasonwords", l);
                        objinner.Add("patterns", pats);
                        objinner.Add("score", finalwords.GetCount(w));
                        objinner.Add("entity", w.GetPhrase());
                        objThisIter.Add(objinner.Build());
                    }
                    obj.Add(objThisIter);
                    // Redwood.log(ConstantsAndVariables.minimaldebug, channelNameLogger,
                    // "Writing justification at " + filename);
                    IOUtils.WriteStringToFile(StringUtils.Normalize(StringUtils.ToAscii(obj.Build().ToString())), filename, "ASCII");
                    writtenInJustification[label] = true;
                }
                if (constVars.justify)
                {
                    Redwood.Log(Redwood.Dbg, "\nJustification for phrases:\n");
                    foreach (CandidatePhrase word in finalwords.KeySet())
                    {
                        Redwood.Log(Redwood.Dbg, "Phrase " + word + " extracted because of patterns: \t" + Counters.ToSortedString(wordsPatExtracted.GetCounter(word), wordsPatExtracted.GetCounter(word).Size(), "%1$s:%2$f", "\n"));
                    }
                }
                // if (usePatternResultAsLabel)
                // if (answerLabel != null)
                // labelWords(sents, commonEngWords, finalwords.keySet(),
                // patterns.keySet(), outFile);
                // else
                // throw new RuntimeException("why is the answer label null?");
                return(finalwords);
            }
            else
            {
                if (constVars.wordScoring.Equals(GetPatternsFromDataMultiClass.WordScoring.Bpb))
                {
                    Counters.AddInPlace(terms, wordsPatExtracted);
                    ICounter <CandidatePhrase>       maxPatWeightTerms = new ClassicCounter <CandidatePhrase>();
                    IDictionary <CandidatePhrase, E> wordMaxPat        = new Dictionary <CandidatePhrase, E>();
                    foreach (KeyValuePair <CandidatePhrase, ClassicCounter <E> > en in terms.EntrySet())
                    {
                        ICounter <E> weights = new ClassicCounter <E>();
                        foreach (E k in en.Value.KeySet())
                        {
                            weights.SetCount(k, patternsLearnedThisIter.GetCount(k));
                        }
                        maxPatWeightTerms.SetCount(en.Key, Counters.Max(weights));
                        wordMaxPat[en.Key] = Counters.Argmax(weights);
                    }
                    Counters.RemoveKeys(maxPatWeightTerms, alreadyIdentifiedWords);
                    double maxvalue = Counters.Max(maxPatWeightTerms);
                    ICollection <CandidatePhrase> words = Counters.KeysAbove(maxPatWeightTerms, maxvalue - 1e-10);
                    CandidatePhrase bestw = null;
                    if (words.Count > 1)
                    {
                        double max = double.NegativeInfinity;
                        foreach (CandidatePhrase w in words)
                        {
                            if (terms.GetCount(w, wordMaxPat[w]) > max)
                            {
                                max   = terms.GetCount(w, wordMaxPat[w]);
                                bestw = w;
                            }
                        }
                    }
                    else
                    {
                        if (words.Count == 1)
                        {
                            bestw = words.GetEnumerator().Current;
                        }
                        else
                        {
                            return(new ClassicCounter <CandidatePhrase>());
                        }
                    }
                    Redwood.Log(ConstantsAndVariables.minimaldebug, "Selected Words: " + bestw);
                    return(Counters.AsCounter(Arrays.AsList(bestw)));
                }
                else
                {
                    throw new Exception("wordscoring " + constVars.wordScoring + " not identified");
                }
            }
        }
        public static void Main(string[] args)
        {
            if (args.Length != 1)
            {
                System.Console.Error.Printf("Usage: java %s file%n", typeof(Edu.Stanford.Nlp.International.French.Scripts.MWEFrequencyDist).FullName);
                System.Environment.Exit(-1);
            }
            File treeFile = new File(args[0]);
            TwoDimensionalCounter <string, string> mweLabelToString = new TwoDimensionalCounter <string, string>();
            ICollection <string> uniquePOSSequences = Generics.NewHashSet();

            try
            {
                BufferedReader     br   = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8"));
                ITreeReaderFactory trf  = new FrenchTreeReaderFactory();
                ITreeReader        tr   = trf.NewTreeReader(br);
                TregexPattern      pMWE = TregexPattern.Compile("/^MW/");
                for (Tree t; (t = tr.ReadTree()) != null;)
                {
                    //Count MWE statistics
                    TregexMatcher m = pMWE.Matcher(t);
                    while (m.FindNextMatchingNode())
                    {
                        Tree              match     = m.GetMatch();
                        string            label     = match.Value();
                        IList <CoreLabel> yield     = match.TaggedLabeledYield();
                        StringBuilder     termYield = new StringBuilder();
                        StringBuilder     posYield  = new StringBuilder();
                        foreach (CoreLabel cl in yield)
                        {
                            termYield.Append(cl.Word()).Append(" ");
                            posYield.Append(cl.Tag()).Append(" ");
                        }
                        mweLabelToString.IncrementCount(label, termYield.ToString().Trim());
                        uniquePOSSequences.Add(posYield.ToString().Trim());
                    }
                }
                tr.Close();
                //Closes the underlying reader
                System.Console.Out.Printf("Type\t#Type\t#Single\t%%Single\t%%Total%n");
                double nMWEs          = mweLabelToString.TotalCount();
                int    nAllSingletons = 0;
                int    nTokens        = 0;
                foreach (string mweLabel in mweLabelToString.FirstKeySet())
                {
                    int               nSingletons = 0;
                    double            totalCount  = mweLabelToString.TotalCount(mweLabel);
                    ICounter <string> mc          = mweLabelToString.GetCounter(mweLabel);
                    foreach (string term in mc.KeySet())
                    {
                        if (mc.GetCount(term) == 1.0)
                        {
                            nSingletons++;
                        }
                        nTokens += term.Split("\\s+").Length *(int)mc.GetCount(term);
                    }
                    nAllSingletons += nSingletons;
                    System.Console.Out.Printf("%s\t%d\t%d\t%.2f\t%.2f%n", mweLabel, (int)totalCount, nSingletons, 100.0 * nSingletons / totalCount, 100.0 * totalCount / nMWEs);
                }
                System.Console.Out.Printf("TOTAL:\t%d\t%d\t%.2f%n", (int)nMWEs, nAllSingletons, 100.0 * nAllSingletons / nMWEs);
                System.Console.Out.WriteLine("#tokens = " + nTokens);
                System.Console.Out.WriteLine("#unique MWE POS sequences = " + uniquePOSSequences.Count);
            }
            catch (UnsupportedEncodingException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (FileNotFoundException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (TregexParseException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
            catch (IOException e)
            {
                Sharpen.Runtime.PrintStackTrace(e);
            }
        }
        internal override ICounter <CandidatePhrase> ScorePhrases(string label, TwoDimensionalCounter <CandidatePhrase, E> terms, TwoDimensionalCounter <CandidatePhrase, E> wordsPatExtracted, ICounter <E> allSelectedPatterns, ICollection <CandidatePhrase
                                                                                                                                                                                                                                               > alreadyIdentifiedWords, bool forLearningPatterns)
        {
            IDictionary <CandidatePhrase, ICounter <ConstantsAndVariables.ScorePhraseMeasures> > scores = new Dictionary <CandidatePhrase, ICounter <ConstantsAndVariables.ScorePhraseMeasures> >();

            if (Data.domainNGramsFile != null)
            {
                Data.LoadDomainNGrams();
            }
            Redwood.Log(ConstantsAndVariables.extremedebug, "Considering terms: " + terms.FirstKeySet());
            // calculate TF-IDF like scores
            ICounter <CandidatePhrase> tfidfScores = new ClassicCounter <CandidatePhrase>();

            if (constVars.usePhraseEvalPatWtByFreq)
            {
                foreach (KeyValuePair <CandidatePhrase, ClassicCounter <E> > en in terms.EntrySet())
                {
                    double score = GetPatTFIDFScore(en.Key, en.Value, allSelectedPatterns);
                    tfidfScores.SetCount(en.Key, score);
                }
                Redwood.Log(ConstantsAndVariables.extremedebug, "BEFORE IDF " + Counters.ToSortedString(tfidfScores, 100, "%1$s:%2$f", "\t"));
                Counters.DivideInPlace(tfidfScores, Data.processedDataFreq);
            }
            ICounter <CandidatePhrase> externalFeatWtsNormalized     = new ClassicCounter <CandidatePhrase>();
            ICounter <CandidatePhrase> domainNgramNormScores         = new ClassicCounter <CandidatePhrase>();
            ICounter <CandidatePhrase> googleNgramNormScores         = new ClassicCounter <CandidatePhrase>();
            ICounter <CandidatePhrase> editDistanceOtherBinaryScores = new ClassicCounter <CandidatePhrase>();
            ICounter <CandidatePhrase> editDistanceSameBinaryScores  = new ClassicCounter <CandidatePhrase>();

            foreach (CandidatePhrase gc in terms.FirstKeySet())
            {
                string g = gc.GetPhrase();
                if (constVars.usePhraseEvalEditDistOther)
                {
                    editDistanceOtherBinaryScores.SetCount(gc, 1 - constVars.GetEditDistanceScoresOtherClassThreshold(label, g));
                }
                if (constVars.usePhraseEvalEditDistSame)
                {
                    editDistanceSameBinaryScores.SetCount(gc, constVars.GetEditDistanceScoresThisClassThreshold(label, g));
                }
                if (constVars.usePhraseEvalDomainNgram)
                {
                    // calculate domain-ngram wts
                    if (Data.domainNGramRawFreq.ContainsKey(g))
                    {
                        System.Diagnostics.Debug.Assert((Data.rawFreq.ContainsKey(gc)));
                        domainNgramNormScores.SetCount(gc, GetDomainNgramScore(g));
                    }
                    else
                    {
                        log.Info("why is " + g + " not present in domainNgram");
                    }
                }
                if (constVars.usePhraseEvalGoogleNgram)
                {
                    googleNgramNormScores.SetCount(gc, GetGoogleNgramScore(gc));
                }
                if (constVars.usePhraseEvalWordClass)
                {
                    // calculate dist sim weights
                    int num = constVars.GetWordClassClusters()[g];
                    if (num == null)
                    {
                        num = constVars.GetWordClassClusters()[g.ToLower()];
                    }
                    if (num != null && constVars.distSimWeights[label].ContainsKey(num))
                    {
                        externalFeatWtsNormalized.SetCount(gc, constVars.distSimWeights[label].GetCount(num));
                    }
                    else
                    {
                        externalFeatWtsNormalized.SetCount(gc, OOVExternalFeatWt);
                    }
                }
            }
            ICounter <CandidatePhrase> normTFIDFScores = GetPatternsFromDataMultiClass.NormalizeSoftMaxMinMaxScores(tfidfScores, true, true, false);
            ICounter <CandidatePhrase> dictOdddsScores = null;

            if (constVars.usePhraseEvalSemanticOdds)
            {
                System.Diagnostics.Debug.Assert(constVars.dictOddsWeights != null, "usePhraseEvalSemanticOdds is true but dictOddsWeights is null for the label " + label);
                dictOdddsScores = GetPatternsFromDataMultiClass.NormalizeSoftMaxMinMaxScores(constVars.dictOddsWeights[label], true, true, false);
            }
            domainNgramNormScores     = GetPatternsFromDataMultiClass.NormalizeSoftMaxMinMaxScores(domainNgramNormScores, true, true, false);
            googleNgramNormScores     = GetPatternsFromDataMultiClass.NormalizeSoftMaxMinMaxScores(googleNgramNormScores, true, true, false);
            externalFeatWtsNormalized = GetPatternsFromDataMultiClass.NormalizeSoftMaxMinMaxScores(externalFeatWtsNormalized, true, true, false);
            // Counters.max(googleNgramNormScores);
            // Counters.max(externalFeatWtsNormalized);
            foreach (CandidatePhrase word in terms.FirstKeySet())
            {
                if (alreadyIdentifiedWords.Contains(word))
                {
                    continue;
                }
                ICounter <ConstantsAndVariables.ScorePhraseMeasures> scoreslist = new ClassicCounter <ConstantsAndVariables.ScorePhraseMeasures>();
                System.Diagnostics.Debug.Assert(normTFIDFScores.ContainsKey(word), "NormTFIDF score does not contain" + word);
                double tfscore = normTFIDFScores.GetCount(word);
                scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Patwtbyfreq, tfscore);
                if (constVars.usePhraseEvalSemanticOdds)
                {
                    double dscore;
                    if (dictOdddsScores.ContainsKey(word))
                    {
                        dscore = dictOdddsScores.GetCount(word);
                    }
                    else
                    {
                        dscore = GetPhraseWeightFromWords(dictOdddsScores, word, OOVdictOdds);
                    }
                    scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Semanticodds, dscore);
                }
                if (constVars.usePhraseEvalDomainNgram)
                {
                    double domainscore;
                    if (domainNgramNormScores.ContainsKey(word))
                    {
                        domainscore = domainNgramNormScores.GetCount(word);
                    }
                    else
                    {
                        domainscore = GetPhraseWeightFromWords(domainNgramNormScores, word, OOVDomainNgramScore);
                    }
                    scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Domainngram, domainscore);
                }
                if (constVars.usePhraseEvalGoogleNgram)
                {
                    double googlescore;
                    if (googleNgramNormScores.ContainsKey(word))
                    {
                        googlescore = googleNgramNormScores.GetCount(word);
                    }
                    else
                    {
                        googlescore = GetPhraseWeightFromWords(googleNgramNormScores, word, OOVGoogleNgramScore);
                    }
                    scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Googlengram, googlescore);
                }
                if (constVars.usePhraseEvalWordClass)
                {
                    double externalFeatureWt;
                    if (externalFeatWtsNormalized.ContainsKey(word))
                    {
                        externalFeatureWt = externalFeatWtsNormalized.GetCount(word);
                    }
                    else
                    {
                        externalFeatureWt = GetPhraseWeightFromWords(externalFeatWtsNormalized, word, OOVExternalFeatWt);
                    }
                    scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Distsim, externalFeatureWt);
                }
                if (constVars.usePhraseEvalEditDistOther)
                {
                    System.Diagnostics.Debug.Assert(editDistanceOtherBinaryScores.ContainsKey(word), "How come no edit distance info?");
                    double editD = editDistanceOtherBinaryScores.GetCount(word);
                    scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Editdistother, editD);
                }
                if (constVars.usePhraseEvalEditDistSame)
                {
                    double editDSame = editDistanceSameBinaryScores.GetCount(word);
                    scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Editdistsame, editDSame);
                }
                if (constVars.usePhraseEvalWordShape)
                {
                    scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Wordshape, this.GetWordShapeScore(word.GetPhrase(), label));
                }
                scores[word] = scoreslist;
                phraseScoresNormalized.SetCounter(word, scoreslist);
            }
            ICounter <CandidatePhrase> phraseScores = new ClassicCounter <CandidatePhrase>();

            foreach (KeyValuePair <CandidatePhrase, ICounter <ConstantsAndVariables.ScorePhraseMeasures> > wEn in scores)
            {
                double avgScore = Counters.Mean(wEn.Value);
                if (!avgScore.IsInfinite() && !double.IsNaN(avgScore))
                {
                    phraseScores.SetCount(wEn.Key, avgScore);
                }
                else
                {
                    Redwood.Log(Redwood.Dbg, "Ignoring " + wEn.Key + " because score is " + avgScore);
                }
            }
            return(phraseScores);
        }