/// <summary>Method to convert features from counts to L1-normalized TFIDF based features</summary> /// <param name="datum">with a collection of features.</param> /// <param name="featureDocCounts">a counter of doc-count for each feature.</param> /// <returns>RVFDatum with l1-normalized tf-idf features.</returns> public virtual RVFDatum <L, F> GetL1NormalizedTFIDFDatum(IDatum <L, F> datum, ICounter <F> featureDocCounts) { ICounter <F> tfidfFeatures = new ClassicCounter <F>(); foreach (F feature in datum.AsFeatures()) { if (featureDocCounts.ContainsKey(feature)) { tfidfFeatures.IncrementCount(feature, 1.0); } } double l1norm = 0; foreach (F feature_1 in tfidfFeatures.KeySet()) { double idf = Math.Log(((double)(this.Size() + 1)) / (featureDocCounts.GetCount(feature_1) + 0.5)); double tf = tfidfFeatures.GetCount(feature_1); tfidfFeatures.SetCount(feature_1, tf * idf); l1norm += tf * idf; } foreach (F feature_2 in tfidfFeatures.KeySet()) { double tfidf = tfidfFeatures.GetCount(feature_2); tfidfFeatures.SetCount(feature_2, tfidf / l1norm); } RVFDatum <L, F> rvfDatum = new RVFDatum <L, F>(tfidfFeatures, datum.Label()); return(rvfDatum); }
public virtual void TestRetainTop() { c1 = new ClassicCounter <string>(); c1.IncrementCount("a", 0.9); c1.IncrementCount("b", 1.0); c1.IncrementCount("c", 1.5); c1.IncrementCount("d", 0.0); c1.IncrementCount("e", -2.0); Counters.RetainTop(c1, 3); NUnit.Framework.Assert.AreEqual(3, c1.Size()); NUnit.Framework.Assert.IsTrue(c1.ContainsKey("a")); NUnit.Framework.Assert.IsFalse(c1.ContainsKey("d")); Counters.RetainTop(c1, 1); NUnit.Framework.Assert.AreEqual(1, c1.Size()); NUnit.Framework.Assert.IsTrue(c1.ContainsKey("c")); NUnit.Framework.Assert.AreEqual(1.5, c1.GetCount("c")); }
public virtual void TestRetainAbove() { c1 = new ClassicCounter <string>(); c1.IncrementCount("a", 1.1); c1.IncrementCount("b", 1.0); c1.IncrementCount("c", 0.9); c1.IncrementCount("d", 0); ICollection <string> removed = Counters.RetainAbove(c1, 1.0); ICollection <string> expected = new HashSet <string>(); expected.Add("c"); expected.Add("d"); NUnit.Framework.Assert.AreEqual(expected, removed); NUnit.Framework.Assert.AreEqual(1.1, c1.GetCount("a")); NUnit.Framework.Assert.AreEqual(1.0, c1.GetCount("b")); NUnit.Framework.Assert.IsFalse(c1.ContainsKey("c")); NUnit.Framework.Assert.IsFalse(c1.ContainsKey("d")); }
public virtual ICounter <CandidatePhrase> ChooseTopWords(ICounter <CandidatePhrase> newdt, TwoDimensionalCounter <CandidatePhrase, E> terms, ICounter <CandidatePhrase> useThresholdNumPatternsForTheseWords, ICollection <CandidatePhrase> ignoreWords , double thresholdWordExtract) { IEnumerator <CandidatePhrase> termIter = Counters.ToPriorityQueue(newdt).GetEnumerator(); ICounter <CandidatePhrase> finalwords = new ClassicCounter <CandidatePhrase>(); while (termIter.MoveNext()) { if (finalwords.Size() >= constVars.numWordsToAdd) { break; } CandidatePhrase w = termIter.Current; if (newdt.GetCount(w) < thresholdWordExtract) { Redwood.Log(ConstantsAndVariables.extremedebug, "not adding word " + w + " and any later words because the score " + newdt.GetCount(w) + " is less than the threshold of " + thresholdWordExtract); break; } System.Diagnostics.Debug.Assert((newdt.GetCount(w) != double.PositiveInfinity)); if (useThresholdNumPatternsForTheseWords.ContainsKey(w) && NumNonRedundantPatterns(terms, w) < constVars.thresholdNumPatternsApplied) { Redwood.Log("extremePatDebug", "Not adding " + w + " because the number of non redundant patterns are below threshold of " + constVars.thresholdNumPatternsApplied + ":" + terms.GetCounter(w).KeySet()); continue; } CandidatePhrase matchedFuzzy = null; if (constVars.minLen4FuzzyForPattern > 0 && ignoreWords != null) { matchedFuzzy = ConstantsAndVariables.ContainsFuzzy(ignoreWords, w, constVars.minLen4FuzzyForPattern); } if (matchedFuzzy == null) { Redwood.Log("extremePatDebug", "adding word " + w); finalwords.SetCount(w, newdt.GetCount(w)); } else { Redwood.Log("extremePatDebug", "not adding " + w + " because it matched " + matchedFuzzy + " in common English word"); ignoreWords.Add(w); } } string nextTen = string.Empty; int n = 0; while (termIter.MoveNext()) { n++; if (n > 10) { break; } CandidatePhrase w = termIter.Current; nextTen += ";\t" + w + ":" + newdt.GetCount(w); } Redwood.Log(Redwood.Dbg, "Next ten phrases were " + nextTen); return(finalwords); }
public virtual void TestIncrement() { c.Clear(); NUnit.Framework.Assert.AreEqual(0., c.GetCount("r")); NUnit.Framework.Assert.AreEqual(1., c.IncrementCount("r")); NUnit.Framework.Assert.AreEqual(1., c.GetCount("r")); c.SetCount("p", 0); c.SetCount("q", 2); NUnit.Framework.Assert.AreEqual(true, c.ContainsKey("q")); NUnit.Framework.Assert.AreEqual(false, c.ContainsKey("!!!")); NUnit.Framework.Assert.AreEqual(0., c.GetCount("p")); NUnit.Framework.Assert.AreEqual(1., c.IncrementCount("p")); NUnit.Framework.Assert.AreEqual(1., c.GetCount("p")); NUnit.Framework.Assert.AreEqual(4., c.TotalCount()); c.DecrementCount("s", 5.0); NUnit.Framework.Assert.AreEqual(-5.0, c.GetCount("s")); c.Remove("s"); NUnit.Framework.Assert.AreEqual(4.0, c.TotalCount()); }
private static ICounter <string> GetFeatures(ClustererDataLoader.ClustererDoc doc, Pair <int, int> mentionPair, ICounter <Pair <int, int> > scores) { ICounter <string> features = new ClassicCounter <string>(); if (!scores.ContainsKey(mentionPair)) { mentionPair = new Pair <int, int>(mentionPair.second, mentionPair.first); } double score = scores.GetCount(mentionPair); features.IncrementCount("max", score); return(features); }
public virtual double GetPhraseWeightFromWords(ICounter <CandidatePhrase> weights, CandidatePhrase ph, double defaultWt) { string[] t = ph.GetPhrase().Split("\\s+"); if (t.Length < 2) { if (weights.ContainsKey(ph)) { return(weights.GetCount(ph)); } else { return(defaultWt); } } double totalscore = 0; double minScore = double.MaxValue; foreach (string w in t) { double score = defaultWt; if (weights.ContainsKey(CandidatePhrase.CreateOrGet(w))) { score = weights.GetCount(w); } if (score < minScore) { minScore = score; } totalscore += score; } if (useAvgInsteadofMinPhraseScoring) { return(totalscore / ph.GetPhrase().Length); } else { return(minScore); } }
public virtual double GetDictOddsScore(CandidatePhrase word, string label, double defaultWt) { double dscore; ICounter <CandidatePhrase> dictOddsWordWeights = constVars.dictOddsWeights[label]; System.Diagnostics.Debug.Assert(dictOddsWordWeights != null, "dictOddsWordWeights is null for label " + label); if (dictOddsWordWeights.ContainsKey(word)) { dscore = dictOddsWordWeights.GetCount(word); } else { dscore = GetPhraseWeightFromWords(dictOddsWordWeights, word, defaultWt); } return(dscore); }
private static ICounter <string> GetFeatures(ClustererDataLoader.ClustererDoc doc, IList <Pair <int, int> > mentionPairs, ICounter <Pair <int, int> > scores) { ICounter <string> features = new ClassicCounter <string>(); double maxScore = 0; double minScore = 1; ICounter <string> totals = new ClassicCounter <string>(); ICounter <string> totalsLog = new ClassicCounter <string>(); ICounter <string> counts = new ClassicCounter <string>(); foreach (Pair <int, int> mentionPair in mentionPairs) { if (!scores.ContainsKey(mentionPair)) { mentionPair = new Pair <int, int>(mentionPair.second, mentionPair.first); } double score = scores.GetCount(mentionPair); double logScore = CappedLog(score); string mt1 = doc.mentionTypes[mentionPair.first]; string mt2 = doc.mentionTypes[mentionPair.second]; mt1 = mt1.Equals("PRONOMINAL") ? "PRONOMINAL" : "NON_PRONOMINAL"; mt2 = mt2.Equals("PRONOMINAL") ? "PRONOMINAL" : "NON_PRONOMINAL"; string conj = "_" + mt1 + "_" + mt2; maxScore = Math.Max(maxScore, score); minScore = Math.Min(minScore, score); totals.IncrementCount(string.Empty, score); totalsLog.IncrementCount(string.Empty, logScore); counts.IncrementCount(string.Empty); totals.IncrementCount(conj, score); totalsLog.IncrementCount(conj, logScore); counts.IncrementCount(conj); } features.IncrementCount("max", maxScore); features.IncrementCount("min", minScore); foreach (string key in counts.KeySet()) { features.IncrementCount("avg" + key, totals.GetCount(key) / mentionPairs.Count); features.IncrementCount("avgLog" + key, totalsLog.GetCount(key) / mentionPairs.Count); } return(features); }
/// <exception cref="System.IO.IOException"/> /// <exception cref="System.TypeLoadException"/> internal virtual ICounter <E> Convert2OneDim(string label, IToDoubleFunction <Pair <E, CandidatePhrase> > scoringFunction, ICollection <CandidatePhrase> allCandidatePhrases, TwoDimensionalCounter <E, CandidatePhrase> positivePatternsAndWords, bool sqrtPatScore, bool scorePhrasesInPatSelection, ICounter <CandidatePhrase> dictOddsWordWeights, bool useFreqPhraseExtractedByPat) { // if (Data.googleNGram.size() == 0 && Data.googleNGramsFile != null) { // Data.loadGoogleNGrams(); // } ICounter <E> patterns = new ClassicCounter <E>(); ICounter <CandidatePhrase> googleNgramNormScores = new ClassicCounter <CandidatePhrase>(); ICounter <CandidatePhrase> domainNgramNormScores = new ClassicCounter <CandidatePhrase>(); ICounter <CandidatePhrase> externalFeatWtsNormalized = new ClassicCounter <CandidatePhrase>(); ICounter <CandidatePhrase> editDistanceFromOtherSemanticBinaryScores = new ClassicCounter <CandidatePhrase>(); ICounter <CandidatePhrase> editDistanceFromAlreadyExtractedBinaryScores = new ClassicCounter <CandidatePhrase>(); double externalWtsDefault = 0.5; ICounter <string> classifierScores = null; if ((patternScoring.Equals(GetPatternsFromDataMultiClass.PatternScoring.PhEvalInPat) || patternScoring.Equals(GetPatternsFromDataMultiClass.PatternScoring.PhEvalInPatLogP)) && scorePhrasesInPatSelection) { foreach (CandidatePhrase gc in allCandidatePhrases) { string g = gc.GetPhrase(); if (constVars.usePatternEvalEditDistOther) { editDistanceFromOtherSemanticBinaryScores.SetCount(gc, constVars.GetEditDistanceScoresOtherClassThreshold(label, g)); } if (constVars.usePatternEvalEditDistSame) { editDistanceFromAlreadyExtractedBinaryScores.SetCount(gc, 1 - constVars.GetEditDistanceScoresThisClassThreshold(label, g)); } if (constVars.usePatternEvalGoogleNgram) { googleNgramNormScores.SetCount(gc, PhraseScorer.GetGoogleNgramScore(gc)); } if (constVars.usePatternEvalDomainNgram) { // calculate domain-ngram wts if (Data.domainNGramRawFreq.ContainsKey(g)) { System.Diagnostics.Debug.Assert((Data.rawFreq.ContainsKey(gc))); domainNgramNormScores.SetCount(gc, scorePhrases.phraseScorer.GetDomainNgramScore(g)); } } if (constVars.usePatternEvalWordClass) { int num = constVars.GetWordClassClusters()[g]; if (num == null) { num = constVars.GetWordClassClusters()[g.ToLower()]; } if (num != null && constVars.distSimWeights[label].ContainsKey(num)) { externalFeatWtsNormalized.SetCount(gc, constVars.distSimWeights[label].GetCount(num)); } else { externalFeatWtsNormalized.SetCount(gc, externalWtsDefault); } } } if (constVars.usePatternEvalGoogleNgram) { googleNgramNormScores = GetPatternsFromDataMultiClass.NormalizeSoftMaxMinMaxScores(googleNgramNormScores, true, true, false); } if (constVars.usePatternEvalDomainNgram) { domainNgramNormScores = GetPatternsFromDataMultiClass.NormalizeSoftMaxMinMaxScores(domainNgramNormScores, true, true, false); } if (constVars.usePatternEvalWordClass) { externalFeatWtsNormalized = GetPatternsFromDataMultiClass.NormalizeSoftMaxMinMaxScores(externalFeatWtsNormalized, true, true, false); } } else { if ((patternScoring.Equals(GetPatternsFromDataMultiClass.PatternScoring.Logreg) || patternScoring.Equals(GetPatternsFromDataMultiClass.PatternScoring.LOGREGlogP)) && scorePhrasesInPatSelection) { Properties props2 = new Properties(); props2.PutAll(props); props2.SetProperty("phraseScorerClass", "edu.stanford.nlp.patterns.ScorePhrasesLearnFeatWt"); ScorePhrases scoreclassifier = new ScorePhrases(props2, constVars); System.Console.Out.WriteLine("file is " + props.GetProperty("domainNGramsFile")); ArgumentParser.FillOptions(typeof(Data), props2); classifierScores = scoreclassifier.phraseScorer.ScorePhrases(label, allCandidatePhrases, true); } } ICounter <CandidatePhrase> cachedScoresForThisIter = new ClassicCounter <CandidatePhrase>(); foreach (KeyValuePair <E, ClassicCounter <CandidatePhrase> > en in positivePatternsAndWords.EntrySet()) { foreach (KeyValuePair <CandidatePhrase, double> en2 in en.Value.EntrySet()) { CandidatePhrase word = en2.Key; ICounter <ConstantsAndVariables.ScorePhraseMeasures> scoreslist = new ClassicCounter <ConstantsAndVariables.ScorePhraseMeasures>(); double score = 1; if ((patternScoring.Equals(GetPatternsFromDataMultiClass.PatternScoring.PhEvalInPat) || patternScoring.Equals(GetPatternsFromDataMultiClass.PatternScoring.PhEvalInPatLogP)) && scorePhrasesInPatSelection) { if (cachedScoresForThisIter.ContainsKey(word)) { score = cachedScoresForThisIter.GetCount(word); } else { if (constVars.GetOtherSemanticClassesWords().Contains(word) || constVars.GetCommonEngWords().Contains(word)) { score = 1; } else { if (constVars.usePatternEvalSemanticOdds) { double semanticClassOdds = 1; if (dictOddsWordWeights.ContainsKey(word)) { semanticClassOdds = 1 - dictOddsWordWeights.GetCount(word); } scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Semanticodds, semanticClassOdds); } if (constVars.usePatternEvalGoogleNgram) { double gscore = 0; if (googleNgramNormScores.ContainsKey(word)) { gscore = 1 - googleNgramNormScores.GetCount(word); } scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Googlengram, gscore); } if (constVars.usePatternEvalDomainNgram) { double domainscore; if (domainNgramNormScores.ContainsKey(word)) { domainscore = 1 - domainNgramNormScores.GetCount(word); } else { domainscore = 1 - scorePhrases.phraseScorer.GetPhraseWeightFromWords(domainNgramNormScores, word, scorePhrases.phraseScorer.OOVDomainNgramScore); } scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Domainngram, domainscore); } if (constVars.usePatternEvalWordClass) { double externalFeatureWt = externalWtsDefault; if (externalFeatWtsNormalized.ContainsKey(word)) { externalFeatureWt = 1 - externalFeatWtsNormalized.GetCount(word); } scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Distsim, externalFeatureWt); } if (constVars.usePatternEvalEditDistOther) { System.Diagnostics.Debug.Assert(editDistanceFromOtherSemanticBinaryScores.ContainsKey(word), "How come no edit distance info for word " + word + string.Empty); scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Editdistother, editDistanceFromOtherSemanticBinaryScores.GetCount(word)); } if (constVars.usePatternEvalEditDistSame) { scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Editdistsame, editDistanceFromAlreadyExtractedBinaryScores.GetCount(word)); } // taking average score = Counters.Mean(scoreslist); phInPatScores.SetCounter(word, scoreslist); } cachedScoresForThisIter.SetCount(word, score); } } else { if ((patternScoring.Equals(GetPatternsFromDataMultiClass.PatternScoring.Logreg) || patternScoring.Equals(GetPatternsFromDataMultiClass.PatternScoring.LOGREGlogP)) && scorePhrasesInPatSelection) { score = 1 - classifierScores.GetCount(word); } } // score = 1 - scorePhrases.scoreUsingClassifer(classifier, // e.getKey(), label, true, null, null, dictOddsWordWeights); // throw new RuntimeException("not implemented yet"); if (useFreqPhraseExtractedByPat) { score = score * scoringFunction.ApplyAsDouble(new Pair <E, CandidatePhrase>(en.Key, word)); } if (constVars.sqrtPatScore) { patterns.IncrementCount(en.Key, Math.Sqrt(score)); } else { patterns.IncrementCount(en.Key, score); } } } return(patterns); }
internal override ICounter <CandidatePhrase> ScorePhrases(string label, TwoDimensionalCounter <CandidatePhrase, E> terms, TwoDimensionalCounter <CandidatePhrase, E> wordsPatExtracted, ICounter <E> allSelectedPatterns, ICollection <CandidatePhrase > alreadyIdentifiedWords, bool forLearningPatterns) { IDictionary <CandidatePhrase, ICounter <ConstantsAndVariables.ScorePhraseMeasures> > scores = new Dictionary <CandidatePhrase, ICounter <ConstantsAndVariables.ScorePhraseMeasures> >(); if (Data.domainNGramsFile != null) { Data.LoadDomainNGrams(); } Redwood.Log(ConstantsAndVariables.extremedebug, "Considering terms: " + terms.FirstKeySet()); // calculate TF-IDF like scores ICounter <CandidatePhrase> tfidfScores = new ClassicCounter <CandidatePhrase>(); if (constVars.usePhraseEvalPatWtByFreq) { foreach (KeyValuePair <CandidatePhrase, ClassicCounter <E> > en in terms.EntrySet()) { double score = GetPatTFIDFScore(en.Key, en.Value, allSelectedPatterns); tfidfScores.SetCount(en.Key, score); } Redwood.Log(ConstantsAndVariables.extremedebug, "BEFORE IDF " + Counters.ToSortedString(tfidfScores, 100, "%1$s:%2$f", "\t")); Counters.DivideInPlace(tfidfScores, Data.processedDataFreq); } ICounter <CandidatePhrase> externalFeatWtsNormalized = new ClassicCounter <CandidatePhrase>(); ICounter <CandidatePhrase> domainNgramNormScores = new ClassicCounter <CandidatePhrase>(); ICounter <CandidatePhrase> googleNgramNormScores = new ClassicCounter <CandidatePhrase>(); ICounter <CandidatePhrase> editDistanceOtherBinaryScores = new ClassicCounter <CandidatePhrase>(); ICounter <CandidatePhrase> editDistanceSameBinaryScores = new ClassicCounter <CandidatePhrase>(); foreach (CandidatePhrase gc in terms.FirstKeySet()) { string g = gc.GetPhrase(); if (constVars.usePhraseEvalEditDistOther) { editDistanceOtherBinaryScores.SetCount(gc, 1 - constVars.GetEditDistanceScoresOtherClassThreshold(label, g)); } if (constVars.usePhraseEvalEditDistSame) { editDistanceSameBinaryScores.SetCount(gc, constVars.GetEditDistanceScoresThisClassThreshold(label, g)); } if (constVars.usePhraseEvalDomainNgram) { // calculate domain-ngram wts if (Data.domainNGramRawFreq.ContainsKey(g)) { System.Diagnostics.Debug.Assert((Data.rawFreq.ContainsKey(gc))); domainNgramNormScores.SetCount(gc, GetDomainNgramScore(g)); } else { log.Info("why is " + g + " not present in domainNgram"); } } if (constVars.usePhraseEvalGoogleNgram) { googleNgramNormScores.SetCount(gc, GetGoogleNgramScore(gc)); } if (constVars.usePhraseEvalWordClass) { // calculate dist sim weights int num = constVars.GetWordClassClusters()[g]; if (num == null) { num = constVars.GetWordClassClusters()[g.ToLower()]; } if (num != null && constVars.distSimWeights[label].ContainsKey(num)) { externalFeatWtsNormalized.SetCount(gc, constVars.distSimWeights[label].GetCount(num)); } else { externalFeatWtsNormalized.SetCount(gc, OOVExternalFeatWt); } } } ICounter <CandidatePhrase> normTFIDFScores = GetPatternsFromDataMultiClass.NormalizeSoftMaxMinMaxScores(tfidfScores, true, true, false); ICounter <CandidatePhrase> dictOdddsScores = null; if (constVars.usePhraseEvalSemanticOdds) { System.Diagnostics.Debug.Assert(constVars.dictOddsWeights != null, "usePhraseEvalSemanticOdds is true but dictOddsWeights is null for the label " + label); dictOdddsScores = GetPatternsFromDataMultiClass.NormalizeSoftMaxMinMaxScores(constVars.dictOddsWeights[label], true, true, false); } domainNgramNormScores = GetPatternsFromDataMultiClass.NormalizeSoftMaxMinMaxScores(domainNgramNormScores, true, true, false); googleNgramNormScores = GetPatternsFromDataMultiClass.NormalizeSoftMaxMinMaxScores(googleNgramNormScores, true, true, false); externalFeatWtsNormalized = GetPatternsFromDataMultiClass.NormalizeSoftMaxMinMaxScores(externalFeatWtsNormalized, true, true, false); // Counters.max(googleNgramNormScores); // Counters.max(externalFeatWtsNormalized); foreach (CandidatePhrase word in terms.FirstKeySet()) { if (alreadyIdentifiedWords.Contains(word)) { continue; } ICounter <ConstantsAndVariables.ScorePhraseMeasures> scoreslist = new ClassicCounter <ConstantsAndVariables.ScorePhraseMeasures>(); System.Diagnostics.Debug.Assert(normTFIDFScores.ContainsKey(word), "NormTFIDF score does not contain" + word); double tfscore = normTFIDFScores.GetCount(word); scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Patwtbyfreq, tfscore); if (constVars.usePhraseEvalSemanticOdds) { double dscore; if (dictOdddsScores.ContainsKey(word)) { dscore = dictOdddsScores.GetCount(word); } else { dscore = GetPhraseWeightFromWords(dictOdddsScores, word, OOVdictOdds); } scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Semanticodds, dscore); } if (constVars.usePhraseEvalDomainNgram) { double domainscore; if (domainNgramNormScores.ContainsKey(word)) { domainscore = domainNgramNormScores.GetCount(word); } else { domainscore = GetPhraseWeightFromWords(domainNgramNormScores, word, OOVDomainNgramScore); } scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Domainngram, domainscore); } if (constVars.usePhraseEvalGoogleNgram) { double googlescore; if (googleNgramNormScores.ContainsKey(word)) { googlescore = googleNgramNormScores.GetCount(word); } else { googlescore = GetPhraseWeightFromWords(googleNgramNormScores, word, OOVGoogleNgramScore); } scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Googlengram, googlescore); } if (constVars.usePhraseEvalWordClass) { double externalFeatureWt; if (externalFeatWtsNormalized.ContainsKey(word)) { externalFeatureWt = externalFeatWtsNormalized.GetCount(word); } else { externalFeatureWt = GetPhraseWeightFromWords(externalFeatWtsNormalized, word, OOVExternalFeatWt); } scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Distsim, externalFeatureWt); } if (constVars.usePhraseEvalEditDistOther) { System.Diagnostics.Debug.Assert(editDistanceOtherBinaryScores.ContainsKey(word), "How come no edit distance info?"); double editD = editDistanceOtherBinaryScores.GetCount(word); scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Editdistother, editD); } if (constVars.usePhraseEvalEditDistSame) { double editDSame = editDistanceSameBinaryScores.GetCount(word); scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Editdistsame, editDSame); } if (constVars.usePhraseEvalWordShape) { scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Wordshape, this.GetWordShapeScore(word.GetPhrase(), label)); } scores[word] = scoreslist; phraseScoresNormalized.SetCounter(word, scoreslist); } ICounter <CandidatePhrase> phraseScores = new ClassicCounter <CandidatePhrase>(); foreach (KeyValuePair <CandidatePhrase, ICounter <ConstantsAndVariables.ScorePhraseMeasures> > wEn in scores) { double avgScore = Counters.Mean(wEn.Value); if (!avgScore.IsInfinite() && !double.IsNaN(avgScore)) { phraseScores.SetCount(wEn.Key, avgScore); } else { Redwood.Log(Redwood.Dbg, "Ignoring " + wEn.Key + " because score is " + avgScore); } } return(phraseScores); }
public virtual bool ContainsKey(E key) { return(counter.ContainsKey(key)); }