C# (CSharp) ClassicCounter.ContainsKey Examples

Programming Language: C# (CSharp)

Class/Type: ClassicCounter

Method/Function: ContainsKey

Examples at hotexamples.com: 8

C# (CSharp) ClassicCounter.ContainsKey - 8 examples found. These are the top rated real world C# (CSharp) examples of ClassicCounter.ContainsKey extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

GetCount(30)

IncrementCount(30)

KeySet(30)

SetCount(30)

Size(9)

ContainsKey(8)

TotalCount(7)

Remove(4)

AddAll(3)

IsEmpty(3)

EntrySet(2)

DecrementCount(1)

Values(1)

Example #1

Show file

File: ChineseQuantifiableEntityNormalizer.cs Project: zerouid/Stanford.CoreNLP.NET

        /// <summary>Check if a unit exists in the literal string.</summary>
        /// <remarks>
        /// Check if a unit exists in the literal string. If so, parse it by making use of
        /// the compositionality; otherwise return null.
        /// </remarks>
        /// <param name="s"/>
        /// <param name="unit"/>
        /// <returns/>
        private static double CompositeAtUnitIfExists(string s, string unit)
        {
            // invalid unit
            if (!quantityUnitToValues.ContainsKey(unit))
            {
                return(null);
            }
            int idx = s.IndexOf(unit);

            if (idx != -1)
            {
                double first = double.ValueOf(1.0);
                // Here we need special handling for 十 and 百 when they occur as the first char
                // As in Chinese 十二 is very common, 百二十 is sometimes valid as well.
                if (("十".Equals(unit) || "百".Equals(unit)) && idx == 0)
                {
                }
                else
                {
                    // do nothing
                    // otherwise we try to parse the value before the unit
                    first = RecurNormalizeLiteralIntegerString(Sharpen.Runtime.Substring(s, 0, idx));
                }
                double second = RecurNormalizeLiteralIntegerString(Sharpen.Runtime.Substring(s, idx + 1));
                if (first != null && second != null)
                {
                    return(double.ValueOf(first * quantityUnitToValues.GetCount(unit) + second));
                }
            }
            // return null if unit is not present or fails to parse
            return(null);
        }

Example #2

Show file

File: ClusteringCorefAlgorithm.cs Project: zerouid/Stanford.CoreNLP.NET

        public virtual void RunCoref(Document document)
        {
            IDictionary <Pair <int, int>, bool> mentionPairs = CorefUtils.GetUnlabeledMentionPairs(document);

            if (mentionPairs.Count == 0)
            {
                return;
            }
            Compressor <string>         compressor           = new Compressor <string>();
            DocumentExamples            examples             = extractor.Extract(0, document, mentionPairs, compressor);
            ICounter <Pair <int, int> > classificationScores = new ClassicCounter <Pair <int, int> >();
            ICounter <Pair <int, int> > rankingScores        = new ClassicCounter <Pair <int, int> >();
            ICounter <int> anaphoricityScores = new ClassicCounter <int>();

            foreach (Example example in examples.examples)
            {
                CorefUtils.CheckForInterrupt();
                Pair <int, int> mentionPair = new Pair <int, int>(example.mentionId1, example.mentionId2);
                classificationScores.IncrementCount(mentionPair, classificationModel.Predict(example, examples.mentionFeatures, compressor));
                rankingScores.IncrementCount(mentionPair, rankingModel.Predict(example, examples.mentionFeatures, compressor));
                if (!anaphoricityScores.ContainsKey(example.mentionId2))
                {
                    anaphoricityScores.IncrementCount(example.mentionId2, anaphoricityModel.Predict(new Example(example, false), examples.mentionFeatures, compressor));
                }
            }
            ClustererDataLoader.ClustererDoc doc = new ClustererDataLoader.ClustererDoc(0, classificationScores, rankingScores, anaphoricityScores, mentionPairs, null, document.predictedMentionsByID.Stream().Collect(Collectors.ToMap(null, null)));
            foreach (Pair <int, int> mentionPair_1 in clusterer.GetClusterMerges(doc))
            {
                CorefUtils.MergeCoreferenceClusters(mentionPair_1, document);
            }
        }

Example #3

Show file

File: Clusterer.cs Project: awesomedotnetcore/Stanford.CoreNLP.NET

            public State(ClustererDataLoader.ClustererDoc doc)
            {
                currentDocId      = doc.id;
                this.doc          = doc;
                this.hashedScores = new Dictionary <Clusterer.MergeKey, bool>();
                this.hashedCosts  = new Dictionary <long, double>();
                this.clusters     = new List <Clusterer.Cluster>();
                this.hash         = 0;
                mentionToCluster  = new Dictionary <int, Clusterer.Cluster>();
                foreach (int m in doc.mentions)
                {
                    Clusterer.Cluster c = new Clusterer.Cluster(m);
                    clusters.Add(c);
                    mentionToCluster[m] = c;
                    hash ^= c.hash * 7;
                }
                IList <Pair <int, int> >    allPairs = new List <Pair <int, int> >(doc.classificationScores.KeySet());
                ICounter <Pair <int, int> > scores   = UseRanking ? doc.rankingScores : doc.classificationScores;

                allPairs.Sort(null);
                int i = 0;

                for (i = 0; i < allPairs.Count; i++)
                {
                    double score = scores.GetCount(allPairs[i]);
                    if (score < MinPairwiseScore && i > MinPairs)
                    {
                        break;
                    }
                    if (i >= EarlyStopThreshold && i / score > EarlyStopVal)
                    {
                        break;
                    }
                }
                mentionPairs = allPairs.SubList(0, i);
                ICounter <int> seenAnaphors    = new ClassicCounter <int>();
                ICounter <int> seenAntecedents = new ClassicCounter <int>();

                globalFeatures = new List <Clusterer.GlobalFeatures>();
                for (int j = 0; j < allPairs.Count; j++)
                {
                    Pair <int, int>          mentionPair = allPairs[j];
                    Clusterer.GlobalFeatures gf          = new Clusterer.GlobalFeatures();
                    gf.currentIndex = j;
                    gf.anaphorSeen  = seenAnaphors.ContainsKey(mentionPair.second);
                    gf.size         = mentionPairs.Count;
                    gf.docSize      = doc.mentions.Count / 300.0;
                    globalFeatures.Add(gf);
                    seenAnaphors.IncrementCount(mentionPair.second);
                    seenAntecedents.IncrementCount(mentionPair.first);
                }
                currentIndex = 0;
                SetClusters();
            }

Example #4

Show file

File: ChineseQuantifiableEntityNormalizer.cs Project: zerouid/Stanford.CoreNLP.NET

        /// <summary>Recursively parse a integer String expressed in either Chinese or a mix of Chinese and arabic numbers.</summary>
        /// <param name="s"/>
        /// <returns/>
        private static double RecurNormalizeLiteralIntegerString(string s)
        {
            // If empty, return 0
            if (s.IsEmpty())
            {
                return(double.ValueOf(0));
            }
            // TODO: check if it is valid. It is possible that this is a vague number like "五六十" which cannot be parsed by current implementation.
            // In case of pure arabic numbers, return the straight value of it
            if (ArabicNumbersPattern.Matcher(s).Matches())
            {
                return(double.ValueOf(s));
            }
            //If s has more than 1 char and first char is 零 or 〇, it is likely
            // to be useless
            if (s.Length > 1 && (s.StartsWith("零") || s.StartsWith("〇")))
            {
                s = Sharpen.Runtime.Substring(s, 1);
            }
            //If there is only one char left and we can quantify it, we return the value of it
            if (s.Length == 1 && wordsToValues.ContainsKey(s))
            {
                return(double.ValueOf(wordsToValues.GetCount(s)));
            }
            // Now parse the integer, making use of the compositionality of Chinese literal numbers
            double value;

            value = CompositeAtUnitIfExists(s, "亿");
            if (value != null)
            {
                return(value);
            }
            else
            {
                value = CompositeAtUnitIfExists(s, "万");
            }
            if (value != null)
            {
                return(value);
            }
            else
            {
                value = CompositeAtUnitIfExists(s, "千");
            }
            if (value != null)
            {
                return(value);
            }
            else
            {
                value = CompositeAtUnitIfExists(s, "百");
            }
            if (value != null)
            {
                return(value);
            }
            else
            {
                value = CompositeAtUnitIfExists(s, "十");
            }
            if (value != null)
            {
                return(value);
            }
            // otherwise we fail to parse and just return null
            return(null);
        }

Example #5

Show file

File: ScorePatternsRatioModifiedFreq.cs Project: zerouid/Stanford.CoreNLP.NET

        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.TypeLoadException"/>
        internal virtual ICounter <E> Convert2OneDim(string label, IToDoubleFunction <Pair <E, CandidatePhrase> > scoringFunction, ICollection <CandidatePhrase> allCandidatePhrases, TwoDimensionalCounter <E, CandidatePhrase> positivePatternsAndWords, bool
                                                     sqrtPatScore, bool scorePhrasesInPatSelection, ICounter <CandidatePhrase> dictOddsWordWeights, bool useFreqPhraseExtractedByPat)
        {
            //    if (Data.googleNGram.size() == 0 && Data.googleNGramsFile != null) {
            //      Data.loadGoogleNGrams();
            //    }
            ICounter <E> patterns = new ClassicCounter <E>();
            ICounter <CandidatePhrase> googleNgramNormScores     = new ClassicCounter <CandidatePhrase>();
            ICounter <CandidatePhrase> domainNgramNormScores     = new ClassicCounter <CandidatePhrase>();
            ICounter <CandidatePhrase> externalFeatWtsNormalized = new ClassicCounter <CandidatePhrase>();
            ICounter <CandidatePhrase> editDistanceFromOtherSemanticBinaryScores    = new ClassicCounter <CandidatePhrase>();
            ICounter <CandidatePhrase> editDistanceFromAlreadyExtractedBinaryScores = new ClassicCounter <CandidatePhrase>();
            double            externalWtsDefault = 0.5;
            ICounter <string> classifierScores   = null;

            if ((patternScoring.Equals(GetPatternsFromDataMultiClass.PatternScoring.PhEvalInPat) || patternScoring.Equals(GetPatternsFromDataMultiClass.PatternScoring.PhEvalInPatLogP)) && scorePhrasesInPatSelection)
            {
                foreach (CandidatePhrase gc in allCandidatePhrases)
                {
                    string g = gc.GetPhrase();
                    if (constVars.usePatternEvalEditDistOther)
                    {
                        editDistanceFromOtherSemanticBinaryScores.SetCount(gc, constVars.GetEditDistanceScoresOtherClassThreshold(label, g));
                    }
                    if (constVars.usePatternEvalEditDistSame)
                    {
                        editDistanceFromAlreadyExtractedBinaryScores.SetCount(gc, 1 - constVars.GetEditDistanceScoresThisClassThreshold(label, g));
                    }
                    if (constVars.usePatternEvalGoogleNgram)
                    {
                        googleNgramNormScores.SetCount(gc, PhraseScorer.GetGoogleNgramScore(gc));
                    }
                    if (constVars.usePatternEvalDomainNgram)
                    {
                        // calculate domain-ngram wts
                        if (Data.domainNGramRawFreq.ContainsKey(g))
                        {
                            System.Diagnostics.Debug.Assert((Data.rawFreq.ContainsKey(gc)));
                            domainNgramNormScores.SetCount(gc, scorePhrases.phraseScorer.GetDomainNgramScore(g));
                        }
                    }
                    if (constVars.usePatternEvalWordClass)
                    {
                        int num = constVars.GetWordClassClusters()[g];
                        if (num == null)
                        {
                            num = constVars.GetWordClassClusters()[g.ToLower()];
                        }
                        if (num != null && constVars.distSimWeights[label].ContainsKey(num))
                        {
                            externalFeatWtsNormalized.SetCount(gc, constVars.distSimWeights[label].GetCount(num));
                        }
                        else
                        {
                            externalFeatWtsNormalized.SetCount(gc, externalWtsDefault);
                        }
                    }
                }
                if (constVars.usePatternEvalGoogleNgram)
                {
                    googleNgramNormScores = GetPatternsFromDataMultiClass.NormalizeSoftMaxMinMaxScores(googleNgramNormScores, true, true, false);
                }
                if (constVars.usePatternEvalDomainNgram)
                {
                    domainNgramNormScores = GetPatternsFromDataMultiClass.NormalizeSoftMaxMinMaxScores(domainNgramNormScores, true, true, false);
                }
                if (constVars.usePatternEvalWordClass)
                {
                    externalFeatWtsNormalized = GetPatternsFromDataMultiClass.NormalizeSoftMaxMinMaxScores(externalFeatWtsNormalized, true, true, false);
                }
            }
            else
            {
                if ((patternScoring.Equals(GetPatternsFromDataMultiClass.PatternScoring.Logreg) || patternScoring.Equals(GetPatternsFromDataMultiClass.PatternScoring.LOGREGlogP)) && scorePhrasesInPatSelection)
                {
                    Properties props2 = new Properties();
                    props2.PutAll(props);
                    props2.SetProperty("phraseScorerClass", "edu.stanford.nlp.patterns.ScorePhrasesLearnFeatWt");
                    ScorePhrases scoreclassifier = new ScorePhrases(props2, constVars);
                    System.Console.Out.WriteLine("file is " + props.GetProperty("domainNGramsFile"));
                    ArgumentParser.FillOptions(typeof(Data), props2);
                    classifierScores = scoreclassifier.phraseScorer.ScorePhrases(label, allCandidatePhrases, true);
                }
            }
            ICounter <CandidatePhrase> cachedScoresForThisIter = new ClassicCounter <CandidatePhrase>();

            foreach (KeyValuePair <E, ClassicCounter <CandidatePhrase> > en in positivePatternsAndWords.EntrySet())
            {
                foreach (KeyValuePair <CandidatePhrase, double> en2 in en.Value.EntrySet())
                {
                    CandidatePhrase word = en2.Key;
                    ICounter <ConstantsAndVariables.ScorePhraseMeasures> scoreslist = new ClassicCounter <ConstantsAndVariables.ScorePhraseMeasures>();
                    double score = 1;
                    if ((patternScoring.Equals(GetPatternsFromDataMultiClass.PatternScoring.PhEvalInPat) || patternScoring.Equals(GetPatternsFromDataMultiClass.PatternScoring.PhEvalInPatLogP)) && scorePhrasesInPatSelection)
                    {
                        if (cachedScoresForThisIter.ContainsKey(word))
                        {
                            score = cachedScoresForThisIter.GetCount(word);
                        }
                        else
                        {
                            if (constVars.GetOtherSemanticClassesWords().Contains(word) || constVars.GetCommonEngWords().Contains(word))
                            {
                                score = 1;
                            }
                            else
                            {
                                if (constVars.usePatternEvalSemanticOdds)
                                {
                                    double semanticClassOdds = 1;
                                    if (dictOddsWordWeights.ContainsKey(word))
                                    {
                                        semanticClassOdds = 1 - dictOddsWordWeights.GetCount(word);
                                    }
                                    scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Semanticodds, semanticClassOdds);
                                }
                                if (constVars.usePatternEvalGoogleNgram)
                                {
                                    double gscore = 0;
                                    if (googleNgramNormScores.ContainsKey(word))
                                    {
                                        gscore = 1 - googleNgramNormScores.GetCount(word);
                                    }
                                    scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Googlengram, gscore);
                                }
                                if (constVars.usePatternEvalDomainNgram)
                                {
                                    double domainscore;
                                    if (domainNgramNormScores.ContainsKey(word))
                                    {
                                        domainscore = 1 - domainNgramNormScores.GetCount(word);
                                    }
                                    else
                                    {
                                        domainscore = 1 - scorePhrases.phraseScorer.GetPhraseWeightFromWords(domainNgramNormScores, word, scorePhrases.phraseScorer.OOVDomainNgramScore);
                                    }
                                    scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Domainngram, domainscore);
                                }
                                if (constVars.usePatternEvalWordClass)
                                {
                                    double externalFeatureWt = externalWtsDefault;
                                    if (externalFeatWtsNormalized.ContainsKey(word))
                                    {
                                        externalFeatureWt = 1 - externalFeatWtsNormalized.GetCount(word);
                                    }
                                    scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Distsim, externalFeatureWt);
                                }
                                if (constVars.usePatternEvalEditDistOther)
                                {
                                    System.Diagnostics.Debug.Assert(editDistanceFromOtherSemanticBinaryScores.ContainsKey(word), "How come no edit distance info for word " + word + string.Empty);
                                    scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Editdistother, editDistanceFromOtherSemanticBinaryScores.GetCount(word));
                                }
                                if (constVars.usePatternEvalEditDistSame)
                                {
                                    scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Editdistsame, editDistanceFromAlreadyExtractedBinaryScores.GetCount(word));
                                }
                                // taking average
                                score = Counters.Mean(scoreslist);
                                phInPatScores.SetCounter(word, scoreslist);
                            }
                            cachedScoresForThisIter.SetCount(word, score);
                        }
                    }
                    else
                    {
                        if ((patternScoring.Equals(GetPatternsFromDataMultiClass.PatternScoring.Logreg) || patternScoring.Equals(GetPatternsFromDataMultiClass.PatternScoring.LOGREGlogP)) && scorePhrasesInPatSelection)
                        {
                            score = 1 - classifierScores.GetCount(word);
                        }
                    }
                    // score = 1 - scorePhrases.scoreUsingClassifer(classifier,
                    // e.getKey(), label, true, null, null, dictOddsWordWeights);
                    // throw new RuntimeException("not implemented yet");
                    if (useFreqPhraseExtractedByPat)
                    {
                        score = score * scoringFunction.ApplyAsDouble(new Pair <E, CandidatePhrase>(en.Key, word));
                    }
                    if (constVars.sqrtPatScore)
                    {
                        patterns.IncrementCount(en.Key, Math.Sqrt(score));
                    }
                    else
                    {
                        patterns.IncrementCount(en.Key, score);
                    }
                }
            }
            return(patterns);
        }

Example #6

Show file

File: ScorePhrasesAverageFeatures.cs Project: zerouid/Stanford.CoreNLP.NET

        internal override ICounter <CandidatePhrase> ScorePhrases(string label, TwoDimensionalCounter <CandidatePhrase, E> terms, TwoDimensionalCounter <CandidatePhrase, E> wordsPatExtracted, ICounter <E> allSelectedPatterns, ICollection <CandidatePhrase
                                                                                                                                                                                                                                               > alreadyIdentifiedWords, bool forLearningPatterns)
        {
            IDictionary <CandidatePhrase, ICounter <ConstantsAndVariables.ScorePhraseMeasures> > scores = new Dictionary <CandidatePhrase, ICounter <ConstantsAndVariables.ScorePhraseMeasures> >();

            if (Data.domainNGramsFile != null)
            {
                Data.LoadDomainNGrams();
            }
            Redwood.Log(ConstantsAndVariables.extremedebug, "Considering terms: " + terms.FirstKeySet());
            // calculate TF-IDF like scores
            ICounter <CandidatePhrase> tfidfScores = new ClassicCounter <CandidatePhrase>();

            if (constVars.usePhraseEvalPatWtByFreq)
            {
                foreach (KeyValuePair <CandidatePhrase, ClassicCounter <E> > en in terms.EntrySet())
                {
                    double score = GetPatTFIDFScore(en.Key, en.Value, allSelectedPatterns);
                    tfidfScores.SetCount(en.Key, score);
                }
                Redwood.Log(ConstantsAndVariables.extremedebug, "BEFORE IDF " + Counters.ToSortedString(tfidfScores, 100, "%1$s:%2$f", "\t"));
                Counters.DivideInPlace(tfidfScores, Data.processedDataFreq);
            }
            ICounter <CandidatePhrase> externalFeatWtsNormalized     = new ClassicCounter <CandidatePhrase>();
            ICounter <CandidatePhrase> domainNgramNormScores         = new ClassicCounter <CandidatePhrase>();
            ICounter <CandidatePhrase> googleNgramNormScores         = new ClassicCounter <CandidatePhrase>();
            ICounter <CandidatePhrase> editDistanceOtherBinaryScores = new ClassicCounter <CandidatePhrase>();
            ICounter <CandidatePhrase> editDistanceSameBinaryScores  = new ClassicCounter <CandidatePhrase>();

            foreach (CandidatePhrase gc in terms.FirstKeySet())
            {
                string g = gc.GetPhrase();
                if (constVars.usePhraseEvalEditDistOther)
                {
                    editDistanceOtherBinaryScores.SetCount(gc, 1 - constVars.GetEditDistanceScoresOtherClassThreshold(label, g));
                }
                if (constVars.usePhraseEvalEditDistSame)
                {
                    editDistanceSameBinaryScores.SetCount(gc, constVars.GetEditDistanceScoresThisClassThreshold(label, g));
                }
                if (constVars.usePhraseEvalDomainNgram)
                {
                    // calculate domain-ngram wts
                    if (Data.domainNGramRawFreq.ContainsKey(g))
                    {
                        System.Diagnostics.Debug.Assert((Data.rawFreq.ContainsKey(gc)));
                        domainNgramNormScores.SetCount(gc, GetDomainNgramScore(g));
                    }
                    else
                    {
                        log.Info("why is " + g + " not present in domainNgram");
                    }
                }
                if (constVars.usePhraseEvalGoogleNgram)
                {
                    googleNgramNormScores.SetCount(gc, GetGoogleNgramScore(gc));
                }
                if (constVars.usePhraseEvalWordClass)
                {
                    // calculate dist sim weights
                    int num = constVars.GetWordClassClusters()[g];
                    if (num == null)
                    {
                        num = constVars.GetWordClassClusters()[g.ToLower()];
                    }
                    if (num != null && constVars.distSimWeights[label].ContainsKey(num))
                    {
                        externalFeatWtsNormalized.SetCount(gc, constVars.distSimWeights[label].GetCount(num));
                    }
                    else
                    {
                        externalFeatWtsNormalized.SetCount(gc, OOVExternalFeatWt);
                    }
                }
            }
            ICounter <CandidatePhrase> normTFIDFScores = GetPatternsFromDataMultiClass.NormalizeSoftMaxMinMaxScores(tfidfScores, true, true, false);
            ICounter <CandidatePhrase> dictOdddsScores = null;

            if (constVars.usePhraseEvalSemanticOdds)
            {
                System.Diagnostics.Debug.Assert(constVars.dictOddsWeights != null, "usePhraseEvalSemanticOdds is true but dictOddsWeights is null for the label " + label);
                dictOdddsScores = GetPatternsFromDataMultiClass.NormalizeSoftMaxMinMaxScores(constVars.dictOddsWeights[label], true, true, false);
            }
            domainNgramNormScores     = GetPatternsFromDataMultiClass.NormalizeSoftMaxMinMaxScores(domainNgramNormScores, true, true, false);
            googleNgramNormScores     = GetPatternsFromDataMultiClass.NormalizeSoftMaxMinMaxScores(googleNgramNormScores, true, true, false);
            externalFeatWtsNormalized = GetPatternsFromDataMultiClass.NormalizeSoftMaxMinMaxScores(externalFeatWtsNormalized, true, true, false);
            // Counters.max(googleNgramNormScores);
            // Counters.max(externalFeatWtsNormalized);
            foreach (CandidatePhrase word in terms.FirstKeySet())
            {
                if (alreadyIdentifiedWords.Contains(word))
                {
                    continue;
                }
                ICounter <ConstantsAndVariables.ScorePhraseMeasures> scoreslist = new ClassicCounter <ConstantsAndVariables.ScorePhraseMeasures>();
                System.Diagnostics.Debug.Assert(normTFIDFScores.ContainsKey(word), "NormTFIDF score does not contain" + word);
                double tfscore = normTFIDFScores.GetCount(word);
                scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Patwtbyfreq, tfscore);
                if (constVars.usePhraseEvalSemanticOdds)
                {
                    double dscore;
                    if (dictOdddsScores.ContainsKey(word))
                    {
                        dscore = dictOdddsScores.GetCount(word);
                    }
                    else
                    {
                        dscore = GetPhraseWeightFromWords(dictOdddsScores, word, OOVdictOdds);
                    }
                    scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Semanticodds, dscore);
                }
                if (constVars.usePhraseEvalDomainNgram)
                {
                    double domainscore;
                    if (domainNgramNormScores.ContainsKey(word))
                    {
                        domainscore = domainNgramNormScores.GetCount(word);
                    }
                    else
                    {
                        domainscore = GetPhraseWeightFromWords(domainNgramNormScores, word, OOVDomainNgramScore);
                    }
                    scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Domainngram, domainscore);
                }
                if (constVars.usePhraseEvalGoogleNgram)
                {
                    double googlescore;
                    if (googleNgramNormScores.ContainsKey(word))
                    {
                        googlescore = googleNgramNormScores.GetCount(word);
                    }
                    else
                    {
                        googlescore = GetPhraseWeightFromWords(googleNgramNormScores, word, OOVGoogleNgramScore);
                    }
                    scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Googlengram, googlescore);
                }
                if (constVars.usePhraseEvalWordClass)
                {
                    double externalFeatureWt;
                    if (externalFeatWtsNormalized.ContainsKey(word))
                    {
                        externalFeatureWt = externalFeatWtsNormalized.GetCount(word);
                    }
                    else
                    {
                        externalFeatureWt = GetPhraseWeightFromWords(externalFeatWtsNormalized, word, OOVExternalFeatWt);
                    }
                    scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Distsim, externalFeatureWt);
                }
                if (constVars.usePhraseEvalEditDistOther)
                {
                    System.Diagnostics.Debug.Assert(editDistanceOtherBinaryScores.ContainsKey(word), "How come no edit distance info?");
                    double editD = editDistanceOtherBinaryScores.GetCount(word);
                    scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Editdistother, editD);
                }
                if (constVars.usePhraseEvalEditDistSame)
                {
                    double editDSame = editDistanceSameBinaryScores.GetCount(word);
                    scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Editdistsame, editDSame);
                }
                if (constVars.usePhraseEvalWordShape)
                {
                    scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Wordshape, this.GetWordShapeScore(word.GetPhrase(), label));
                }
                scores[word] = scoreslist;
                phraseScoresNormalized.SetCounter(word, scoreslist);
            }
            ICounter <CandidatePhrase> phraseScores = new ClassicCounter <CandidatePhrase>();

            foreach (KeyValuePair <CandidatePhrase, ICounter <ConstantsAndVariables.ScorePhraseMeasures> > wEn in scores)
            {
                double avgScore = Counters.Mean(wEn.Value);
                if (!avgScore.IsInfinite() && !double.IsNaN(avgScore))
                {
                    phraseScores.SetCount(wEn.Key, avgScore);
                }
                else
                {
                    Redwood.Log(Redwood.Dbg, "Ignoring " + wEn.Key + " because score is " + avgScore);
                }
            }
            return(phraseScores);
        }

Example #7

Show file

        public override float Score(IntTaggedWord itw, string word)
        {
            // Label tagL = itw.tagLabel();
            // String tag = tagL.value();
            string tag  = itw.TagString(tagIndex);
            ILabel tagL = new Tag(tag);
            float  logProb;

            if (word.Matches(dateMatch))
            {
                //EncodingPrintWriter.out.println("Date match for " + word,encoding);
                if (tag.Equals("NT"))
                {
                    logProb = 0.0f;
                }
                else
                {
                    logProb = float.NegativeInfinity;
                }
            }
            else
            {
                if (word.Matches(numberMatch))
                {
                    //EncodingPrintWriter.out.println("Number match for " + word,encoding);
                    if (tag.Equals("CD") && (!word.Matches(ordinalMatch)))
                    {
                        logProb = 0.0f;
                    }
                    else
                    {
                        if (tag.Equals("OD") && word.Matches(ordinalMatch))
                        {
                            logProb = 0.0f;
                        }
                        else
                        {
                            logProb = float.NegativeInfinity;
                        }
                    }
                }
                else
                {
                    if (word.Matches(properNameMatch))
                    {
                        //EncodingPrintWriter.out.println("Proper name match for " + word,encoding);
                        if (tag.Equals("NR"))
                        {
                            logProb = 0.0f;
                        }
                        else
                        {
                            logProb = float.NegativeInfinity;
                        }
                    }
                    else
                    {
                        /* -------------
                        *  // this didn't seem to work -- too categorical
                        *  int type = Character.getType(word.charAt(0));
                        *  // the below may not normalize probs over options, but is probably okay
                        *  if (type == Character.START_PUNCTUATION) {
                        *  if (tag.equals("PU-LPAREN") || tag.equals("PU-PAREN") ||
                        *  tag.equals("PU-LQUOTE") || tag.equals("PU-QUOTE") ||
                        *  tag.equals("PU")) {
                        *  // if (VERBOSE) log.info("ChineseUWM: unknown L Punc");
                        *  logProb = 0.0f;
                        *  } else {
                        *  logProb = Float.NEGATIVE_INFINITY;
                        *  }
                        *  } else if (type == Character.END_PUNCTUATION) {
                        *  if (tag.equals("PU-RPAREN") || tag.equals("PU-PAREN") ||
                        *  tag.equals("PU-RQUOTE") || tag.equals("PU-QUOTE") ||
                        *  tag.equals("PU")) {
                        *  // if (VERBOSE) log.info("ChineseUWM: unknown R Punc");
                        *  logProb = 0.0f;
                        *  } else {
                        *  logProb = Float.NEGATIVE_INFINITY;
                        *  }
                        *  } else {
                        *  if (tag.equals("PU-OTHER") || tag.equals("PU-ENDSENT") ||
                        *  tag.equals("PU")) {
                        *  // if (VERBOSE) log.info("ChineseUWM: unknown O Punc");
                        *  logProb = 0.0f;
                        *  } else {
                        *  logProb = Float.NEGATIVE_INFINITY;
                        *  }
                        *  }
                        *  ------------- */
                        if (useFirst)
                        {
                            string first = Sharpen.Runtime.Substring(word, 0, 1);
                            if (useUnicodeType)
                            {
                                char ch   = word[0];
                                int  type = char.GetType(ch);
                                if (type != char.OtherLetter)
                                {
                                    // standard Chinese characters are of type "OTHER_LETTER"!!
                                    first = int.ToString(type);
                                }
                            }
                            if (!seenFirst.Contains(first))
                            {
                                if (useGT)
                                {
                                    logProb = ScoreGT(tag);
                                    goto first_break;
                                }
                                else
                                {
                                    first = unknown;
                                }
                            }
                            /* get the Counter of terminal rewrites for the relevant tag */
                            ClassicCounter <string> wordProbs = tagHash[tagL];

                            /* if the proposed tag has never been seen before, issue a
                             * warning and return probability 0. */
                            if (wordProbs == null)
                            {
                                logProb = float.NegativeInfinity;
                            }
                            else
                            {
                                if (wordProbs.ContainsKey(first))
                                {
                                    logProb = (float)wordProbs.GetCount(first);
                                }
                                else
                                {
                                    logProb = (float)wordProbs.GetCount(unknown);
                                }
                            }
                        }
                        else
                        {
                            if (useGT)
                            {
                                logProb = ScoreGT(tag);
                            }
                            else
                            {
                                logProb = float.NegativeInfinity;
                            }
                        }
                        first_break :;
                    }
                }
            }
            // should never get this!
            return(logProb);
        }

Example #8

Show file

        public static void Main(string[] args)
        {
            System.Console.Out.WriteLine("Testing unknown matching");
            string s = "\u5218\u00b7\u9769\u547d";

            if (s.Matches(properNameMatch))
            {
                System.Console.Out.WriteLine("hooray names!");
            }
            else
            {
                System.Console.Out.WriteLine("Uh-oh names!");
            }
            string s1 = "\uff13\uff10\uff10\uff10";

            if (s1.Matches(numberMatch))
            {
                System.Console.Out.WriteLine("hooray numbers!");
            }
            else
            {
                System.Console.Out.WriteLine("Uh-oh numbers!");
            }
            string s11 = "\u767e\u5206\u4e4b\u56db\u5341\u4e09\u70b9\u4e8c";

            if (s11.Matches(numberMatch))
            {
                System.Console.Out.WriteLine("hooray numbers!");
            }
            else
            {
                System.Console.Out.WriteLine("Uh-oh numbers!");
            }
            string s12 = "\u767e\u5206\u4e4b\u4e09\u5341\u516b\u70b9\u516d";

            if (s12.Matches(numberMatch))
            {
                System.Console.Out.WriteLine("hooray numbers!");
            }
            else
            {
                System.Console.Out.WriteLine("Uh-oh numbers!");
            }
            string s2 = "\u4e09\u6708";

            if (s2.Matches(dateMatch))
            {
                System.Console.Out.WriteLine("hooray dates!");
            }
            else
            {
                System.Console.Out.WriteLine("Uh-oh dates!");
            }
            System.Console.Out.WriteLine("Testing tagged word");
            ClassicCounter <TaggedWord> c = new ClassicCounter <TaggedWord>();
            TaggedWord tw1 = new TaggedWord("w", "t");

            c.IncrementCount(tw1);
            TaggedWord tw2 = new TaggedWord("w", "t2");

            System.Console.Out.WriteLine(c.ContainsKey(tw2));
            System.Console.Out.WriteLine(tw1.Equals(tw2));
            WordTag wt1 = ToWordTag(tw1);
            WordTag wt2 = ToWordTag(tw2);
            WordTag wt3 = new WordTag("w", "t2");

            System.Console.Out.WriteLine(wt1.Equals(wt2));
            System.Console.Out.WriteLine(wt2.Equals(wt3));
        }