Exemple #1
0
        /// <summary>This computes the average over all folds of the function we're trying to optimize.</summary>
        /// <remarks>
        /// This computes the average over all folds of the function we're trying to optimize.
        /// The input triple contains, in order, the train set, the test set, and the saved state.
        /// You don't have to use the saved state if you don't want to.
        /// </remarks>
        public virtual double ComputeAverage(IToDoubleFunction <Triple <GeneralDataset <L, F>, GeneralDataset <L, F>, CrossValidator.SavedState> > function)
        {
            double sum = 0;
            IEnumerator <Triple <GeneralDataset <L, F>, GeneralDataset <L, F>, CrossValidator.SavedState> > foldIt = Iterator();

            while (foldIt.MoveNext())
            {
                sum += function.ApplyAsDouble(foldIt.Current);
            }
            return(sum / kFold);
        }
        public static T GetBestMatched <T>(IList <T> matches, IToDoubleFunction <MatchedExpression> scorer)
            where T : MatchedExpression
        {
            if (matches == null || matches.IsEmpty())
            {
                return(null);
            }
            T      best      = null;
            double bestScore = double.NegativeInfinity;

            foreach (T m in matches)
            {
                double s = scorer.ApplyAsDouble(m);
                if (best == null || s > bestScore)
                {
                    best      = m;
                    bestScore = s;
                }
            }
            return(best);
        }
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.TypeLoadException"/>
        internal virtual ICounter <E> Convert2OneDim(string label, IToDoubleFunction <Pair <E, CandidatePhrase> > scoringFunction, ICollection <CandidatePhrase> allCandidatePhrases, TwoDimensionalCounter <E, CandidatePhrase> positivePatternsAndWords, bool
                                                     sqrtPatScore, bool scorePhrasesInPatSelection, ICounter <CandidatePhrase> dictOddsWordWeights, bool useFreqPhraseExtractedByPat)
        {
            //    if (Data.googleNGram.size() == 0 && Data.googleNGramsFile != null) {
            //      Data.loadGoogleNGrams();
            //    }
            ICounter <E> patterns = new ClassicCounter <E>();
            ICounter <CandidatePhrase> googleNgramNormScores     = new ClassicCounter <CandidatePhrase>();
            ICounter <CandidatePhrase> domainNgramNormScores     = new ClassicCounter <CandidatePhrase>();
            ICounter <CandidatePhrase> externalFeatWtsNormalized = new ClassicCounter <CandidatePhrase>();
            ICounter <CandidatePhrase> editDistanceFromOtherSemanticBinaryScores    = new ClassicCounter <CandidatePhrase>();
            ICounter <CandidatePhrase> editDistanceFromAlreadyExtractedBinaryScores = new ClassicCounter <CandidatePhrase>();
            double            externalWtsDefault = 0.5;
            ICounter <string> classifierScores   = null;

            if ((patternScoring.Equals(GetPatternsFromDataMultiClass.PatternScoring.PhEvalInPat) || patternScoring.Equals(GetPatternsFromDataMultiClass.PatternScoring.PhEvalInPatLogP)) && scorePhrasesInPatSelection)
            {
                foreach (CandidatePhrase gc in allCandidatePhrases)
                {
                    string g = gc.GetPhrase();
                    if (constVars.usePatternEvalEditDistOther)
                    {
                        editDistanceFromOtherSemanticBinaryScores.SetCount(gc, constVars.GetEditDistanceScoresOtherClassThreshold(label, g));
                    }
                    if (constVars.usePatternEvalEditDistSame)
                    {
                        editDistanceFromAlreadyExtractedBinaryScores.SetCount(gc, 1 - constVars.GetEditDistanceScoresThisClassThreshold(label, g));
                    }
                    if (constVars.usePatternEvalGoogleNgram)
                    {
                        googleNgramNormScores.SetCount(gc, PhraseScorer.GetGoogleNgramScore(gc));
                    }
                    if (constVars.usePatternEvalDomainNgram)
                    {
                        // calculate domain-ngram wts
                        if (Data.domainNGramRawFreq.ContainsKey(g))
                        {
                            System.Diagnostics.Debug.Assert((Data.rawFreq.ContainsKey(gc)));
                            domainNgramNormScores.SetCount(gc, scorePhrases.phraseScorer.GetDomainNgramScore(g));
                        }
                    }
                    if (constVars.usePatternEvalWordClass)
                    {
                        int num = constVars.GetWordClassClusters()[g];
                        if (num == null)
                        {
                            num = constVars.GetWordClassClusters()[g.ToLower()];
                        }
                        if (num != null && constVars.distSimWeights[label].ContainsKey(num))
                        {
                            externalFeatWtsNormalized.SetCount(gc, constVars.distSimWeights[label].GetCount(num));
                        }
                        else
                        {
                            externalFeatWtsNormalized.SetCount(gc, externalWtsDefault);
                        }
                    }
                }
                if (constVars.usePatternEvalGoogleNgram)
                {
                    googleNgramNormScores = GetPatternsFromDataMultiClass.NormalizeSoftMaxMinMaxScores(googleNgramNormScores, true, true, false);
                }
                if (constVars.usePatternEvalDomainNgram)
                {
                    domainNgramNormScores = GetPatternsFromDataMultiClass.NormalizeSoftMaxMinMaxScores(domainNgramNormScores, true, true, false);
                }
                if (constVars.usePatternEvalWordClass)
                {
                    externalFeatWtsNormalized = GetPatternsFromDataMultiClass.NormalizeSoftMaxMinMaxScores(externalFeatWtsNormalized, true, true, false);
                }
            }
            else
            {
                if ((patternScoring.Equals(GetPatternsFromDataMultiClass.PatternScoring.Logreg) || patternScoring.Equals(GetPatternsFromDataMultiClass.PatternScoring.LOGREGlogP)) && scorePhrasesInPatSelection)
                {
                    Properties props2 = new Properties();
                    props2.PutAll(props);
                    props2.SetProperty("phraseScorerClass", "edu.stanford.nlp.patterns.ScorePhrasesLearnFeatWt");
                    ScorePhrases scoreclassifier = new ScorePhrases(props2, constVars);
                    System.Console.Out.WriteLine("file is " + props.GetProperty("domainNGramsFile"));
                    ArgumentParser.FillOptions(typeof(Data), props2);
                    classifierScores = scoreclassifier.phraseScorer.ScorePhrases(label, allCandidatePhrases, true);
                }
            }
            ICounter <CandidatePhrase> cachedScoresForThisIter = new ClassicCounter <CandidatePhrase>();

            foreach (KeyValuePair <E, ClassicCounter <CandidatePhrase> > en in positivePatternsAndWords.EntrySet())
            {
                foreach (KeyValuePair <CandidatePhrase, double> en2 in en.Value.EntrySet())
                {
                    CandidatePhrase word = en2.Key;
                    ICounter <ConstantsAndVariables.ScorePhraseMeasures> scoreslist = new ClassicCounter <ConstantsAndVariables.ScorePhraseMeasures>();
                    double score = 1;
                    if ((patternScoring.Equals(GetPatternsFromDataMultiClass.PatternScoring.PhEvalInPat) || patternScoring.Equals(GetPatternsFromDataMultiClass.PatternScoring.PhEvalInPatLogP)) && scorePhrasesInPatSelection)
                    {
                        if (cachedScoresForThisIter.ContainsKey(word))
                        {
                            score = cachedScoresForThisIter.GetCount(word);
                        }
                        else
                        {
                            if (constVars.GetOtherSemanticClassesWords().Contains(word) || constVars.GetCommonEngWords().Contains(word))
                            {
                                score = 1;
                            }
                            else
                            {
                                if (constVars.usePatternEvalSemanticOdds)
                                {
                                    double semanticClassOdds = 1;
                                    if (dictOddsWordWeights.ContainsKey(word))
                                    {
                                        semanticClassOdds = 1 - dictOddsWordWeights.GetCount(word);
                                    }
                                    scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Semanticodds, semanticClassOdds);
                                }
                                if (constVars.usePatternEvalGoogleNgram)
                                {
                                    double gscore = 0;
                                    if (googleNgramNormScores.ContainsKey(word))
                                    {
                                        gscore = 1 - googleNgramNormScores.GetCount(word);
                                    }
                                    scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Googlengram, gscore);
                                }
                                if (constVars.usePatternEvalDomainNgram)
                                {
                                    double domainscore;
                                    if (domainNgramNormScores.ContainsKey(word))
                                    {
                                        domainscore = 1 - domainNgramNormScores.GetCount(word);
                                    }
                                    else
                                    {
                                        domainscore = 1 - scorePhrases.phraseScorer.GetPhraseWeightFromWords(domainNgramNormScores, word, scorePhrases.phraseScorer.OOVDomainNgramScore);
                                    }
                                    scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Domainngram, domainscore);
                                }
                                if (constVars.usePatternEvalWordClass)
                                {
                                    double externalFeatureWt = externalWtsDefault;
                                    if (externalFeatWtsNormalized.ContainsKey(word))
                                    {
                                        externalFeatureWt = 1 - externalFeatWtsNormalized.GetCount(word);
                                    }
                                    scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Distsim, externalFeatureWt);
                                }
                                if (constVars.usePatternEvalEditDistOther)
                                {
                                    System.Diagnostics.Debug.Assert(editDistanceFromOtherSemanticBinaryScores.ContainsKey(word), "How come no edit distance info for word " + word + string.Empty);
                                    scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Editdistother, editDistanceFromOtherSemanticBinaryScores.GetCount(word));
                                }
                                if (constVars.usePatternEvalEditDistSame)
                                {
                                    scoreslist.SetCount(ConstantsAndVariables.ScorePhraseMeasures.Editdistsame, editDistanceFromAlreadyExtractedBinaryScores.GetCount(word));
                                }
                                // taking average
                                score = Counters.Mean(scoreslist);
                                phInPatScores.SetCounter(word, scoreslist);
                            }
                            cachedScoresForThisIter.SetCount(word, score);
                        }
                    }
                    else
                    {
                        if ((patternScoring.Equals(GetPatternsFromDataMultiClass.PatternScoring.Logreg) || patternScoring.Equals(GetPatternsFromDataMultiClass.PatternScoring.LOGREGlogP)) && scorePhrasesInPatSelection)
                        {
                            score = 1 - classifierScores.GetCount(word);
                        }
                    }
                    // score = 1 - scorePhrases.scoreUsingClassifer(classifier,
                    // e.getKey(), label, true, null, null, dictOddsWordWeights);
                    // throw new RuntimeException("not implemented yet");
                    if (useFreqPhraseExtractedByPat)
                    {
                        score = score * scoringFunction.ApplyAsDouble(new Pair <E, CandidatePhrase>(en.Key, word));
                    }
                    if (constVars.sqrtPatScore)
                    {
                        patterns.IncrementCount(en.Key, Math.Sqrt(score));
                    }
                    else
                    {
                        patterns.IncrementCount(en.Key, score);
                    }
                }
            }
            return(patterns);
        }
Exemple #4
0
 public virtual double GetCost(V value)
 {
     return(costFunction.ApplyAsDouble(value));
 }
 public static IList <T> GetNonOverlappingMaxScore <T, E, _T2, _T3, _T4>(IList <_T2> items, IFunction <_T3> toIntervalFunc, IToDoubleFunction <_T4> scoreFunc)
     where E : IComparable <E>
     where _T2 : T
 {
     if (items.Count > 1)
     {
         IDictionary <E, IntervalTree.PartialScoredList <T, E> > bestNonOverlapping = new SortedDictionary <E, IntervalTree.PartialScoredList <T, E> >();
         foreach (T item in items)
         {
             Interval <E> itemInterval = toIntervalFunc.Apply(item);
             E            mBegin       = itemInterval.GetBegin();
             E            mEnd         = itemInterval.GetEnd();
             IntervalTree.PartialScoredList <T, E> bestk = bestNonOverlapping[mEnd];
             double itemScore = scoreFunc.ApplyAsDouble(item);
             if (bestk == null)
             {
                 bestk                    = new IntervalTree.PartialScoredList <T, E>();
                 bestk.size               = 1;
                 bestk.score              = itemScore;
                 bestk.@object            = item;
                 bestNonOverlapping[mEnd] = bestk;
             }
             // Assumes map is ordered
             foreach (E j in bestNonOverlapping.Keys)
             {
                 if (j.CompareTo(mBegin) > 0)
                 {
                     break;
                 }
                 // Consider adding this match into the bestNonOverlapping strand at j
                 IntervalTree.PartialScoredList <T, E> bestj = bestNonOverlapping[j];
                 double withMatchScore = bestj.score + itemScore;
                 bool   better         = false;
                 if (withMatchScore > bestk.score)
                 {
                     better = true;
                 }
                 else
                 {
                     if (withMatchScore == bestk.score)
                     {
                         if (bestj.size + 1 < bestk.size)
                         {
                             better = true;
                         }
                     }
                 }
                 if (better)
                 {
                     bestk.size         = bestj.size + 1;
                     bestk.score        = withMatchScore;
                     bestk.@object      = item;
                     bestk.lastMatchKey = j;
                 }
             }
         }
         IntervalTree.PartialScoredList <T, E> best = null;
         foreach (IntervalTree.PartialScoredList <T, E> v in bestNonOverlapping.Values)
         {
             if (best == null || v.score > best.score)
             {
                 best = v;
             }
         }
         IList <T> nonOverlapping = new List <T>(best.size);
         IntervalTree.PartialScoredList <T, E> prev = best;
         while (prev != null)
         {
             if (prev.@object != null)
             {
                 nonOverlapping.Add(prev.@object);
             }
             if (prev.lastMatchKey != null)
             {
                 prev = bestNonOverlapping[prev.lastMatchKey];
             }
             else
             {
                 prev = null;
             }
         }
         Java.Util.Collections.Reverse(nonOverlapping);
         return(nonOverlapping);
     }
     else
     {
         IList <T> nonOverlapping = new List <T>(items);
         return(nonOverlapping);
     }
 }