//these are hardcoded in the order we wish the results to be presented.
        private static string OutputMapResultsDefaultKeys(IDictionary <string, ICounter <QuoteAttributionEvaluation.Result> > tagResults, string[] keyOrder)
        {
            StringBuilder output = new StringBuilder();

            QuoteAttributionEvaluation.Result[] order = new QuoteAttributionEvaluation.Result[] { QuoteAttributionEvaluation.Result.Correct, QuoteAttributionEvaluation.Result.Incorrect, QuoteAttributionEvaluation.Result.Skipped };
            foreach (string tag in keyOrder)
            {
                ICounter <QuoteAttributionEvaluation.Result> resultsCounter = tagResults[tag];
                if (resultsCounter == null)
                {
                    continue;
                }
                if (tag == null)
                {
                    output.Append("No label" + "\t");
                }
                else
                {
                    output.Append(tag + "\t");
                }
                foreach (QuoteAttributionEvaluation.Result result in order)
                {
                    output.Append(result.ToString() + "\t" + resultsCounter.GetCount(result) + "\t");
                }
                //append total and precision
                double numCorrect   = resultsCounter.GetCount(QuoteAttributionEvaluation.Result.Correct);
                double numIncorrect = resultsCounter.GetCount(QuoteAttributionEvaluation.Result.Incorrect);
                double total        = numCorrect + numIncorrect;
                double precision    = (total == 0) ? 0 : numCorrect / total;
                output.Append(total + "\t" + precision + "\n");
            }
            return(output.ToString());
        }
        private void PrintResultsInternal(PrintWriter pw, ICounter <Pair <string, string> > results, ClassicCounter <string> labelCount)
        {
            ClassicCounter <string> correct         = new ClassicCounter <string>();
            ClassicCounter <string> predictionCount = new ClassicCounter <string>();
            bool countGoldLabels = false;

            if (labelCount == null)
            {
                labelCount      = new ClassicCounter <string>();
                countGoldLabels = true;
            }
            foreach (Pair <string, string> predictedActual in results.KeySet())
            {
                string predicted = predictedActual.first;
                string actual    = predictedActual.second;
                if (predicted.Equals(actual))
                {
                    correct.IncrementCount(actual, results.GetCount(predictedActual));
                }
                predictionCount.IncrementCount(predicted, results.GetCount(predictedActual));
                if (countGoldLabels)
                {
                    labelCount.IncrementCount(actual, results.GetCount(predictedActual));
                }
            }
            DecimalFormat formatter = new DecimalFormat();

            formatter.SetMaximumFractionDigits(1);
            formatter.SetMinimumFractionDigits(1);
            double totalCount     = 0;
            double totalCorrect   = 0;
            double totalPredicted = 0;

            pw.Println("Label\tCorrect\tPredict\tActual\tPrecn\tRecall\tF");
            IList <string> labels = new List <string>(labelCount.KeySet());

            labels.Sort();
            foreach (string label in labels)
            {
                double numcorrect = correct.GetCount(label);
                double predicted  = predictionCount.GetCount(label);
                double trueCount  = labelCount.GetCount(label);
                double precision  = (predicted > 0) ? (numcorrect / predicted) : 0;
                double recall     = numcorrect / trueCount;
                double f          = (precision + recall > 0) ? 2 * precision * recall / (precision + recall) : 0.0;
                pw.Println(StringUtils.PadOrTrim(label, MaxLabelLength) + "\t" + numcorrect + "\t" + predicted + "\t" + trueCount + "\t" + formatter.Format(precision * 100) + "\t" + formatter.Format(100 * recall) + "\t" + formatter.Format(100 * f));
                if (!RelationMention.IsUnrelatedLabel(label))
                {
                    totalCount     += trueCount;
                    totalCorrect   += numcorrect;
                    totalPredicted += predicted;
                }
            }
            double precision_1 = (totalPredicted > 0) ? (totalCorrect / totalPredicted) : 0;
            double recall_1    = totalCorrect / totalCount;
            double f_1         = (totalPredicted > 0 && totalCorrect > 0) ? 2 * precision_1 * recall_1 / (precision_1 + recall_1) : 0.0;

            pw.Println("Total\t" + totalCorrect + "\t" + totalPredicted + "\t" + totalCount + "\t" + formatter.Format(100 * precision_1) + "\t" + formatter.Format(100 * recall_1) + "\t" + formatter.Format(100 * f_1));
        }
Exemple #3
0
			public virtual double Precision(string relation)
			{
				if (predictedCount.GetCount(relation) == 0)
				{
					return 1.0;
				}
				return correctCount.GetCount(relation) / predictedCount.GetCount(relation);
			}
Exemple #4
0
			public virtual double Recall(string relation)
			{
				if (goldCount.GetCount(relation) == 0)
				{
					return 0.0;
				}
				return correctCount.GetCount(relation) / goldCount.GetCount(relation);
			}
Exemple #5
0
 public virtual double ProbabilityOf(E @object)
 {
     if (!parameters.KeySet().Contains(@object))
     {
         throw new Exception("Not a valid object for this multinomial!");
     }
     return(parameters.GetCount(@object));
 }
Exemple #6
0
        public virtual void TestUnion()
        {
            ICounter <string> c3 = Counters.Union(c1, c2);

            NUnit.Framework.Assert.AreEqual(c3.GetCount("p"), 6.0);
            NUnit.Framework.Assert.AreEqual(c3.GetCount("s"), 4.0);
            NUnit.Framework.Assert.AreEqual(c3.GetCount("t"), 8.0);
            NUnit.Framework.Assert.AreEqual(c3.TotalCount(), 36.0);
        }
Exemple #7
0
 public override void Evaluate(Tree guess, Tree gold, PrintWriter pw)
 {
     if (gold == null || guess == null)
     {
         System.Console.Error.Printf("%s: Cannot compare against a null gold or guess tree!\n", this.GetType().FullName);
         return;
     }
     //Do regular evaluation
     base.Evaluate(guess, gold, pw);
     if (doCatLevelEval)
     {
         IDictionary <string, ICollection <ILabel> > guessCats = MakeObjectsByCat(guess);
         IDictionary <string, ICollection <ILabel> > goldCats  = MakeObjectsByCat(gold);
         ICollection <string> allCats = Generics.NewHashSet();
         Sharpen.Collections.AddAll(allCats, guessCats.Keys);
         Sharpen.Collections.AddAll(allCats, goldCats.Keys);
         foreach (string cat in allCats)
         {
             ICollection <ILabel> thisGuessCats = guessCats[cat];
             ICollection <ILabel> thisGoldCats  = goldCats[cat];
             if (thisGuessCats == null)
             {
                 thisGuessCats = Generics.NewHashSet();
             }
             if (thisGoldCats == null)
             {
                 thisGoldCats = Generics.NewHashSet();
             }
             double currentPrecision = Precision(thisGuessCats, thisGoldCats);
             double currentRecall    = Precision(thisGoldCats, thisGuessCats);
             double currentF1        = (currentPrecision > 0.0 && currentRecall > 0.0 ? 2.0 / (1.0 / currentPrecision + 1.0 / currentRecall) : 0.0);
             precisions.IncrementCount(cat, currentPrecision);
             recalls.IncrementCount(cat, currentRecall);
             f1s.IncrementCount(cat, currentF1);
             precisions2.IncrementCount(cat, thisGuessCats.Count * currentPrecision);
             pnums2.IncrementCount(cat, thisGuessCats.Count);
             recalls2.IncrementCount(cat, thisGoldCats.Count * currentRecall);
             rnums2.IncrementCount(cat, thisGoldCats.Count);
             if (lex != null)
             {
                 MeasureOOV(guess, gold);
             }
             if (pw != null && runningAverages)
             {
                 pw.Println(cat + "\tP: " + ((int)(currentPrecision * 10000)) / 100.0 + " (sent ave " + ((int)(precisions.GetCount(cat) * 10000 / num)) / 100.0 + ") (evalb " + ((int)(precisions2.GetCount(cat) * 10000 / pnums2.GetCount(cat))) / 100.0 + ")");
                 pw.Println("\tR: " + ((int)(currentRecall * 10000)) / 100.0 + " (sent ave " + ((int)(recalls.GetCount(cat) * 10000 / num)) / 100.0 + ") (evalb " + ((int)(recalls2.GetCount(cat) * 10000 / rnums2.GetCount(cat))) / 100.0 + ")");
                 double cF1  = 2.0 / (rnums2.GetCount(cat) / recalls2.GetCount(cat) + pnums2.GetCount(cat) / precisions2.GetCount(cat));
                 string emit = str + " F1: " + ((int)(currentF1 * 10000)) / 100.0 + " (sent ave " + ((int)(10000 * f1s.GetCount(cat) / num)) / 100.0 + ", evalb " + ((int)(10000 * cF1)) / 100.0 + ")";
                 pw.Println(emit);
             }
         }
         if (pw != null && runningAverages)
         {
             pw.Println("========================================");
         }
     }
 }
Exemple #8
0
        public virtual ICounter <CandidatePhrase> ChooseTopWords(ICounter <CandidatePhrase> newdt, TwoDimensionalCounter <CandidatePhrase, E> terms, ICounter <CandidatePhrase> useThresholdNumPatternsForTheseWords, ICollection <CandidatePhrase> ignoreWords
                                                                 , double thresholdWordExtract)
        {
            IEnumerator <CandidatePhrase> termIter   = Counters.ToPriorityQueue(newdt).GetEnumerator();
            ICounter <CandidatePhrase>    finalwords = new ClassicCounter <CandidatePhrase>();

            while (termIter.MoveNext())
            {
                if (finalwords.Size() >= constVars.numWordsToAdd)
                {
                    break;
                }
                CandidatePhrase w = termIter.Current;
                if (newdt.GetCount(w) < thresholdWordExtract)
                {
                    Redwood.Log(ConstantsAndVariables.extremedebug, "not adding word " + w + " and any later words because the score " + newdt.GetCount(w) + " is less than the threshold of  " + thresholdWordExtract);
                    break;
                }
                System.Diagnostics.Debug.Assert((newdt.GetCount(w) != double.PositiveInfinity));
                if (useThresholdNumPatternsForTheseWords.ContainsKey(w) && NumNonRedundantPatterns(terms, w) < constVars.thresholdNumPatternsApplied)
                {
                    Redwood.Log("extremePatDebug", "Not adding " + w + " because the number of non redundant patterns are below threshold of " + constVars.thresholdNumPatternsApplied + ":" + terms.GetCounter(w).KeySet());
                    continue;
                }
                CandidatePhrase matchedFuzzy = null;
                if (constVars.minLen4FuzzyForPattern > 0 && ignoreWords != null)
                {
                    matchedFuzzy = ConstantsAndVariables.ContainsFuzzy(ignoreWords, w, constVars.minLen4FuzzyForPattern);
                }
                if (matchedFuzzy == null)
                {
                    Redwood.Log("extremePatDebug", "adding word " + w);
                    finalwords.SetCount(w, newdt.GetCount(w));
                }
                else
                {
                    Redwood.Log("extremePatDebug", "not adding " + w + " because it matched " + matchedFuzzy + " in common English word");
                    ignoreWords.Add(w);
                }
            }
            string nextTen = string.Empty;
            int    n       = 0;

            while (termIter.MoveNext())
            {
                n++;
                if (n > 10)
                {
                    break;
                }
                CandidatePhrase w = termIter.Current;
                nextTen += ";\t" + w + ":" + newdt.GetCount(w);
            }
            Redwood.Log(Redwood.Dbg, "Next ten phrases were " + nextTen);
            return(finalwords);
        }
Exemple #9
0
        public virtual void TestIntersection()
        {
            ICounter <string> c3 = Counters.Intersection(c1, c2);

            NUnit.Framework.Assert.AreEqual(c3.GetCount("p"), 1.0);
            NUnit.Framework.Assert.AreEqual(c3.GetCount("q"), 2.0);
            NUnit.Framework.Assert.AreEqual(c3.GetCount("s"), 0.0);
            NUnit.Framework.Assert.AreEqual(c3.GetCount("t"), 0.0);
            NUnit.Framework.Assert.AreEqual(c3.TotalCount(), 6.0);
        }
Exemple #10
0
        public virtual void TestProduct()
        {
            ICounter <string> c3 = Counters.Product(c1, c2);

            NUnit.Framework.Assert.AreEqual(c3.GetCount("p"), 5.0);
            NUnit.Framework.Assert.AreEqual(c3.GetCount("q"), 12.0);
            NUnit.Framework.Assert.AreEqual(c3.GetCount("r"), 21.0);
            NUnit.Framework.Assert.AreEqual(c3.GetCount("s"), 0.0);
            NUnit.Framework.Assert.AreEqual(c3.GetCount("t"), 0.0);
        }
Exemple #11
0
        public async void TestCountNo()
        {
            //info.openid = "oXMto1LOSU0skZmaqLu3bAkQIDZE";
            info.deviceId = did;// "TestDeviceId---1";
            var m = await counter.Increment(counterID);

            Debug.Log(m);
            var b = await counter.GetCount(counterID);

            Debug.Log(b);
        }
Exemple #12
0
        public virtual void TestToTiedRankCounter()
        {
            SetUp();
            c1.SetCount("t", 1.0);
            c1.SetCount("u", 1.0);
            c1.SetCount("v", 2.0);
            c1.SetCount("z", 4.0);
            ICounter <string> rank = Counters.ToTiedRankCounter(c1);

            NUnit.Framework.Assert.AreEqual(1.5, rank.GetCount("z"));
            NUnit.Framework.Assert.AreEqual(7.0, rank.GetCount("t"));
        }
Exemple #13
0
        public virtual void TestAddInPlaceCollection()
        {
            // initialize counter
            SetUp();
            IList <string> collection = new List <string>();

            collection.Add("p");
            collection.Add("p");
            collection.Add("s");
            Counters.AddInPlace(c1, collection);
            NUnit.Framework.Assert.AreEqual(3.0, c1.GetCount("p"));
            NUnit.Framework.Assert.AreEqual(5.0, c1.GetCount("s"));
        }
        public override void Evaluate(Tree guess, Tree gold, PrintWriter pw)
        {
            if (gold == null || guess == null)
            {
                System.Console.Error.Printf("%s: Cannot compare against a null gold or guess tree!\n", this.GetType().FullName);
                return;
            }
            IDictionary <CollinsRelation, ICollection <CollinsDependency> > guessDeps = MakeCollinsObjects(guess);
            IDictionary <CollinsRelation, ICollection <CollinsDependency> > goldDeps  = MakeCollinsObjects(gold);
            ICollection <CollinsRelation> relations = Generics.NewHashSet();

            Sharpen.Collections.AddAll(relations, guessDeps.Keys);
            Sharpen.Collections.AddAll(relations, goldDeps.Keys);
            num += 1.0;
            foreach (CollinsRelation rel in relations)
            {
                ICollection <CollinsDependency> thisGuessDeps = guessDeps[rel];
                ICollection <CollinsDependency> thisGoldDeps  = goldDeps[rel];
                if (thisGuessDeps == null)
                {
                    thisGuessDeps = Generics.NewHashSet();
                }
                if (thisGoldDeps == null)
                {
                    thisGoldDeps = Generics.NewHashSet();
                }
                double currentPrecision = Precision(thisGuessDeps, thisGoldDeps);
                double currentRecall    = Precision(thisGoldDeps, thisGuessDeps);
                double currentF1        = (currentPrecision > 0.0 && currentRecall > 0.0 ? 2.0 / (1.0 / currentPrecision + 1.0 / currentRecall) : 0.0);
                precisions.IncrementCount(rel, currentPrecision);
                recalls.IncrementCount(rel, currentRecall);
                f1s.IncrementCount(rel, currentF1);
                precisions2.IncrementCount(rel, thisGuessDeps.Count * currentPrecision);
                pnums2.IncrementCount(rel, thisGuessDeps.Count);
                recalls2.IncrementCount(rel, thisGoldDeps.Count * currentRecall);
                rnums2.IncrementCount(rel, thisGoldDeps.Count);
                if (pw != null && runningAverages)
                {
                    pw.Println(rel + "\tP: " + ((int)(currentPrecision * 10000)) / 100.0 + " (sent ave " + ((int)(precisions.GetCount(rel) * 10000 / num)) / 100.0 + ") (evalb " + ((int)(precisions2.GetCount(rel) * 10000 / pnums2.GetCount(rel))) / 100.0 + ")");
                    pw.Println("\tR: " + ((int)(currentRecall * 10000)) / 100.0 + " (sent ave " + ((int)(recalls.GetCount(rel) * 10000 / num)) / 100.0 + ") (evalb " + ((int)(recalls2.GetCount(rel) * 10000 / rnums2.GetCount(rel))) / 100.0 + ")");
                    double cF1  = 2.0 / (rnums2.GetCount(rel) / recalls2.GetCount(rel) + pnums2.GetCount(rel) / precisions2.GetCount(rel));
                    string emit = str + " F1: " + ((int)(currentF1 * 10000)) / 100.0 + " (sent ave " + ((int)(10000 * f1s.GetCount(rel) / num)) / 100.0 + ", evalb " + ((int)(10000 * cF1)) / 100.0 + ")";
                    pw.Println(emit);
                }
            }
            if (pw != null && runningAverages)
            {
                pw.Println("================================================================================");
            }
        }
Exemple #15
0
        public virtual void TestL2Normalize()
        {
            ClassicCounter <string> c = new ClassicCounter <string>();

            c.IncrementCount("a", 4.0);
            c.IncrementCount("b", 2.0);
            c.IncrementCount("c", 1.0);
            c.IncrementCount("d", 2.0);
            ICounter <string> d = Counters.L2Normalize(c);

            NUnit.Framework.Assert.AreEqual(d.GetCount("a"), 0.8, Tolerance);
            NUnit.Framework.Assert.AreEqual(d.GetCount("b"), 0.4, Tolerance);
            NUnit.Framework.Assert.AreEqual(d.GetCount("c"), 0.2, Tolerance);
            NUnit.Framework.Assert.AreEqual(d.GetCount("d"), 0.4, Tolerance);
        }
 internal virtual double GetPatTFIDFScore(CandidatePhrase word, ICounter <E> patsThatExtractedThis, ICounter <E> allSelectedPatterns)
 {
     if (Data.processedDataFreq.GetCount(word) == 0.0)
     {
         Redwood.Log(Redwood.Warn, "How come the processed corpus freq has count of " + word + " 0. The count in raw freq is " + Data.rawFreq.GetCount(word) + " and the Data.rawFreq size is " + Data.rawFreq.Size());
         return(0);
     }
     else
     {
         double          total = 0;
         ICollection <E> rem   = new HashSet <E>();
         foreach (KeyValuePair <E, double> en2 in patsThatExtractedThis.EntrySet())
         {
             double weight = 1.0;
             if (usePatternWeights)
             {
                 weight = allSelectedPatterns.GetCount(en2.Key);
                 if (weight == 0)
                 {
                     Redwood.Log(Redwood.Force, "Warning: Weight zero for " + en2.Key + ". May be pattern was removed when choosing other patterns (if subsumed by another pattern).");
                     rem.Add(en2.Key);
                 }
             }
             total += weight;
         }
         Counters.RemoveKeys(patsThatExtractedThis, rem);
         double score = total / Data.processedDataFreq.GetCount(word);
         return(score);
     }
 }
        /// <summary>Method to convert features from counts to L1-normalized TFIDF based features</summary>
        /// <param name="datum">with a collection of features.</param>
        /// <param name="featureDocCounts">a counter of doc-count for each feature.</param>
        /// <returns>RVFDatum with l1-normalized tf-idf features.</returns>
        public virtual RVFDatum <L, F> GetL1NormalizedTFIDFDatum(IDatum <L, F> datum, ICounter <F> featureDocCounts)
        {
            ICounter <F> tfidfFeatures = new ClassicCounter <F>();

            foreach (F feature in datum.AsFeatures())
            {
                if (featureDocCounts.ContainsKey(feature))
                {
                    tfidfFeatures.IncrementCount(feature, 1.0);
                }
            }
            double l1norm = 0;

            foreach (F feature_1 in tfidfFeatures.KeySet())
            {
                double idf = Math.Log(((double)(this.Size() + 1)) / (featureDocCounts.GetCount(feature_1) + 0.5));
                double tf  = tfidfFeatures.GetCount(feature_1);
                tfidfFeatures.SetCount(feature_1, tf * idf);
                l1norm += tf * idf;
            }
            foreach (F feature_2 in tfidfFeatures.KeySet())
            {
                double tfidf = tfidfFeatures.GetCount(feature_2);
                tfidfFeatures.SetCount(feature_2, tfidf / l1norm);
            }
            RVFDatum <L, F> rvfDatum = new RVFDatum <L, F>(tfidfFeatures, datum.Label());

            return(rvfDatum);
        }
Exemple #18
0
        public virtual ClassicCounter <L> ScoresOf(RVFDatum <L, F> example)
        {
            ClassicCounter <L> scores = new ClassicCounter <L>();

            Counters.AddInPlace(scores, priors);
            if (addZeroValued)
            {
                Counters.AddInPlace(scores, priorZero);
            }
            foreach (L l in labels)
            {
                double       score    = 0.0;
                ICounter <F> features = example.AsFeaturesCounter();
                foreach (F f in features.KeySet())
                {
                    int value = (int)features.GetCount(f);
                    score += Weight(l, f, int.Parse(value));
                    if (addZeroValued)
                    {
                        score -= Weight(l, f, zero);
                    }
                }
                scores.IncrementCount(l, score);
            }
            return(scores);
        }
Exemple #19
0
        public static Edu.Stanford.Nlp.Stats.Distribution <E> AbsolutelyDiscountedDistribution <E>(ICounter <E> counter, int numberOfKeys, double discount)
        {
            Edu.Stanford.Nlp.Stats.Distribution <E> norm = new Edu.Stanford.Nlp.Stats.Distribution <E>();
            norm.counter = new ClassicCounter <E>();
            double total        = counter.TotalCount();
            double reservedMass = 0.0;

            foreach (E key in counter.KeySet())
            {
                double count = counter.GetCount(key);
                if (count > discount)
                {
                    double newCount = (count - discount) / total;
                    norm.counter.SetCount(key, newCount);
                    // a positive count left over
                    //        System.out.println("seen: " + newCount);
                    reservedMass += discount;
                }
                else
                {
                    // count <= discount
                    reservedMass += count;
                }
            }
            // if the count <= discount, don't put key in counter, and we treat it as unseen!!
            norm.numberOfKeys = numberOfKeys;
            norm.reservedMass = reservedMass / total;
            //    System.out.println("UNSEEN: " + reservedMass / total / (numberOfKeys - counter.size()));
            return(norm);
        }
Exemple #20
0
        /// <summary>The examples are assumed to be a list of RFVDatum.</summary>
        /// <remarks>
        /// The examples are assumed to be a list of RFVDatum.
        /// The datums are assumed to not contain the zeroes and then they are added to each instance.
        /// </remarks>
        public virtual NaiveBayesClassifier <L, F> TrainClassifier(GeneralDataset <L, F> examples, ICollection <F> featureSet)
        {
            int numFeatures = featureSet.Count;

            int[][] data   = new int[][] {  };
            int[]   labels = new int[examples.Size()];
            labelIndex   = new HashIndex <L>();
            featureIndex = new HashIndex <F>();
            foreach (F feat in featureSet)
            {
                featureIndex.Add(feat);
            }
            for (int d = 0; d < examples.Size(); d++)
            {
                RVFDatum <L, F> datum = examples.GetRVFDatum(d);
                ICounter <F>    c     = datum.AsFeaturesCounter();
                foreach (F feature in c.KeySet())
                {
                    int fNo   = featureIndex.IndexOf(feature);
                    int value = (int)c.GetCount(feature);
                    data[d][fNo] = value;
                }
                labelIndex.Add(datum.Label());
                labels[d] = labelIndex.IndexOf(datum.Label());
            }
            int numClasses = labelIndex.Size();

            return(TrainClassifier(data, labels, numFeatures, numClasses, labelIndex, featureIndex));
        }
Exemple #21
0
        private double Weight(L label, F feature, Number val)
        {
            Pair <Pair <L, F>, Number> p = new Pair <Pair <L, F>, Number>(new Pair <L, F>(label, feature), val);
            double v = weights.GetCount(p);

            return(v);
        }
Exemple #22
0
        public void TrainUnannotated(IList <TaggedWord> sentence, double weight)
        {
            uwModelTrainer.IncrementTreesRead(weight);
            int loc = 0;

            foreach (TaggedWord tw in sentence)
            {
                string            baseTag = op.Langpack().BasicCategory(tw.Tag());
                ICounter <string> counts  = baseTagCounts[baseTag];
                if (counts == null)
                {
                    ++loc;
                    continue;
                }
                double totalCount = counts.TotalCount();
                if (totalCount == 0)
                {
                    ++loc;
                    continue;
                }
                foreach (string tag in counts.KeySet())
                {
                    TaggedWord newTW = new TaggedWord(tw.Word(), tag);
                    Train(newTW, loc, weight * counts.GetCount(tag) / totalCount);
                }
                ++loc;
            }
        }
        public virtual double GetPosteriorPredictiveProbability(ICounter <E> counts, E @object)
        {
            double numerator   = parameters.GetCount(@object) + counts.GetCount(@object);
            double denominator = parameters.TotalCount() + counts.TotalCount();

            return(numerator / denominator);
        }
Exemple #24
0
        public virtual IList <string> AnnotateMulticlass(IList <IDatum <string, string> > testDatums)
        {
            IList <string> predictedLabels = new List <string>();

            foreach (IDatum <string, string> testDatum in testDatums)
            {
                string            label = ClassOf(testDatum, null);
                ICounter <string> probs = ProbabilityOf(testDatum);
                double            prob  = probs.GetCount(label);
                StringWriter      sw    = new StringWriter();
                PrintWriter       pw    = new PrintWriter(sw);
                if (logger.IsLoggable(Level.Fine))
                {
                    JustificationOf(testDatum, pw, label);
                }
                logger.Fine("JUSTIFICATION for label GOLD:" + testDatum.Label() + " SYS:" + label + " (prob:" + prob + "):\n" + sw.ToString() + "\nJustification done.");
                predictedLabels.Add(label);
                if (!testDatum.Label().Equals(label))
                {
                    logger.Info("Classification: found different type " + label + " for relation: " + testDatum);
                }
                else
                {
                    logger.Info("Classification: found similar type " + label + " for relation: " + testDatum);
                }
            }
            return(predictedLabels);
        }
Exemple #25
0
        public static void RunSequencesFinder(BestSequenceFinderTest.ITestSequenceModel tsm, KBestSequenceFinder sf)
        {
            ICounter <int[]>    bestLabelsCounter = sf.KBestSequences(tsm, K2nr);
            IList <int[]>       topValues         = Counters.ToSortedList(bestLabelsCounter);
            IEnumerator <int[]> iter = topValues.GetEnumerator();

            for (int i = 0; i < K2nr; i++)
            {
                int[]  sequence    = iter.Current;
                string strSequence = Arrays.ToString(sequence);
                double score       = bestLabelsCounter.GetCount(sequence);
                // Deal with ties in the scoring ... only tied pairs handled.
                bool found = false;
                if (strSequence.Equals(test2nrAnswers[i]))
                {
                    found = true;
                }
                else
                {
                    if (i > 0 && Math.Abs(score - test2nrScores[i - 1]) < 1e-8 && strSequence.Equals(test2nrAnswers[i - 1]))
                    {
                        found = true;
                    }
                    else
                    {
                        if (i + 1 < test2nrScores.Length && Math.Abs(score - test2nrScores[i + 1]) < 1e-8 && strSequence.Equals(test2nrAnswers[i + 1]))
                        {
                            found = true;
                        }
                    }
                }
                NUnit.Framework.Assert.IsTrue("Best sequence is wrong. Correct: " + test2nrAnswers[i] + ", found: " + strSequence, found);
                NUnit.Framework.Assert.AreEqual("Best sequence score is wrong.", test2nrScores[i], score, 1e-8);
            }
        }
        private void AddFeatures(ICounter <F> features)
        {
            if (data.Length == size)
            {
                int[][]    newData   = new int[size * 2][];
                double[][] newValues = new double[size * 2][];
                lock (typeof(Runtime))
                {
                    System.Array.Copy(data, 0, newData, 0, size);
                    System.Array.Copy(values, 0, newValues, 0, size);
                }
                data   = newData;
                values = newValues;
            }
            IList <F> featureNames = new List <F>(features.KeySet());
            int       nFeatures    = featureNames.Count;

            data[size]   = new int[nFeatures];
            values[size] = new double[nFeatures];
            for (int i = 0; i < nFeatures; ++i)
            {
                F   feature = featureNames[i];
                int fID     = featureIndex.AddToIndex(feature);
                if (fID >= 0)
                {
                    data[size][i]   = fID;
                    values[size][i] = features.GetCount(feature);
                }
                else
                {
                    // Usually a feature present at test but not training time.
                    System.Diagnostics.Debug.Assert(featureIndex.IsLocked(), "Could not add feature to index: " + feature);
                }
            }
        }
        public override void Evaluate(Tree guess, Tree gold, PrintWriter pw)
        {
            if (gold == null || guess == null)
            {
                System.Console.Error.Printf("%s: Cannot compare against a null gold or guess tree!%n", this.GetType().FullName);
                return;
            }
            IDictionary <ILabel, ICollection <Constituent> > guessDeps = MakeObjectsByCat(guess);
            IDictionary <ILabel, ICollection <Constituent> > goldDeps  = MakeObjectsByCat(gold);
            ICollection <ILabel> cats = Generics.NewHashSet(guessDeps.Keys);

            Sharpen.Collections.AddAll(cats, goldDeps.Keys);
            if (pw != null && runningAverages)
            {
                pw.Println("========================================");
                pw.Println("Labeled Bracketed Evaluation by Category");
                pw.Println("========================================");
            }
            ++num;
            foreach (ILabel cat in cats)
            {
                ICollection <Constituent> thisGuessDeps = guessDeps.Contains(cat) ? guessDeps[cat] : Generics.NewHashSet <Constituent>();
                ICollection <Constituent> thisGoldDeps  = goldDeps.Contains(cat) ? goldDeps[cat] : Generics.NewHashSet <Constituent>();
                double currentPrecision = Precision(thisGuessDeps, thisGoldDeps);
                double currentRecall    = Precision(thisGoldDeps, thisGuessDeps);
                double currentF1        = (currentPrecision > 0.0 && currentRecall > 0.0 ? 2.0 / (1.0 / currentPrecision + 1.0 / currentRecall) : 0.0);
                precisions.IncrementCount(cat, currentPrecision);
                recalls.IncrementCount(cat, currentRecall);
                f1s.IncrementCount(cat, currentF1);
                precisions2.IncrementCount(cat, thisGuessDeps.Count * currentPrecision);
                pnums2.IncrementCount(cat, thisGuessDeps.Count);
                recalls2.IncrementCount(cat, thisGoldDeps.Count * currentRecall);
                rnums2.IncrementCount(cat, thisGoldDeps.Count);
                if (pw != null && runningAverages)
                {
                    pw.Println(cat + "\tP: " + ((int)(currentPrecision * 10000)) / 100.0 + " (sent ave " + ((int)(precisions.GetCount(cat) * 10000 / num)) / 100.0 + ") (evalb " + ((int)(precisions2.GetCount(cat) * 10000 / pnums2.GetCount(cat))) / 100.0 + ")");
                    pw.Println("\tR: " + ((int)(currentRecall * 10000)) / 100.0 + " (sent ave " + ((int)(recalls.GetCount(cat) * 10000 / num)) / 100.0 + ") (evalb " + ((int)(recalls2.GetCount(cat) * 10000 / rnums2.GetCount(cat))) / 100.0 + ")");
                    double cF1  = 2.0 / (rnums2.GetCount(cat) / recalls2.GetCount(cat) + pnums2.GetCount(cat) / precisions2.GetCount(cat));
                    string emit = str + " F1: " + ((int)(currentF1 * 10000)) / 100.0 + " (sent ave " + ((int)(10000 * f1s.GetCount(cat) / num)) / 100.0 + ", evalb " + ((int)(10000 * cF1)) / 100.0 + ")";
                    pw.Println(emit);
                }
            }
            if (pw != null && runningAverages)
            {
                pw.Println("========================================");
            }
        }
Exemple #28
0
        public virtual IEnumerator <IntTaggedWord> RuleIteratorByWord(int word, int loc, string featureSpec)
        {
            EnsureProbs(word);
            IList <IntTaggedWord> rules = new List <IntTaggedWord>();
            double max = Counters.Max(logProbs);

            for (int tag = 0; tag < tagIndex.Size(); tag++)
            {
                IntTaggedWord iTW   = new IntTaggedWord(word, tag);
                double        score = logProbs.GetCount(tagIndex.Get(tag));
                if (score > max - iteratorCutoffFactor)
                {
                    rules.Add(iTW);
                }
            }
            return(rules.GetEnumerator());
        }
Exemple #29
0
        public virtual void TestAbsoluteDifference()
        {
            ICounter <string> c3 = Counters.AbsoluteDifference(c1, c2);

            NUnit.Framework.Assert.AreEqual(c3.GetCount("p"), 4.0);
            NUnit.Framework.Assert.AreEqual(c3.GetCount("q"), 4.0);
            NUnit.Framework.Assert.AreEqual(c3.GetCount("r"), 4.0);
            NUnit.Framework.Assert.AreEqual(c3.GetCount("s"), 4.0);
            NUnit.Framework.Assert.AreEqual(c3.GetCount("t"), 8.0);
            ICounter <string> c4 = Counters.AbsoluteDifference(c2, c1);

            NUnit.Framework.Assert.AreEqual(c4.GetCount("p"), 4.0);
            NUnit.Framework.Assert.AreEqual(c4.GetCount("q"), 4.0);
            NUnit.Framework.Assert.AreEqual(c4.GetCount("r"), 4.0);
            NUnit.Framework.Assert.AreEqual(c4.GetCount("s"), 4.0);
            NUnit.Framework.Assert.AreEqual(c4.GetCount("t"), 8.0);
        }
        // Does L1 or L2 using FOBOS and lazy update, so L1 should not be handled in the
        // objective
        // Alternatively, you can handle other regularization in the objective,
        // but then, if the derivative is not sparse, this routine would not be very
        // efficient. However, might still be okay for CRFs
        public virtual ICounter <K> Minimize(F function, ICounter <K> x, int maxIterations)
        {
            Sayln("       Batch size of: " + batchSize);
            Sayln("       Data dimension of: " + function.DataSize());
            int numBatches = (function.DataSize() - 1) / this.batchSize + 1;

            Sayln("       Batches per pass through data:  " + numBatches);
            Sayln("       Number of passes is = " + numPasses);
            Sayln("       Max iterations is = " + maxIterations);
            ICounter <K> lastUpdated = new ClassicCounter <K>();
            int          timeStep    = 0;
            Timing       total       = new Timing();

            total.Start();
            for (int iter = 0; iter < numPasses; iter++)
            {
                double totalObjValue = 0;
                for (int j = 0; j < numBatches; j++)
                {
                    int[] selectedData = GetSample(function, this.batchSize);
                    // the core adagrad
                    ICounter <K> gradient = function.DerivativeAt(x, selectedData);
                    totalObjValue = totalObjValue + function.ValueAt(x, selectedData);
                    foreach (K feature in gradient.KeySet())
                    {
                        double gradf              = gradient.GetCount(feature);
                        double prevrate           = eta / (Math.Sqrt(sumGradSquare.GetCount(feature)) + soften);
                        double sgsValue           = sumGradSquare.IncrementCount(feature, gradf * gradf);
                        double currentrate        = eta / (Math.Sqrt(sgsValue) + soften);
                        double testupdate         = x.GetCount(feature) - (currentrate * gradient.GetCount(feature));
                        double lastUpdateTimeStep = lastUpdated.GetCount(feature);
                        double idleinterval       = timeStep - lastUpdateTimeStep - 1;
                        lastUpdated.SetCount(feature, (double)timeStep);
                        // does lazy update using idleinterval
                        double trunc      = Math.Max(0.0, (Math.Abs(testupdate) - (currentrate + prevrate * idleinterval) * this.lambdaL1));
                        double trunc2     = trunc * Math.Pow(1 - this.lambdaL2, currentrate + prevrate * idleinterval);
                        double realupdate = Math.Signum(testupdate) * trunc2;
                        if (realupdate < Eps)
                        {
                            x.Remove(feature);
                        }
                        else
                        {
                            x.SetCount(feature, realupdate);
                        }
                        // reporting
                        timeStep++;
                        if (timeStep > maxIterations)
                        {
                            Sayln("Stochastic Optimization complete.  Stopped after max iterations");
                            break;
                        }
                        Sayln(System.Console.Out.Format("Iter %d \t batch: %d \t time=%.2f \t obj=%.4f", iter, timeStep, total.Report() / 1000.0, totalObjValue).ToString());
                    }
                }
            }
            return(x);
        }