//these are hardcoded in the order we wish the results to be presented. private static string OutputMapResultsDefaultKeys(IDictionary <string, ICounter <QuoteAttributionEvaluation.Result> > tagResults, string[] keyOrder) { StringBuilder output = new StringBuilder(); QuoteAttributionEvaluation.Result[] order = new QuoteAttributionEvaluation.Result[] { QuoteAttributionEvaluation.Result.Correct, QuoteAttributionEvaluation.Result.Incorrect, QuoteAttributionEvaluation.Result.Skipped }; foreach (string tag in keyOrder) { ICounter <QuoteAttributionEvaluation.Result> resultsCounter = tagResults[tag]; if (resultsCounter == null) { continue; } if (tag == null) { output.Append("No label" + "\t"); } else { output.Append(tag + "\t"); } foreach (QuoteAttributionEvaluation.Result result in order) { output.Append(result.ToString() + "\t" + resultsCounter.GetCount(result) + "\t"); } //append total and precision double numCorrect = resultsCounter.GetCount(QuoteAttributionEvaluation.Result.Correct); double numIncorrect = resultsCounter.GetCount(QuoteAttributionEvaluation.Result.Incorrect); double total = numCorrect + numIncorrect; double precision = (total == 0) ? 0 : numCorrect / total; output.Append(total + "\t" + precision + "\n"); } return(output.ToString()); }
private void PrintResultsInternal(PrintWriter pw, ICounter <Pair <string, string> > results, ClassicCounter <string> labelCount) { ClassicCounter <string> correct = new ClassicCounter <string>(); ClassicCounter <string> predictionCount = new ClassicCounter <string>(); bool countGoldLabels = false; if (labelCount == null) { labelCount = new ClassicCounter <string>(); countGoldLabels = true; } foreach (Pair <string, string> predictedActual in results.KeySet()) { string predicted = predictedActual.first; string actual = predictedActual.second; if (predicted.Equals(actual)) { correct.IncrementCount(actual, results.GetCount(predictedActual)); } predictionCount.IncrementCount(predicted, results.GetCount(predictedActual)); if (countGoldLabels) { labelCount.IncrementCount(actual, results.GetCount(predictedActual)); } } DecimalFormat formatter = new DecimalFormat(); formatter.SetMaximumFractionDigits(1); formatter.SetMinimumFractionDigits(1); double totalCount = 0; double totalCorrect = 0; double totalPredicted = 0; pw.Println("Label\tCorrect\tPredict\tActual\tPrecn\tRecall\tF"); IList <string> labels = new List <string>(labelCount.KeySet()); labels.Sort(); foreach (string label in labels) { double numcorrect = correct.GetCount(label); double predicted = predictionCount.GetCount(label); double trueCount = labelCount.GetCount(label); double precision = (predicted > 0) ? (numcorrect / predicted) : 0; double recall = numcorrect / trueCount; double f = (precision + recall > 0) ? 2 * precision * recall / (precision + recall) : 0.0; pw.Println(StringUtils.PadOrTrim(label, MaxLabelLength) + "\t" + numcorrect + "\t" + predicted + "\t" + trueCount + "\t" + formatter.Format(precision * 100) + "\t" + formatter.Format(100 * recall) + "\t" + formatter.Format(100 * f)); if (!RelationMention.IsUnrelatedLabel(label)) { totalCount += trueCount; totalCorrect += numcorrect; totalPredicted += predicted; } } double precision_1 = (totalPredicted > 0) ? (totalCorrect / totalPredicted) : 0; double recall_1 = totalCorrect / totalCount; double f_1 = (totalPredicted > 0 && totalCorrect > 0) ? 2 * precision_1 * recall_1 / (precision_1 + recall_1) : 0.0; pw.Println("Total\t" + totalCorrect + "\t" + totalPredicted + "\t" + totalCount + "\t" + formatter.Format(100 * precision_1) + "\t" + formatter.Format(100 * recall_1) + "\t" + formatter.Format(100 * f_1)); }
public virtual double Precision(string relation) { if (predictedCount.GetCount(relation) == 0) { return 1.0; } return correctCount.GetCount(relation) / predictedCount.GetCount(relation); }
public virtual double Recall(string relation) { if (goldCount.GetCount(relation) == 0) { return 0.0; } return correctCount.GetCount(relation) / goldCount.GetCount(relation); }
public virtual double ProbabilityOf(E @object) { if (!parameters.KeySet().Contains(@object)) { throw new Exception("Not a valid object for this multinomial!"); } return(parameters.GetCount(@object)); }
public virtual void TestUnion() { ICounter <string> c3 = Counters.Union(c1, c2); NUnit.Framework.Assert.AreEqual(c3.GetCount("p"), 6.0); NUnit.Framework.Assert.AreEqual(c3.GetCount("s"), 4.0); NUnit.Framework.Assert.AreEqual(c3.GetCount("t"), 8.0); NUnit.Framework.Assert.AreEqual(c3.TotalCount(), 36.0); }
public override void Evaluate(Tree guess, Tree gold, PrintWriter pw) { if (gold == null || guess == null) { System.Console.Error.Printf("%s: Cannot compare against a null gold or guess tree!\n", this.GetType().FullName); return; } //Do regular evaluation base.Evaluate(guess, gold, pw); if (doCatLevelEval) { IDictionary <string, ICollection <ILabel> > guessCats = MakeObjectsByCat(guess); IDictionary <string, ICollection <ILabel> > goldCats = MakeObjectsByCat(gold); ICollection <string> allCats = Generics.NewHashSet(); Sharpen.Collections.AddAll(allCats, guessCats.Keys); Sharpen.Collections.AddAll(allCats, goldCats.Keys); foreach (string cat in allCats) { ICollection <ILabel> thisGuessCats = guessCats[cat]; ICollection <ILabel> thisGoldCats = goldCats[cat]; if (thisGuessCats == null) { thisGuessCats = Generics.NewHashSet(); } if (thisGoldCats == null) { thisGoldCats = Generics.NewHashSet(); } double currentPrecision = Precision(thisGuessCats, thisGoldCats); double currentRecall = Precision(thisGoldCats, thisGuessCats); double currentF1 = (currentPrecision > 0.0 && currentRecall > 0.0 ? 2.0 / (1.0 / currentPrecision + 1.0 / currentRecall) : 0.0); precisions.IncrementCount(cat, currentPrecision); recalls.IncrementCount(cat, currentRecall); f1s.IncrementCount(cat, currentF1); precisions2.IncrementCount(cat, thisGuessCats.Count * currentPrecision); pnums2.IncrementCount(cat, thisGuessCats.Count); recalls2.IncrementCount(cat, thisGoldCats.Count * currentRecall); rnums2.IncrementCount(cat, thisGoldCats.Count); if (lex != null) { MeasureOOV(guess, gold); } if (pw != null && runningAverages) { pw.Println(cat + "\tP: " + ((int)(currentPrecision * 10000)) / 100.0 + " (sent ave " + ((int)(precisions.GetCount(cat) * 10000 / num)) / 100.0 + ") (evalb " + ((int)(precisions2.GetCount(cat) * 10000 / pnums2.GetCount(cat))) / 100.0 + ")"); pw.Println("\tR: " + ((int)(currentRecall * 10000)) / 100.0 + " (sent ave " + ((int)(recalls.GetCount(cat) * 10000 / num)) / 100.0 + ") (evalb " + ((int)(recalls2.GetCount(cat) * 10000 / rnums2.GetCount(cat))) / 100.0 + ")"); double cF1 = 2.0 / (rnums2.GetCount(cat) / recalls2.GetCount(cat) + pnums2.GetCount(cat) / precisions2.GetCount(cat)); string emit = str + " F1: " + ((int)(currentF1 * 10000)) / 100.0 + " (sent ave " + ((int)(10000 * f1s.GetCount(cat) / num)) / 100.0 + ", evalb " + ((int)(10000 * cF1)) / 100.0 + ")"; pw.Println(emit); } } if (pw != null && runningAverages) { pw.Println("========================================"); } } }
public virtual ICounter <CandidatePhrase> ChooseTopWords(ICounter <CandidatePhrase> newdt, TwoDimensionalCounter <CandidatePhrase, E> terms, ICounter <CandidatePhrase> useThresholdNumPatternsForTheseWords, ICollection <CandidatePhrase> ignoreWords , double thresholdWordExtract) { IEnumerator <CandidatePhrase> termIter = Counters.ToPriorityQueue(newdt).GetEnumerator(); ICounter <CandidatePhrase> finalwords = new ClassicCounter <CandidatePhrase>(); while (termIter.MoveNext()) { if (finalwords.Size() >= constVars.numWordsToAdd) { break; } CandidatePhrase w = termIter.Current; if (newdt.GetCount(w) < thresholdWordExtract) { Redwood.Log(ConstantsAndVariables.extremedebug, "not adding word " + w + " and any later words because the score " + newdt.GetCount(w) + " is less than the threshold of " + thresholdWordExtract); break; } System.Diagnostics.Debug.Assert((newdt.GetCount(w) != double.PositiveInfinity)); if (useThresholdNumPatternsForTheseWords.ContainsKey(w) && NumNonRedundantPatterns(terms, w) < constVars.thresholdNumPatternsApplied) { Redwood.Log("extremePatDebug", "Not adding " + w + " because the number of non redundant patterns are below threshold of " + constVars.thresholdNumPatternsApplied + ":" + terms.GetCounter(w).KeySet()); continue; } CandidatePhrase matchedFuzzy = null; if (constVars.minLen4FuzzyForPattern > 0 && ignoreWords != null) { matchedFuzzy = ConstantsAndVariables.ContainsFuzzy(ignoreWords, w, constVars.minLen4FuzzyForPattern); } if (matchedFuzzy == null) { Redwood.Log("extremePatDebug", "adding word " + w); finalwords.SetCount(w, newdt.GetCount(w)); } else { Redwood.Log("extremePatDebug", "not adding " + w + " because it matched " + matchedFuzzy + " in common English word"); ignoreWords.Add(w); } } string nextTen = string.Empty; int n = 0; while (termIter.MoveNext()) { n++; if (n > 10) { break; } CandidatePhrase w = termIter.Current; nextTen += ";\t" + w + ":" + newdt.GetCount(w); } Redwood.Log(Redwood.Dbg, "Next ten phrases were " + nextTen); return(finalwords); }
public virtual void TestIntersection() { ICounter <string> c3 = Counters.Intersection(c1, c2); NUnit.Framework.Assert.AreEqual(c3.GetCount("p"), 1.0); NUnit.Framework.Assert.AreEqual(c3.GetCount("q"), 2.0); NUnit.Framework.Assert.AreEqual(c3.GetCount("s"), 0.0); NUnit.Framework.Assert.AreEqual(c3.GetCount("t"), 0.0); NUnit.Framework.Assert.AreEqual(c3.TotalCount(), 6.0); }
public virtual void TestProduct() { ICounter <string> c3 = Counters.Product(c1, c2); NUnit.Framework.Assert.AreEqual(c3.GetCount("p"), 5.0); NUnit.Framework.Assert.AreEqual(c3.GetCount("q"), 12.0); NUnit.Framework.Assert.AreEqual(c3.GetCount("r"), 21.0); NUnit.Framework.Assert.AreEqual(c3.GetCount("s"), 0.0); NUnit.Framework.Assert.AreEqual(c3.GetCount("t"), 0.0); }
public async void TestCountNo() { //info.openid = "oXMto1LOSU0skZmaqLu3bAkQIDZE"; info.deviceId = did;// "TestDeviceId---1"; var m = await counter.Increment(counterID); Debug.Log(m); var b = await counter.GetCount(counterID); Debug.Log(b); }
public virtual void TestToTiedRankCounter() { SetUp(); c1.SetCount("t", 1.0); c1.SetCount("u", 1.0); c1.SetCount("v", 2.0); c1.SetCount("z", 4.0); ICounter <string> rank = Counters.ToTiedRankCounter(c1); NUnit.Framework.Assert.AreEqual(1.5, rank.GetCount("z")); NUnit.Framework.Assert.AreEqual(7.0, rank.GetCount("t")); }
public virtual void TestAddInPlaceCollection() { // initialize counter SetUp(); IList <string> collection = new List <string>(); collection.Add("p"); collection.Add("p"); collection.Add("s"); Counters.AddInPlace(c1, collection); NUnit.Framework.Assert.AreEqual(3.0, c1.GetCount("p")); NUnit.Framework.Assert.AreEqual(5.0, c1.GetCount("s")); }
public override void Evaluate(Tree guess, Tree gold, PrintWriter pw) { if (gold == null || guess == null) { System.Console.Error.Printf("%s: Cannot compare against a null gold or guess tree!\n", this.GetType().FullName); return; } IDictionary <CollinsRelation, ICollection <CollinsDependency> > guessDeps = MakeCollinsObjects(guess); IDictionary <CollinsRelation, ICollection <CollinsDependency> > goldDeps = MakeCollinsObjects(gold); ICollection <CollinsRelation> relations = Generics.NewHashSet(); Sharpen.Collections.AddAll(relations, guessDeps.Keys); Sharpen.Collections.AddAll(relations, goldDeps.Keys); num += 1.0; foreach (CollinsRelation rel in relations) { ICollection <CollinsDependency> thisGuessDeps = guessDeps[rel]; ICollection <CollinsDependency> thisGoldDeps = goldDeps[rel]; if (thisGuessDeps == null) { thisGuessDeps = Generics.NewHashSet(); } if (thisGoldDeps == null) { thisGoldDeps = Generics.NewHashSet(); } double currentPrecision = Precision(thisGuessDeps, thisGoldDeps); double currentRecall = Precision(thisGoldDeps, thisGuessDeps); double currentF1 = (currentPrecision > 0.0 && currentRecall > 0.0 ? 2.0 / (1.0 / currentPrecision + 1.0 / currentRecall) : 0.0); precisions.IncrementCount(rel, currentPrecision); recalls.IncrementCount(rel, currentRecall); f1s.IncrementCount(rel, currentF1); precisions2.IncrementCount(rel, thisGuessDeps.Count * currentPrecision); pnums2.IncrementCount(rel, thisGuessDeps.Count); recalls2.IncrementCount(rel, thisGoldDeps.Count * currentRecall); rnums2.IncrementCount(rel, thisGoldDeps.Count); if (pw != null && runningAverages) { pw.Println(rel + "\tP: " + ((int)(currentPrecision * 10000)) / 100.0 + " (sent ave " + ((int)(precisions.GetCount(rel) * 10000 / num)) / 100.0 + ") (evalb " + ((int)(precisions2.GetCount(rel) * 10000 / pnums2.GetCount(rel))) / 100.0 + ")"); pw.Println("\tR: " + ((int)(currentRecall * 10000)) / 100.0 + " (sent ave " + ((int)(recalls.GetCount(rel) * 10000 / num)) / 100.0 + ") (evalb " + ((int)(recalls2.GetCount(rel) * 10000 / rnums2.GetCount(rel))) / 100.0 + ")"); double cF1 = 2.0 / (rnums2.GetCount(rel) / recalls2.GetCount(rel) + pnums2.GetCount(rel) / precisions2.GetCount(rel)); string emit = str + " F1: " + ((int)(currentF1 * 10000)) / 100.0 + " (sent ave " + ((int)(10000 * f1s.GetCount(rel) / num)) / 100.0 + ", evalb " + ((int)(10000 * cF1)) / 100.0 + ")"; pw.Println(emit); } } if (pw != null && runningAverages) { pw.Println("================================================================================"); } }
public virtual void TestL2Normalize() { ClassicCounter <string> c = new ClassicCounter <string>(); c.IncrementCount("a", 4.0); c.IncrementCount("b", 2.0); c.IncrementCount("c", 1.0); c.IncrementCount("d", 2.0); ICounter <string> d = Counters.L2Normalize(c); NUnit.Framework.Assert.AreEqual(d.GetCount("a"), 0.8, Tolerance); NUnit.Framework.Assert.AreEqual(d.GetCount("b"), 0.4, Tolerance); NUnit.Framework.Assert.AreEqual(d.GetCount("c"), 0.2, Tolerance); NUnit.Framework.Assert.AreEqual(d.GetCount("d"), 0.4, Tolerance); }
internal virtual double GetPatTFIDFScore(CandidatePhrase word, ICounter <E> patsThatExtractedThis, ICounter <E> allSelectedPatterns) { if (Data.processedDataFreq.GetCount(word) == 0.0) { Redwood.Log(Redwood.Warn, "How come the processed corpus freq has count of " + word + " 0. The count in raw freq is " + Data.rawFreq.GetCount(word) + " and the Data.rawFreq size is " + Data.rawFreq.Size()); return(0); } else { double total = 0; ICollection <E> rem = new HashSet <E>(); foreach (KeyValuePair <E, double> en2 in patsThatExtractedThis.EntrySet()) { double weight = 1.0; if (usePatternWeights) { weight = allSelectedPatterns.GetCount(en2.Key); if (weight == 0) { Redwood.Log(Redwood.Force, "Warning: Weight zero for " + en2.Key + ". May be pattern was removed when choosing other patterns (if subsumed by another pattern)."); rem.Add(en2.Key); } } total += weight; } Counters.RemoveKeys(patsThatExtractedThis, rem); double score = total / Data.processedDataFreq.GetCount(word); return(score); } }
/// <summary>Method to convert features from counts to L1-normalized TFIDF based features</summary> /// <param name="datum">with a collection of features.</param> /// <param name="featureDocCounts">a counter of doc-count for each feature.</param> /// <returns>RVFDatum with l1-normalized tf-idf features.</returns> public virtual RVFDatum <L, F> GetL1NormalizedTFIDFDatum(IDatum <L, F> datum, ICounter <F> featureDocCounts) { ICounter <F> tfidfFeatures = new ClassicCounter <F>(); foreach (F feature in datum.AsFeatures()) { if (featureDocCounts.ContainsKey(feature)) { tfidfFeatures.IncrementCount(feature, 1.0); } } double l1norm = 0; foreach (F feature_1 in tfidfFeatures.KeySet()) { double idf = Math.Log(((double)(this.Size() + 1)) / (featureDocCounts.GetCount(feature_1) + 0.5)); double tf = tfidfFeatures.GetCount(feature_1); tfidfFeatures.SetCount(feature_1, tf * idf); l1norm += tf * idf; } foreach (F feature_2 in tfidfFeatures.KeySet()) { double tfidf = tfidfFeatures.GetCount(feature_2); tfidfFeatures.SetCount(feature_2, tfidf / l1norm); } RVFDatum <L, F> rvfDatum = new RVFDatum <L, F>(tfidfFeatures, datum.Label()); return(rvfDatum); }
public virtual ClassicCounter <L> ScoresOf(RVFDatum <L, F> example) { ClassicCounter <L> scores = new ClassicCounter <L>(); Counters.AddInPlace(scores, priors); if (addZeroValued) { Counters.AddInPlace(scores, priorZero); } foreach (L l in labels) { double score = 0.0; ICounter <F> features = example.AsFeaturesCounter(); foreach (F f in features.KeySet()) { int value = (int)features.GetCount(f); score += Weight(l, f, int.Parse(value)); if (addZeroValued) { score -= Weight(l, f, zero); } } scores.IncrementCount(l, score); } return(scores); }
public static Edu.Stanford.Nlp.Stats.Distribution <E> AbsolutelyDiscountedDistribution <E>(ICounter <E> counter, int numberOfKeys, double discount) { Edu.Stanford.Nlp.Stats.Distribution <E> norm = new Edu.Stanford.Nlp.Stats.Distribution <E>(); norm.counter = new ClassicCounter <E>(); double total = counter.TotalCount(); double reservedMass = 0.0; foreach (E key in counter.KeySet()) { double count = counter.GetCount(key); if (count > discount) { double newCount = (count - discount) / total; norm.counter.SetCount(key, newCount); // a positive count left over // System.out.println("seen: " + newCount); reservedMass += discount; } else { // count <= discount reservedMass += count; } } // if the count <= discount, don't put key in counter, and we treat it as unseen!! norm.numberOfKeys = numberOfKeys; norm.reservedMass = reservedMass / total; // System.out.println("UNSEEN: " + reservedMass / total / (numberOfKeys - counter.size())); return(norm); }
/// <summary>The examples are assumed to be a list of RFVDatum.</summary> /// <remarks> /// The examples are assumed to be a list of RFVDatum. /// The datums are assumed to not contain the zeroes and then they are added to each instance. /// </remarks> public virtual NaiveBayesClassifier <L, F> TrainClassifier(GeneralDataset <L, F> examples, ICollection <F> featureSet) { int numFeatures = featureSet.Count; int[][] data = new int[][] { }; int[] labels = new int[examples.Size()]; labelIndex = new HashIndex <L>(); featureIndex = new HashIndex <F>(); foreach (F feat in featureSet) { featureIndex.Add(feat); } for (int d = 0; d < examples.Size(); d++) { RVFDatum <L, F> datum = examples.GetRVFDatum(d); ICounter <F> c = datum.AsFeaturesCounter(); foreach (F feature in c.KeySet()) { int fNo = featureIndex.IndexOf(feature); int value = (int)c.GetCount(feature); data[d][fNo] = value; } labelIndex.Add(datum.Label()); labels[d] = labelIndex.IndexOf(datum.Label()); } int numClasses = labelIndex.Size(); return(TrainClassifier(data, labels, numFeatures, numClasses, labelIndex, featureIndex)); }
private double Weight(L label, F feature, Number val) { Pair <Pair <L, F>, Number> p = new Pair <Pair <L, F>, Number>(new Pair <L, F>(label, feature), val); double v = weights.GetCount(p); return(v); }
public void TrainUnannotated(IList <TaggedWord> sentence, double weight) { uwModelTrainer.IncrementTreesRead(weight); int loc = 0; foreach (TaggedWord tw in sentence) { string baseTag = op.Langpack().BasicCategory(tw.Tag()); ICounter <string> counts = baseTagCounts[baseTag]; if (counts == null) { ++loc; continue; } double totalCount = counts.TotalCount(); if (totalCount == 0) { ++loc; continue; } foreach (string tag in counts.KeySet()) { TaggedWord newTW = new TaggedWord(tw.Word(), tag); Train(newTW, loc, weight * counts.GetCount(tag) / totalCount); } ++loc; } }
public virtual double GetPosteriorPredictiveProbability(ICounter <E> counts, E @object) { double numerator = parameters.GetCount(@object) + counts.GetCount(@object); double denominator = parameters.TotalCount() + counts.TotalCount(); return(numerator / denominator); }
public virtual IList <string> AnnotateMulticlass(IList <IDatum <string, string> > testDatums) { IList <string> predictedLabels = new List <string>(); foreach (IDatum <string, string> testDatum in testDatums) { string label = ClassOf(testDatum, null); ICounter <string> probs = ProbabilityOf(testDatum); double prob = probs.GetCount(label); StringWriter sw = new StringWriter(); PrintWriter pw = new PrintWriter(sw); if (logger.IsLoggable(Level.Fine)) { JustificationOf(testDatum, pw, label); } logger.Fine("JUSTIFICATION for label GOLD:" + testDatum.Label() + " SYS:" + label + " (prob:" + prob + "):\n" + sw.ToString() + "\nJustification done."); predictedLabels.Add(label); if (!testDatum.Label().Equals(label)) { logger.Info("Classification: found different type " + label + " for relation: " + testDatum); } else { logger.Info("Classification: found similar type " + label + " for relation: " + testDatum); } } return(predictedLabels); }
public static void RunSequencesFinder(BestSequenceFinderTest.ITestSequenceModel tsm, KBestSequenceFinder sf) { ICounter <int[]> bestLabelsCounter = sf.KBestSequences(tsm, K2nr); IList <int[]> topValues = Counters.ToSortedList(bestLabelsCounter); IEnumerator <int[]> iter = topValues.GetEnumerator(); for (int i = 0; i < K2nr; i++) { int[] sequence = iter.Current; string strSequence = Arrays.ToString(sequence); double score = bestLabelsCounter.GetCount(sequence); // Deal with ties in the scoring ... only tied pairs handled. bool found = false; if (strSequence.Equals(test2nrAnswers[i])) { found = true; } else { if (i > 0 && Math.Abs(score - test2nrScores[i - 1]) < 1e-8 && strSequence.Equals(test2nrAnswers[i - 1])) { found = true; } else { if (i + 1 < test2nrScores.Length && Math.Abs(score - test2nrScores[i + 1]) < 1e-8 && strSequence.Equals(test2nrAnswers[i + 1])) { found = true; } } } NUnit.Framework.Assert.IsTrue("Best sequence is wrong. Correct: " + test2nrAnswers[i] + ", found: " + strSequence, found); NUnit.Framework.Assert.AreEqual("Best sequence score is wrong.", test2nrScores[i], score, 1e-8); } }
private void AddFeatures(ICounter <F> features) { if (data.Length == size) { int[][] newData = new int[size * 2][]; double[][] newValues = new double[size * 2][]; lock (typeof(Runtime)) { System.Array.Copy(data, 0, newData, 0, size); System.Array.Copy(values, 0, newValues, 0, size); } data = newData; values = newValues; } IList <F> featureNames = new List <F>(features.KeySet()); int nFeatures = featureNames.Count; data[size] = new int[nFeatures]; values[size] = new double[nFeatures]; for (int i = 0; i < nFeatures; ++i) { F feature = featureNames[i]; int fID = featureIndex.AddToIndex(feature); if (fID >= 0) { data[size][i] = fID; values[size][i] = features.GetCount(feature); } else { // Usually a feature present at test but not training time. System.Diagnostics.Debug.Assert(featureIndex.IsLocked(), "Could not add feature to index: " + feature); } } }
public override void Evaluate(Tree guess, Tree gold, PrintWriter pw) { if (gold == null || guess == null) { System.Console.Error.Printf("%s: Cannot compare against a null gold or guess tree!%n", this.GetType().FullName); return; } IDictionary <ILabel, ICollection <Constituent> > guessDeps = MakeObjectsByCat(guess); IDictionary <ILabel, ICollection <Constituent> > goldDeps = MakeObjectsByCat(gold); ICollection <ILabel> cats = Generics.NewHashSet(guessDeps.Keys); Sharpen.Collections.AddAll(cats, goldDeps.Keys); if (pw != null && runningAverages) { pw.Println("========================================"); pw.Println("Labeled Bracketed Evaluation by Category"); pw.Println("========================================"); } ++num; foreach (ILabel cat in cats) { ICollection <Constituent> thisGuessDeps = guessDeps.Contains(cat) ? guessDeps[cat] : Generics.NewHashSet <Constituent>(); ICollection <Constituent> thisGoldDeps = goldDeps.Contains(cat) ? goldDeps[cat] : Generics.NewHashSet <Constituent>(); double currentPrecision = Precision(thisGuessDeps, thisGoldDeps); double currentRecall = Precision(thisGoldDeps, thisGuessDeps); double currentF1 = (currentPrecision > 0.0 && currentRecall > 0.0 ? 2.0 / (1.0 / currentPrecision + 1.0 / currentRecall) : 0.0); precisions.IncrementCount(cat, currentPrecision); recalls.IncrementCount(cat, currentRecall); f1s.IncrementCount(cat, currentF1); precisions2.IncrementCount(cat, thisGuessDeps.Count * currentPrecision); pnums2.IncrementCount(cat, thisGuessDeps.Count); recalls2.IncrementCount(cat, thisGoldDeps.Count * currentRecall); rnums2.IncrementCount(cat, thisGoldDeps.Count); if (pw != null && runningAverages) { pw.Println(cat + "\tP: " + ((int)(currentPrecision * 10000)) / 100.0 + " (sent ave " + ((int)(precisions.GetCount(cat) * 10000 / num)) / 100.0 + ") (evalb " + ((int)(precisions2.GetCount(cat) * 10000 / pnums2.GetCount(cat))) / 100.0 + ")"); pw.Println("\tR: " + ((int)(currentRecall * 10000)) / 100.0 + " (sent ave " + ((int)(recalls.GetCount(cat) * 10000 / num)) / 100.0 + ") (evalb " + ((int)(recalls2.GetCount(cat) * 10000 / rnums2.GetCount(cat))) / 100.0 + ")"); double cF1 = 2.0 / (rnums2.GetCount(cat) / recalls2.GetCount(cat) + pnums2.GetCount(cat) / precisions2.GetCount(cat)); string emit = str + " F1: " + ((int)(currentF1 * 10000)) / 100.0 + " (sent ave " + ((int)(10000 * f1s.GetCount(cat) / num)) / 100.0 + ", evalb " + ((int)(10000 * cF1)) / 100.0 + ")"; pw.Println(emit); } } if (pw != null && runningAverages) { pw.Println("========================================"); } }
public virtual IEnumerator <IntTaggedWord> RuleIteratorByWord(int word, int loc, string featureSpec) { EnsureProbs(word); IList <IntTaggedWord> rules = new List <IntTaggedWord>(); double max = Counters.Max(logProbs); for (int tag = 0; tag < tagIndex.Size(); tag++) { IntTaggedWord iTW = new IntTaggedWord(word, tag); double score = logProbs.GetCount(tagIndex.Get(tag)); if (score > max - iteratorCutoffFactor) { rules.Add(iTW); } } return(rules.GetEnumerator()); }
public virtual void TestAbsoluteDifference() { ICounter <string> c3 = Counters.AbsoluteDifference(c1, c2); NUnit.Framework.Assert.AreEqual(c3.GetCount("p"), 4.0); NUnit.Framework.Assert.AreEqual(c3.GetCount("q"), 4.0); NUnit.Framework.Assert.AreEqual(c3.GetCount("r"), 4.0); NUnit.Framework.Assert.AreEqual(c3.GetCount("s"), 4.0); NUnit.Framework.Assert.AreEqual(c3.GetCount("t"), 8.0); ICounter <string> c4 = Counters.AbsoluteDifference(c2, c1); NUnit.Framework.Assert.AreEqual(c4.GetCount("p"), 4.0); NUnit.Framework.Assert.AreEqual(c4.GetCount("q"), 4.0); NUnit.Framework.Assert.AreEqual(c4.GetCount("r"), 4.0); NUnit.Framework.Assert.AreEqual(c4.GetCount("s"), 4.0); NUnit.Framework.Assert.AreEqual(c4.GetCount("t"), 8.0); }
// Does L1 or L2 using FOBOS and lazy update, so L1 should not be handled in the // objective // Alternatively, you can handle other regularization in the objective, // but then, if the derivative is not sparse, this routine would not be very // efficient. However, might still be okay for CRFs public virtual ICounter <K> Minimize(F function, ICounter <K> x, int maxIterations) { Sayln(" Batch size of: " + batchSize); Sayln(" Data dimension of: " + function.DataSize()); int numBatches = (function.DataSize() - 1) / this.batchSize + 1; Sayln(" Batches per pass through data: " + numBatches); Sayln(" Number of passes is = " + numPasses); Sayln(" Max iterations is = " + maxIterations); ICounter <K> lastUpdated = new ClassicCounter <K>(); int timeStep = 0; Timing total = new Timing(); total.Start(); for (int iter = 0; iter < numPasses; iter++) { double totalObjValue = 0; for (int j = 0; j < numBatches; j++) { int[] selectedData = GetSample(function, this.batchSize); // the core adagrad ICounter <K> gradient = function.DerivativeAt(x, selectedData); totalObjValue = totalObjValue + function.ValueAt(x, selectedData); foreach (K feature in gradient.KeySet()) { double gradf = gradient.GetCount(feature); double prevrate = eta / (Math.Sqrt(sumGradSquare.GetCount(feature)) + soften); double sgsValue = sumGradSquare.IncrementCount(feature, gradf * gradf); double currentrate = eta / (Math.Sqrt(sgsValue) + soften); double testupdate = x.GetCount(feature) - (currentrate * gradient.GetCount(feature)); double lastUpdateTimeStep = lastUpdated.GetCount(feature); double idleinterval = timeStep - lastUpdateTimeStep - 1; lastUpdated.SetCount(feature, (double)timeStep); // does lazy update using idleinterval double trunc = Math.Max(0.0, (Math.Abs(testupdate) - (currentrate + prevrate * idleinterval) * this.lambdaL1)); double trunc2 = trunc * Math.Pow(1 - this.lambdaL2, currentrate + prevrate * idleinterval); double realupdate = Math.Signum(testupdate) * trunc2; if (realupdate < Eps) { x.Remove(feature); } else { x.SetCount(feature, realupdate); } // reporting timeStep++; if (timeStep > maxIterations) { Sayln("Stochastic Optimization complete. Stopped after max iterations"); break; } Sayln(System.Console.Out.Format("Iter %d \t batch: %d \t time=%.2f \t obj=%.4f", iter, timeStep, total.Report() / 1000.0, totalObjValue).ToString()); } } } return(x); }