public virtual double Score <F>(IClassifier <L, F> classifier, GeneralDataset <L, F> data) { IList <L> guesses = new List <L>(); IList <L> labels = new List <L>(); for (int i = 0; i < data.Size(); i++) { IDatum <L, F> d = data.GetRVFDatum(i); L guess = classifier.ClassOf(d); guesses.Add(guess); } int[] labelsArr = data.GetLabelsArray(); labelIndex = data.labelIndex; for (int i_1 = 0; i_1 < data.Size(); i_1++) { labels.Add(labelIndex.Get(labelsArr[i_1])); } labelIndex = new HashIndex <L>(); labelIndex.AddAll(data.LabelIndex().ObjectsList()); labelIndex.AddAll(classifier.Labels()); int numClasses = labelIndex.Size(); tpCount = new int[numClasses]; fpCount = new int[numClasses]; fnCount = new int[numClasses]; negIndex = labelIndex.IndexOf(negLabel); for (int i_2 = 0; i_2 < guesses.Count; ++i_2) { L guess = guesses[i_2]; int guessIndex = labelIndex.IndexOf(guess); L label = labels[i_2]; int trueIndex = labelIndex.IndexOf(label); if (guessIndex == trueIndex) { if (guessIndex != negIndex) { tpCount[guessIndex]++; } } else { if (guessIndex != negIndex) { fpCount[guessIndex]++; } if (trueIndex != negIndex) { fnCount[trueIndex]++; } } } return(GetFMeasure()); }
public override double Score <F>(IClassifier <L, F> classifier, GeneralDataset <L, F> data) { labelIndex = new HashIndex <L>(); labelIndex.AddAll(classifier.Labels()); labelIndex.AddAll(data.labelIndex.ObjectsList()); ClearCounts(); int[] labelsArr = data.GetLabelsArray(); for (int i = 0; i < data.Size(); i++) { IDatum <L, F> d = data.GetRVFDatum(i); L guess = classifier.ClassOf(d); AddGuess(guess, labelIndex.Get(labelsArr[i])); } FinalizeCounts(); return(GetFMeasure()); }
/// <summary>A helper function for dumping the accuracy of the trained classifier.</summary> /// <param name="classifier">The classifier to evaluate.</param> /// <param name="dataset">The dataset to evaluate the classifier on.</param> public static void DumpAccuracy(IClassifier <ClauseSplitter.ClauseClassifierLabel, string> classifier, GeneralDataset <ClauseSplitter.ClauseClassifierLabel, string> dataset) { DecimalFormat df = new DecimalFormat("0.00%"); Redwood.Log("size: " + dataset.Size()); Redwood.Log("split count: " + StreamSupport.Stream(dataset.Spliterator(), false).Filter(null).Collect(Collectors.ToList()).Count); Redwood.Log("interm count: " + StreamSupport.Stream(dataset.Spliterator(), false).Filter(null).Collect(Collectors.ToList()).Count); Pair <double, double> pr = classifier.EvaluatePrecisionAndRecall(dataset, ClauseSplitter.ClauseClassifierLabel.ClauseSplit); Redwood.Log("p (split): " + df.Format(pr.first)); Redwood.Log("r (split): " + df.Format(pr.second)); Redwood.Log("f1 (split): " + df.Format(2 * pr.first * pr.second / (pr.first + pr.second))); pr = classifier.EvaluatePrecisionAndRecall(dataset, ClauseSplitter.ClauseClassifierLabel.ClauseInterm); Redwood.Log("p (interm): " + df.Format(pr.first)); Redwood.Log("r (interm): " + df.Format(pr.second)); Redwood.Log("f1 (interm): " + df.Format(2 * pr.first * pr.second / (pr.first + pr.second))); }
public virtual void InitMC <F>(IProbabilisticClassifier <L, F> classifier, GeneralDataset <L, F> data) { //if (!(gData instanceof Dataset)) { // throw new UnsupportedOperationException("Can only handle Datasets, not "+gData.getClass().getName()); //} // //Dataset data = (Dataset)gData; IPriorityQueue <Pair <int, Pair <double, bool> > > q = new BinaryHeapPriorityQueue <Pair <int, Pair <double, bool> > >(); total = 0; correct = 0; logLikelihood = 0.0; for (int i = 0; i < data.Size(); i++) { IDatum <L, F> d = data.GetRVFDatum(i); ICounter <L> scores = classifier.LogProbabilityOf(d); L guess = Counters.Argmax(scores); L correctLab = d.Label(); double guessScore = scores.GetCount(guess); double correctScore = scores.GetCount(correctLab); int guessInd = data.LabelIndex().IndexOf(guess); int correctInd = data.LabelIndex().IndexOf(correctLab); total++; if (guessInd == correctInd) { correct++; } logLikelihood += correctScore; q.Add(new Pair <int, Pair <double, bool> >(int.Parse(i), new Pair <double, bool>(guessScore, bool.ValueOf(guessInd == correctInd))), -guessScore); } accuracy = (double)correct / (double)total; IList <Pair <int, Pair <double, bool> > > sorted = q.ToSortedList(); scores = new double[sorted.Count]; isCorrect = new bool[sorted.Count]; for (int i_1 = 0; i_1 < sorted.Count; i_1++) { Pair <double, bool> next = sorted[i_1].Second(); scores[i_1] = next.First(); isCorrect[i_1] = next.Second(); } }
public virtual double Score <F>(IProbabilisticClassifier <L, F> classifier, GeneralDataset <L, F> data) { List <Pair <double, int> > dataScores = new List <Pair <double, int> >(); for (int i = 0; i < data.Size(); i++) { IDatum <L, F> d = data.GetRVFDatum(i); ICounter <L> scores = classifier.LogProbabilityOf(d); int labelD = d.Label().Equals(posLabel) ? 1 : 0; dataScores.Add(new Pair <double, int>(Math.Exp(scores.GetCount(posLabel)), labelD)); } PRCurve prc = new PRCurve(dataScores); confWeightedAccuracy = prc.Cwa(); accuracy = prc.Accuracy(); optAccuracy = prc.OptimalAccuracy(); optConfWeightedAccuracy = prc.OptimalCwa(); logLikelihood = prc.LogLikelihood(); accrecall = prc.CwaArray(); optaccrecall = prc.OptimalCwaArray(); return(accuracy); }