/// <summary> Calculates the performance stats for the desired class and return /// results as a set of Instances. /// /// </summary> /// <param name="classIndex">index of the class of interest. /// </param> /// <returns> datapoints as a set of instances. /// </returns> public virtual Instances getCurve(FastVector predictions, int classIndex) { if ((predictions.size() == 0) || (((NominalPrediction) predictions.elementAt(0)).distribution().Length <= classIndex)) { return null; } ThresholdCurve tc = new ThresholdCurve(); Instances threshInst = tc.getCurve(predictions, classIndex); Instances insts = makeHeader(); int fpind = threshInst.attribute(ThresholdCurve.FP_RATE_NAME).index(); int tpind = threshInst.attribute(ThresholdCurve.TP_RATE_NAME).index(); int threshind = threshInst.attribute(ThresholdCurve.THRESHOLD_NAME).index(); double[] vals; double fpval, tpval, thresh; for (int i = 0; i < threshInst.numInstances(); i++) { fpval = threshInst.instance(i).value_Renamed(fpind); tpval = threshInst.instance(i).value_Renamed(tpind); thresh = threshInst.instance(i).value_Renamed(threshind); vals = new double[3]; vals[0] = 0; vals[1] = fpval; vals[2] = thresh; insts.add(new Instance(1.0, vals)); vals = new double[3]; vals[0] = 1; vals[1] = 1.0 - tpval; vals[2] = thresh; insts.add(new Instance(1.0, vals)); } return insts; }
/// <summary> Calculates the cumulative margin distribution for the set of /// predictions, returning the result as a set of Instances. The /// structure of these Instances is as follows:<p> <ul> /// <li> <b>Margin</b> contains the margin value (which should be plotted /// as an x-coordinate) /// <li> <b>Current</b> contains the count of instances with the current /// margin (plot as y axis) /// <li> <b>Cumulative</b> contains the count of instances with margin /// less than or equal to the current margin (plot as y axis) /// </ul> <p> /// /// </summary> /// <returns> datapoints as a set of instances, null if no predictions /// have been made. /// </returns> public virtual Instances getCurve(FastVector predictions) { if (predictions.size() == 0) { return null; } Instances insts = makeHeader(); double[] margins = getMargins(predictions); int[] sorted = Utils.sort(margins); int binMargin = 0; int totalMargin = 0; insts.add(makeInstance(- 1, binMargin, totalMargin)); for (int i = 0; i < sorted.Length; i++) { double current = margins[sorted[i]]; double weight = ((NominalPrediction) predictions.elementAt(sorted[i])).weight(); totalMargin = (int) (totalMargin + weight); binMargin = (int) (binMargin + weight); if (true) { insts.add(makeInstance(current, binMargin, totalMargin)); binMargin = 0; } } return insts; }
/// <summary> Calculates the performance stats for the default class and return /// results as a set of Instances. The /// structure of these Instances is as follows:<p> <ul> /// <li> <b>Probability Cost Function </b> /// <li> <b>Normalized Expected Cost</b> /// <li> <b>Threshold</b> contains the probability threshold that gives /// rise to the previous performance values. /// </ul> <p> /// /// </summary> /// <seealso cref="TwoClassStats"> /// </seealso> /// <param name="classIndex">index of the class of interest. /// </param> /// <returns> datapoints as a set of instances, null if no predictions /// have been made. /// </returns> public virtual Instances getCurve(FastVector predictions) { if (predictions.size() == 0) { return null; } return getCurve(predictions, ((NominalPrediction) predictions.elementAt(0)).distribution().Length - 1); }
/// <summary> Includes a whole bunch of predictions in the confusion matrix. /// /// </summary> /// <param name="predictions">a FastVector containing the NominalPredictions /// to include /// </param> /// <exception cref="Exception">if no valid prediction was made (i.e. /// unclassified). /// </exception> public virtual void addPredictions(FastVector predictions) { for (int i = 0; i < predictions.size(); i++) { addPrediction((NominalPrediction) predictions.elementAt(i)); } }
/// <summary> Pulls all the margin values out of a vector of NominalPredictions. /// /// </summary> /// <param name="predictions">a FastVector containing NominalPredictions /// </param> /// <returns> an array of margin values. /// </returns> private double[] getMargins(FastVector predictions) { // sort by predicted probability of the desired class. double[] margins = new double[predictions.size()]; for (int i = 0; i < margins.Length; i++) { NominalPrediction pred = (NominalPrediction) predictions.elementAt(i); margins[i] = pred.margin(); } return margins; }
/// <summary> Calculates the performance stats for the desired class and return /// results as a set of Instances. /// /// </summary> /// <param name="classIndex">index of the class of interest. /// </param> /// <returns> datapoints as a set of instances. /// </returns> public virtual Instances getCurve(FastVector predictions, int classIndex) { if ((predictions.size() == 0) || (((NominalPrediction) predictions.elementAt(0)).distribution().Length <= classIndex)) { return null; } double totPos = 0, totNeg = 0; double[] probs = getProbabilities(predictions, classIndex); // Get distribution of positive/negatives for (int i = 0; i < probs.Length; i++) { NominalPrediction pred = (NominalPrediction) predictions.elementAt(i); if (pred.actual() == weka.classifiers.evaluation.Prediction_Fields.MISSING_VALUE) { System.Console.Error.WriteLine(GetType().FullName + " Skipping prediction with missing class value"); continue; } if (pred.weight() < 0) { System.Console.Error.WriteLine(GetType().FullName + " Skipping prediction with negative weight"); continue; } if (pred.actual() == classIndex) { totPos += pred.weight(); } else { totNeg += pred.weight(); } } Instances insts = makeHeader(); int[] sorted = Utils.sort(probs); TwoClassStats tc = new TwoClassStats(totPos, totNeg, 0, 0); double threshold = 0; double cumulativePos = 0; double cumulativeNeg = 0; for (int i = 0; i < sorted.Length; i++) { if ((i == 0) || (probs[sorted[i]] > threshold)) { tc.TruePositive = tc.TruePositive - cumulativePos; tc.FalseNegative = tc.FalseNegative + cumulativePos; tc.FalsePositive = tc.FalsePositive - cumulativeNeg; tc.TrueNegative = tc.TrueNegative + cumulativeNeg; threshold = probs[sorted[i]]; insts.add(makeInstance(tc, threshold)); cumulativePos = 0; cumulativeNeg = 0; if (i == sorted.Length - 1) { break; } } NominalPrediction pred = (NominalPrediction) predictions.elementAt(sorted[i]); if (pred.actual() == weka.classifiers.evaluation.Prediction_Fields.MISSING_VALUE) { System.Console.Error.WriteLine(GetType().FullName + " Skipping prediction with missing class value"); continue; } if (pred.weight() < 0) { System.Console.Error.WriteLine(GetType().FullName + " Skipping prediction with negative weight"); continue; } if (pred.actual() == classIndex) { cumulativePos += pred.weight(); } else { cumulativeNeg += pred.weight(); } /* System.out.println(tc + " " + probs[sorted[i]] + " " + (pred.actual() == classIndex)); */ /*if ((i != (sorted.length - 1)) && ((i == 0) || (probs[sorted[i]] != probs[sorted[i - 1]]))) { insts.add(makeInstance(tc, probs[sorted[i]])); }*/ } return insts; }
private double[] getProbabilities(FastVector predictions, int classIndex) { // sort by predicted probability of the desired class. double[] probs = new double[predictions.size()]; for (int i = 0; i < probs.Length; i++) { NominalPrediction pred = (NominalPrediction) predictions.elementAt(i); probs[i] = pred.distribution()[classIndex]; } return probs; }