/// <summary> /// Creates a new test for two ROC curves. /// </summary> /// /// <param name="curve1">The first ROC curve.</param> /// <param name="curve2">The second ROC curve.</param> /// <param name="hypothesizedDifference">The hypothesized difference between the two areas.</param> /// <param name="alternate">The alternative hypothesis (research hypothesis) to test.</param> /// public TwoReceiverOperatingCurveTest(ReceiverOperatingCharacteristic curve1, ReceiverOperatingCharacteristic curve2, double hypothesizedDifference = 0, TwoSampleHypothesis alternate = TwoSampleHypothesis.ValuesAreDifferent) { this.Curve1 = curve1; this.Curve2 = curve2; double[] Vx1 = curve1.NegativeAccuracies; double[] Vy1 = curve1.PositiveAccuracies; double[] Vx2 = curve2.NegativeAccuracies; double[] Vy2 = curve2.PositiveAccuracies; double covx = Measures.Covariance(Vx1, Vx2); double covy = Measures.Covariance(Vy1, Vy2); double cov = covx / Vx1.Length + covy / Vy1.Length; this.EstimatedValue1 = curve1.Area; this.EstimatedValue2 = curve2.Area; this.ObservedDifference = EstimatedValue1 - EstimatedValue2; this.HypothesizedDifference = hypothesizedDifference; this.Variance1 = curve1.Variance; this.Variance2 = curve2.Variance; this.OverallVariance = Variance1 + Variance2 - 2 * cov; this.StandardError = System.Math.Sqrt(OverallVariance); // Compute Z statistic double z = (ObservedDifference - HypothesizedDifference) / StandardError; Compute(z, alternate); }
private void buttonROC_Click(object sender, EventArgs e) { ReceiverOperatingCharacteristic roc = null; int numPoints = -1; string numPointsStr = ""; Utility.InputBox("ROC Points", "How many points should be used?", ref numPointsStr); if (numPointsStr != "") { if (!Int32.TryParse(numPointsStr, out numPoints)) { MessageBox.Show("Your input was invalid. Please try again."); return; } } switch (_scheme) { case VotingScheme.NONE: { roc = _voter.getROC(); break; } case VotingScheme.MAJORITY_VOTE: { roc = _voter.getMajorityVote().getROC(); break; } case VotingScheme.ADDITIVE_PREDICTIONS: { roc = _voter.getSumPredictions().getROC(); break; } default: { break; } } if (roc != null) { roc.Compute(numPoints); FormDataView <double> f = new FormDataView <double>(roc); f.Show(); } else { if (MessageBox.Show(this, "This voter does not offer ROC computation.\nWould you like to compute a general (non-vote) ROC from the data?", "ROC Computation", MessageBoxButtons.YesNo) == System.Windows.Forms.DialogResult.Yes) { roc = _voter.getROC(); roc.Compute(numPoints); FormDataView <double> f = new FormDataView <double>(roc); f.Show(); } } }
public VisualizationForm(ReceiverOperatingCharacteristic roc, String windowTitle) { InitializeComponent(); ScatterPlotForm sp = new ScatterPlotForm(roc.GetScatterplot(true)); sp.Show(); }
internal ReceiverOperatingCharacteristic getROC() { // Accord.Statistics.Analysis.RocAreaMethod method = RocAreaMethod.DeLong; ReceiverOperatingCharacteristic roc = new ReceiverOperatingCharacteristic(voterExpected.ToArray(), voterPredictedConf.ToArray()); return(roc); }
/// <summary> /// Creates a new <see cref="ReceiverOperatingCurveTest"/>. /// </summary> /// /// <param name="curve">The curve to be tested.</param> /// <param name="hypothesizedValue">The hypothesized value for the ROC area.</param> /// <param name="alternate">The alternative hypothesis (research hypothesis) to test.</param> /// public ReceiverOperatingCurveTest(ReceiverOperatingCharacteristic curve, double hypothesizedValue = 0.5, OneSampleHypothesis alternate = OneSampleHypothesis.ValueIsDifferentFromHypothesis) { this.Curve = curve; Compute(curve.Area, hypothesizedValue, curve.StandardError, alternate); }
public static double Auc(double[] expected, double[] predicted) { var roc = new ReceiverOperatingCharacteristic(expected, predicted); roc.Compute(predicted); return(roc.Area); }
static void Main(string[] args) { double[][] inputs = { // Those are from class -1 new double[] { 2, 4, 0 }, new double[] { 5, 5, 1 }, new double[] { 4, 5, 0 }, new double[] { 2, 5, 5 }, new double[] { 4, 5, 1 }, new double[] { 4, 5, 0 }, new double[] { 6, 2, 0 }, new double[] { 4, 1, 0 }, // Those are from class +1 new double[] { 1, 4, 5 }, new double[] { 7, 5, 1 }, new double[] { 2, 6, 0 }, new double[] { 7, 4, 7 }, new double[] { 4, 5, 0 }, new double[] { 6, 2, 9 }, new double[] { 4, 1, 6 }, new double[] { 7, 2, 9 }, }; int[] outputs = { -1, -1, -1, -1, -1, -1, -1, -1, // fist eight from class -1 +1, +1, +1, +1, +1, +1, +1, +1 // last eight from class +1 }; // Next, we create a linear Support Vector Machine with 4 inputs SupportVectorMachine machine = new SupportVectorMachine(inputs: 3); // Create the sequential minimal optimization learning algorithm var smo = new SequentialMinimalOptimization(machine, inputs, outputs); // We learn the machine double error = smo.Run(); // And then extract its predicted labels double[] predicted = new double[inputs.Length]; for (int i = 0; i < predicted.Length; i++) { predicted[i] = machine.Compute(inputs[i]); } // At this point, the output vector contains the labels which // should have been assigned by the machine, and the predicted // vector contains the labels which have been actually assigned. // Create a new ROC curve to assess the performance of the model var roc = new ReceiverOperatingCharacteristic(outputs, predicted); roc.Compute(100); // Compute a ROC curve with 100 cut-off points roc.GetScatterplot(true); Console.WriteLine(roc.Area.ToString()); Console.Write(roc.StandardError.ToString()); }
public void ReceiverOperatingCharacteristicConstructorTest2() { double[] measurement = { 0, 0, 0, 0, 0, 1, 1, 1 }; double[] prediction = { 0, 0, 0.5, 0.5, 1, 1, 1, 1 }; ReceiverOperatingCharacteristic target = new ReceiverOperatingCharacteristic(measurement, prediction); target.Compute(0.5, true); Assert.AreEqual(target.Points.Count, 4); var p1 = target.Points[0]; var p2 = target.Points[1]; var p3 = target.Points[2]; var p4 = target.Points[3]; Assert.AreEqual(p1.Sensitivity, 1); Assert.AreEqual(1 - p1.Specificity, 1); Assert.AreEqual(p4.Sensitivity, 0); Assert.AreEqual(1 - p4.Specificity, 0); target.Compute(0.5, false); Assert.AreEqual(target.Points.Count, 3); target.Compute(new double[] { 0.0, 0.4, 0.6, 1.0 }); Assert.AreEqual(target.Points.Count, 4); Assert.AreEqual(target.Negatives, 5); Assert.AreEqual(target.Positives, 3); Assert.AreEqual(target.Observations, 8); foreach (var point in target.Points) { Assert.AreEqual(point.Samples, 8); Assert.AreEqual(point.ActualNegatives, 5); Assert.AreEqual(point.ActualPositives, 3); if (point.Cutoff == 0.0) { Assert.AreEqual(point.PredictedNegatives, 0); Assert.AreEqual(point.PredictedPositives, 8); } else if (point.Cutoff == 0.4) { Assert.AreEqual(point.PredictedNegatives, 2); Assert.AreEqual(point.PredictedPositives, 6); } else { Assert.AreEqual(point.PredictedNegatives, 4); Assert.AreEqual(point.PredictedPositives, 4); } } Assert.AreEqual(target.Area, 0.8); // Assert.AreEqual(target.StandardError, 0.1821680136170595); // HanleyMcNeil Assert.AreEqual(0.1, target.StandardError); // De Long }
public void ReceiverOperatingCharacteristicConstructorZeroIncrementThrowsTest() { double[] measurement = { 1 }; double[] prediction = { 1 }; double zeroIncrement = 0d; const bool forceOrigin = true; ReceiverOperatingCharacteristic target = new ReceiverOperatingCharacteristic(measurement, prediction); Assert.Throws <ArgumentException>(() => target.Compute(zeroIncrement, forceOrigin)); }
private void visualize() { switch (_visualizationType) { case VisualizationType.COMPONENTS_CUMULATIVE: { if (_visualizationSource != null) { if (_visualizationSource.GetType() == typeof(PrincipalComponentAnalysis)) { PrincipalComponentAnalysis pca = (PrincipalComponentAnalysis)_visualizationSource; VisualizationForm f = new VisualizationForm(pca.Components, true, "Cumulative Component Distribution"); f.Show(); } } break; } case VisualizationType.COMPONENTS_DISTRIBUTION: { if (_visualizationSource != null) { if (_visualizationSource.GetType() == typeof(PrincipalComponentAnalysis)) { PrincipalComponentAnalysis pca = (PrincipalComponentAnalysis)_visualizationSource; VisualizationForm f = new VisualizationForm(pca.Components, false, "Component Distribution"); f.Show(); } } break; } case VisualizationType.ROC_PLOT_POINTS: { if (_visualizationSource != null) { if (_visualizationSource.GetType() == typeof(ReceiverOperatingCharacteristic)) { ReceiverOperatingCharacteristic roc = (ReceiverOperatingCharacteristic)_visualizationSource; ScatterPlotForm sp = new ScatterPlotForm(roc.GetScatterplot(true)); sp.Show(); } } break; } default: { break; } } }
private static void DrawROCCurve(int[] actual, int[] preds, int numClass, string modelName) { ScatterplotView spv = new ScatterplotView(); spv.Dock = DockStyle.Fill; spv.LinesVisible = true; Color[] colors = new Color[] { Color.Blue, Color.Red, Color.Orange, Color.Yellow, Color.Green, Color.Gray, Color.LightSalmon, Color.LightSkyBlue, Color.Black, Color.Pink }; for (int i = 0; i < numClass; i++) { // Build ROC for Train Set bool[] expected = actual.Select(x => x == i ? true : false).ToArray(); int[] predicted = preds.Select(x => x == i ? 1 : 0).ToArray(); var trainRoc = new ReceiverOperatingCharacteristic(expected, predicted); trainRoc.Compute(1000); // Get Train AUC double auc = trainRoc.Area; double[] xVals = trainRoc.Points.Select(x => 1 - x.Specificity).ToArray(); double[] yVals = trainRoc.Points.Select(x => x.Sensitivity).ToArray(); // Draw ROC Curve spv.Graph.GraphPane.AddCurve( String.Format( "Digit: {0} - AUC: {1:0.00}", i, auc ), xVals, yVals, colors[i], SymbolType.None ); spv.Graph.GraphPane.AxisChange(); } spv.Graph.GraphPane.Title.Text = String.Format( "{0} ROC - One vs. Rest", modelName ); Form f1 = new Form(); f1.Width = 700; f1.Height = 500; f1.Controls.Add(spv); f1.ShowDialog(); }
private void btnRunAnalysis_Click(object sender, EventArgs e) { if (sourceTable == null) { MessageBox.Show("Please load some data before attempting to plot a curve."); return; } // Finishes and save any pending changes to the given data dgvSource.EndEdit(); // Creates a matrix from the source data table int n = sourceTable.Rows.Count; double[] realData = new double[n]; double[] testData = new double[n]; for (int i = 0; i < n; i++) { realData[i] = (double)sourceTable.Rows[i][0]; testData[i] = (double)sourceTable.Rows[i][1]; } // Creates the Receiver Operating Curve of the given source rocCurve = new ReceiverOperatingCharacteristic(realData, testData); // Compute the ROC curve if (rbNumPoints.Checked) { rocCurve.Compute((int)numPoints.Value); } else { rocCurve.Compute((float)numIncrement.Value); } // Update graphs CreateCurveGraph(zedGraph1); // Show point details dgvPointDetails.DataSource = new SortableBindingList <ReceiverOperatingCharacteristicPoint>(rocCurve.Points); // Show area and error tbArea.Text = rocCurve.Area.ToString(); tbError.Text = rocCurve.Error.ToString(); }
public static void Test() { var realData = Util.ArrayInit(20, d => Bernoulli.Sample(0.5) ? 1.0 : 0.0); var testData = Util.ArrayInit(20, d => Beta.Sample(1, 1)); // Creates the Receiver Operating Curve of the given source var rocCurve = new ReceiverOperatingCharacteristic(realData, realData); // Compute the ROC curve with 20 points rocCurve.Compute(20); for(int i=0; i < rocCurve.Points.Count; i++) { Console.WriteLine("ROC curve at point {0}: false positive rate {1:0.000}, true positive rate {2:0.000}, accuracy {3:0.000}", i, 1 - rocCurve.Points[i].Specificity, rocCurve.Points[i].Specificity, rocCurve.Points[i].Accuracy); } Console.WriteLine("Area under the ROC curve: {0:0.000}", rocCurve.Area); }
public EvaluationResult Evaluate() { var result = new EvaluationResult(); // load evaluation data result.StartMeasure(EvaluationResult.RecordType.LoadDataset); var reader = new ExcelReader(Helpers.DatasetPath); DataTable dataStore = reader.GetWorksheet("Evaluation"); int[] labels = dataStore.ToVector <int>("Label"); string[] learnData = dataStore.ToVector <string>("Sentiment"); result.StopMeasure(); // tokenize result.StartMeasure(EvaluationResult.RecordType.Tokenization); string[][] tokenized = learnData.Select(x => _preprocessor.Process(x)).ToArray(); result.StopMeasure(); // benchmark featurization result.StartMeasure(EvaluationResult.RecordType.Featurization); int[][] learnTokenized = _bagOfWords.Transform(tokenized).ToInt32(); result.StopMeasure(); // benchmark classification result.StartMeasure(EvaluationResult.RecordType.Classification); int[] testResult = _bayes.Decide(learnTokenized); result.StopMeasure(); // calculate stats result.StartMeasure(EvaluationResult.RecordType.Statistics); var mat = new ConfusionMatrix(testResult, labels); var roc = new ReceiverOperatingCharacteristic(labels, testResult.ToDouble()); roc.Compute(200); result.StopMeasure(); // save metrics result.Matrix = mat; result.Roc = roc; return(result); }
private void btnRunAnalysis_Click(object sender, EventArgs e) { if (sourceTable == null) { MessageBox.Show("Please load some data before attempting to plot a curve."); return; } // Finishes and save any pending changes to the given data dgvSource.EndEdit(); // Creates a matrix from the source data table int n = sourceTable.Rows.Count; double[] realData = new double[n]; double[] testData = new double[n]; for (int i = 0; i < n; i++) { realData[i] = (double)sourceTable.Rows[i][0]; testData[i] = (double)sourceTable.Rows[i][1]; } // Creates the Receiver Operating Curve of the given source rocCurve = new ReceiverOperatingCharacteristic(realData, testData); // Compute the ROC curve if (rbNumPoints.Checked) rocCurve.Compute((int)numPoints.Value); else rocCurve.Compute((float)numIncrement.Value); scatterplotView1.Scatterplot = rocCurve.GetScatterplot(true); // Show point details dgvPointDetails.DataSource = new SortableBindingList<ReceiverOperatingCharacteristicPoint>(rocCurve.Points); // Show area and error tbArea.Text = rocCurve.Area.ToString(); tbError.Text = rocCurve.StandardError.ToString(); }
internal FormDataView(ReceiverOperatingCharacteristic roc) { InitializeComponent(); DataTable data = new DataTable(); data.Columns.Add("Observations", typeof(String)); data.Columns.Add("Negatives", typeof(String)); data.Columns.Add("Positives", typeof(String)); data.Columns.Add("Area", typeof(String)); data.Columns.Add("Std Error", typeof(String)); data.Columns.Add("Variance", typeof(String)); data.Rows.Add(roc.Observations, roc.Negatives, roc.Positives, roc.Area, roc.StandardError, roc.Variance); dataGridView1.DataSource = data; this.dataGridView1.AutoResizeColumns(); this._availableVisualizations = new List <VisualizationType> { VisualizationType.ROC_PLOT_POINTS }; enableVisualization(roc); fitHeight(); }
internal ReceiverOperatingCharacteristic getROC() { List <Tuple <int, double> > classificationSet = new List <Tuple <int, double> >(); foreach (var kvp in _totalClassification) { kvp.Value.RawPredictions.ForEach(prediction => { classificationSet.Add(new Tuple <int, double>(kvp.Value.ActualClass, prediction)); }); } List <int> expectedValues = new List <int>(); List <double> predictedValues = new List <double>(); classificationSet.ForEach(tuple => { expectedValues.Add(tuple.Item1); predictedValues.Add(tuple.Item2); }); // Accord.Statistics.Analysis.RocAreaMethod method = RocAreaMethod.DeLong; ReceiverOperatingCharacteristic roc = new ReceiverOperatingCharacteristic(expectedValues.ToArray(), predictedValues.ToArray()); return(roc); }
private String getExportString(String delimiter, String note, VotingScheme votingScheme) { StringBuilder s = new StringBuilder(); String featureModel = this._featureModel; String kernel = _cMode.ToString(); if (_svmConfig != null) { kernel = this._svmConfig.Kernel.ToString(); } ConfusionMatrix cm = null; ReceiverOperatingCharacteristic roc = null; SchemeSumPredictions sumPredictions = null; SchemeMajorityVote majorityVote = null; switch (votingScheme) { case VotingScheme.NONE: { cm = _voter.AggregatedConfusionMatrix; roc = _voter.getROC(); break; } case VotingScheme.ADDITIVE_PREDICTIONS: { sumPredictions = _voter.getSumPredictions(); cm = null; roc = sumPredictions.getROC(); break; } case VotingScheme.MAJORITY_VOTE: { majorityVote = _voter.getMajorityVote(); cm = null; roc = majorityVote.getROC(); break; } } if (votingScheme == VotingScheme.NONE && roc != null && cm != null) { roc.Compute(100); s.Append(featureModel + delimiter); s.Append(votingScheme + delimiter); s.Append(kernel + delimiter); s.Append(note + delimiter); s.Append(this._numRuns + delimiter); s.Append(this._numFolds + delimiter); s.Append(this._timeElapsedMS + delimiter); s.Append(this._memoryUsedBytes + delimiter); s.Append(Utility.formatNumber(Utility.BytesToGB(this._memoryUsedBytes)) + delimiter); s.Append(cm.Samples + delimiter); s.Append(Utility.formatNumber(roc.Area) + delimiter); s.Append(Utility.formatNumber(cm.Sensitivity) + delimiter); s.Append(Utility.formatNumber(cm.Specificity) + delimiter); s.Append(Utility.formatNumber(cm.FalsePositiveRate) + delimiter); s.Append(Utility.formatNumber(cm.FalseDiscoveryRate) + delimiter); s.Append(Utility.formatNumber(cm.Accuracy) + delimiter); s.Append(Utility.formatNumber(cm.PositivePredictiveValue) + delimiter); s.Append(Utility.formatNumber(cm.Precision) + delimiter); s.Append(Utility.formatNumber(cm.Recall) + delimiter); s.Append(Utility.formatNumber(cm.FScore) + delimiter); s.Append(cm.ActualPositives + delimiter); s.Append(cm.ActualNegatives + delimiter); s.Append(cm.TruePositives + delimiter); s.Append(cm.TrueNegatives + delimiter); s.Append(cm.FalsePositives + delimiter); s.Append(cm.FalseNegatives); } else if (votingScheme == VotingScheme.ADDITIVE_PREDICTIONS && sumPredictions != null && roc != null) { roc.Compute(100); s.Append(featureModel + delimiter); s.Append(votingScheme + delimiter); s.Append(kernel + delimiter); s.Append(note + delimiter); s.Append(this._numRuns + delimiter); s.Append(this._numFolds + delimiter); s.Append(this._timeElapsedMS + delimiter); s.Append(this._memoryUsedBytes + delimiter); s.Append(Utility.formatNumber(Utility.BytesToGB(this._memoryUsedBytes)) + delimiter); s.Append(sumPredictions.NumSamples + delimiter); s.Append(Utility.formatNumber(sumPredictions.ROCAreaVoter) + delimiter); s.Append(Utility.formatNumber(sumPredictions.Sensitivity) + delimiter); s.Append(Utility.formatNumber(sumPredictions.Specificity) + delimiter); s.Append(Utility.formatNumber(sumPredictions.FalsePositiveRate) + delimiter); s.Append(Utility.formatNumber(sumPredictions.FalseDiscoveryRate) + delimiter); s.Append(Utility.formatNumber(sumPredictions.Accuracy) + delimiter); s.Append(Utility.formatNumber(sumPredictions.PositivePredictiveValue) + delimiter); s.Append(Utility.formatNumber(sumPredictions.Precision) + delimiter); s.Append(Utility.formatNumber(sumPredictions.Recall) + delimiter); s.Append(Utility.formatNumber(sumPredictions.FScore) + delimiter); s.Append(sumPredictions.ActualPositives + delimiter); s.Append(sumPredictions.ActualNegatives + delimiter); s.Append(sumPredictions.TruePositives + delimiter); s.Append(sumPredictions.TrueNegatives + delimiter); s.Append(sumPredictions.FalsePositives + delimiter); s.Append(sumPredictions.FalseNegatives); } else if (votingScheme == VotingScheme.MAJORITY_VOTE && majorityVote != null && roc != null) { roc.Compute(100); s.Append(featureModel + delimiter); s.Append(votingScheme + delimiter); s.Append(kernel + delimiter); s.Append(note + delimiter); s.Append(this._numRuns + delimiter); s.Append(this._numFolds + delimiter); s.Append(this._timeElapsedMS + delimiter); s.Append(this._memoryUsedBytes + delimiter); s.Append(Utility.formatNumber(Utility.BytesToGB(this._memoryUsedBytes)) + delimiter); s.Append(majorityVote.NumSamples + delimiter); s.Append(Utility.formatNumber(majorityVote.ROCAreaVoter) + delimiter); s.Append(Utility.formatNumber(majorityVote.Sensitivity) + delimiter); s.Append(Utility.formatNumber(majorityVote.Specificity) + delimiter); s.Append(Utility.formatNumber(majorityVote.FalsePositiveRate) + delimiter); s.Append(Utility.formatNumber(majorityVote.FalseDiscoveryRate) + delimiter); s.Append(Utility.formatNumber(majorityVote.Accuracy) + delimiter); s.Append(Utility.formatNumber(majorityVote.PositivePredictiveValue) + delimiter); s.Append(Utility.formatNumber(majorityVote.Precision) + delimiter); s.Append(Utility.formatNumber(majorityVote.Recall) + delimiter); s.Append(Utility.formatNumber(majorityVote.FScore) + delimiter); s.Append(majorityVote.ActualPositives + delimiter); s.Append(majorityVote.ActualNegatives + delimiter); s.Append(majorityVote.TruePositives + delimiter); s.Append(majorityVote.TrueNegatives + delimiter); s.Append(majorityVote.FalsePositives + delimiter); s.Append(majorityVote.FalseNegatives); } return(s.ToString()); }
/// <summary> /// Updates the accuracy using the current results. /// </summary> protected virtual void UpdateAccuracy() { double nlpdThreshold = -Math.Log(0.001); int labelCount = FullMapping.LabelCount; var confusionMatrix = Util.ArrayInit(labelCount, labelCount, (i, j) => 0.0); int correct = 0; double logProb = 0.0; int goldX = 0; List <double> trueBinaryLabelList = null; List <double> probTrueLabelList = null; // Only for binary labels if (Mapping.LabelCount == 2) { trueBinaryLabelList = new List <double>(); probTrueLabelList = new List <double>(); } foreach (var kvp in GoldLabels) { if (kvp.Value == null) { continue; } // We have a gold label goldX++; Discrete trueLabel = null; if (TrueLabel.ContainsKey(kvp.Key)) { trueLabel = TrueLabel[kvp.Key]; } if (trueLabel == null) { trueLabel = Discrete.Uniform(Mapping.LabelCount); //continue; // No inferred label } var probs = trueLabel.GetProbs(); double max = probs.Max(); var predictedLabels = probs.Select((p, i) => new { prob = p, idx = i }).Where(a => a.prob == max).Select(a => a.idx).ToArray(); int predictedLabel = predictedLabels.Length == 1 ? predictedLabels[0] : predictedLabels[Rand.Int(predictedLabels.Length)]; this.PredictedLabel[kvp.Key] = predictedLabel; int goldLabel = kvp.Value.Value; if (goldLabel == predictedLabel) { correct++; } confusionMatrix[goldLabel, predictedLabel] = confusionMatrix[goldLabel, predictedLabel] + 1.0; var nlp = -trueLabel.GetLogProb(goldLabel); if (nlp > nlpdThreshold) { nlp = nlpdThreshold; } logProb += nlp; if (trueBinaryLabelList != null) { trueBinaryLabelList.Add(goldLabel); probTrueLabelList.Add(probs[goldLabel]); } } Accuracy = correct / (double)goldX; NegativeLogProb = logProb / (double)goldX; ModelConfusionMatrix = confusionMatrix; // Average recall double sumRec = 0; for (int i = 0; i < labelCount; i++) { double classSum = 0; for (int j = 0; j < labelCount; j++) { classSum += confusionMatrix[i, j]; } sumRec += confusionMatrix[i, i] / classSum; } AvgRecall = sumRec / labelCount; // WorkerLabelAccuracy: Perc. agreement between worker label and gold label int sumAcc = 0; var LabelSet = Mapping.DataWithGold; int numLabels = LabelSet.Count(); foreach (var datum in LabelSet) { sumAcc += datum.WorkerLabel == datum.GoldLabel ? 1 : 0; } WorkerLabelAccuracy = (double)sumAcc / (double)numLabels; if (trueBinaryLabelList != null) { RocCurve = new ReceiverOperatingCharacteristic(trueBinaryLabelList.ToArray(), probTrueLabelList.ToArray()); RocCurve.Compute(10000); ResultsConfusionMatrixForBinaryLabels = new ConfusionMatrix((int)confusionMatrix[1, 1], (int)confusionMatrix[0, 0], (int)confusionMatrix[0, 1], (int)confusionMatrix[1, 0]); } }
public void ComputeTest() { // Example from // http://faculty.vassar.edu/lowry/roc1.html double[,] data = { { 4, 1 }, { 4, 1 }, { 4, 1 }, { 4, 1 }, { 4, 1 }, { 4, 1 }, { 4, 1 }, { 4, 1 }, { 4, 1 }, { 4, 1 }, { 4, 1 }, { 4, 1 }, { 4, 1 }, { 4, 1 }, { 4, 1 }, { 4, 1 }, { 4, 1 }, { 4, 1 }, // 18 { 4, 0 }, { 6, 1 }, { 6, 1 }, { 6, 1 }, { 6, 1 }, { 6, 1 }, { 6, 1 }, { 6, 1 }, // 7 { 6, 0 }, { 6, 0 }, { 6, 0 }, { 6, 0 }, { 6, 0 }, { 6, 0 }, { 6, 0 }, { 6, 0 }, { 6, 0 }, { 6, 0 }, { 6, 0 }, { 6, 0 }, { 6, 0 }, { 6, 0 }, { 6, 0 }, { 6, 0 }, { 6, 0 }, // 17 { 8, 1 }, { 8, 1 }, { 8, 1 }, { 8, 1 }, // 4 { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, // 36 { 9, 1 }, { 9, 1 }, { 9, 1 }, // 3 { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, // 39 }; double[] measurement = data.GetColumn(1); double[] prediction = data.GetColumn(0); var roc = new ReceiverOperatingCharacteristic(measurement, prediction); double[] cutpoints = { 5, 7, 9, double.PositiveInfinity }; roc.Compute(cutpoints); Assert.AreEqual(32, roc.Positives); Assert.AreEqual(93, roc.Negatives); Assert.AreEqual(4, roc.Points.Count); var p1 = roc.Points[0]; var p2 = roc.Points[1]; var p3 = roc.Points[2]; var p4 = roc.Points[3]; Assert.AreEqual(18, p1.FalseNegatives); Assert.AreEqual(18 + 7, p2.FalseNegatives); Assert.AreEqual(18 + 7 + 4, p3.FalseNegatives); Assert.AreEqual(18 + 7 + 4 + 3, p4.FalseNegatives); Assert.AreEqual(1, p1.TrueNegatives); Assert.AreEqual(1 + 17, p2.TrueNegatives); Assert.AreEqual(1 + 17 + 36, p3.TrueNegatives); Assert.AreEqual(1 + 17 + 36 + 39, p4.TrueNegatives); double area = roc.Area; double error = roc.StandardError; // Area should be near 0.87 Assert.AreEqual(0.87, area, 0.011); Assert.IsFalse(Double.IsNaN(area)); // Assert.AreEqual(0.043781206163219656, error); // HanleyMcNeil Assert.AreEqual(0.04485087617325112, error); // DeLong estimate }
public void ComputeTest() { // Example from // http://faculty.vassar.edu/lowry/roc1.html double[,] data = { { 4, 1 }, { 4, 1 }, { 4, 1 }, { 4, 1 }, { 4, 1 }, { 4, 1 }, { 4, 1 }, { 4, 1 }, { 4, 1 }, { 4, 1 }, { 4, 1 }, { 4, 1 }, { 4, 1 }, { 4, 1 }, { 4, 1 }, { 4, 1 }, { 4, 1 }, { 4, 1 }, // 18 { 4, 0 }, { 6, 1 }, { 6, 1 }, { 6, 1 }, { 6, 1 }, { 6, 1 }, { 6, 1 }, { 6, 1 }, // 7 { 6, 0 }, { 6, 0 }, { 6, 0 }, { 6, 0 }, { 6, 0 }, { 6, 0 }, { 6, 0 }, { 6, 0 }, { 6, 0 }, { 6, 0 }, { 6, 0 }, { 6, 0 }, { 6, 0 }, { 6, 0 }, { 6, 0 }, { 6, 0 }, { 6, 0 }, // 17 { 8, 1 }, { 8, 1 }, { 8, 1 }, { 8, 1 }, // 4 { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, { 8, 0 }, // 36 { 9, 1 }, { 9, 1 }, { 9, 1 }, // 3 { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, { 9, 0 }, // 39 }; double[] measurement = data.GetColumn(1); double[] prediction = data.GetColumn(0); var roc = new ReceiverOperatingCharacteristic(measurement, prediction); double[] cutpoints = { 5, 7, 9, double.PositiveInfinity }; roc.Compute(cutpoints); Assert.AreEqual(32, roc.Positives); Assert.AreEqual(93, roc.Negatives); Assert.AreEqual(4, roc.Points.Count); var p1 = roc.Points[0]; var p2 = roc.Points[1]; var p3 = roc.Points[2]; var p4 = roc.Points[3]; Assert.AreEqual(18, p1.FalseNegatives); Assert.AreEqual(18 + 7, p2.FalseNegatives); Assert.AreEqual(18 + 7 + 4, p3.FalseNegatives); Assert.AreEqual(18 + 7 + 4 + 3, p4.FalseNegatives); Assert.AreEqual(1, p1.TrueNegatives); Assert.AreEqual(1 + 17, p2.TrueNegatives); Assert.AreEqual(1 + 17 + 36, p3.TrueNegatives); Assert.AreEqual(1 + 17 + 36 + 39, p4.TrueNegatives); double area = roc.Area; double error = roc.Error; // Area should be near 0.87 Assert.IsTrue(System.Math.Abs(area - 0.875) < roc.Error); }
public void ReceiverOperatingCharacteristicConstructorTest3() { // This example shows how to measure the accuracy of a // binary classifier using a ROC curve. For this example, // we will be creating a Support Vector Machine trained // on the following instances: double[][] inputs = { // Those are from class -1 new double[] { 2, 4, 0 }, new double[] { 5, 5, 1 }, new double[] { 4, 5, 0 }, new double[] { 2, 5, 5 }, new double[] { 4, 5, 1 }, new double[] { 4, 5, 0 }, new double[] { 6, 2, 0 }, new double[] { 4, 1, 0 }, // Those are from class +1 new double[] { 1, 4, 5 }, new double[] { 7, 5, 1 }, new double[] { 2, 6, 0 }, new double[] { 7, 4, 7 }, new double[] { 4, 5, 0 }, new double[] { 6, 2, 9 }, new double[] { 4, 1, 6 }, new double[] { 7, 2, 9 }, }; int[] outputs = { -1, -1, -1, -1, -1, -1, -1, -1, // first eight from class -1 +1, +1, +1, +1, +1, +1, +1, +1 // last eight from class +1 }; // Create a linear Support Vector Machine with 3 inputs var machine = new SupportVectorMachine(inputs: 3); // Create the sequential minimal optimization teacher var learn = new SequentialMinimalOptimization(machine, inputs, outputs) { Complexity = 1 }; // Run the learning algorithm double error = learn.Run(); // Extract the input labels predicted by the machine double[] predicted = new double[inputs.Length]; for (int i = 0; i < predicted.Length; i++) { predicted[i] = machine.Score(inputs[i]); } // Create a new ROC curve to assess the performance of the model var roc = new ReceiverOperatingCharacteristic(outputs, predicted); roc.Compute(100); // Compute a ROC curve with 100 points /* * // Generate a connected scatter plot for the ROC curve and show it on-screen * ScatterplotBox.Show(roc.GetScatterplot(includeRandom: true), nonBlocking: true) * * .SetSymbolSize(0) // do not display data points * .SetLinesVisible(true) // show lines connecting points * .SetScaleTight(true) // tighten the scale to points * .WaitForClose(); */ Assert.AreEqual(0.25, error); Assert.AreEqual(0.78125, roc.Area); // Assert.AreEqual(0.1174774, roc.StandardError, 1e-6); HanleyMcNeil // Assert.AreEqual(0.11958120746409709, roc.StandardError, 1e-6); Assert.AreEqual(0.132845321574701, roc.StandardError, 1e-6); }
public void DeLongVarianceTest() { // Example from Sampling Variability of Nonparametric Estimates of the // Areas under Receiver Operating Characteristic Curves: An Update bool yes = true; bool no = false; bool[] expected = { /* 1*/ yes, /* 2*/ no, /* 3*/ yes, /* 4*/ no, /* 5*/ no, /* 6*/ yes, /* 7*/ yes, /* 8*/ no, /* 9*/ no, /*10*/ yes, /*11*/ no, /*12*/ no, /*13*/ yes, /*14*/ no, /*15*/ no }; int[] actual = { /* 1*/ 1, /* 2*/ 2, /* 3*/ 5, /* 4*/ 1, /* 5*/ 1, /* 6*/ 1, /* 7*/ 2, /* 8*/ 1, /* 9*/ 2, /*10*/ 2, /*11*/ 1, /*12*/ 1, /*13*/ 5, /*14*/ 1, /*15*/ 1 }; ReceiverOperatingCharacteristic curve = new ReceiverOperatingCharacteristic(expected, actual); curve.Compute(10); Assert.AreEqual(6, curve.PositiveResults.Length); Assert.AreEqual(1, curve.PositiveResults[0]); Assert.AreEqual(5, curve.PositiveResults[1]); Assert.AreEqual(1, curve.PositiveResults[2]); Assert.AreEqual(2, curve.PositiveResults[3]); Assert.AreEqual(2, curve.PositiveResults[4]); Assert.AreEqual(5, curve.PositiveResults[5]); Assert.AreEqual(9, curve.NegativeResults.Length); Assert.AreEqual(2, curve.NegativeResults[0]); Assert.AreEqual(1, curve.NegativeResults[1]); Assert.AreEqual(1, curve.NegativeResults[2]); Assert.AreEqual(1, curve.NegativeResults[3]); Assert.AreEqual(2, curve.NegativeResults[4]); Assert.AreEqual(1, curve.NegativeResults[5]); Assert.AreEqual(1, curve.NegativeResults[6]); Assert.AreEqual(1, curve.NegativeResults[7]); Assert.AreEqual(1, curve.NegativeResults[8]); Assert.AreEqual(6, curve.PositiveAccuracies.Length); Assert.AreEqual(0.3888, curve.PositiveAccuracies[0], 1e-4); Assert.AreEqual(1.0000, curve.PositiveAccuracies[1], 1e-4); Assert.AreEqual(0.3888, curve.PositiveAccuracies[2], 1e-4); Assert.AreEqual(0.8888, curve.PositiveAccuracies[3], 1e-4); Assert.AreEqual(0.8888, curve.PositiveAccuracies[4], 1e-4); Assert.AreEqual(1.0000, curve.PositiveAccuracies[5], 1e-4); Assert.AreEqual(9, curve.NegativeAccuracies.Length); Assert.AreEqual(0.5000, curve.NegativeAccuracies[0], 1e-4); Assert.AreEqual(0.8333, curve.NegativeAccuracies[1], 1e-4); Assert.AreEqual(0.8333, curve.NegativeAccuracies[2], 1e-4); Assert.AreEqual(0.8333, curve.NegativeAccuracies[3], 1e-4); Assert.AreEqual(0.5000, curve.NegativeAccuracies[4], 1e-4); Assert.AreEqual(0.8333, curve.NegativeAccuracies[5], 1e-4); Assert.AreEqual(0.8333, curve.NegativeAccuracies[6], 1e-4); Assert.AreEqual(0.8333, curve.NegativeAccuracies[7], 1e-4); Assert.AreEqual(0.8333, curve.NegativeAccuracies[8], 1e-4); Assert.IsFalse(curve.NegativeAccuracies.HasNaN()); Assert.IsFalse(curve.PositiveAccuracies.HasNaN()); Assert.AreEqual(0.1285, curve.StandardError, 1e-4); Assert.AreEqual(0.0165, curve.Variance, 1e-4); Assert.IsFalse(Double.IsNaN(curve.StandardError)); Assert.IsFalse(Double.IsNaN(curve.Variance)); }
/// <summary> /// Updates the accuracy using the current results. /// </summary> protected virtual void UpdateAccuracy() { double nlpdThreshold = -Math.Log(0.001); int labelCount = FullMapping.LabelCount; var confusionMatrix = Util.ArrayInit(labelCount, labelCount, (i, j) => 0.0); int correct = 0; double logProb = 0.0; int goldX = 0; List<double> trueBinaryLabelList = null; List<double> probTrueLabelList = null; // Only for binary labels if (Mapping.LabelCount == 2) { trueBinaryLabelList = new List<double>(); probTrueLabelList = new List<double>(); } foreach (var kvp in GoldLabels) { if (kvp.Value == null) continue; // We have a gold label goldX++; Discrete trueLabel = null; if (TrueLabel.ContainsKey(kvp.Key)) trueLabel = TrueLabel[kvp.Key]; if (trueLabel == null) { trueLabel = Discrete.Uniform(Mapping.LabelCount); //continue; // No inferred label } var probs = trueLabel.GetProbs(); double max = probs.Max(); var predictedLabels = probs.Select((p, i) => new { prob = p, idx = i }).Where(a => a.prob == max).Select(a => a.idx).ToArray(); int predictedLabel = predictedLabels.Length == 1 ? predictedLabels[0] : predictedLabels[Rand.Int(predictedLabels.Length)]; this.PredictedLabel[kvp.Key] = predictedLabel; int goldLabel = kvp.Value.Value; if (goldLabel == predictedLabel) correct++; confusionMatrix[goldLabel, predictedLabel] = confusionMatrix[goldLabel, predictedLabel] + 1.0; var nlp = -trueLabel.GetLogProb(goldLabel); if (nlp > nlpdThreshold) nlp = nlpdThreshold; logProb += nlp; if (trueBinaryLabelList != null) { trueBinaryLabelList.Add(goldLabel); probTrueLabelList.Add(probs[goldLabel]); } } Accuracy = correct / (double)goldX; NegativeLogProb = logProb / (double)goldX; ModelConfusionMatrix = confusionMatrix; // Average recall double sumRec = 0; for (int i = 0; i < labelCount; i++) { double classSum = 0; for (int j = 0; j < labelCount; j++) { classSum += confusionMatrix[i, j]; } sumRec += confusionMatrix[i, i] / classSum; } AvgRecall = sumRec / labelCount; // WorkerLabelAccuracy: Perc. agreement between worker label and gold label int sumAcc = 0; var LabelSet = Mapping.DataWithGold; int numLabels = LabelSet.Count(); foreach (var datum in LabelSet) { sumAcc += datum.WorkerLabel == datum.GoldLabel ? 1 : 0; } WorkerLabelAccuracy = (double) sumAcc / (double) numLabels; if (trueBinaryLabelList != null) { RocCurve = new ReceiverOperatingCharacteristic(trueBinaryLabelList.ToArray(), probTrueLabelList.ToArray()); RocCurve.Compute(10000); ResultsConfusionMatrixForBinaryLabels = new ConfusionMatrix((int)confusionMatrix[1, 1], (int)confusionMatrix[0, 0], (int)confusionMatrix[0, 1], (int)confusionMatrix[1, 0]); } }
public void ReceiverOperatingCharacteristicConstructorTest3() { // This example shows how to measure the accuracy of a // binary classifier using a ROC curve. For this example, // we will be creating a Support Vector Machine trained // on the following instances: double[][] inputs = { // Those are from class -1 new double[] { 2, 4, 0 }, new double[] { 5, 5, 1 }, new double[] { 4, 5, 0 }, new double[] { 2, 5, 5 }, new double[] { 4, 5, 1 }, new double[] { 4, 5, 0 }, new double[] { 6, 2, 0 }, new double[] { 4, 1, 0 }, // Those are from class +1 new double[] { 1, 4, 5 }, new double[] { 7, 5, 1 }, new double[] { 2, 6, 0 }, new double[] { 7, 4, 7 }, new double[] { 4, 5, 0 }, new double[] { 6, 2, 9 }, new double[] { 4, 1, 6 }, new double[] { 7, 2, 9 }, }; int[] outputs = { -1, -1, -1, -1, -1, -1, -1, -1, // fist eight from class -1 +1, +1, +1, +1, +1, +1, +1, +1 // last eight from class +1 }; // Create a linear Support Vector Machine with 4 inputs SupportVectorMachine machine = new SupportVectorMachine(inputs: 3); // Create the sequential minimal optimization teacher SequentialMinimalOptimization learn = new SequentialMinimalOptimization(machine, inputs, outputs); // Run the learning algorithm double error = learn.Run(); // Extract the input labels predicted by the machine double[] predicted = new double[inputs.Length]; for (int i = 0; i < predicted.Length; i++) predicted[i] = machine.Compute(inputs[i]); // Create a new ROC curve to assess the performance of the model var roc = new ReceiverOperatingCharacteristic(outputs, predicted); roc.Compute(100); // Compute a ROC curve with 100 points /* // Generate a connected scatter plot for the ROC curve and show it on-screen ScatterplotBox.Show(roc.GetScatterplot(includeRandom: true), nonBlocking: true) .SetSymbolSize(0) // do not display data points .SetLinesVisible(true) // show lines connecting points .SetScaleTight(true) // tighten the scale to points .WaitForClose(); */ Assert.AreEqual(0.7890625, roc.Area); // Assert.AreEqual(0.1174774, roc.StandardError, 1e-6); HanleyMcNeil Assert.AreEqual(0.11958120746409709, roc.StandardError, 1e-6); }
/// <summary> /// Run the lesson. /// </summary> public static void Run() { // get data Console.WriteLine("Loading data...."); var path = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, @"..\..\..\..\california_housing.csv")); var housing = Frame.ReadCsv(path, separators: ","); housing = housing.Where(kv => ((decimal)kv.Value["median_house_value"]) < 500000); // create the median_high_house_value feature housing.AddColumn("median_high_house_value", housing["median_house_value"].Select(v => v.Value >= 265000 ? 1.0 : 0.0)); // shuffle the frame var rnd = new Random(); var indices = Enumerable.Range(0, housing.Rows.KeyCount).OrderBy(v => rnd.NextDouble()); housing = housing.IndexRowsWith(indices).SortRowsByKey(); // create training, validation, and test frames var training = housing.Rows[Enumerable.Range(0, 12000)]; var validation = housing.Rows[Enumerable.Range(12000, 2500)]; var test = housing.Rows[Enumerable.Range(14500, 2500)]; // build the list of features we're going to use var columns = new string[] { "latitude", "longitude", "housing_median_age", "total_rooms", "total_bedrooms", "population", "households", "median_income" }; // train the model using a logistic regressor var learner = new IterativeReweightedLeastSquares <LogisticRegression>() { MaxIterations = 100 }; var regression = learner.Learn( training.Columns[columns].ToArray2D <double>().ToJagged(), training["median_high_house_value"].Values.ToArray()); // get probabilities var features_validation = validation.Columns[columns].ToArray2D <double>().ToJagged(); var label_validation = validation["median_high_house_value"].Values.ToArray(); var probabilities = regression.Probability(features_validation); // calculate the histogram of probabilities var histogram = new Histogram(); histogram.Compute(probabilities, 0.05); // draw the histogram Plot(histogram, "Probability histogram", "prediction", "count"); // get predictions and actuals var predictions = regression.Decide(features_validation); var actuals = label_validation.Select(v => v == 1.0 ? true : false).ToArray(); // create confusion matrix var confusion = new ConfusionMatrix(predictions, actuals); // display classification scores Console.WriteLine($"True Positives: {confusion.TruePositives}"); Console.WriteLine($"True Negatives: {confusion.TrueNegatives}"); Console.WriteLine($"False Positives: {confusion.FalsePositives}"); Console.WriteLine($"False Negatives: {confusion.FalseNegatives}"); Console.WriteLine(); // display accuracy, precision, and recall Console.WriteLine($"Accuracy: {confusion.Accuracy}"); Console.WriteLine($"Precision: {confusion.Precision}"); Console.WriteLine($"Recall: {confusion.Recall}"); Console.WriteLine(); // display TPR and FPR Console.WriteLine($"TPR: {confusion.Sensitivity}"); Console.WriteLine($"FPR: {confusion.FalsePositiveRate}"); Console.WriteLine(); // calculate roc curve var roc = new ReceiverOperatingCharacteristic( actuals, predictions.Select(v => v ? 1 : 0).ToArray()); roc.Compute(100); // generate the scatter plot var rocPlot = roc.GetScatterplot(true); // show roc curve Plot(rocPlot); // show the auc Console.WriteLine($"AUC: {roc.Area}"); }
public void DeLongComparisonTest() { // Example from Sampling Variability of Nonparametric Estimates of the // Areas under Receiver Operating Characteristic Curves: An Update bool yes = true; bool no = false; bool[] expected = { /* 1*/ yes, /* 2*/ no, /* 3*/ yes, /* 4*/ no, /* 5*/ no, /* 6*/ yes, /* 7*/ yes, /* 8*/ no, /* 9*/ no, /*10*/ yes, /*11*/ no, /*12*/ no, /*13*/ yes, /*14*/ no, /*15*/ no }; int[] actual1 = { /* 1*/ 1, /* 2*/ 2, /* 3*/ 5, /* 4*/ 1, /* 5*/ 1, /* 6*/ 1, /* 7*/ 2, /* 8*/ 1, /* 9*/ 2, /*10*/ 2, /*11*/ 1, /*12*/ 1, /*13*/ 5, /*14*/ 1, /*15*/ 1 }; int[] actual2 = { /* 1*/ 1, /* 2*/ 1, /* 3*/ 5, /* 4*/ 1, /* 5*/ 1, /* 6*/ 1, /* 7*/ 4, /* 8*/ 1, /* 9*/ 2, /*10*/ 2, /*11*/ 1, /*12*/ 1, /*13*/ 5, /*14*/ 1, /*15*/ 1 }; ReceiverOperatingCharacteristic a = new ReceiverOperatingCharacteristic(expected, actual1); ReceiverOperatingCharacteristic b = new ReceiverOperatingCharacteristic(expected, actual2); a.Compute(10); b.Compute(10); TwoReceiverOperatingCurveTest test = new TwoReceiverOperatingCurveTest(a, b); Assert.AreEqual(-1.1351915229662422, test.Statistic); }
static void Main(string[] args) { System.Globalization.CultureInfo customCulture = (System.Globalization.CultureInfo)System.Threading.Thread.CurrentThread.CurrentCulture.Clone(); customCulture.NumberFormat.NumberDecimalSeparator = "."; int nBufferWidth = Console.BufferWidth; Console.SetBufferSize(nBufferWidth, 1000); System.Threading.Thread.CurrentThread.CurrentCulture = customCulture; Config cfg = new Config("AnalysClassifier.config"); log("info", "Конфиг:" + cfg.ToString()); double[] output = File.ReadAllLines(cfg.OutputPath) .Select(x => double.Parse(x)).ToArray(); double[] target = File.ReadAllLines(cfg.TargetPath) .Select(x => double.Parse(x)).ToArray(); double[] error = File.ReadAllLines(cfg.ErrorPath) .Select(x => double.Parse(x)).ToArray(); if (cfg.Plotroc == true) { Console.WriteLine("Модуль Plotroc"); Console.WriteLine("Расчет ROC"); var roc = new ReceiverOperatingCharacteristic(output, target); roc.Compute(100); // Compute a ROC curve with 100 cut-off points Console.WriteLine("ROC расчитана"); ScatterplotBox.Show(roc.GetScatterplot(includeRandom: true)) .SetSymbolSize(0) // do not display data points .SetLinesVisible(true) // show lines connecting points .SetScaleTight(true); // tighten the scale to points } if (cfg.Plothist == true) { Console.WriteLine("Модуль Plothist"); HistoframShow(error, "ошибок"); HistoframShow(target, "эталонных выходов"); HistoframShow(output, "выходов модели"); } if (cfg.Plotconfucion == true) { Console.WriteLine("Модуль Plotconfucion"); Console.WriteLine("Расчет ConfusionMatrix"); var cm = new GeneralConfusionMatrix(classes: 2, expected: output.Select(x => x > 0.5?1:0).ToArray(), predicted: target.Select(x => x > 0.5 ? 1 : 0).ToArray()); Console.WriteLine("ConfusionMatrix расчитана"); Console.WriteLine("Confusion Matrix:"); string[][] outMat = cm.Matrix. ToJagged(). Select(x => x.Select(y => IntToStringFormatted(y)).ToArray()). ToArray(); foreach (var it in cm.ColumnTotals) { Console.Write($"{IntToStringFormatted(it)}"); } Console.WriteLine("|"); Console.WriteLine(new string('_', 9 * cm.ColumnTotals.Length)); int i = 0; foreach (var it in outMat) { foreach (var it2 in it) { Console.Write(it2); Console.Write(" "); } Console.Write($"| {cm.RowTotals[i++]}"); Console.WriteLine(); } Console.WriteLine(); // We can get more information about our problem as well: Console.WriteLine("Дополнительная информация:"); Console.WriteLine($"Классов: {cm.NumberOfClasses}:"); Console.WriteLine($"Примеров: {cm.NumberOfSamples}:"); Console.WriteLine($"Точность: {cm.Accuracy}:"); Console.WriteLine($"Ошибка: {cm.Error}:"); Console.WriteLine($"chanceAgreement: {cm.ChanceAgreement}:"); Console.WriteLine($"geommetricAgreement: {cm.Error}:"); Console.WriteLine($"pearson: {cm.Pearson}:"); Console.WriteLine($"kappa: {cm.Kappa}:"); Console.WriteLine($"tau: {cm.Tau}:"); Console.WriteLine($"chiSquare: {cm.ChiSquare}:"); Console.WriteLine($"kappaStdErr: {cm.Kappa}:"); } }
private void compute(Dictionary <string, NodeClassification> nodeClassifications) { baseExpected = new List <int>(); basePredicted = new List <int>(); basePredictedConf = new List <double>(); voterExpected = new List <int>(); voterPredicted = new List <int>(); voterPredictedConf = new List <double>(); int predictionCount = nodeClassifications.First().Value.RawPredictions.Count; foreach (var kvp in nodeClassifications) { double sum = kvp.Value.RawPredictions.Sum(); // Voter VSamples++; voterExpected.Add(kvp.Value.ActualClass); voterPredictedConf.Add(sum); if (sum >= 0) { voterPredicted.Add(1); } else { voterPredicted.Add(-1); } if (kvp.Value.ActualClass >= 0) // malware { VAP++; if (sum >= 0) // malware predicted { VTP++; } else // goodware predicted { VFN++; } } else if (kvp.Value.ActualClass < 0) { VAN++; if (sum < 0) // goodware predicted { VTN++; } else // malware predicted { VFP++; } } // sum += offset; String sumVote = ((sum >= 0 && kvp.Value.ActualClass > 0) || (sum < 0 && kvp.Value.ActualClass < 0)) ? "Success" : "Fail"; String majorityVote = (kvp.Value.CorrectPredictions > kvp.Value.FalsePredictions) ? "Success" : "Fail"; String variancePrediction = Accord.Statistics.Tools.Variance(kvp.Value.RawPredictions.ToArray()).ToString(); String meanPrediction = Accord.Statistics.Tools.Mean(kvp.Value.RawPredictions.ToArray()).ToString(); String sumPredictions = sum.ToString(); foreach (double prediction in kvp.Value.RawPredictions) { baseExpected.Add(kvp.Value.ActualClass); basePredictedConf.Add(prediction); if (prediction >= 0) { basePredicted.Add(1); } else { basePredicted.Add(-1); } Samples++; if (kvp.Value.ActualClass > 0) // malware { AP++; if (prediction >= 0) // predicted as malware { TP++; } else // predicted as goodware { FN++; } } else if (kvp.Value.ActualClass < 0) // goodware { AN++; if (prediction >= 0) // predicted as malware { FP++; } else // predicted as goodware { TN++; } } } /* * addItem(kvp.Key, * kvp.Value.TotalPredictions.ToString(), * kvp.Value.CorrectPredictions.ToString(), * kvp.Value.FalsePredictions.ToString(), * kvp.Value.RawPredictions[0].ToString(), * kvp.Value.RawPredictions[1].ToString(), * majorityVote, * sumVote, * meanPrediction, * variancePrediction, * sumPredictions);*/ List <object> vals = new List <object>(); vals.Add(kvp.Key); vals.Add(kvp.Value.TotalPredictions.ToString()); vals.Add(kvp.Value.CorrectPredictions.ToString()); vals.Add(kvp.Value.FalsePredictions.ToString()); for (int i = 0; i < kvp.Value.RawPredictions.Count; i++) { vals.Add(kvp.Value.RawPredictions[i].ToString()); } vals.Add(majorityVote); vals.Add(sumVote); vals.Add(meanPrediction); vals.Add(variancePrediction); vals.Add(sumPredictions); // DataRow n = new DataRow(); _tableDetails.Rows.Add(vals.ToArray()); bool sumVoteSuccess = ((sum >= 0 && kvp.Value.ActualClass > 0) || (sum < 0 && kvp.Value.ActualClass < 0)) ? true : false; this._numNodes++; this._numPredictionsPerNode = kvp.Value.RawPredictions.Count; this._numPredictions += kvp.Value.RawPredictions.Count; } // calculate confusion matrix statistics //_Sensitivity = ((double)_NumTruePositive / (double)(_NumTruePositive + _NumFalseNegative)); //_Specificity = ((double)_NumTrueNegative / (double)(_NumTrueNegative + _NumFalsePositive)); //_Precision = ((double)_NumTruePositive / (double)(_NumTruePositive + _NumFalsePositive)); //_FPR = ((double)_NumFalsePositive / (double)(_NumFalsePositive + _NumTrueNegative)); //_Accuracy = (((double)(_NumTruePositive + _NumTrueNegative)) / ((double)(_numNodes))); //_FDR = ((double)_NumFalsePositive / (double)(_NumFalsePositive + _NumTruePositive)); _baseMatrix = new ConfusionMatrix(basePredicted.ToArray <int>(), baseExpected.ToArray <int>(), 1, -1); _voterMatrix = new ConfusionMatrix(voterPredicted.ToArray <int>(), voterExpected.ToArray <int>(), 1, -1); ReceiverOperatingCharacteristic rocBase = new ReceiverOperatingCharacteristic(baseExpected.ToArray(), basePredictedConf.ToArray()); ReceiverOperatingCharacteristic rocVoter = new ReceiverOperatingCharacteristic(voterExpected.ToArray(), voterPredictedConf.ToArray()); rocBase.Compute(100); rocVoter.Compute(100); rocAreaBase = rocBase.Area; rocAreaVoter = rocVoter.Area; // Utility.writeToConsole<int>(voterExpected.ToArray<int>()); }
private static void DrawROCCurve(int[] trainActual, int[] trainPreds, int[] testActual, int[] testPreds, int predClass, int minNumOccurrences, string modelName) { // Create a new ROC curve to assess the performance of the model string predClassStr = predClass == 0 ? "Neutral" : predClass == 1 ? "Positive" : "Negative"; Console.WriteLine( "* Building ROC curve for {0} vs. Rest", predClassStr ); // Build ROC for Train Set bool[] trainExpectedClass = trainActual.Select(x => x == predClass ? true : false).ToArray(); int[] trainPredictedClass = trainPreds.Select(x => x == predClass ? 1 : 0).ToArray(); var trainRoc = new ReceiverOperatingCharacteristic(trainExpectedClass, trainPredictedClass); trainRoc.Compute(1000); // Get Train AUC double trainAUC = trainRoc.Area; double[] trainXValues = trainRoc.Points.Select(x => 1 - x.Specificity).ToArray(); double[] trainYValues = trainRoc.Points.Select(x => x.Sensitivity).ToArray(); // Build ROC for Test Set bool[] testExpectedClass = testActual.Select(x => x == predClass ? true : false).ToArray(); int[] testPredictedClass = testPreds.Select(x => x == predClass ? 1 : 0).ToArray(); var testRoc = new ReceiverOperatingCharacteristic(testExpectedClass, testPredictedClass); testRoc.Compute(1000); // Get Test AUC double testAUC = testRoc.Area; double[] testXValues = testRoc.Points.Select(x => 1 - x.Specificity).ToArray(); double[] testYValues = testRoc.Points.Select(x => x.Sensitivity).ToArray(); // Draw ROC Curve with both Train & Test ROC ScatterplotView spv = new ScatterplotView(); spv.Dock = DockStyle.Fill; spv.LinesVisible = true; spv.Graph.GraphPane.AddCurve( String.Format("Train (AUC: {0:0.00})", trainAUC), trainXValues, trainYValues, Color.Green, SymbolType.None ); spv.Graph.GraphPane.AddCurve( String.Format("Test (AUC: {0:0.00})", testAUC), testXValues, testYValues, Color.Blue, SymbolType.None ); spv.Graph.GraphPane.AddCurve("Random", testXValues, testXValues, Color.Red, SymbolType.None); spv.Graph.GraphPane.Title.Text = String.Format( "{0} ROC - {1} vs. Rest (# occurrences >= {2})", modelName, predClassStr, minNumOccurrences ); spv.Graph.GraphPane.AxisChange(); Form f1 = new Form(); f1.Width = 700; f1.Height = 500; f1.Controls.Add(spv); f1.ShowDialog(); }