/// <summary>
        ///   Creates a new test for two ROC curves.
        /// </summary>
        ///
        /// <param name="curve1">The first ROC curve.</param>
        /// <param name="curve2">The second ROC curve.</param>
        /// <param name="hypothesizedDifference">The hypothesized difference between the two areas.</param>
        /// <param name="alternate">The alternative hypothesis (research hypothesis) to test.</param>
        ///
        public TwoReceiverOperatingCurveTest(ReceiverOperatingCharacteristic curve1, ReceiverOperatingCharacteristic curve2,
                                             double hypothesizedDifference = 0, TwoSampleHypothesis alternate = TwoSampleHypothesis.ValuesAreDifferent)
        {
            this.Curve1 = curve1;
            this.Curve2 = curve2;

            double[] Vx1 = curve1.NegativeAccuracies;
            double[] Vy1 = curve1.PositiveAccuracies;

            double[] Vx2 = curve2.NegativeAccuracies;
            double[] Vy2 = curve2.PositiveAccuracies;

            double covx = Measures.Covariance(Vx1, Vx2);
            double covy = Measures.Covariance(Vy1, Vy2);
            double cov  = covx / Vx1.Length + covy / Vy1.Length;

            this.EstimatedValue1        = curve1.Area;
            this.EstimatedValue2        = curve2.Area;
            this.ObservedDifference     = EstimatedValue1 - EstimatedValue2;
            this.HypothesizedDifference = hypothesizedDifference;

            this.Variance1 = curve1.Variance;
            this.Variance2 = curve2.Variance;

            this.OverallVariance = Variance1 + Variance2 - 2 * cov;
            this.StandardError   = System.Math.Sqrt(OverallVariance);

            // Compute Z statistic
            double z = (ObservedDifference - HypothesizedDifference) / StandardError;

            Compute(z, alternate);
        }
예제 #2
0
        private void buttonROC_Click(object sender, EventArgs e)
        {
            ReceiverOperatingCharacteristic roc = null;
            int    numPoints    = -1;
            string numPointsStr = "";

            Utility.InputBox("ROC Points", "How many points should be used?", ref numPointsStr);
            if (numPointsStr != "")
            {
                if (!Int32.TryParse(numPointsStr, out numPoints))
                {
                    MessageBox.Show("Your input was invalid. Please try again.");
                    return;
                }
            }


            switch (_scheme)
            {
            case VotingScheme.NONE:
            {
                roc = _voter.getROC();
                break;
            }

            case VotingScheme.MAJORITY_VOTE:
            {
                roc = _voter.getMajorityVote().getROC();
                break;
            }

            case VotingScheme.ADDITIVE_PREDICTIONS:
            {
                roc = _voter.getSumPredictions().getROC();
                break;
            }

            default:
            {
                break;
            }
            }

            if (roc != null)
            {
                roc.Compute(numPoints);
                FormDataView <double> f = new FormDataView <double>(roc);
                f.Show();
            }
            else
            {
                if (MessageBox.Show(this, "This voter does not offer ROC computation.\nWould you like to compute a general (non-vote) ROC from the data?", "ROC Computation", MessageBoxButtons.YesNo) == System.Windows.Forms.DialogResult.Yes)
                {
                    roc = _voter.getROC();
                    roc.Compute(numPoints);
                    FormDataView <double> f = new FormDataView <double>(roc);
                    f.Show();
                }
            }
        }
        /// <summary>
        ///   Creates a new test for two ROC curves.
        /// </summary>
        /// 
        /// <param name="curve1">The first ROC curve.</param>
        /// <param name="curve2">The second ROC curve.</param>
        /// <param name="hypothesizedDifference">The hypothesized difference between the two areas.</param>
        /// <param name="alternate">The alternative hypothesis (research hypothesis) to test.</param>
        ///
        public TwoReceiverOperatingCurveTest(ReceiverOperatingCharacteristic curve1, ReceiverOperatingCharacteristic curve2,
            double hypothesizedDifference = 0, TwoSampleHypothesis alternate = TwoSampleHypothesis.ValuesAreDifferent)
        {
            this.Curve1 = curve1;
            this.Curve2 = curve2;

            double[] Vx1 = curve1.NegativeAccuracies;
            double[] Vy1 = curve1.PositiveAccuracies;

            double[] Vx2 = curve2.NegativeAccuracies;
            double[] Vy2 = curve2.PositiveAccuracies;

            double covx = Measures.Covariance(Vx1, Vx2);
            double covy = Measures.Covariance(Vy1, Vy2);
            double cov = covx / Vx1.Length + covy / Vy1.Length;

            this.EstimatedValue1 = curve1.Area;
            this.EstimatedValue2 = curve2.Area;
            this.ObservedDifference = EstimatedValue1 - EstimatedValue2;
            this.HypothesizedDifference = hypothesizedDifference;

            this.Variance1 = curve1.Variance;
            this.Variance2 = curve2.Variance;

            this.OverallVariance = Variance1 + Variance2 - 2 * cov;
            this.StandardError = System.Math.Sqrt(OverallVariance);

            // Compute Z statistic
            double z = (ObservedDifference - HypothesizedDifference) / StandardError;

            Compute(z, alternate);
        }
예제 #4
0
        public VisualizationForm(ReceiverOperatingCharacteristic roc, String windowTitle)
        {
            InitializeComponent();
            ScatterPlotForm sp = new ScatterPlotForm(roc.GetScatterplot(true));

            sp.Show();
        }
예제 #5
0
        internal ReceiverOperatingCharacteristic getROC()
        {
            // Accord.Statistics.Analysis.RocAreaMethod method = RocAreaMethod.DeLong;
            ReceiverOperatingCharacteristic roc = new ReceiverOperatingCharacteristic(voterExpected.ToArray(), voterPredictedConf.ToArray());

            return(roc);
        }
        /// <summary>
        ///   Creates a new <see cref="ReceiverOperatingCurveTest"/>.
        /// </summary>
        /// 
        /// <param name="curve">The curve to be tested.</param>
        /// <param name="hypothesizedValue">The hypothesized value for the ROC area.</param>
        /// <param name="alternate">The alternative hypothesis (research hypothesis) to test.</param>
        ///
        public ReceiverOperatingCurveTest(ReceiverOperatingCharacteristic curve, double hypothesizedValue = 0.5,
            OneSampleHypothesis alternate = OneSampleHypothesis.ValueIsDifferentFromHypothesis)
        {
            this.Curve = curve;

            Compute(curve.Area, hypothesizedValue, curve.StandardError, alternate);
        }
예제 #7
0
        public static double Auc(double[] expected, double[] predicted)
        {
            var roc = new ReceiverOperatingCharacteristic(expected, predicted);

            roc.Compute(predicted);
            return(roc.Area);
        }
예제 #8
0
        /// <summary>
        ///   Creates a new <see cref="ReceiverOperatingCurveTest"/>.
        /// </summary>
        ///
        /// <param name="curve">The curve to be tested.</param>
        /// <param name="hypothesizedValue">The hypothesized value for the ROC area.</param>
        /// <param name="alternate">The alternative hypothesis (research hypothesis) to test.</param>
        ///
        public ReceiverOperatingCurveTest(ReceiverOperatingCharacteristic curve, double hypothesizedValue = 0.5,
                                          OneSampleHypothesis alternate = OneSampleHypothesis.ValueIsDifferentFromHypothesis)
        {
            this.Curve = curve;

            Compute(curve.Area, hypothesizedValue, curve.StandardError, alternate);
        }
예제 #9
0
        static void Main(string[] args)
        {
            double[][] inputs =
            {
                // Those are from class -1
                new double[] { 2, 4, 0 },
                new double[] { 5, 5, 1 },
                new double[] { 4, 5, 0 },
                new double[] { 2, 5, 5 },
                new double[] { 4, 5, 1 },
                new double[] { 4, 5, 0 },
                new double[] { 6, 2, 0 },
                new double[] { 4, 1, 0 },

                // Those are from class +1
                new double[] { 1, 4, 5 },
                new double[] { 7, 5, 1 },
                new double[] { 2, 6, 0 },
                new double[] { 7, 4, 7 },
                new double[] { 4, 5, 0 },
                new double[] { 6, 2, 9 },
                new double[] { 4, 1, 6 },
                new double[] { 7, 2, 9 },
            };

            int[] outputs =
            {
                -1, -1, -1, -1, -1, -1, -1, -1, // fist eight from class -1
                +1, +1, +1, +1, +1, +1, +1, +1  // last eight from class +1
            };

            // Next, we create a linear Support Vector Machine with 4 inputs
            SupportVectorMachine machine = new SupportVectorMachine(inputs: 3);

            // Create the sequential minimal optimization learning algorithm
            var smo = new SequentialMinimalOptimization(machine, inputs, outputs);

            // We learn the machine
            double error = smo.Run();

            // And then extract its predicted labels
            double[] predicted = new double[inputs.Length];
            for (int i = 0; i < predicted.Length; i++)
            {
                predicted[i] = machine.Compute(inputs[i]);
            }

            // At this point, the output vector contains the labels which
            // should have been assigned by the machine, and the predicted
            // vector contains the labels which have been actually assigned.

            // Create a new ROC curve to assess the performance of the model
            var roc = new ReceiverOperatingCharacteristic(outputs, predicted);

            roc.Compute(100); // Compute a ROC curve with 100 cut-off points
            roc.GetScatterplot(true);
            Console.WriteLine(roc.Area.ToString());
            Console.Write(roc.StandardError.ToString());
        }
        public void ReceiverOperatingCharacteristicConstructorTest2()
        {
            double[] measurement = { 0, 0, 0, 0, 0, 1, 1, 1 };
            double[] prediction  = { 0, 0, 0.5, 0.5, 1, 1, 1, 1 };
            ReceiverOperatingCharacteristic target = new ReceiverOperatingCharacteristic(measurement, prediction);

            target.Compute(0.5, true);
            Assert.AreEqual(target.Points.Count, 4);
            var p1 = target.Points[0];
            var p2 = target.Points[1];
            var p3 = target.Points[2];
            var p4 = target.Points[3];

            Assert.AreEqual(p1.Sensitivity, 1);
            Assert.AreEqual(1 - p1.Specificity, 1);
            Assert.AreEqual(p4.Sensitivity, 0);
            Assert.AreEqual(1 - p4.Specificity, 0);

            target.Compute(0.5, false);
            Assert.AreEqual(target.Points.Count, 3);


            target.Compute(new double[] { 0.0, 0.4, 0.6, 1.0 });

            Assert.AreEqual(target.Points.Count, 4);
            Assert.AreEqual(target.Negatives, 5);
            Assert.AreEqual(target.Positives, 3);
            Assert.AreEqual(target.Observations, 8);

            foreach (var point in target.Points)
            {
                Assert.AreEqual(point.Samples, 8);
                Assert.AreEqual(point.ActualNegatives, 5);
                Assert.AreEqual(point.ActualPositives, 3);

                if (point.Cutoff == 0.0)
                {
                    Assert.AreEqual(point.PredictedNegatives, 0);
                    Assert.AreEqual(point.PredictedPositives, 8);
                }
                else if (point.Cutoff == 0.4)
                {
                    Assert.AreEqual(point.PredictedNegatives, 2);
                    Assert.AreEqual(point.PredictedPositives, 6);
                }
                else
                {
                    Assert.AreEqual(point.PredictedNegatives, 4);
                    Assert.AreEqual(point.PredictedPositives, 4);
                }
            }

            Assert.AreEqual(target.Area, 0.8);
            // Assert.AreEqual(target.StandardError, 0.1821680136170595); // HanleyMcNeil
            Assert.AreEqual(0.1, target.StandardError); // De Long
        }
예제 #11
0
        public void ReceiverOperatingCharacteristicConstructorZeroIncrementThrowsTest()
        {
            double[]   measurement   = { 1 };
            double[]   prediction    = { 1 };
            double     zeroIncrement = 0d;
            const bool forceOrigin   = true;
            ReceiverOperatingCharacteristic target = new ReceiverOperatingCharacteristic(measurement, prediction);

            Assert.Throws <ArgumentException>(() => target.Compute(zeroIncrement, forceOrigin));
        }
예제 #12
0
        private void visualize()
        {
            switch (_visualizationType)
            {
            case VisualizationType.COMPONENTS_CUMULATIVE:
            {
                if (_visualizationSource != null)
                {
                    if (_visualizationSource.GetType() == typeof(PrincipalComponentAnalysis))
                    {
                        PrincipalComponentAnalysis pca = (PrincipalComponentAnalysis)_visualizationSource;
                        VisualizationForm          f   = new VisualizationForm(pca.Components, true, "Cumulative Component Distribution");
                        f.Show();
                    }
                }

                break;
            }

            case VisualizationType.COMPONENTS_DISTRIBUTION:
            {
                if (_visualizationSource != null)
                {
                    if (_visualizationSource.GetType() == typeof(PrincipalComponentAnalysis))
                    {
                        PrincipalComponentAnalysis pca = (PrincipalComponentAnalysis)_visualizationSource;
                        VisualizationForm          f   = new VisualizationForm(pca.Components, false, "Component Distribution");
                        f.Show();
                    }
                }
                break;
            }

            case VisualizationType.ROC_PLOT_POINTS:
            {
                if (_visualizationSource != null)
                {
                    if (_visualizationSource.GetType() == typeof(ReceiverOperatingCharacteristic))
                    {
                        ReceiverOperatingCharacteristic roc = (ReceiverOperatingCharacteristic)_visualizationSource;
                        ScatterPlotForm sp = new ScatterPlotForm(roc.GetScatterplot(true));
                        sp.Show();
                    }
                }
                break;
            }

            default:
            {
                break;
            }
            }
        }
예제 #13
0
        private static void DrawROCCurve(int[] actual, int[] preds, int numClass, string modelName)
        {
            ScatterplotView spv = new ScatterplotView();

            spv.Dock         = DockStyle.Fill;
            spv.LinesVisible = true;

            Color[] colors = new Color[] {
                Color.Blue, Color.Red, Color.Orange, Color.Yellow, Color.Green,
                Color.Gray, Color.LightSalmon, Color.LightSkyBlue, Color.Black, Color.Pink
            };

            for (int i = 0; i < numClass; i++)
            {
                // Build ROC for Train Set
                bool[] expected  = actual.Select(x => x == i ? true : false).ToArray();
                int[]  predicted = preds.Select(x => x == i ? 1 : 0).ToArray();

                var trainRoc = new ReceiverOperatingCharacteristic(expected, predicted);
                trainRoc.Compute(1000);

                // Get Train AUC
                double   auc   = trainRoc.Area;
                double[] xVals = trainRoc.Points.Select(x => 1 - x.Specificity).ToArray();
                double[] yVals = trainRoc.Points.Select(x => x.Sensitivity).ToArray();

                // Draw ROC Curve
                spv.Graph.GraphPane.AddCurve(
                    String.Format(
                        "Digit: {0} - AUC: {1:0.00}",
                        i, auc
                        ),
                    xVals, yVals, colors[i], SymbolType.None
                    );
                spv.Graph.GraphPane.AxisChange();
            }

            spv.Graph.GraphPane.Title.Text = String.Format(
                "{0} ROC - One vs. Rest",
                modelName
                );

            Form f1 = new Form();

            f1.Width  = 700;
            f1.Height = 500;
            f1.Controls.Add(spv);
            f1.ShowDialog();
        }
예제 #14
0
        private void btnRunAnalysis_Click(object sender, EventArgs e)
        {
            if (sourceTable == null)
            {
                MessageBox.Show("Please load some data before attempting to plot a curve.");
                return;
            }


            // Finishes and save any pending changes to the given data
            dgvSource.EndEdit();

            // Creates a matrix from the source data table
            int n = sourceTable.Rows.Count;

            double[] realData = new double[n];
            double[] testData = new double[n];
            for (int i = 0; i < n; i++)
            {
                realData[i] = (double)sourceTable.Rows[i][0];
                testData[i] = (double)sourceTable.Rows[i][1];
            }

            // Creates the Receiver Operating Curve of the given source
            rocCurve = new ReceiverOperatingCharacteristic(realData, testData);

            // Compute the ROC curve
            if (rbNumPoints.Checked)
            {
                rocCurve.Compute((int)numPoints.Value);
            }
            else
            {
                rocCurve.Compute((float)numIncrement.Value);
            }

            // Update graphs
            CreateCurveGraph(zedGraph1);

            // Show point details
            dgvPointDetails.DataSource = new SortableBindingList <ReceiverOperatingCharacteristicPoint>(rocCurve.Points);

            // Show area and error
            tbArea.Text  = rocCurve.Area.ToString();
            tbError.Text = rocCurve.Error.ToString();
        }
        public static void Test()
        {
            var realData = Util.ArrayInit(20, d => Bernoulli.Sample(0.5) ? 1.0 : 0.0);
            var testData = Util.ArrayInit(20, d => Beta.Sample(1, 1));

            // Creates the Receiver Operating Curve of the given source
            var rocCurve = new ReceiverOperatingCharacteristic(realData, realData);

            // Compute the ROC curve with 20 points
            rocCurve.Compute(20);

            for(int i=0; i < rocCurve.Points.Count; i++)
            {
                Console.WriteLine("ROC curve at point {0}: false positive rate {1:0.000}, true positive rate {2:0.000}, accuracy {3:0.000}", i, 1 - rocCurve.Points[i].Specificity, rocCurve.Points[i].Specificity, rocCurve.Points[i].Accuracy);
            }

            Console.WriteLine("Area under the ROC curve: {0:0.000}", rocCurve.Area);
        }
예제 #16
0
        public EvaluationResult Evaluate()
        {
            var result = new EvaluationResult();

            // load evaluation data
            result.StartMeasure(EvaluationResult.RecordType.LoadDataset);
            var       reader    = new ExcelReader(Helpers.DatasetPath);
            DataTable dataStore = reader.GetWorksheet("Evaluation");

            int[]    labels    = dataStore.ToVector <int>("Label");
            string[] learnData = dataStore.ToVector <string>("Sentiment");
            result.StopMeasure();

            // tokenize
            result.StartMeasure(EvaluationResult.RecordType.Tokenization);
            string[][] tokenized = learnData.Select(x => _preprocessor.Process(x)).ToArray();
            result.StopMeasure();

            // benchmark featurization
            result.StartMeasure(EvaluationResult.RecordType.Featurization);
            int[][] learnTokenized = _bagOfWords.Transform(tokenized).ToInt32();
            result.StopMeasure();

            // benchmark classification
            result.StartMeasure(EvaluationResult.RecordType.Classification);
            int[] testResult = _bayes.Decide(learnTokenized);
            result.StopMeasure();

            // calculate stats
            result.StartMeasure(EvaluationResult.RecordType.Statistics);
            var mat = new ConfusionMatrix(testResult, labels);
            var roc = new ReceiverOperatingCharacteristic(labels, testResult.ToDouble());

            roc.Compute(200);
            result.StopMeasure();

            // save metrics
            result.Matrix = mat;
            result.Roc    = roc;

            return(result);
        }
예제 #17
0
        private void btnRunAnalysis_Click(object sender, EventArgs e)
        {
            if (sourceTable == null)
            {
                MessageBox.Show("Please load some data before attempting to plot a curve.");
                return;
            }


            // Finishes and save any pending changes to the given data
            dgvSource.EndEdit();

            // Creates a matrix from the source data table
            int n = sourceTable.Rows.Count;

            double[] realData = new double[n];
            double[] testData = new double[n];
            for (int i = 0; i < n; i++)
            {
                realData[i] = (double)sourceTable.Rows[i][0];
                testData[i] = (double)sourceTable.Rows[i][1];
            }

            // Creates the Receiver Operating Curve of the given source
            rocCurve = new ReceiverOperatingCharacteristic(realData, testData);

            // Compute the ROC curve
            if (rbNumPoints.Checked)
                rocCurve.Compute((int)numPoints.Value);
            else
                rocCurve.Compute((float)numIncrement.Value);

            scatterplotView1.Scatterplot = rocCurve.GetScatterplot(true);

            // Show point details
            dgvPointDetails.DataSource = new SortableBindingList<ReceiverOperatingCharacteristicPoint>(rocCurve.Points);

            // Show area and error
            tbArea.Text = rocCurve.Area.ToString();
            tbError.Text = rocCurve.StandardError.ToString();
        }
예제 #18
0
        internal FormDataView(ReceiverOperatingCharacteristic roc)
        {
            InitializeComponent();

            DataTable data = new DataTable();

            data.Columns.Add("Observations", typeof(String));
            data.Columns.Add("Negatives", typeof(String));
            data.Columns.Add("Positives", typeof(String));
            data.Columns.Add("Area", typeof(String));
            data.Columns.Add("Std Error", typeof(String));
            data.Columns.Add("Variance", typeof(String));

            data.Rows.Add(roc.Observations, roc.Negatives, roc.Positives, roc.Area, roc.StandardError, roc.Variance);

            dataGridView1.DataSource = data;
            this.dataGridView1.AutoResizeColumns();
            this._availableVisualizations = new List <VisualizationType> {
                VisualizationType.ROC_PLOT_POINTS
            };
            enableVisualization(roc);

            fitHeight();
        }
예제 #19
0
        internal ReceiverOperatingCharacteristic getROC()
        {
            List <Tuple <int, double> > classificationSet = new List <Tuple <int, double> >();

            foreach (var kvp in _totalClassification)
            {
                kvp.Value.RawPredictions.ForEach(prediction => {
                    classificationSet.Add(new Tuple <int, double>(kvp.Value.ActualClass, prediction));
                });
            }

            List <int>    expectedValues  = new List <int>();
            List <double> predictedValues = new List <double>();

            classificationSet.ForEach(tuple => {
                expectedValues.Add(tuple.Item1);
                predictedValues.Add(tuple.Item2);
            });

            // Accord.Statistics.Analysis.RocAreaMethod method = RocAreaMethod.DeLong;
            ReceiverOperatingCharacteristic roc = new ReceiverOperatingCharacteristic(expectedValues.ToArray(), predictedValues.ToArray());

            return(roc);
        }
        public void ReceiverOperatingCharacteristicConstructorTest2()
        {
            double[] measurement = { 0, 0, 0, 0, 0, 1, 1, 1 };
            double[] prediction = { 0, 0, 0.5, 0.5, 1, 1, 1, 1 };
            ReceiverOperatingCharacteristic target = new ReceiverOperatingCharacteristic(measurement, prediction);

            target.Compute(0.5, true);
            Assert.AreEqual(target.Points.Count, 4);
            var p1 = target.Points[0];
            var p2 = target.Points[1];
            var p3 = target.Points[2];
            var p4 = target.Points[3];

            Assert.AreEqual(p1.Sensitivity, 1);
            Assert.AreEqual(1 - p1.Specificity, 1);
            Assert.AreEqual(p4.Sensitivity, 0);
            Assert.AreEqual(1 - p4.Specificity, 0);

            target.Compute(0.5, false);
            Assert.AreEqual(target.Points.Count, 3);


            target.Compute(new double[] { 0.0, 0.4, 0.6, 1.0 });

            Assert.AreEqual(target.Points.Count, 4);
            Assert.AreEqual(target.Negatives, 5);
            Assert.AreEqual(target.Positives, 3);
            Assert.AreEqual(target.Observations, 8);

            foreach (var point in target.Points)
            {
                Assert.AreEqual(point.Samples, 8);
                Assert.AreEqual(point.ActualNegatives, 5);
                Assert.AreEqual(point.ActualPositives, 3);

                if (point.Cutoff == 0.0)
                {
                    Assert.AreEqual(point.PredictedNegatives, 0);
                    Assert.AreEqual(point.PredictedPositives, 8);
                }
                else if (point.Cutoff == 0.4)
                {
                    Assert.AreEqual(point.PredictedNegatives, 2);
                    Assert.AreEqual(point.PredictedPositives, 6);
                }
                else
                {
                    Assert.AreEqual(point.PredictedNegatives, 4);
                    Assert.AreEqual(point.PredictedPositives, 4);
                }

            }

            Assert.AreEqual(target.Area, 0.8);
            // Assert.AreEqual(target.StandardError, 0.1821680136170595); // HanleyMcNeil
            Assert.AreEqual(0.1, target.StandardError); // De Long

        }
예제 #21
0
        private String getExportString(String delimiter, String note, VotingScheme votingScheme)
        {
            StringBuilder s = new StringBuilder();

            String featureModel = this._featureModel;
            String kernel       = _cMode.ToString();

            if (_svmConfig != null)
            {
                kernel = this._svmConfig.Kernel.ToString();
            }

            ConfusionMatrix cm = null;
            ReceiverOperatingCharacteristic roc = null;

            SchemeSumPredictions sumPredictions = null;
            SchemeMajorityVote   majorityVote   = null;

            switch (votingScheme)
            {
            case VotingScheme.NONE:
            {
                cm  = _voter.AggregatedConfusionMatrix;
                roc = _voter.getROC();
                break;
            }

            case VotingScheme.ADDITIVE_PREDICTIONS:
            {
                sumPredictions = _voter.getSumPredictions();
                cm             = null;
                roc            = sumPredictions.getROC();
                break;
            }

            case VotingScheme.MAJORITY_VOTE:
            {
                majorityVote = _voter.getMajorityVote();
                cm           = null;
                roc          = majorityVote.getROC();
                break;
            }
            }

            if (votingScheme == VotingScheme.NONE && roc != null && cm != null)
            {
                roc.Compute(100);
                s.Append(featureModel + delimiter);
                s.Append(votingScheme + delimiter);
                s.Append(kernel + delimiter);
                s.Append(note + delimiter);
                s.Append(this._numRuns + delimiter);
                s.Append(this._numFolds + delimiter);
                s.Append(this._timeElapsedMS + delimiter);
                s.Append(this._memoryUsedBytes + delimiter);
                s.Append(Utility.formatNumber(Utility.BytesToGB(this._memoryUsedBytes)) + delimiter);
                s.Append(cm.Samples + delimiter);
                s.Append(Utility.formatNumber(roc.Area) + delimiter);
                s.Append(Utility.formatNumber(cm.Sensitivity) + delimiter);
                s.Append(Utility.formatNumber(cm.Specificity) + delimiter);
                s.Append(Utility.formatNumber(cm.FalsePositiveRate) + delimiter);
                s.Append(Utility.formatNumber(cm.FalseDiscoveryRate) + delimiter);
                s.Append(Utility.formatNumber(cm.Accuracy) + delimiter);
                s.Append(Utility.formatNumber(cm.PositivePredictiveValue) + delimiter);
                s.Append(Utility.formatNumber(cm.Precision) + delimiter);
                s.Append(Utility.formatNumber(cm.Recall) + delimiter);
                s.Append(Utility.formatNumber(cm.FScore) + delimiter);
                s.Append(cm.ActualPositives + delimiter);
                s.Append(cm.ActualNegatives + delimiter);
                s.Append(cm.TruePositives + delimiter);
                s.Append(cm.TrueNegatives + delimiter);
                s.Append(cm.FalsePositives + delimiter);
                s.Append(cm.FalseNegatives);
            }

            else if (votingScheme == VotingScheme.ADDITIVE_PREDICTIONS && sumPredictions != null && roc != null)
            {
                roc.Compute(100);
                s.Append(featureModel + delimiter);
                s.Append(votingScheme + delimiter);
                s.Append(kernel + delimiter);
                s.Append(note + delimiter);
                s.Append(this._numRuns + delimiter);
                s.Append(this._numFolds + delimiter);
                s.Append(this._timeElapsedMS + delimiter);
                s.Append(this._memoryUsedBytes + delimiter);
                s.Append(Utility.formatNumber(Utility.BytesToGB(this._memoryUsedBytes)) + delimiter);
                s.Append(sumPredictions.NumSamples + delimiter);
                s.Append(Utility.formatNumber(sumPredictions.ROCAreaVoter) + delimiter);
                s.Append(Utility.formatNumber(sumPredictions.Sensitivity) + delimiter);
                s.Append(Utility.formatNumber(sumPredictions.Specificity) + delimiter);
                s.Append(Utility.formatNumber(sumPredictions.FalsePositiveRate) + delimiter);
                s.Append(Utility.formatNumber(sumPredictions.FalseDiscoveryRate) + delimiter);
                s.Append(Utility.formatNumber(sumPredictions.Accuracy) + delimiter);
                s.Append(Utility.formatNumber(sumPredictions.PositivePredictiveValue) + delimiter);
                s.Append(Utility.formatNumber(sumPredictions.Precision) + delimiter);
                s.Append(Utility.formatNumber(sumPredictions.Recall) + delimiter);
                s.Append(Utility.formatNumber(sumPredictions.FScore) + delimiter);
                s.Append(sumPredictions.ActualPositives + delimiter);
                s.Append(sumPredictions.ActualNegatives + delimiter);
                s.Append(sumPredictions.TruePositives + delimiter);
                s.Append(sumPredictions.TrueNegatives + delimiter);
                s.Append(sumPredictions.FalsePositives + delimiter);
                s.Append(sumPredictions.FalseNegatives);
            }

            else if (votingScheme == VotingScheme.MAJORITY_VOTE && majorityVote != null && roc != null)
            {
                roc.Compute(100);
                s.Append(featureModel + delimiter);
                s.Append(votingScheme + delimiter);
                s.Append(kernel + delimiter);
                s.Append(note + delimiter);
                s.Append(this._numRuns + delimiter);
                s.Append(this._numFolds + delimiter);
                s.Append(this._timeElapsedMS + delimiter);
                s.Append(this._memoryUsedBytes + delimiter);
                s.Append(Utility.formatNumber(Utility.BytesToGB(this._memoryUsedBytes)) + delimiter);
                s.Append(majorityVote.NumSamples + delimiter);
                s.Append(Utility.formatNumber(majorityVote.ROCAreaVoter) + delimiter);
                s.Append(Utility.formatNumber(majorityVote.Sensitivity) + delimiter);
                s.Append(Utility.formatNumber(majorityVote.Specificity) + delimiter);
                s.Append(Utility.formatNumber(majorityVote.FalsePositiveRate) + delimiter);
                s.Append(Utility.formatNumber(majorityVote.FalseDiscoveryRate) + delimiter);
                s.Append(Utility.formatNumber(majorityVote.Accuracy) + delimiter);
                s.Append(Utility.formatNumber(majorityVote.PositivePredictiveValue) + delimiter);
                s.Append(Utility.formatNumber(majorityVote.Precision) + delimiter);
                s.Append(Utility.formatNumber(majorityVote.Recall) + delimiter);
                s.Append(Utility.formatNumber(majorityVote.FScore) + delimiter);
                s.Append(majorityVote.ActualPositives + delimiter);
                s.Append(majorityVote.ActualNegatives + delimiter);
                s.Append(majorityVote.TruePositives + delimiter);
                s.Append(majorityVote.TrueNegatives + delimiter);
                s.Append(majorityVote.FalsePositives + delimiter);
                s.Append(majorityVote.FalseNegatives);
            }

            return(s.ToString());
        }
예제 #22
0
        /// <summary>
        /// Updates the accuracy using the current results.
        /// </summary>
        protected virtual void UpdateAccuracy()
        {
            double nlpdThreshold   = -Math.Log(0.001);
            int    labelCount      = FullMapping.LabelCount;
            var    confusionMatrix = Util.ArrayInit(labelCount, labelCount, (i, j) => 0.0);
            int    correct         = 0;
            double logProb         = 0.0;

            int goldX = 0;

            List <double> trueBinaryLabelList = null;
            List <double> probTrueLabelList   = null;

            // Only for binary labels
            if (Mapping.LabelCount == 2)
            {
                trueBinaryLabelList = new List <double>();
                probTrueLabelList   = new List <double>();
            }

            foreach (var kvp in GoldLabels)
            {
                if (kvp.Value == null)
                {
                    continue;
                }

                // We have a gold label
                goldX++;

                Discrete trueLabel = null;
                if (TrueLabel.ContainsKey(kvp.Key))
                {
                    trueLabel = TrueLabel[kvp.Key];
                }

                if (trueLabel == null)
                {
                    trueLabel = Discrete.Uniform(Mapping.LabelCount);
                    //continue;  // No inferred label
                }

                var    probs           = trueLabel.GetProbs();
                double max             = probs.Max();
                var    predictedLabels = probs.Select((p, i) => new
                {
                    prob = p,
                    idx  = i
                }).Where(a => a.prob == max).Select(a => a.idx).ToArray();

                int predictedLabel = predictedLabels.Length == 1 ? predictedLabels[0] : predictedLabels[Rand.Int(predictedLabels.Length)];

                this.PredictedLabel[kvp.Key] = predictedLabel;

                int goldLabel = kvp.Value.Value;

                if (goldLabel == predictedLabel)
                {
                    correct++;
                }

                confusionMatrix[goldLabel, predictedLabel] = confusionMatrix[goldLabel, predictedLabel] + 1.0;

                var nlp = -trueLabel.GetLogProb(goldLabel);
                if (nlp > nlpdThreshold)
                {
                    nlp = nlpdThreshold;
                }
                logProb += nlp;

                if (trueBinaryLabelList != null)
                {
                    trueBinaryLabelList.Add(goldLabel);
                    probTrueLabelList.Add(probs[goldLabel]);
                }
            }

            Accuracy             = correct / (double)goldX;
            NegativeLogProb      = logProb / (double)goldX;
            ModelConfusionMatrix = confusionMatrix;

            // Average recall
            double sumRec = 0;

            for (int i = 0; i < labelCount; i++)
            {
                double classSum = 0;
                for (int j = 0; j < labelCount; j++)
                {
                    classSum += confusionMatrix[i, j];
                }

                sumRec += confusionMatrix[i, i] / classSum;
            }
            AvgRecall = sumRec / labelCount;

            // WorkerLabelAccuracy: Perc. agreement between worker label and gold label
            int sumAcc    = 0;
            var LabelSet  = Mapping.DataWithGold;
            int numLabels = LabelSet.Count();

            foreach (var datum in LabelSet)
            {
                sumAcc += datum.WorkerLabel == datum.GoldLabel ? 1 : 0;
            }
            WorkerLabelAccuracy = (double)sumAcc / (double)numLabels;

            if (trueBinaryLabelList != null)
            {
                RocCurve = new ReceiverOperatingCharacteristic(trueBinaryLabelList.ToArray(), probTrueLabelList.ToArray());
                RocCurve.Compute(10000);
                ResultsConfusionMatrixForBinaryLabels = new ConfusionMatrix((int)confusionMatrix[1, 1], (int)confusionMatrix[0, 0], (int)confusionMatrix[0, 1], (int)confusionMatrix[1, 0]);
            }
        }
        public void ComputeTest()
        {
            // Example from
            // http://faculty.vassar.edu/lowry/roc1.html

            double[,] data =
            {
                { 4, 1 }, { 4, 1 },
                { 4, 1 }, { 4, 1 },
                { 4, 1 }, { 4, 1 },
                { 4, 1 }, { 4, 1 },
                { 4, 1 }, { 4, 1 },
                { 4, 1 }, { 4, 1 },
                { 4, 1 }, { 4, 1 },
                { 4, 1 }, { 4, 1 },
                { 4, 1 }, { 4, 1 },                  // 18
                { 4, 0 },

                { 6, 1 }, { 6, 1 },
                { 6, 1 }, { 6, 1 },
                { 6, 1 }, { 6, 1 },
                { 6, 1 }, // 7

                { 6, 0 }, { 6, 0 },
                { 6, 0 }, { 6, 0 },
                { 6, 0 }, { 6, 0 },
                { 6, 0 }, { 6, 0 },
                { 6, 0 }, { 6, 0 },
                { 6, 0 }, { 6, 0 },
                { 6, 0 }, { 6, 0 },
                { 6, 0 }, { 6, 0 },
                { 6, 0 }, // 17

                { 8, 1 }, { 8, 1 },
                { 8, 1 }, { 8, 1 },                  // 4

                { 8, 0 }, { 8, 0 },
                { 8, 0 }, { 8, 0 },
                { 8, 0 }, { 8, 0 },
                { 8, 0 }, { 8, 0 },
                { 8, 0 }, { 8, 0 },
                { 8, 0 }, { 8, 0 },
                { 8, 0 }, { 8, 0 },
                { 8, 0 }, { 8, 0 },
                { 8, 0 }, { 8, 0 },
                { 8, 0 }, { 8, 0 },
                { 8, 0 }, { 8, 0 },
                { 8, 0 }, { 8, 0 },
                { 8, 0 }, { 8, 0 },
                { 8, 0 }, { 8, 0 },
                { 8, 0 }, { 8, 0 },
                { 8, 0 }, { 8, 0 },
                { 8, 0 }, { 8, 0 },
                { 8, 0 }, { 8, 0 },                  // 36

                { 9, 1 }, { 9, 1 },
                { 9, 1 }, // 3

                { 9, 0 }, { 9, 0 },
                { 9, 0 }, { 9, 0 },
                { 9, 0 }, { 9, 0 },
                { 9, 0 }, { 9, 0 },
                { 9, 0 }, { 9, 0 },
                { 9, 0 }, { 9, 0 },
                { 9, 0 }, { 9, 0 },
                { 9, 0 }, { 9, 0 },
                { 9, 0 }, { 9, 0 },
                { 9, 0 }, { 9, 0 },
                { 9, 0 }, { 9, 0 },
                { 9, 0 }, { 9, 0 },
                { 9, 0 }, { 9, 0 },
                { 9, 0 }, { 9, 0 },
                { 9, 0 }, { 9, 0 },
                { 9, 0 }, { 9, 0 },
                { 9, 0 }, { 9, 0 },
                { 9, 0 }, { 9, 0 },
                { 9, 0 }, { 9, 0 },
                { 9, 0 }, // 39
            };


            double[] measurement = data.GetColumn(1);
            double[] prediction  = data.GetColumn(0);

            var roc = new ReceiverOperatingCharacteristic(measurement, prediction);

            double[] cutpoints = { 5, 7, 9, double.PositiveInfinity };

            roc.Compute(cutpoints);

            Assert.AreEqual(32, roc.Positives);
            Assert.AreEqual(93, roc.Negatives);

            Assert.AreEqual(4, roc.Points.Count);
            var p1 = roc.Points[0];
            var p2 = roc.Points[1];
            var p3 = roc.Points[2];
            var p4 = roc.Points[3];

            Assert.AreEqual(18, p1.FalseNegatives);
            Assert.AreEqual(18 + 7, p2.FalseNegatives);
            Assert.AreEqual(18 + 7 + 4, p3.FalseNegatives);
            Assert.AreEqual(18 + 7 + 4 + 3, p4.FalseNegatives);

            Assert.AreEqual(1, p1.TrueNegatives);
            Assert.AreEqual(1 + 17, p2.TrueNegatives);
            Assert.AreEqual(1 + 17 + 36, p3.TrueNegatives);
            Assert.AreEqual(1 + 17 + 36 + 39, p4.TrueNegatives);


            double area  = roc.Area;
            double error = roc.StandardError;

            // Area should be near 0.87
            Assert.AreEqual(0.87, area, 0.011);
            Assert.IsFalse(Double.IsNaN(area));

            // Assert.AreEqual(0.043781206163219656, error); // HanleyMcNeil
            Assert.AreEqual(0.04485087617325112, error); // DeLong estimate
        }
        public void ComputeTest()
        {
            // Example from
            // http://faculty.vassar.edu/lowry/roc1.html

            double[,] data =
            {
                { 4, 1 }, { 4, 1 },
                { 4, 1 }, { 4, 1 },
                { 4, 1 }, { 4, 1 },
                { 4, 1 }, { 4, 1 },
                { 4, 1 }, { 4, 1 },
                { 4, 1 }, { 4, 1 },
                { 4, 1 }, { 4, 1 },
                { 4, 1 }, { 4, 1 },
                { 4, 1 }, { 4, 1 },                  // 18
                { 4, 0 },

                { 6, 1 }, { 6, 1 },
                { 6, 1 }, { 6, 1 },
                { 6, 1 }, { 6, 1 },
                { 6, 1 }, // 7

                { 6, 0 }, { 6, 0 },
                { 6, 0 }, { 6, 0 },
                { 6, 0 }, { 6, 0 },
                { 6, 0 }, { 6, 0 },
                { 6, 0 }, { 6, 0 },
                { 6, 0 }, { 6, 0 },
                { 6, 0 }, { 6, 0 },
                { 6, 0 }, { 6, 0 },
                { 6, 0 }, // 17

                { 8, 1 }, { 8, 1 },
                { 8, 1 }, { 8, 1 },                  // 4

                { 8, 0 }, { 8, 0 },
                { 8, 0 }, { 8, 0 },
                { 8, 0 }, { 8, 0 },
                { 8, 0 }, { 8, 0 },
                { 8, 0 }, { 8, 0 },
                { 8, 0 }, { 8, 0 },
                { 8, 0 }, { 8, 0 },
                { 8, 0 }, { 8, 0 },
                { 8, 0 }, { 8, 0 },
                { 8, 0 }, { 8, 0 },
                { 8, 0 }, { 8, 0 },
                { 8, 0 }, { 8, 0 },
                { 8, 0 }, { 8, 0 },
                { 8, 0 }, { 8, 0 },
                { 8, 0 }, { 8, 0 },
                { 8, 0 }, { 8, 0 },
                { 8, 0 }, { 8, 0 },
                { 8, 0 }, { 8, 0 },                  // 36

                { 9, 1 }, { 9, 1 },
                { 9, 1 }, // 3

                { 9, 0 }, { 9, 0 },
                { 9, 0 }, { 9, 0 },
                { 9, 0 }, { 9, 0 },
                { 9, 0 }, { 9, 0 },
                { 9, 0 }, { 9, 0 },
                { 9, 0 }, { 9, 0 },
                { 9, 0 }, { 9, 0 },
                { 9, 0 }, { 9, 0 },
                { 9, 0 }, { 9, 0 },
                { 9, 0 }, { 9, 0 },
                { 9, 0 }, { 9, 0 },
                { 9, 0 }, { 9, 0 },
                { 9, 0 }, { 9, 0 },
                { 9, 0 }, { 9, 0 },
                { 9, 0 }, { 9, 0 },
                { 9, 0 }, { 9, 0 },
                { 9, 0 }, { 9, 0 },
                { 9, 0 }, { 9, 0 },
                { 9, 0 }, { 9, 0 },
                { 9, 0 }, // 39
            };


            double[] measurement = data.GetColumn(1);
            double[] prediction  = data.GetColumn(0);

            var roc = new ReceiverOperatingCharacteristic(measurement, prediction);

            double[] cutpoints = { 5, 7, 9, double.PositiveInfinity };

            roc.Compute(cutpoints);

            Assert.AreEqual(32, roc.Positives);
            Assert.AreEqual(93, roc.Negatives);

            Assert.AreEqual(4, roc.Points.Count);
            var p1 = roc.Points[0];
            var p2 = roc.Points[1];
            var p3 = roc.Points[2];
            var p4 = roc.Points[3];

            Assert.AreEqual(18, p1.FalseNegatives);
            Assert.AreEqual(18 + 7, p2.FalseNegatives);
            Assert.AreEqual(18 + 7 + 4, p3.FalseNegatives);
            Assert.AreEqual(18 + 7 + 4 + 3, p4.FalseNegatives);

            Assert.AreEqual(1, p1.TrueNegatives);
            Assert.AreEqual(1 + 17, p2.TrueNegatives);
            Assert.AreEqual(1 + 17 + 36, p3.TrueNegatives);
            Assert.AreEqual(1 + 17 + 36 + 39, p4.TrueNegatives);


            double area  = roc.Area;
            double error = roc.Error;

            // Area should be near 0.87
            Assert.IsTrue(System.Math.Abs(area - 0.875) < roc.Error);
        }
        public void ReceiverOperatingCharacteristicConstructorTest3()
        {
            // This example shows how to measure the accuracy of a
            // binary classifier using a ROC curve. For this example,
            // we will be creating a Support Vector Machine trained
            // on the following instances:

            double[][] inputs =
            {
                // Those are from class -1
                new double[] { 2, 4, 0 },
                new double[] { 5, 5, 1 },
                new double[] { 4, 5, 0 },
                new double[] { 2, 5, 5 },
                new double[] { 4, 5, 1 },
                new double[] { 4, 5, 0 },
                new double[] { 6, 2, 0 },
                new double[] { 4, 1, 0 },

                // Those are from class +1
                new double[] { 1, 4, 5 },
                new double[] { 7, 5, 1 },
                new double[] { 2, 6, 0 },
                new double[] { 7, 4, 7 },
                new double[] { 4, 5, 0 },
                new double[] { 6, 2, 9 },
                new double[] { 4, 1, 6 },
                new double[] { 7, 2, 9 },
            };

            int[] outputs =
            {
                -1, -1, -1, -1, -1, -1, -1, -1, // first eight from class -1
                +1, +1, +1, +1, +1, +1, +1, +1  // last  eight from class +1
            };

            // Create a linear Support Vector Machine with 3 inputs
            var machine = new SupportVectorMachine(inputs: 3);

            // Create the sequential minimal optimization teacher
            var learn = new SequentialMinimalOptimization(machine, inputs, outputs)
            {
                Complexity = 1
            };

            // Run the learning algorithm
            double error = learn.Run();

            // Extract the input labels predicted by the machine
            double[] predicted = new double[inputs.Length];
            for (int i = 0; i < predicted.Length; i++)
            {
                predicted[i] = machine.Score(inputs[i]);
            }


            // Create a new ROC curve to assess the performance of the model
            var roc = new ReceiverOperatingCharacteristic(outputs, predicted);

            roc.Compute(100); // Compute a ROC curve with 100 points

            /*
             *          // Generate a connected scatter plot for the ROC curve and show it on-screen
             *          ScatterplotBox.Show(roc.GetScatterplot(includeRandom: true), nonBlocking: true)
             *
             *              .SetSymbolSize(0)      // do not display data points
             *              .SetLinesVisible(true) // show lines connecting points
             *              .SetScaleTight(true)   // tighten the scale to points
             *              .WaitForClose();
             */

            Assert.AreEqual(0.25, error);
            Assert.AreEqual(0.78125, roc.Area);
            // Assert.AreEqual(0.1174774, roc.StandardError, 1e-6); HanleyMcNeil
            // Assert.AreEqual(0.11958120746409709, roc.StandardError, 1e-6);
            Assert.AreEqual(0.132845321574701, roc.StandardError, 1e-6);
        }
        public void ComputeTest()
        {
            // Example from
            // http://faculty.vassar.edu/lowry/roc1.html

            double[,] data = 
            { 
                { 4,  1 },                { 4,  1 },
                { 4,  1 },                { 4,  1 },
                { 4,  1 },                { 4,  1 },
                { 4,  1 },                { 4,  1 },
                { 4,  1 },                { 4,  1 },
                { 4,  1 },                { 4,  1 },
                { 4,  1 },                { 4,  1 },
                { 4,  1 },                { 4,  1 },
                { 4,  1 },                { 4,  1 }, // 18
                { 4,  0 },

                { 6,  1 },                 { 6,  1 }, 
                { 6,  1 },                 { 6,  1 }, 
                { 6,  1 },                 { 6,  1 }, 
                { 6,  1 }, // 7

                { 6,  0 },                 { 6,  0 },
                { 6,  0 },                 { 6,  0 },
                { 6,  0 },                 { 6,  0 },
                { 6,  0 },                 { 6,  0 },
                { 6,  0 },                 { 6,  0 },
                { 6,  0 },                 { 6,  0 },
                { 6,  0 },                 { 6,  0 },
                { 6,  0 },                 { 6,  0 },
                { 6,  0 }, // 17

                { 8,  1 },                { 8,  1 },
                { 8,  1 },                { 8,  1 }, // 4

                { 8,  0 },                { 8,  0 },
                { 8,  0 },                { 8,  0 },
                { 8,  0 },                { 8,  0 },
                { 8,  0 },                { 8,  0 },
                { 8,  0 },                { 8,  0 },
                { 8,  0 },                { 8,  0 },
                { 8,  0 },                { 8,  0 },
                { 8,  0 },                { 8,  0 },
                { 8,  0 },                { 8,  0 },
                { 8,  0 },                { 8,  0 },
                { 8,  0 },                { 8,  0 },
                { 8,  0 },                { 8,  0 },
                { 8,  0 },                { 8,  0 },
                { 8,  0 },                { 8,  0 },
                { 8,  0 },                { 8,  0 },
                { 8,  0 },                { 8,  0 },
                { 8,  0 },                { 8,  0 },
                { 8,  0 },                { 8,  0 }, // 36

                { 9, 1 },                 { 9, 1 },
                { 9, 1 }, // 3

                { 9, 0 },                { 9, 0 },
                { 9, 0 },                { 9, 0 },
                { 9, 0 },                { 9, 0 },
                { 9, 0 },                { 9, 0 },
                { 9, 0 },                { 9, 0 },
                { 9, 0 },                { 9, 0 },
                { 9, 0 },                { 9, 0 },
                { 9, 0 },                { 9, 0 },
                { 9, 0 },                { 9, 0 },
                { 9, 0 },                { 9, 0 },
                { 9, 0 },                { 9, 0 },
                { 9, 0 },                { 9, 0 },
                { 9, 0 },                { 9, 0 },
                { 9, 0 },                { 9, 0 },
                { 9, 0 },                { 9, 0 },
                { 9, 0 },                { 9, 0 },
                { 9, 0 },                { 9, 0 },
                { 9, 0 },                { 9, 0 }, 
                { 9, 0 },                { 9, 0 }, 
                { 9, 0 }, // 39
            };


            double[] measurement = data.GetColumn(1);
            double[] prediction = data.GetColumn(0);

            var roc = new ReceiverOperatingCharacteristic(measurement, prediction);
            double[] cutpoints = { 5, 7, 9, double.PositiveInfinity };

            roc.Compute(cutpoints);

            Assert.AreEqual(32, roc.Positives);
            Assert.AreEqual(93, roc.Negatives);

            Assert.AreEqual(4, roc.Points.Count);
            var p1 = roc.Points[0];
            var p2 = roc.Points[1];
            var p3 = roc.Points[2];
            var p4 = roc.Points[3];

            Assert.AreEqual(18, p1.FalseNegatives);
            Assert.AreEqual(18 + 7, p2.FalseNegatives);
            Assert.AreEqual(18 + 7 + 4, p3.FalseNegatives);
            Assert.AreEqual(18 + 7 + 4 + 3, p4.FalseNegatives);

            Assert.AreEqual(1, p1.TrueNegatives);
            Assert.AreEqual(1 + 17, p2.TrueNegatives);
            Assert.AreEqual(1 + 17 + 36, p3.TrueNegatives);
            Assert.AreEqual(1 + 17 + 36 + 39, p4.TrueNegatives);


            double area = roc.Area;
            double error = roc.StandardError;

            // Area should be near 0.87
            Assert.AreEqual(0.87, area, 0.011);
            Assert.IsFalse(Double.IsNaN(area));

            // Assert.AreEqual(0.043781206163219656, error); // HanleyMcNeil
            Assert.AreEqual(0.04485087617325112, error); // DeLong estimate
        }
        public void DeLongVarianceTest()
        {
            // Example from Sampling Variability of Nonparametric Estimates of the
            // Areas under Receiver Operating Characteristic Curves: An Update

            bool yes = true;
            bool no = false;

            bool[] expected = 
            {
                /* 1*/ yes,
                /* 2*/ no,
                /* 3*/ yes,
                /* 4*/ no,
                /* 5*/ no,
                /* 6*/ yes,
                /* 7*/ yes,
                /* 8*/ no,
                /* 9*/ no,
                /*10*/ yes,
                /*11*/ no,
                /*12*/ no,
                /*13*/ yes,
                /*14*/ no,
                /*15*/ no
            };

            int[] actual = 
            {
                /* 1*/ 1,
                /* 2*/ 2,
                /* 3*/ 5,
                /* 4*/ 1,
                /* 5*/ 1,
                /* 6*/ 1,
                /* 7*/ 2,
                /* 8*/ 1,
                /* 9*/ 2,
                /*10*/ 2,
                /*11*/ 1,
                /*12*/ 1,
                /*13*/ 5,
                /*14*/ 1,
                /*15*/ 1
            };

            ReceiverOperatingCharacteristic curve = new ReceiverOperatingCharacteristic(expected, actual);

            curve.Compute(10);

            Assert.AreEqual(6, curve.PositiveResults.Length);
            Assert.AreEqual(1, curve.PositiveResults[0]);
            Assert.AreEqual(5, curve.PositiveResults[1]);
            Assert.AreEqual(1, curve.PositiveResults[2]);
            Assert.AreEqual(2, curve.PositiveResults[3]);
            Assert.AreEqual(2, curve.PositiveResults[4]);
            Assert.AreEqual(5, curve.PositiveResults[5]);

            Assert.AreEqual(9, curve.NegativeResults.Length);
            Assert.AreEqual(2, curve.NegativeResults[0]);
            Assert.AreEqual(1, curve.NegativeResults[1]);
            Assert.AreEqual(1, curve.NegativeResults[2]);
            Assert.AreEqual(1, curve.NegativeResults[3]);
            Assert.AreEqual(2, curve.NegativeResults[4]);
            Assert.AreEqual(1, curve.NegativeResults[5]);
            Assert.AreEqual(1, curve.NegativeResults[6]);
            Assert.AreEqual(1, curve.NegativeResults[7]);
            Assert.AreEqual(1, curve.NegativeResults[8]);


            Assert.AreEqual(6, curve.PositiveAccuracies.Length);
            Assert.AreEqual(0.3888, curve.PositiveAccuracies[0], 1e-4);
            Assert.AreEqual(1.0000, curve.PositiveAccuracies[1], 1e-4);
            Assert.AreEqual(0.3888, curve.PositiveAccuracies[2], 1e-4);
            Assert.AreEqual(0.8888, curve.PositiveAccuracies[3], 1e-4);
            Assert.AreEqual(0.8888, curve.PositiveAccuracies[4], 1e-4);
            Assert.AreEqual(1.0000, curve.PositiveAccuracies[5], 1e-4);

            Assert.AreEqual(9, curve.NegativeAccuracies.Length);
            Assert.AreEqual(0.5000, curve.NegativeAccuracies[0], 1e-4);
            Assert.AreEqual(0.8333, curve.NegativeAccuracies[1], 1e-4);
            Assert.AreEqual(0.8333, curve.NegativeAccuracies[2], 1e-4);
            Assert.AreEqual(0.8333, curve.NegativeAccuracies[3], 1e-4);
            Assert.AreEqual(0.5000, curve.NegativeAccuracies[4], 1e-4);
            Assert.AreEqual(0.8333, curve.NegativeAccuracies[5], 1e-4);
            Assert.AreEqual(0.8333, curve.NegativeAccuracies[6], 1e-4);
            Assert.AreEqual(0.8333, curve.NegativeAccuracies[7], 1e-4);
            Assert.AreEqual(0.8333, curve.NegativeAccuracies[8], 1e-4);

            Assert.IsFalse(curve.NegativeAccuracies.HasNaN());
            Assert.IsFalse(curve.PositiveAccuracies.HasNaN());

            Assert.AreEqual(0.1285, curve.StandardError, 1e-4);
            Assert.AreEqual(0.0165, curve.Variance, 1e-4);

            Assert.IsFalse(Double.IsNaN(curve.StandardError));
            Assert.IsFalse(Double.IsNaN(curve.Variance));
        }
예제 #28
0
        /// <summary>
        /// Updates the accuracy using the current results.
        /// </summary>
        protected virtual void UpdateAccuracy()
        {
            double nlpdThreshold = -Math.Log(0.001);
            int labelCount = FullMapping.LabelCount;
            var confusionMatrix = Util.ArrayInit(labelCount, labelCount, (i, j) => 0.0);
            int correct = 0;
            double logProb = 0.0;

            int goldX = 0;

            List<double> trueBinaryLabelList = null;
            List<double> probTrueLabelList = null;

            // Only for binary labels
            if (Mapping.LabelCount == 2) 
            {
                trueBinaryLabelList = new List<double>();
                probTrueLabelList = new List<double>();
            }

            foreach (var kvp in GoldLabels)
            {
                if (kvp.Value == null)
                    continue;

                // We have a gold label
                goldX++;

                Discrete trueLabel = null;
                if (TrueLabel.ContainsKey(kvp.Key))
                    trueLabel = TrueLabel[kvp.Key];

                if (trueLabel == null)
                {
                    trueLabel = Discrete.Uniform(Mapping.LabelCount);
                    //continue;  // No inferred label
                }

                var probs = trueLabel.GetProbs();
                double max = probs.Max();
                var predictedLabels = probs.Select((p, i) => new
                {
                    prob = p,
                    idx = i
                }).Where(a => a.prob == max).Select(a => a.idx).ToArray();

                int predictedLabel = predictedLabels.Length == 1 ? predictedLabels[0] : predictedLabels[Rand.Int(predictedLabels.Length)];

                this.PredictedLabel[kvp.Key] = predictedLabel;

                int goldLabel = kvp.Value.Value;

                if (goldLabel == predictedLabel)
                    correct++;

                confusionMatrix[goldLabel, predictedLabel] = confusionMatrix[goldLabel, predictedLabel] + 1.0;

                var nlp = -trueLabel.GetLogProb(goldLabel);
                if (nlp > nlpdThreshold)
                    nlp = nlpdThreshold;
                logProb += nlp;

                if (trueBinaryLabelList != null)
                {
                    trueBinaryLabelList.Add(goldLabel);
                    probTrueLabelList.Add(probs[goldLabel]);
                }
            }

            Accuracy = correct / (double)goldX;
            NegativeLogProb = logProb / (double)goldX;
            ModelConfusionMatrix = confusionMatrix;

            // Average recall
            double sumRec = 0;
            for (int i = 0; i < labelCount; i++)
            {
                double classSum = 0;
                for (int j = 0; j < labelCount; j++)
                {
                    classSum += confusionMatrix[i, j];
                }

                sumRec += confusionMatrix[i, i] / classSum;
            }
            AvgRecall = sumRec / labelCount;

            // WorkerLabelAccuracy: Perc. agreement between worker label and gold label
            int sumAcc = 0;
            var LabelSet = Mapping.DataWithGold;
            int numLabels = LabelSet.Count();
            foreach (var datum in LabelSet)
            {
                sumAcc += datum.WorkerLabel == datum.GoldLabel ? 1 : 0;
            }
            WorkerLabelAccuracy = (double) sumAcc / (double) numLabels;

            if (trueBinaryLabelList != null)
            {
                RocCurve = new ReceiverOperatingCharacteristic(trueBinaryLabelList.ToArray(), probTrueLabelList.ToArray());
                RocCurve.Compute(10000);
                ResultsConfusionMatrixForBinaryLabels = new ConfusionMatrix((int)confusionMatrix[1, 1], (int)confusionMatrix[0, 0], (int)confusionMatrix[0, 1], (int)confusionMatrix[1, 0]);
            }
        }
        public void ReceiverOperatingCharacteristicConstructorTest3()
        {
            // This example shows how to measure the accuracy of a 
            // binary classifier using a ROC curve. For this example,
            // we will be creating a Support Vector Machine trained
            // on the following instances:

            double[][] inputs =
            {
                // Those are from class -1
                new double[] { 2, 4, 0 },
                new double[] { 5, 5, 1 },
                new double[] { 4, 5, 0 },
                new double[] { 2, 5, 5 },
                new double[] { 4, 5, 1 },
                new double[] { 4, 5, 0 },
                new double[] { 6, 2, 0 },
                new double[] { 4, 1, 0 },

                // Those are from class +1
                new double[] { 1, 4, 5 },
                new double[] { 7, 5, 1 },
                new double[] { 2, 6, 0 },
                new double[] { 7, 4, 7 },
                new double[] { 4, 5, 0 },
                new double[] { 6, 2, 9 },
                new double[] { 4, 1, 6 },
                new double[] { 7, 2, 9 },
            };

            int[] outputs =
            {
                -1, -1, -1, -1, -1, -1, -1, -1, // fist eight from class -1
                +1, +1, +1, +1, +1, +1, +1, +1  // last eight from class +1
            };

            // Create a linear Support Vector Machine with 4 inputs
            SupportVectorMachine machine = new SupportVectorMachine(inputs: 3);

            // Create the sequential minimal optimization teacher
            SequentialMinimalOptimization learn = new SequentialMinimalOptimization(machine, inputs, outputs);

            // Run the learning algorithm
            double error = learn.Run();

            // Extract the input labels predicted by the machine
            double[] predicted = new double[inputs.Length];
            for (int i = 0; i < predicted.Length; i++)
                predicted[i] = machine.Compute(inputs[i]);


            // Create a new ROC curve to assess the performance of the model
            var roc = new ReceiverOperatingCharacteristic(outputs, predicted);

            roc.Compute(100); // Compute a ROC curve with 100 points
            /*
                        // Generate a connected scatter plot for the ROC curve and show it on-screen
                        ScatterplotBox.Show(roc.GetScatterplot(includeRandom: true), nonBlocking: true)

                            .SetSymbolSize(0)      // do not display data points
                            .SetLinesVisible(true) // show lines connecting points
                            .SetScaleTight(true)   // tighten the scale to points
                            .WaitForClose();
            */

            Assert.AreEqual(0.7890625, roc.Area);
            // Assert.AreEqual(0.1174774, roc.StandardError, 1e-6); HanleyMcNeil
            Assert.AreEqual(0.11958120746409709, roc.StandardError, 1e-6);
        }
        public void ComputeTest()
        {
            // Example from
            // http://faculty.vassar.edu/lowry/roc1.html

            double[,] data = 
            { 
                { 4,  1 },                { 4,  1 },
                { 4,  1 },                { 4,  1 },
                { 4,  1 },                { 4,  1 },
                { 4,  1 },                { 4,  1 },
                { 4,  1 },                { 4,  1 },
                { 4,  1 },                { 4,  1 },
                { 4,  1 },                { 4,  1 },
                { 4,  1 },                { 4,  1 },
                { 4,  1 },                { 4,  1 }, // 18
                { 4,  0 },

                { 6,  1 },                 { 6,  1 }, 
                { 6,  1 },                 { 6,  1 }, 
                { 6,  1 },                 { 6,  1 }, 
                { 6,  1 }, // 7

                { 6,  0 },                 { 6,  0 },
                { 6,  0 },                 { 6,  0 },
                { 6,  0 },                 { 6,  0 },
                { 6,  0 },                 { 6,  0 },
                { 6,  0 },                 { 6,  0 },
                { 6,  0 },                 { 6,  0 },
                { 6,  0 },                 { 6,  0 },
                { 6,  0 },                 { 6,  0 },
                { 6,  0 }, // 17

                { 8,  1 },                { 8,  1 },
                { 8,  1 },                { 8,  1 }, // 4

                { 8,  0 },                { 8,  0 },
                { 8,  0 },                { 8,  0 },
                { 8,  0 },                { 8,  0 },
                { 8,  0 },                { 8,  0 },
                { 8,  0 },                { 8,  0 },
                { 8,  0 },                { 8,  0 },
                { 8,  0 },                { 8,  0 },
                { 8,  0 },                { 8,  0 },
                { 8,  0 },                { 8,  0 },
                { 8,  0 },                { 8,  0 },
                { 8,  0 },                { 8,  0 },
                { 8,  0 },                { 8,  0 },
                { 8,  0 },                { 8,  0 },
                { 8,  0 },                { 8,  0 },
                { 8,  0 },                { 8,  0 },
                { 8,  0 },                { 8,  0 },
                { 8,  0 },                { 8,  0 },
                { 8,  0 },                { 8,  0 }, // 36

                { 9, 1 },                 { 9, 1 },
                { 9, 1 }, // 3

                { 9, 0 },                { 9, 0 },
                { 9, 0 },                { 9, 0 },
                { 9, 0 },                { 9, 0 },
                { 9, 0 },                { 9, 0 },
                { 9, 0 },                { 9, 0 },
                { 9, 0 },                { 9, 0 },
                { 9, 0 },                { 9, 0 },
                { 9, 0 },                { 9, 0 },
                { 9, 0 },                { 9, 0 },
                { 9, 0 },                { 9, 0 },
                { 9, 0 },                { 9, 0 },
                { 9, 0 },                { 9, 0 },
                { 9, 0 },                { 9, 0 },
                { 9, 0 },                { 9, 0 },
                { 9, 0 },                { 9, 0 },
                { 9, 0 },                { 9, 0 },
                { 9, 0 },                { 9, 0 },
                { 9, 0 },                { 9, 0 }, 
                { 9, 0 },                { 9, 0 }, 
                { 9, 0 }, // 39
            };


            double[] measurement = data.GetColumn(1);
            double[] prediction = data.GetColumn(0);

            var roc = new ReceiverOperatingCharacteristic(measurement, prediction);
            double[] cutpoints = { 5, 7, 9, double.PositiveInfinity };

            roc.Compute(cutpoints);

            Assert.AreEqual(32, roc.Positives);
            Assert.AreEqual(93, roc.Negatives);

            Assert.AreEqual(4, roc.Points.Count);
            var p1 = roc.Points[0];
            var p2 = roc.Points[1];
            var p3 = roc.Points[2];
            var p4 = roc.Points[3];

            Assert.AreEqual(18, p1.FalseNegatives);
            Assert.AreEqual(18 + 7, p2.FalseNegatives);
            Assert.AreEqual(18 + 7 + 4, p3.FalseNegatives);
            Assert.AreEqual(18 + 7 + 4 + 3, p4.FalseNegatives);

            Assert.AreEqual(1, p1.TrueNegatives);
            Assert.AreEqual(1 + 17, p2.TrueNegatives);
            Assert.AreEqual(1 + 17 + 36, p3.TrueNegatives);
            Assert.AreEqual(1 + 17 + 36 + 39, p4.TrueNegatives);


            double area = roc.Area;
            double error = roc.Error;

            // Area should be near 0.87
            Assert.IsTrue(System.Math.Abs(area - 0.875) < roc.Error);

        }
예제 #31
0
        /// <summary>
        /// Run the lesson.
        /// </summary>
        public static void Run()
        {
            // get data
            Console.WriteLine("Loading data....");
            var path    = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, @"..\..\..\..\california_housing.csv"));
            var housing = Frame.ReadCsv(path, separators: ",");

            housing = housing.Where(kv => ((decimal)kv.Value["median_house_value"]) < 500000);

            // create the median_high_house_value feature
            housing.AddColumn("median_high_house_value",
                              housing["median_house_value"].Select(v => v.Value >= 265000 ? 1.0 : 0.0));

            // shuffle the frame
            var rnd     = new Random();
            var indices = Enumerable.Range(0, housing.Rows.KeyCount).OrderBy(v => rnd.NextDouble());

            housing = housing.IndexRowsWith(indices).SortRowsByKey();

            // create training, validation, and test frames
            var training   = housing.Rows[Enumerable.Range(0, 12000)];
            var validation = housing.Rows[Enumerable.Range(12000, 2500)];
            var test       = housing.Rows[Enumerable.Range(14500, 2500)];

            // build the list of features we're going to use
            var columns = new string[] {
                "latitude",
                "longitude",
                "housing_median_age",
                "total_rooms",
                "total_bedrooms",
                "population",
                "households",
                "median_income"
            };

            // train the model using a logistic regressor
            var learner = new IterativeReweightedLeastSquares <LogisticRegression>()
            {
                MaxIterations = 100
            };
            var regression = learner.Learn(
                training.Columns[columns].ToArray2D <double>().ToJagged(),
                training["median_high_house_value"].Values.ToArray());

            // get probabilities
            var features_validation = validation.Columns[columns].ToArray2D <double>().ToJagged();
            var label_validation    = validation["median_high_house_value"].Values.ToArray();
            var probabilities       = regression.Probability(features_validation);

            // calculate the histogram of probabilities
            var histogram = new Histogram();

            histogram.Compute(probabilities, 0.05);

            // draw the histogram
            Plot(histogram, "Probability histogram", "prediction", "count");

            // get predictions and actuals
            var predictions = regression.Decide(features_validation);
            var actuals     = label_validation.Select(v => v == 1.0 ? true : false).ToArray();

            // create confusion matrix
            var confusion = new ConfusionMatrix(predictions, actuals);

            // display classification scores
            Console.WriteLine($"True Positives:  {confusion.TruePositives}");
            Console.WriteLine($"True Negatives:  {confusion.TrueNegatives}");
            Console.WriteLine($"False Positives: {confusion.FalsePositives}");
            Console.WriteLine($"False Negatives: {confusion.FalseNegatives}");
            Console.WriteLine();

            // display accuracy, precision, and recall
            Console.WriteLine($"Accuracy:        {confusion.Accuracy}");
            Console.WriteLine($"Precision:       {confusion.Precision}");
            Console.WriteLine($"Recall:          {confusion.Recall}");
            Console.WriteLine();

            // display TPR and FPR
            Console.WriteLine($"TPR:             {confusion.Sensitivity}");
            Console.WriteLine($"FPR:             {confusion.FalsePositiveRate}");
            Console.WriteLine();

            // calculate roc curve
            var roc = new ReceiverOperatingCharacteristic(
                actuals,
                predictions.Select(v => v ? 1 : 0).ToArray());

            roc.Compute(100);

            // generate the scatter plot
            var rocPlot = roc.GetScatterplot(true);

            // show roc curve
            Plot(rocPlot);

            // show the auc
            Console.WriteLine($"AUC:             {roc.Area}");
        }
        public void DeLongComparisonTest()
        {
            // Example from Sampling Variability of Nonparametric Estimates of the
            // Areas under Receiver Operating Characteristic Curves: An Update

            bool yes = true;
            bool no = false;

            bool[] expected = 
            {
                /* 1*/ yes,
                /* 2*/ no,
                /* 3*/ yes,
                /* 4*/ no,
                /* 5*/ no,
                /* 6*/ yes,
                /* 7*/ yes,
                /* 8*/ no,
                /* 9*/ no,
                /*10*/ yes,
                /*11*/ no,
                /*12*/ no,
                /*13*/ yes,
                /*14*/ no,
                /*15*/ no
            };

            int[] actual1 = 
            {
                /* 1*/ 1,
                /* 2*/ 2,
                /* 3*/ 5,
                /* 4*/ 1,
                /* 5*/ 1,
                /* 6*/ 1,
                /* 7*/ 2,
                /* 8*/ 1,
                /* 9*/ 2,
                /*10*/ 2,
                /*11*/ 1,
                /*12*/ 1,
                /*13*/ 5,
                /*14*/ 1,
                /*15*/ 1
            };

            int[] actual2 = 
            {
                /* 1*/ 1,
                /* 2*/ 1,
                /* 3*/ 5,
                /* 4*/ 1,
                /* 5*/ 1,
                /* 6*/ 1,
                /* 7*/ 4,
                /* 8*/ 1,
                /* 9*/ 2,
                /*10*/ 2,
                /*11*/ 1,
                /*12*/ 1,
                /*13*/ 5,
                /*14*/ 1,
                /*15*/ 1
            };

            ReceiverOperatingCharacteristic a = new ReceiverOperatingCharacteristic(expected, actual1);
            ReceiverOperatingCharacteristic b = new ReceiverOperatingCharacteristic(expected, actual2);

            a.Compute(10);
            b.Compute(10);

            TwoReceiverOperatingCurveTest test = new TwoReceiverOperatingCurveTest(a, b);

            Assert.AreEqual(-1.1351915229662422, test.Statistic);

        }
예제 #33
0
        static void Main(string[] args)
        {
            System.Globalization.CultureInfo customCulture = (System.Globalization.CultureInfo)System.Threading.Thread.CurrentThread.CurrentCulture.Clone();
            customCulture.NumberFormat.NumberDecimalSeparator = ".";

            int nBufferWidth = Console.BufferWidth;

            Console.SetBufferSize(nBufferWidth, 1000);

            System.Threading.Thread.CurrentThread.CurrentCulture = customCulture;

            Config cfg = new Config("AnalysClassifier.config");

            log("info", "Конфиг:" + cfg.ToString());
            double[] output = File.ReadAllLines(cfg.OutputPath)
                              .Select(x => double.Parse(x)).ToArray();
            double[] target = File.ReadAllLines(cfg.TargetPath)
                              .Select(x => double.Parse(x)).ToArray();
            double[] error = File.ReadAllLines(cfg.ErrorPath)
                             .Select(x => double.Parse(x)).ToArray();

            if (cfg.Plotroc == true)
            {
                Console.WriteLine("Модуль Plotroc");
                Console.WriteLine("Расчет ROC");
                var roc = new ReceiverOperatingCharacteristic(output, target);
                roc.Compute(100); // Compute a ROC curve with 100 cut-off points
                Console.WriteLine("ROC расчитана");
                ScatterplotBox.Show(roc.GetScatterplot(includeRandom: true))
                .SetSymbolSize(0)          // do not display data points
                .SetLinesVisible(true)     // show lines connecting points
                .SetScaleTight(true);      // tighten the scale to points
            }
            if (cfg.Plothist == true)
            {
                Console.WriteLine("Модуль Plothist");
                HistoframShow(error, "ошибок");
                HistoframShow(target, "эталонных выходов");
                HistoframShow(output, "выходов модели");
            }
            if (cfg.Plotconfucion == true)
            {
                Console.WriteLine("Модуль Plotconfucion");
                Console.WriteLine("Расчет ConfusionMatrix");
                var cm = new GeneralConfusionMatrix(classes: 2,
                                                    expected: output.Select(x => x > 0.5?1:0).ToArray(),
                                                    predicted: target.Select(x => x > 0.5 ? 1 : 0).ToArray());
                Console.WriteLine("ConfusionMatrix расчитана");
                Console.WriteLine("Confusion Matrix:");
                string[][] outMat = cm.Matrix.
                                    ToJagged().
                                    Select(x => x.Select(y => IntToStringFormatted(y)).ToArray()).
                                    ToArray();

                foreach (var it in cm.ColumnTotals)
                {
                    Console.Write($"{IntToStringFormatted(it)}");
                }
                Console.WriteLine("|");
                Console.WriteLine(new string('_', 9 * cm.ColumnTotals.Length));
                int i = 0;
                foreach (var it in outMat)
                {
                    foreach (var it2 in it)
                    {
                        Console.Write(it2);
                        Console.Write(" ");
                    }
                    Console.Write($"| {cm.RowTotals[i++]}");
                    Console.WriteLine();
                }
                Console.WriteLine();

                // We can get more information about our problem as well:
                Console.WriteLine("Дополнительная информация:");
                Console.WriteLine($"Классов: {cm.NumberOfClasses}:");
                Console.WriteLine($"Примеров: {cm.NumberOfSamples}:");
                Console.WriteLine($"Точность: {cm.Accuracy}:");
                Console.WriteLine($"Ошибка: {cm.Error}:");
                Console.WriteLine($"chanceAgreement: {cm.ChanceAgreement}:");
                Console.WriteLine($"geommetricAgreement: {cm.Error}:");
                Console.WriteLine($"pearson: {cm.Pearson}:");
                Console.WriteLine($"kappa: {cm.Kappa}:");
                Console.WriteLine($"tau: {cm.Tau}:");
                Console.WriteLine($"chiSquare: {cm.ChiSquare}:");
                Console.WriteLine($"kappaStdErr: {cm.Kappa}:");
            }
        }
예제 #34
0
        private void compute(Dictionary <string, NodeClassification> nodeClassifications)
        {
            baseExpected       = new List <int>();
            basePredicted      = new List <int>();
            basePredictedConf  = new List <double>();
            voterExpected      = new List <int>();
            voterPredicted     = new List <int>();
            voterPredictedConf = new List <double>();

            int predictionCount = nodeClassifications.First().Value.RawPredictions.Count;

            foreach (var kvp in nodeClassifications)
            {
                double sum = kvp.Value.RawPredictions.Sum();

                // Voter
                VSamples++;
                voterExpected.Add(kvp.Value.ActualClass);
                voterPredictedConf.Add(sum);

                if (sum >= 0)
                {
                    voterPredicted.Add(1);
                }
                else
                {
                    voterPredicted.Add(-1);
                }

                if (kvp.Value.ActualClass >= 0) // malware
                {
                    VAP++;

                    if (sum >= 0) // malware predicted
                    {
                        VTP++;
                    }
                    else // goodware predicted
                    {
                        VFN++;
                    }
                }
                else if (kvp.Value.ActualClass < 0)
                {
                    VAN++;

                    if (sum < 0) // goodware predicted
                    {
                        VTN++;
                    }
                    else // malware predicted
                    {
                        VFP++;
                    }
                }


                // sum += offset;
                String sumVote      = ((sum >= 0 && kvp.Value.ActualClass > 0) || (sum < 0 && kvp.Value.ActualClass < 0)) ? "Success" : "Fail";
                String majorityVote = (kvp.Value.CorrectPredictions > kvp.Value.FalsePredictions) ? "Success" : "Fail";

                String variancePrediction = Accord.Statistics.Tools.Variance(kvp.Value.RawPredictions.ToArray()).ToString();
                String meanPrediction     = Accord.Statistics.Tools.Mean(kvp.Value.RawPredictions.ToArray()).ToString();
                String sumPredictions     = sum.ToString();


                foreach (double prediction in kvp.Value.RawPredictions)
                {
                    baseExpected.Add(kvp.Value.ActualClass);
                    basePredictedConf.Add(prediction);

                    if (prediction >= 0)
                    {
                        basePredicted.Add(1);
                    }
                    else
                    {
                        basePredicted.Add(-1);
                    }

                    Samples++;

                    if (kvp.Value.ActualClass > 0) // malware
                    {
                        AP++;

                        if (prediction >= 0) // predicted as malware
                        {
                            TP++;
                        }
                        else // predicted as goodware
                        {
                            FN++;
                        }
                    }

                    else if (kvp.Value.ActualClass < 0) // goodware
                    {
                        AN++;

                        if (prediction >= 0) // predicted as malware
                        {
                            FP++;
                        }
                        else // predicted as goodware
                        {
                            TN++;
                        }
                    }
                }

                /*
                 * addItem(kvp.Key,
                 *  kvp.Value.TotalPredictions.ToString(),
                 *  kvp.Value.CorrectPredictions.ToString(),
                 *  kvp.Value.FalsePredictions.ToString(),
                 *  kvp.Value.RawPredictions[0].ToString(),
                 *  kvp.Value.RawPredictions[1].ToString(),
                 *  majorityVote,
                 *  sumVote,
                 *  meanPrediction,
                 *  variancePrediction,
                 *  sumPredictions);*/
                List <object> vals = new List <object>();
                vals.Add(kvp.Key);
                vals.Add(kvp.Value.TotalPredictions.ToString());
                vals.Add(kvp.Value.CorrectPredictions.ToString());
                vals.Add(kvp.Value.FalsePredictions.ToString());

                for (int i = 0; i < kvp.Value.RawPredictions.Count; i++)
                {
                    vals.Add(kvp.Value.RawPredictions[i].ToString());
                }

                vals.Add(majorityVote);
                vals.Add(sumVote);
                vals.Add(meanPrediction);
                vals.Add(variancePrediction);
                vals.Add(sumPredictions);

                // DataRow n = new DataRow();
                _tableDetails.Rows.Add(vals.ToArray());

                bool sumVoteSuccess = ((sum >= 0 && kvp.Value.ActualClass > 0) || (sum < 0 && kvp.Value.ActualClass < 0)) ? true : false;

                this._numNodes++;
                this._numPredictionsPerNode = kvp.Value.RawPredictions.Count;
                this._numPredictions       += kvp.Value.RawPredictions.Count;
            }

            // calculate confusion matrix statistics
            //_Sensitivity = ((double)_NumTruePositive / (double)(_NumTruePositive + _NumFalseNegative));
            //_Specificity = ((double)_NumTrueNegative / (double)(_NumTrueNegative + _NumFalsePositive));
            //_Precision = ((double)_NumTruePositive / (double)(_NumTruePositive + _NumFalsePositive));
            //_FPR = ((double)_NumFalsePositive / (double)(_NumFalsePositive + _NumTrueNegative));
            //_Accuracy = (((double)(_NumTruePositive + _NumTrueNegative)) / ((double)(_numNodes)));
            //_FDR = ((double)_NumFalsePositive / (double)(_NumFalsePositive + _NumTruePositive));

            _baseMatrix  = new ConfusionMatrix(basePredicted.ToArray <int>(), baseExpected.ToArray <int>(), 1, -1);
            _voterMatrix = new ConfusionMatrix(voterPredicted.ToArray <int>(), voterExpected.ToArray <int>(), 1, -1);

            ReceiverOperatingCharacteristic rocBase  = new ReceiverOperatingCharacteristic(baseExpected.ToArray(), basePredictedConf.ToArray());
            ReceiverOperatingCharacteristic rocVoter = new ReceiverOperatingCharacteristic(voterExpected.ToArray(), voterPredictedConf.ToArray());

            rocBase.Compute(100);
            rocVoter.Compute(100);
            rocAreaBase  = rocBase.Area;
            rocAreaVoter = rocVoter.Area;

            // Utility.writeToConsole<int>(voterExpected.ToArray<int>());
        }
        public void DeLongVarianceTest()
        {
            // Example from Sampling Variability of Nonparametric Estimates of the
            // Areas under Receiver Operating Characteristic Curves: An Update

            bool yes = true;
            bool no  = false;

            bool[] expected =
            {
                /* 1*/ yes,
                /* 2*/ no,
                /* 3*/ yes,
                /* 4*/ no,
                /* 5*/ no,
                /* 6*/ yes,
                /* 7*/ yes,
                /* 8*/ no,
                /* 9*/ no,
                /*10*/ yes,
                /*11*/ no,
                /*12*/ no,
                /*13*/ yes,
                /*14*/ no,
                /*15*/ no
            };

            int[] actual =
            {
                /* 1*/ 1,
                /* 2*/ 2,
                /* 3*/ 5,
                /* 4*/ 1,
                /* 5*/ 1,
                /* 6*/ 1,
                /* 7*/ 2,
                /* 8*/ 1,
                /* 9*/ 2,
                /*10*/ 2,
                /*11*/ 1,
                /*12*/ 1,
                /*13*/ 5,
                /*14*/ 1,
                /*15*/ 1
            };

            ReceiverOperatingCharacteristic curve = new ReceiverOperatingCharacteristic(expected, actual);

            curve.Compute(10);

            Assert.AreEqual(6, curve.PositiveResults.Length);
            Assert.AreEqual(1, curve.PositiveResults[0]);
            Assert.AreEqual(5, curve.PositiveResults[1]);
            Assert.AreEqual(1, curve.PositiveResults[2]);
            Assert.AreEqual(2, curve.PositiveResults[3]);
            Assert.AreEqual(2, curve.PositiveResults[4]);
            Assert.AreEqual(5, curve.PositiveResults[5]);

            Assert.AreEqual(9, curve.NegativeResults.Length);
            Assert.AreEqual(2, curve.NegativeResults[0]);
            Assert.AreEqual(1, curve.NegativeResults[1]);
            Assert.AreEqual(1, curve.NegativeResults[2]);
            Assert.AreEqual(1, curve.NegativeResults[3]);
            Assert.AreEqual(2, curve.NegativeResults[4]);
            Assert.AreEqual(1, curve.NegativeResults[5]);
            Assert.AreEqual(1, curve.NegativeResults[6]);
            Assert.AreEqual(1, curve.NegativeResults[7]);
            Assert.AreEqual(1, curve.NegativeResults[8]);


            Assert.AreEqual(6, curve.PositiveAccuracies.Length);
            Assert.AreEqual(0.3888, curve.PositiveAccuracies[0], 1e-4);
            Assert.AreEqual(1.0000, curve.PositiveAccuracies[1], 1e-4);
            Assert.AreEqual(0.3888, curve.PositiveAccuracies[2], 1e-4);
            Assert.AreEqual(0.8888, curve.PositiveAccuracies[3], 1e-4);
            Assert.AreEqual(0.8888, curve.PositiveAccuracies[4], 1e-4);
            Assert.AreEqual(1.0000, curve.PositiveAccuracies[5], 1e-4);

            Assert.AreEqual(9, curve.NegativeAccuracies.Length);
            Assert.AreEqual(0.5000, curve.NegativeAccuracies[0], 1e-4);
            Assert.AreEqual(0.8333, curve.NegativeAccuracies[1], 1e-4);
            Assert.AreEqual(0.8333, curve.NegativeAccuracies[2], 1e-4);
            Assert.AreEqual(0.8333, curve.NegativeAccuracies[3], 1e-4);
            Assert.AreEqual(0.5000, curve.NegativeAccuracies[4], 1e-4);
            Assert.AreEqual(0.8333, curve.NegativeAccuracies[5], 1e-4);
            Assert.AreEqual(0.8333, curve.NegativeAccuracies[6], 1e-4);
            Assert.AreEqual(0.8333, curve.NegativeAccuracies[7], 1e-4);
            Assert.AreEqual(0.8333, curve.NegativeAccuracies[8], 1e-4);

            Assert.IsFalse(curve.NegativeAccuracies.HasNaN());
            Assert.IsFalse(curve.PositiveAccuracies.HasNaN());

            Assert.AreEqual(0.1285, curve.StandardError, 1e-4);
            Assert.AreEqual(0.0165, curve.Variance, 1e-4);

            Assert.IsFalse(Double.IsNaN(curve.StandardError));
            Assert.IsFalse(Double.IsNaN(curve.Variance));
        }
        private static void DrawROCCurve(int[] trainActual, int[] trainPreds, int[] testActual, int[] testPreds, int predClass, int minNumOccurrences, string modelName)
        {
            // Create a new ROC curve to assess the performance of the model
            string predClassStr = predClass == 0 ? "Neutral" : predClass == 1 ? "Positive" : "Negative";

            Console.WriteLine(
                "* Building ROC curve for {0} vs. Rest",
                predClassStr
                );

            // Build ROC for Train Set
            bool[] trainExpectedClass  = trainActual.Select(x => x == predClass ? true : false).ToArray();
            int[]  trainPredictedClass = trainPreds.Select(x => x == predClass ? 1 : 0).ToArray();

            var trainRoc = new ReceiverOperatingCharacteristic(trainExpectedClass, trainPredictedClass);

            trainRoc.Compute(1000);

            // Get Train AUC
            double trainAUC = trainRoc.Area;

            double[] trainXValues = trainRoc.Points.Select(x => 1 - x.Specificity).ToArray();
            double[] trainYValues = trainRoc.Points.Select(x => x.Sensitivity).ToArray();

            // Build ROC for Test Set
            bool[] testExpectedClass  = testActual.Select(x => x == predClass ? true : false).ToArray();
            int[]  testPredictedClass = testPreds.Select(x => x == predClass ? 1 : 0).ToArray();

            var testRoc = new ReceiverOperatingCharacteristic(testExpectedClass, testPredictedClass);

            testRoc.Compute(1000);

            // Get Test AUC
            double testAUC = testRoc.Area;

            double[] testXValues = testRoc.Points.Select(x => 1 - x.Specificity).ToArray();
            double[] testYValues = testRoc.Points.Select(x => x.Sensitivity).ToArray();

            // Draw ROC Curve with both Train & Test ROC
            ScatterplotView spv = new ScatterplotView();

            spv.Dock         = DockStyle.Fill;
            spv.LinesVisible = true;

            spv.Graph.GraphPane.AddCurve(
                String.Format("Train (AUC: {0:0.00})", trainAUC),
                trainXValues, trainYValues, Color.Green, SymbolType.None
                );
            spv.Graph.GraphPane.AddCurve(
                String.Format("Test (AUC: {0:0.00})", testAUC),
                testXValues, testYValues, Color.Blue, SymbolType.None
                );
            spv.Graph.GraphPane.AddCurve("Random", testXValues, testXValues, Color.Red, SymbolType.None);

            spv.Graph.GraphPane.Title.Text = String.Format(
                "{0} ROC - {1} vs. Rest (# occurrences >= {2})",
                modelName, predClassStr, minNumOccurrences
                );
            spv.Graph.GraphPane.AxisChange();

            Form f1 = new Form();

            f1.Width  = 700;
            f1.Height = 500;
            f1.Controls.Add(spv);
            f1.ShowDialog();
        }
        public void DeLongComparisonTest()
        {
            // Example from Sampling Variability of Nonparametric Estimates of the
            // Areas under Receiver Operating Characteristic Curves: An Update

            bool yes = true;
            bool no  = false;

            bool[] expected =
            {
                /* 1*/ yes,
                /* 2*/ no,
                /* 3*/ yes,
                /* 4*/ no,
                /* 5*/ no,
                /* 6*/ yes,
                /* 7*/ yes,
                /* 8*/ no,
                /* 9*/ no,
                /*10*/ yes,
                /*11*/ no,
                /*12*/ no,
                /*13*/ yes,
                /*14*/ no,
                /*15*/ no
            };

            int[] actual1 =
            {
                /* 1*/ 1,
                /* 2*/ 2,
                /* 3*/ 5,
                /* 4*/ 1,
                /* 5*/ 1,
                /* 6*/ 1,
                /* 7*/ 2,
                /* 8*/ 1,
                /* 9*/ 2,
                /*10*/ 2,
                /*11*/ 1,
                /*12*/ 1,
                /*13*/ 5,
                /*14*/ 1,
                /*15*/ 1
            };

            int[] actual2 =
            {
                /* 1*/ 1,
                /* 2*/ 1,
                /* 3*/ 5,
                /* 4*/ 1,
                /* 5*/ 1,
                /* 6*/ 1,
                /* 7*/ 4,
                /* 8*/ 1,
                /* 9*/ 2,
                /*10*/ 2,
                /*11*/ 1,
                /*12*/ 1,
                /*13*/ 5,
                /*14*/ 1,
                /*15*/ 1
            };

            ReceiverOperatingCharacteristic a = new ReceiverOperatingCharacteristic(expected, actual1);
            ReceiverOperatingCharacteristic b = new ReceiverOperatingCharacteristic(expected, actual2);

            a.Compute(10);
            b.Compute(10);

            TwoReceiverOperatingCurveTest test = new TwoReceiverOperatingCurveTest(a, b);

            Assert.AreEqual(-1.1351915229662422, test.Statistic);
        }