Ejemplo n.º 1
0
        public static void Main(string[] args)
        {
            Evaluation evaluation = new Evaluation();

            evaluation.addEvaluator(new ErrorEvaluator(new MeanSquaredError()));

            string[] classNames = new string[] { "Virginica", "Setosa", "Versicolor" };


            MultiLayerPerceptron neuralNet = (MultiLayerPerceptron)NeuralNetwork.createFromFile("irisNet.nnet");
            DataSet dataSet = DataSet.createFromFile("data_sets/iris_data_normalised.txt", 4, 3, ",");

            evaluation.addEvaluator(new ClassifierEvaluator.MultiClass(classNames));
            evaluation.evaluateDataSet(neuralNet, dataSet);

            ClassifierEvaluator evaluator       = evaluation.getEvaluator(typeof(ClassifierEvaluator.MultiClass));
            ConfusionMatrix     confusionMatrix = evaluator.Result;

            Console.WriteLine("Confusion matrrix:\r\n");
            Console.WriteLine(confusionMatrix.ToString() + "\r\n\r\n");
            Console.WriteLine("Classification metrics\r\n");
            ClassificationMetrics[]     metrics = ClassificationMetrics.createFromMatrix(confusionMatrix);
            ClassificationMetrics.Stats average = ClassificationMetrics.average(metrics);
            foreach (ClassificationMetrics cm in metrics)
            {
                Console.WriteLine(cm.ToString() + "\r\n");
            }
            Console.WriteLine(average.ToString());
        }
Ejemplo n.º 2
0
    /// <summary>
    /// Writes the precision-recall curve to the file with the specified name.
    /// </summary>
    /// <param name="fileName">The name of the file to write the precision-recall curve to.</param>
    /// <param name="evaluator">The classifier evaluator.</param>
    /// <param name="x">The x vector of the ground truth.</param>
    /// <param name="y">The y of the ground truth.</param>
    /// <param name="yPredicDistrib">The predictive distributions.</param>
    /// <param name="positiveClassLabel">The label of the positive class.</param>
    /// <remarks>Adapted from MicrosoftResearch.Infer.Learners</remarks>
    private void WritePrecisionRecallCurve(
        string fileName,
        ClassifierEvaluator <IList <Vector>, int, IList <string>, string> evaluator,
        Vector[] x,
        IList <string> y,
        IEnumerable <IDictionary <string, double> > yPredicDistrib,
        string positiveClassLabel)
    {
        Debug.Assert(yPredicDistrib != null, "The predictive distributions must not be null.");
        Debug.Assert(yPredicDistrib.Count() > 0, "The predictive distributions must not be empty.");
        Debug.Assert(positiveClassLabel != null, "The label of the positive class must not be null.");

        var precisionRecallCurve = evaluator.PrecisionRecallCurve(positiveClassLabel, x, y, yPredicDistrib);

        using (var writer = new StreamWriter(fileName))
        {
            writer.WriteLine("# Precision-recall curve");
            writer.WriteLine("#");
            writer.WriteLine("# Class '" + positiveClassLabel + "'     (versus the rest)");
            writer.WriteLine("#");
            writer.WriteLine("# Recall (R), precision (P)");
            foreach (var point in precisionRecallCurve)
            {
                writer.WriteLine("{0}, {1}", point.First, point.Second);
            }
        }
    }
Ejemplo n.º 3
0
        /// <summary>
        /// Writes the receiver operating characteristic curve to the file with the specified name.
        /// </summary>
        /// <param name="fileName">The name of the file to write the receiver operating characteristic curve to.</param>
        /// <param name="evaluator">The classifier evaluator.</param>
        /// <param name="groundTruth">The ground truth.</param>
        /// <param name="predictiveDistributions">The predictive distributions.</param>
        /// <param name="positiveClassLabel">The label of the positive class.</param>
        private void WriteRocCurve(
            string fileName,
            ClassifierEvaluator <IList <LabeledFeatureValues>, LabeledFeatureValues, IList <LabelDistribution>, string> evaluator,
            IList <LabeledFeatureValues> groundTruth,
            IList <IDictionary <string, double> > predictiveDistributions,
            string positiveClassLabel)
        {
            Debug.Assert(predictiveDistributions != null, "The predictive distributions must not be null.");
            Debug.Assert(predictiveDistributions.Count > 0, "The predictive distributions must not be empty.");
            Debug.Assert(positiveClassLabel != null, "The label of the positive class must not be null.");

            var rocCurve = evaluator.ReceiverOperatingCharacteristicCurve(positiveClassLabel, groundTruth, predictiveDistributions);

            using (var writer = new StreamWriter(fileName))
            {
                writer.WriteLine("# Receiver operating characteristic (ROC) curve");
                writer.WriteLine("#");
                writer.WriteLine("# Class '" + positiveClassLabel + "'     (versus the rest)");
                writer.WriteLine("#");
                writer.WriteLine("# False positive rate (FPR), True positive rate (TPR)");
                foreach (var point in rocCurve)
                {
                    writer.WriteLine(point);
                }
            }
        }
Ejemplo n.º 4
0
    public BPM(
        string[] labels,
        double sparsityApproxThresh)
    {
        Debug.Assert(labels != null, "The labels must not be null.");
        Debug.Assert(labels.Length == 2, "The labels must have two possible values.");
        Debug.Assert(sparsityApproxThresh >= 0, "The sparsityApproxThresh must be greater than or equal to zero.");

        // Initialise the validations
        _validate = new Validate();

        // Initialise the BPM
        _engine = new Dictionary <DistributionType, InferenceEngine>();
        _w      = new Dictionary <DistributionType, Variable <Vector> >();
        _w[DistributionType.Prior]     = null;
        _w[DistributionType.Posterior] = null;
        _d = new Dictionary <DistributionType, DistributionName>();
        _yPredicDistrib = Enumerable.Empty <IDictionary <string, double> >();
        _yPredicLabel   = new string[] { };

        _mapping = new GenericClassifierMapping(labels);
        // TO DO

        // Evaluator mapping
        var evaluatorMapping = _mapping.ForEvaluation();

        _evaluator = new ClassifierEvaluator
                     <IList <Vector>, int, IList <string>, string>(evaluatorMapping);

        // Other initialisations
        _availableDatasetName = new DatasetName();
        _numObservations      = 0;
        _numFeatures          = 0;
    }
Ejemplo n.º 5
0
        /// <summary>
        /// Writes the probability calibration plot to the file with the specified name.
        /// </summary>
        /// <param name="fileName">The name of the file to write the calibration plot to.</param>
        /// <param name="evaluator">The classifier evaluator.</param>
        /// <param name="groundTruth">The ground truth.</param>
        /// <param name="predictiveDistributions">The predictive distributions.</param>
        /// <param name="positiveClassLabel">The label of the positive class.</param>
        private void WriteCalibrationCurve(
            string fileName,
            ClassifierEvaluator <IList <LabeledFeatureValues>, LabeledFeatureValues, IList <LabelDistribution>, string> evaluator,
            IList <LabeledFeatureValues> groundTruth,
            IList <IDictionary <string, double> > predictiveDistributions,
            string positiveClassLabel)
        {
            Debug.Assert(predictiveDistributions != null, "The predictive distributions must not be null.");
            Debug.Assert(predictiveDistributions.Count > 0, "The predictive distributions must not be empty.");
            Debug.Assert(positiveClassLabel != null, "The label of the positive class must not be null.");

            var    calibrationCurve = evaluator.CalibrationCurve(positiveClassLabel, groundTruth, predictiveDistributions);
            double calibrationError = calibrationCurve.Select(i => Metrics.AbsoluteError(i.EmpiricalProbability, i.PredictedProbability)).Average();

            using (var writer = new StreamWriter(fileName))
            {
                writer.WriteLine("# Empirical probability calibration plot");
                writer.WriteLine("#");
                writer.WriteLine("# Class '" + positiveClassLabel + "'     (versus the rest)");
                writer.WriteLine("# Calibration error = {0}     (mean absolute error)", calibrationError);
                writer.WriteLine("#");
                writer.WriteLine("# Predicted probability, empirical probability");
                foreach (var point in calibrationCurve)
                {
                    writer.WriteLine(point);
                }
            }
        }
Ejemplo n.º 6
0
        /// <summary>
        /// Writes the precision-recall curve to the file with the specified name.
        /// </summary>
        /// <param name="fileName">The name of the file to write the precision-recall curve to.</param>
        /// <param name="evaluator">The classifier evaluator.</param>
        /// <param name="groundTruth">The ground truth.</param>
        /// <param name="predictiveDistributions">The predictive distributions.</param>
        /// <param name="positiveClassLabel">The label of the positive class.</param>
        private void WritePrecisionRecallCurve(
            string fileName,
            ClassifierEvaluator <IList <LabeledFeatureValues>, LabeledFeatureValues, IList <LabelDistribution>, string> evaluator,
            IList <LabeledFeatureValues> groundTruth,
            IList <IDictionary <string, double> > predictiveDistributions,
            string positiveClassLabel)
        {
            Debug.Assert(predictiveDistributions != null, "The predictive distributions must not be null.");
            Debug.Assert(predictiveDistributions.Count > 0, "The predictive distributions must not be empty.");
            Debug.Assert(positiveClassLabel != null, "The label of the positive class must not be null.");

            var precisionRecallCurve = evaluator.PrecisionRecallCurve(positiveClassLabel, groundTruth, predictiveDistributions);

            using (var writer = new StreamWriter(fileName))
            {
                writer.WriteLine("# Precision-recall curve");
                writer.WriteLine("#");
                writer.WriteLine("# Class '" + positiveClassLabel + "'     (versus the rest)");
                writer.WriteLine("#");
                writer.WriteLine("# precision (P), Recall (R)");
                foreach (var point in precisionRecallCurve)
                {
                    writer.WriteLine(point);
                }
            }
        }
Ejemplo n.º 7
0
    /// <summary>
    /// Writes the receiver operating characteristic curve to the file with the specified name.
    /// </summary>
    /// <param name="fileName">The name of the file to write the receiver operating characteristic curve to.</param>
    /// <param name="evaluator">The classifier evaluator.</param>
    /// <param name="x">The x vector of the ground truth.</param>
    /// <param name="y">The y of the ground truth.</param>
    /// <param name="yPredicDistrib">The predictive distributions.</param>
    /// <param name="positiveClassLabel">The label of the positive class.</param>
    /// <remarks>Adapted from MicrosoftResearch.Infer.Learners</remarks>
    private void WriteRocCurve(
        string fileName,
        ClassifierEvaluator <IList <Vector>, int, IList <string>, string> evaluator,
        Vector[] x,
        IList <string> y,
        IEnumerable <IDictionary <string, double> > yPredicDistrib,
        string positiveClassLabel)
    {
        Debug.Assert(yPredicDistrib != null, "The predictive distributions must not be null.");
        Debug.Assert(yPredicDistrib.Count() > 0, "The predictive distributions must not be empty.");
        Debug.Assert(positiveClassLabel != null, "The label of the positive class must not be null.");

        var rocCurve = evaluator.ReceiverOperatingCharacteristicCurve(positiveClassLabel, x, y, yPredicDistrib);

        using (var writer = new StreamWriter(fileName))
        {
            writer.WriteLine("# Receiver operating characteristic (ROC) curve");
            writer.WriteLine("#");
            writer.WriteLine("# Class '" + positiveClassLabel + "'     (versus the rest)");
            writer.WriteLine("#");
            writer.WriteLine("# False positive rate (FPR), true positive rate (TPR)");
            foreach (var point in rocCurve)
            {
                writer.WriteLine("{0}, {1}", point.First, point.Second);
            }
        }
    }
Ejemplo n.º 8
0
    /// <summary>
    /// Writes the probability calibration plot to the file with the specified name.
    /// </summary>
    /// <param name="fileName">The name of the file to write the calibration plot to.</param>
    /// <param name="evaluator">The classifier evaluator.</param>
    /// <param name="x">The x vector of the ground truth.</param>
    /// <param name="y">The y of the ground truth.</param>
    /// <param name="yPredicDistrib">The predictive distributions.</param>
    /// <param name="positiveClassLabel">The label of the positive class.</param>
    /// <remarks>Adapted from MicrosoftResearch.Infer.Learners</remarks>
    private void WriteCalibrationCurve(
        string fileName,
        ClassifierEvaluator <IList <Vector>, int, IList <string>, string> evaluator,
        Vector[] x,
        IList <string> y,
        IEnumerable <IDictionary <string, double> > yPredicDistrib,
        string positiveClassLabel)
    {
        Debug.Assert(yPredicDistrib != null, "The predictive distributions must not be null.");
        Debug.Assert(yPredicDistrib.Count() > 0, "The predictive distributions must not be empty.");
        Debug.Assert(positiveClassLabel != null, "The label of the positive class must not be null.");

        var    calibrationCurve = evaluator.CalibrationCurve(positiveClassLabel, x, y, yPredicDistrib);
        double calibrationError = calibrationCurve.Select(v => Metrics.AbsoluteError(v.First, v.Second)).Average();

        using (var writer = new StreamWriter(fileName))
        {
            writer.WriteLine("# Empirical probability calibration plot");
            writer.WriteLine("#");
            writer.WriteLine("# Class '" + positiveClassLabel + "'     (versus the rest)");
            writer.WriteLine("# Calibration error = {0}     (mean absolute error)", calibrationError);
            writer.WriteLine("#");
            writer.WriteLine("# Predicted probability, empirical probability");
            foreach (var point in calibrationCurve)
            {
                writer.WriteLine("{0}, {1}", point.First, point.Second);
            }
        }
    }
Ejemplo n.º 9
0
        // TODO: dont sysout - store somewhere these results so they can be displayed
        //
        private void testNetwork(NeuralNetwork neuralNetwork, DataSet testSet)
        {
            evaluation.evaluateDataSet(neuralNetwork, testSet);
            // works for binary what if we have multiple classes - how to get results for multiple classes here?
            //      results.add(evaluation.getEvaluator(ClassificationMetricsEvaluator.class).getResult()[0]); // MUST BE FIXED!!!!! get all and add thm all to results

            System.Console.WriteLine("##############################################################################");
            System.Console.WriteLine("MeanSquare Error: " + ((ErrorEvaluator)evaluation.getEvaluator(typeof(ErrorEvaluator))).Result);
            System.Console.WriteLine("##############################################################################");

            // TODO: deal with BinaryClassifiers too here
            ClassifierEvaluator evaluator       = ((ClassifierEvaluator.MultiClass)evaluation.getEvaluator(typeof(ClassifierEvaluator.MultiClass)));
            ConfusionMatrix     confusionMatrix = evaluator.Result;

            System.Console.WriteLine("Confusion Matrix: \r\n" + confusionMatrix.ToString());

            System.Console.WriteLine("##############################################################################");
            System.Console.WriteLine("Classification metrics: ");
            ClassificationMetrics[] metrics = ClassificationMetrics.createFromMatrix(confusionMatrix);             // add all of these to result

            foreach (ClassificationMetrics cm in metrics)
            {
                System.Console.WriteLine(cm.ToString());
            }

            System.Console.WriteLine("##############################################################################");
        }
Ejemplo n.º 10
0
    /// <summary>
    /// Computes all per-label AUCs as well as the micro- and macro-averaged AUCs.
    /// </summary>
    /// <param name="confusionMatrix">The confusion matrix.</param>
    /// <param name="evaluator">The classifier evaluator.</param>
    /// <param name="x">The x vector of the ground truth.</param>
    /// <param name="y">The y of the ground truth.</param>
    /// <param name="yPredicDistrib">The predictive distributions.</param>
    /// <param name="microAuc">The micro-averaged area under the receiver operating characteristic curve.</param>
    /// <param name="macroAuc">The macro-averaged area under the receiver operating characteristic curve.</param>
    /// <param name="macroAucClassLabelCount">The number of class labels for which the AUC if defined.</param>
    /// <returns>The area under the receiver operating characteristic curve for each class label.</returns>
    /// <remarks>Adapted from MicrosoftResearch.Infer.Learners</remarks>
    private IDictionary <string, double> ComputeLabelAuc(
        ConfusionMatrix <string> confusionMatrix,
        ClassifierEvaluator <IList <Vector>, int, IList <string>, string> evaluator,
        Vector[] x,
        IList <string> y,
        IEnumerable <IDictionary <string, double> > yPredicDistrib,
        out double microAuc,
        out double macroAuc,
        out int macroAucClassLabelCount)
    {
        int instanceCount   = yPredicDistrib.Count();
        var classLabels     = confusionMatrix.ClassLabelSet.Elements.ToArray();
        int classLabelCount = classLabels.Length;
        var labelAuc        = new Dictionary <string, double>();

        // Compute per-label AUC
        macroAucClassLabelCount = classLabelCount;
        foreach (var classLabel in classLabels)
        {
            // One versus rest
            double auc;
            try
            {
                auc = evaluator.AreaUnderRocCurve(classLabel, x, y, yPredicDistrib);
            }
            catch (ArgumentException)
            {
                auc = double.NaN;
                macroAucClassLabelCount--;
            }

            labelAuc.Add(classLabel, auc);
        }

        // Compute micro- and macro-averaged AUC
        microAuc = 0;
        macroAuc = 0;
        foreach (var label in classLabels)
        {
            if (double.IsNaN(labelAuc[label]))
            {
                continue;
            }

            microAuc += confusionMatrix.TrueLabelCount(label) * labelAuc[label] / instanceCount;
            macroAuc += labelAuc[label] / macroAucClassLabelCount;
        }

        return(labelAuc);
    }
Ejemplo n.º 11
0
        /// <summary>
        /// Computes all per-label AUCs as well as the micro- and macro-averaged AUCs.
        /// </summary>
        /// <param name="confusionMatrix">The confusion matrix.</param>
        /// <param name="evaluator">The classifier evaluator.</param>
        /// <param name="groundTruth">The ground truth.</param>
        /// <param name="predictiveDistributions">The predictive distributions.</param>
        /// <param name="microAuc">The micro-averaged area under the receiver operating characteristic curve.</param>
        /// <param name="macroAuc">The macro-averaged area under the receiver operating characteristic curve.</param>
        /// <param name="macroAucClassLabelCount">The number of class labels for which the AUC if defined.</param>
        /// <returns>The area under the receiver operating characteristic curve for each class label.</returns>
        private IDictionary <string, double> ComputeLabelAuc(
            ConfusionMatrix <string> confusionMatrix,
            ClassifierEvaluator <IList <LabeledFeatureValues>, LabeledFeatureValues, IList <LabelDistribution>, string> evaluator,
            IList <LabeledFeatureValues> groundTruth,
            ICollection <IDictionary <string, double> > predictiveDistributions,
            out double microAuc,
            out double macroAuc,
            out int macroAucClassLabelCount)
        {
            int instanceCount   = predictiveDistributions.Count;
            var classLabels     = confusionMatrix.ClassLabelSet.Elements.ToArray();
            int classLabelCount = classLabels.Length;
            var labelAuc        = new Dictionary <string, double>();

            // Compute per-label AUC
            macroAucClassLabelCount = classLabelCount;
            foreach (var classLabel in classLabels)
            {
                // One versus rest
                double auc;
                try
                {
                    auc = evaluator.AreaUnderRocCurve(classLabel, groundTruth, predictiveDistributions);
                }
                catch (ArgumentException)
                {
                    auc = double.NaN;
                    macroAucClassLabelCount--;
                }

                labelAuc.Add(classLabel, auc);
            }

            // Compute micro- and macro-averaged AUC
            microAuc = 0;
            macroAuc = 0;
            foreach (var label in classLabels)
            {
                if (double.IsNaN(labelAuc[label]))
                {
                    continue;
                }

                microAuc += confusionMatrix.TrueLabelCount(label) * labelAuc[label] / instanceCount;
                macroAuc += labelAuc[label] / macroAucClassLabelCount;
            }

            return(labelAuc);
        }
Ejemplo n.º 12
0
    /// <summary>
    /// Writes the evaluation results to a file with the specified name.
    /// </summary>
    /// <param name="writer">The name of the file to write the report to.</param>
    /// <param name="evaluator">The classifier evaluator.</param>
    /// <param name="x">The x vector of the ground truth.</param>
    /// <param name="y">The y of the ground truth.</param>
    /// <param name="yPredicDistrib">The predictive distributions.</param>
    /// <param name="yPredicLabel">The predicted labels.</param>
    /// <remarks>Adapted from MicrosoftResearch.Infer.Learners</remarks>
    private void WriteReport(
        StreamWriter writer,
        ClassifierEvaluator <IList <Vector>, int, IList <string>, string> evaluator,
        Vector[] x,
        IList <string> y,
        IEnumerable <IDictionary <string, double> > yPredicDistrib,
        IEnumerable <string> yPredicLabel)
    {
        // Compute confusion matrix
        var confusionMatrix = evaluator.ConfusionMatrix(x, y, yPredicLabel);

        // Compute mean negative log probability
        double meanNegativeLogProbability =
            evaluator.Evaluate(x, y, yPredicDistrib, Metrics.NegativeLogProbability) / yPredicDistrib.Count();

        // Compute M-measure (averaged pairwise AUC)
        IDictionary <string, IDictionary <string, double> > aucMatrix;
        double auc = evaluator.AreaUnderRocCurve(x, y, yPredicDistrib, out aucMatrix);

        // Compute per-label AUC as well as micro- and macro-averaged AUC
        double microAuc;
        double macroAuc;
        int    macroAucClassLabelCount;
        var    labelAuc = this.ComputeLabelAuc(
            confusionMatrix,
            evaluator,
            x,
            y,
            yPredicDistrib,
            out microAuc,
            out macroAuc,
            out macroAucClassLabelCount);

        // Instance-averaged performance
        this.WriteInstanceAveragedPerformance(writer, confusionMatrix, meanNegativeLogProbability, microAuc);

        // Class-averaged performance
        this.WriteClassAveragedPerformance(writer, confusionMatrix, auc, macroAuc, macroAucClassLabelCount);

        // Performance on individual classes
        this.WriteIndividualClassPerformance(writer, confusionMatrix, labelAuc);

        // Confusion matrix
        this.WriteConfusionMatrix(writer, confusionMatrix);

        // Pairwise AUC
        this.WriteAucMatrix(writer, aucMatrix);
    }
Ejemplo n.º 13
0
        /// <summary>
        /// Writes the evaluation results to a file with the specified name.
        /// </summary>
        /// <param name="writer">The name of the file to write the report to.</param>
        /// <param name="evaluator">The classifier evaluator.</param>
        /// <param name="groundTruth">The ground truth.</param>
        /// <param name="predictiveDistributions">The predictive distributions.</param>
        /// <param name="predictedLabels">The predicted labels.</param>
        private void WriteReport(
            StreamWriter writer,
            ClassifierEvaluator <IList <LabeledFeatureValues>, LabeledFeatureValues, IList <LabelDistribution>, string> evaluator,
            IList <LabeledFeatureValues> groundTruth,
            ICollection <IDictionary <string, double> > predictiveDistributions,
            IEnumerable <string> predictedLabels)
        {
            // Compute confusion matrix
            var confusionMatrix = evaluator.ConfusionMatrix(groundTruth, predictedLabels);

            // Compute mean negative log probability
            double meanNegativeLogProbability =
                evaluator.Evaluate(groundTruth, predictiveDistributions, Metrics.NegativeLogProbability) / predictiveDistributions.Count;

            // Compute M-measure (averaged pairwise AUC)
            IDictionary <string, IDictionary <string, double> > aucMatrix;
            double auc = evaluator.AreaUnderRocCurve(groundTruth, predictiveDistributions, out aucMatrix);

            // Compute per-label AUC as well as micro- and macro-averaged AUC
            double microAuc;
            double macroAuc;
            int    macroAucClassLabelCount;
            var    labelAuc = this.ComputeLabelAuc(
                confusionMatrix,
                evaluator,
                groundTruth,
                predictiveDistributions,
                out microAuc,
                out macroAuc,
                out macroAucClassLabelCount);

            // Instance-averaged performance
            this.WriteInstanceAveragedPerformance(writer, confusionMatrix, meanNegativeLogProbability, microAuc);

            // Class-averaged performance
            this.WriteClassAveragedPerformance(writer, confusionMatrix, auc, macroAuc, macroAucClassLabelCount);

            // Performance on individual classes
            this.WriteIndividualClassPerformance(writer, confusionMatrix, labelAuc);

            // Confusion matrix
            this.WriteConfusionMatrix(writer, confusionMatrix);

            // Pairwise AUC
            this.WriteAucMatrix(writer, aucMatrix);
        }
Ejemplo n.º 14
0
        /// <summary>
        /// Prepares the environment before each test.
        /// </summary>
        public ClassifierEvaluatorTests()
        {
            // Ground truth labels (no uncertainty)
            this.groundTruth    = new LabelDistribution[5];
            this.groundTruth[0] = new Dictionary <string, double> {
                { LabelSet[0], 1 }, { LabelSet[1], 0 }, { LabelSet[2], 0 }
            };
            this.groundTruth[1] = new Dictionary <string, double> {
                { LabelSet[0], 0 }, { LabelSet[1], 1 }, { LabelSet[2], 0 }
            };
            this.groundTruth[2] = new Dictionary <string, double> {
                { LabelSet[0], 0 }, { LabelSet[1], 0 }, { LabelSet[2], 1 }
            };
            this.groundTruth[3] = new Dictionary <string, double> {
                { LabelSet[0], 1 }, { LabelSet[1], 0 }, { LabelSet[2], 0 }
            };
            this.groundTruth[4] = new Dictionary <string, double> {
                { LabelSet[0], 1 }, { LabelSet[1], 0 }, { LabelSet[2], 0 }
            };

            // Predictions
            this.predictions    = new LabelDistribution[5];
            this.predictions[0] = new Dictionary <string, double> {
                { LabelSet[0], 0 }, { LabelSet[1], 0 }, { LabelSet[2], 1 }
            };
            this.predictions[1] = new Dictionary <string, double> {
                { LabelSet[0], 0 }, { LabelSet[1], 1 }, { LabelSet[2], 0 }
            };
            this.predictions[2] = new Dictionary <string, double> {
                { LabelSet[0], 1 }, { LabelSet[1], 0 }, { LabelSet[2], 0 }
            };
            this.predictions[3] = new Dictionary <string, double> {
                { LabelSet[0], 1 / 6.0 }, { LabelSet[1], 2 / 3.0 }, { LabelSet[2], 1 / 6.0 }
            };
            this.predictions[4] = new Dictionary <string, double> {
                { LabelSet[0], 1 / 8.0 }, { LabelSet[1], 1 / 8.0 }, { LabelSet[2], 3 / 4.0 }
            };

            // Classifier evaluator
            var classifierMapping = new ClassifierMapping();
            var evaluatorMapping  = classifierMapping.ForEvaluation();

            this.evaluator = new ClassifierEvaluator <IEnumerable <LabelDistribution>, LabelDistribution, IEnumerable <LabelDistribution>, string>(evaluatorMapping);
        }
Ejemplo n.º 15
0
    public BPMMapped(
        string[] labels)
    {
        Debug.Assert(labels != null, "The labels must not be null.");
        Debug.Assert(labels.Length == 2, "The labels must have two possible values.");

        // Initialise the validations
        _validate = new Validate();

        // Create a BPM from the mapping
        _mapping    = new GenericClassifierMapping(labels);
        _classifier = BayesPointMachineClassifier.CreateBinaryClassifier(_mapping);

        // Evaluator mapping
        var evaluatorMapping = _mapping.ForEvaluation();

        _evaluator = new ClassifierEvaluator
                     <IList <Vector>, int, IList <string>, string>(evaluatorMapping);

        // Other initialisations
        _availableDatasetName = new DatasetName();
        _numObservations      = 0;
        _numFeatures          = 0;
    }
Ejemplo n.º 16
0
        /// <summary>
        /// Runs the module.
        /// </summary>
        /// <param name="args">The command line arguments for the module.</param>
        /// <param name="usagePrefix">The prefix to print before the usage string.</param>
        /// <returns>True if the run was successful, false otherwise.</returns>
        public override bool Run(string[] args, string usagePrefix)
        {
            string groundTruthFileName          = string.Empty;
            string predictionsFileName          = string.Empty;
            string reportFileName               = string.Empty;
            string calibrationCurveFileName     = string.Empty;
            string rocCurveFileName             = string.Empty;
            string precisionRecallCurveFileName = string.Empty;
            string positiveClassLabel           = string.Empty;

            var parser = new CommandLineParser();

            parser.RegisterParameterHandler("--ground-truth", "FILE", "File with ground truth labels", v => groundTruthFileName = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--predictions", "FILE", "File with label predictions", v => predictionsFileName    = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--report", "FILE", "File to store the evaluation report", v => reportFileName      = v, CommandLineParameterType.Optional);
            parser.RegisterParameterHandler("--calibration-curve", "FILE", "File to store the empirical calibration curve", v => calibrationCurveFileName     = v, CommandLineParameterType.Optional);
            parser.RegisterParameterHandler("--roc-curve", "FILE", "File to store the receiver operating characteristic curve", v => rocCurveFileName         = v, CommandLineParameterType.Optional);
            parser.RegisterParameterHandler("--precision-recall-curve", "FILE", "File to store the precision-recall curve", v => precisionRecallCurveFileName = v, CommandLineParameterType.Optional);
            parser.RegisterParameterHandler("--positive-class", "STRING", "Label of the positive class to use in curves", v => positiveClassLabel             = v, CommandLineParameterType.Optional);
            if (!parser.TryParse(args, usagePrefix))
            {
                return(false);
            }

            // Read ground truth
            var groundTruth = ClassifierPersistenceUtils.LoadLabeledFeatureValues(groundTruthFileName);

            // Read predictions using ground truth label dictionary
            var predictions = ClassifierPersistenceUtils.LoadLabelDistributions(predictionsFileName, groundTruth.First().LabelDistribution.LabelSet);

            // Check that there are at least two distinct class labels
            if (predictions.First().LabelSet.Count < 2)
            {
                throw new InvalidFileFormatException("Ground truth and predictions must contain at least two distinct class labels.");
            }

            // Distill distributions and point estimates
            var predictiveDistributions  = predictions.Select(i => i.ToDictionary()).ToList();
            var predictivePointEstimates = predictions.Select(i => i.GetMode()).ToList();

            // Create evaluator
            var evaluatorMapping = Mappings.Classifier.ForEvaluation();
            var evaluator        = new ClassifierEvaluator <IList <LabeledFeatureValues>, LabeledFeatureValues, IList <LabelDistribution>, string>(evaluatorMapping);

            // Write evaluation report
            if (!string.IsNullOrEmpty(reportFileName))
            {
                using (var writer = new StreamWriter(reportFileName))
                {
                    this.WriteReportHeader(writer, groundTruthFileName, predictionsFileName);
                    this.WriteReport(writer, evaluator, groundTruth, predictiveDistributions, predictivePointEstimates);
                }
            }

            // Compute and write the empirical probability calibration curve
            positiveClassLabel = this.CheckPositiveClassLabel(groundTruth, positiveClassLabel);
            if (!string.IsNullOrEmpty(calibrationCurveFileName))
            {
                this.WriteCalibrationCurve(calibrationCurveFileName, evaluator, groundTruth, predictiveDistributions, positiveClassLabel);
            }

            // Compute and write the precision-recall curve
            if (!string.IsNullOrEmpty(precisionRecallCurveFileName))
            {
                this.WritePrecisionRecallCurve(precisionRecallCurveFileName, evaluator, groundTruth, predictiveDistributions, positiveClassLabel);
            }

            // Compute and write the receiver operating characteristic curve
            if (!string.IsNullOrEmpty(rocCurveFileName))
            {
                this.WriteRocCurve(rocCurveFileName, evaluator, groundTruth, predictiveDistributions, positiveClassLabel);
            }

            return(true);
        }
Ejemplo n.º 17
0
        /// <summary>
        /// Runs the module.
        /// </summary>
        /// <param name="args">The command line arguments for the module.</param>
        /// <param name="usagePrefix">The prefix to print before the usage string.</param>
        /// <returns>True if the run was successful, false otherwise.</returns>
        public override bool Run(string[] args, string usagePrefix)
        {
            string dataSetFile = string.Empty;
            string resultsFile = string.Empty;
            int    crossValidationFoldCount = 5;
            int    iterationCount           = BayesPointMachineClassifierTrainingSettings.IterationCountDefault;
            int    batchCount           = BayesPointMachineClassifierTrainingSettings.BatchCountDefault;
            bool   computeModelEvidence = BayesPointMachineClassifierTrainingSettings.ComputeModelEvidenceDefault;

            var parser = new CommandLineParser();

            parser.RegisterParameterHandler("--data-set", "FILE", "File with training data", v => dataSetFile           = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--results", "FILE", "File with cross-validation results", v => resultsFile = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--folds", "NUM", "Number of cross-validation folds (defaults to " + crossValidationFoldCount + ")", v => crossValidationFoldCount = v, CommandLineParameterType.Optional);
            parser.RegisterParameterHandler("--iterations", "NUM", "Number of training algorithm iterations (defaults to " + iterationCount + ")", v => iterationCount         = v, CommandLineParameterType.Optional);
            parser.RegisterParameterHandler("--batches", "NUM", "Number of batches to split the training data into (defaults to " + batchCount + ")", v => batchCount          = v, CommandLineParameterType.Optional);
            parser.RegisterParameterHandler("--compute-evidence", "Compute model evidence (defaults to " + computeModelEvidence + ")", () => computeModelEvidence = true);

            if (!parser.TryParse(args, usagePrefix))
            {
                return(false);
            }

            // Load and shuffle data
            var dataSet = ClassifierPersistenceUtils.LoadLabeledFeatureValues(dataSetFile);

            BayesPointMachineClassifierModuleUtilities.WriteDataSetInfo(dataSet);

            Rand.Restart(562);
            Rand.Shuffle(dataSet);

            // Create evaluator
            var evaluatorMapping = Mappings.Classifier.ForEvaluation();
            var evaluator        = new ClassifierEvaluator <IList <LabeledFeatureValues>, LabeledFeatureValues, IList <LabelDistribution>, string>(evaluatorMapping);

            // Create performance metrics
            var accuracy = new List <double>();
            var negativeLogProbability = new List <double>();
            var auc             = new List <double>();
            var evidence        = new List <double>();
            var iterationCounts = new List <double>();
            var trainingTime    = new List <double>();

            // Run cross-validation
            int validationSetSize = dataSet.Count / crossValidationFoldCount;

            Console.WriteLine("Running {0}-fold cross-validation on {1}", crossValidationFoldCount, dataSetFile);

            // TODO: Use chained mapping to implement cross-validation
            for (int fold = 0; fold < crossValidationFoldCount; fold++)
            {
                // Construct training and validation sets for fold
                int validationSetStart = fold * validationSetSize;
                int validationSetEnd   = (fold + 1 == crossValidationFoldCount)
                                           ? dataSet.Count
                                           : (fold + 1) * validationSetSize;

                var trainingSet   = new List <LabeledFeatureValues>();
                var validationSet = new List <LabeledFeatureValues>();

                for (int instance = 0; instance < dataSet.Count; instance++)
                {
                    if (validationSetStart <= instance && instance < validationSetEnd)
                    {
                        validationSet.Add(dataSet[instance]);
                    }
                    else
                    {
                        trainingSet.Add(dataSet[instance]);
                    }
                }

                // Print info
                Console.WriteLine("   Fold {0} [validation set instances {1} - {2}]", fold + 1, validationSetStart, validationSetEnd - 1);

                // Create classifier
                var classifier = BayesPointMachineClassifier.CreateBinaryClassifier(Mappings.Classifier);
                classifier.Settings.Training.IterationCount       = iterationCount;
                classifier.Settings.Training.BatchCount           = batchCount;
                classifier.Settings.Training.ComputeModelEvidence = computeModelEvidence;

                int currentIterationCount = 0;
                classifier.IterationChanged += (sender, eventArgs) => { currentIterationCount = eventArgs.CompletedIterationCount; };

                // Train classifier
                var stopWatch = new Stopwatch();
                stopWatch.Start();
                classifier.Train(trainingSet);
                stopWatch.Stop();

                // Produce predictions
                var predictions     = classifier.PredictDistribution(validationSet).ToList();
                var predictedLabels = predictions.Select(
                    prediction => prediction.Aggregate((aggregate, next) => next.Value > aggregate.Value ? next : aggregate).Key).ToList();

                // Iteration count
                iterationCounts.Add(currentIterationCount);

                // Training time
                trainingTime.Add(stopWatch.ElapsedMilliseconds);

                // Compute accuracy
                accuracy.Add(1 - (evaluator.Evaluate(validationSet, predictedLabels, Metrics.ZeroOneError) / predictions.Count));

                // Compute mean negative log probability
                negativeLogProbability.Add(evaluator.Evaluate(validationSet, predictions, Metrics.NegativeLogProbability) / predictions.Count);

                // Compute M-measure (averaged pairwise AUC)
                auc.Add(evaluator.AreaUnderRocCurve(validationSet, predictions));

                // Compute log evidence if desired
                evidence.Add(computeModelEvidence ? classifier.LogModelEvidence : double.NaN);

                // Persist performance metrics
                Console.WriteLine(
                    "      Accuracy = {0,5:0.0000}   NegLogProb = {1,5:0.0000}   AUC = {2,5:0.0000}{3}   Iterations = {4}   Training time = {5}",
                    accuracy[fold],
                    negativeLogProbability[fold],
                    auc[fold],
                    computeModelEvidence ? string.Format("   Log evidence = {0,5:0.0000}", evidence[fold]) : string.Empty,
                    iterationCounts[fold],
                    BayesPointMachineClassifierModuleUtilities.FormatElapsedTime(trainingTime[fold]));

                BayesPointMachineClassifierModuleUtilities.SavePerformanceMetrics(
                    resultsFile, accuracy, negativeLogProbability, auc, evidence, iterationCounts, trainingTime);
            }

            return(true);
        }
Ejemplo n.º 18
0
    /// <summary>
    /// CrossValidate diagnosis
    /// </summary>
    /// <param name="x"></param>
    /// <param name="y"></param>
    /// <param name="mapping"></param>
    /// <param name="reportFileName"></param>
    /// <param name="crossValidationFoldCount"></param>
    /// <param name="iterationCount"></param>
    /// <param name="computeModelEvidence"></param>
    /// <param name="batchCount"></param>
    /// <remarks>Adapted from MicrosoftResearch.Infer.Learners</remarks>
    public CrossValidateMapped(
        Vector[] x,
        IList <string> y,
        GenericClassifierMapping mapping,
        string reportFileName,
        int crossValidationFoldCount, //folds
        int iterationCount,
        bool computeModelEvidence,
        int batchCount)
    {
        Debug.Assert(x != null, "The feature vector must not be null.");
        Debug.Assert(y != null, "The targe variable must not be null.");
        Debug.Assert(mapping != null, "The classifier map must not be null.");
        Debug.Assert(!string.IsNullOrEmpty(reportFileName), "The report file name must not be null/empty.");
        Debug.Assert(iterationCount > 0, "The iteration count must be greater than zero.");
        Debug.Assert(batchCount > 0, "The batch count must be greater than zero.");

        // Shuffle dataset
        shuffleVector(x);

        // Create evaluator
        var evaluatorMapping = mapping.ForEvaluation();
        var evaluator        = new ClassifierEvaluator <
            IList <Vector>,         // the type of the instance source,
            int,                    // the type of an instance
            IList <string>,         // the type of the label source
            string>(                // the type of a label.
            evaluatorMapping);


        // Create performance metrics
        var accuracy = new List <double>();
        var negativeLogProbability = new List <double>();
        var auc             = new List <double>();
        var evidence        = new List <double>();
        var iterationCounts = new List <double>();
        var trainingTime    = new List <double>();

        // Run cross-validation
        int validationSetSize     = x.Length / crossValidationFoldCount;
        int trainingSetSize       = x.Length - validationSetSize;
        int validationFoldSetSize = 0;
        int trainingFoldSetSize   = 0;

        Console.WriteLine(
            "Running {0}-fold cross-validation", crossValidationFoldCount);

        if (validationSetSize == 0 || trainingSetSize == 0)
        {
            Console.WriteLine("Invalid number of folds");
            Console.ReadKey();
            System.Environment.Exit(1);
        }

        for (int fold = 0; fold < crossValidationFoldCount; fold++)
        {
            // Construct training and validation sets for fold
            int validationSetStart = fold * validationSetSize;
            int validationSetEnd   = (fold + 1 == crossValidationFoldCount)
                                       ? x.Length
                                       : (fold + 1) * validationSetSize;


            validationFoldSetSize = validationSetEnd - validationSetStart;
            trainingFoldSetSize   = x.Length - validationFoldSetSize;

            Vector[]       trainingSet         = new Vector[trainingFoldSetSize];
            Vector[]       validationSet       = new Vector[validationFoldSetSize];
            IList <string> trainingSetLabels   = new List <string>();
            IList <string> validationSetLabels = new List <string>();

            for (int instance = 0, iv = 0, it = 0; instance < x.Length; instance++)
            {
                if (validationSetStart <= instance && instance < validationSetEnd)
                {
                    validationSet[iv++] = x[instance];
                    validationSetLabels.Add(y[instance]);
                }
                else
                {
                    trainingSet[it++] = x[instance];
                    trainingSetLabels.Add(y[instance]);
                }
            }

            // Print info
            Console.WriteLine("   Fold {0} [validation set instances {1} - {2}]", fold + 1, validationSetStart, validationSetEnd - 1);

            // Create classifier
            var classifier = BayesPointMachineClassifier.CreateBinaryClassifier(mapping);
            classifier.Settings.Training.IterationCount       = iterationCount;
            classifier.Settings.Training.BatchCount           = batchCount;
            classifier.Settings.Training.ComputeModelEvidence = computeModelEvidence;

            int currentIterationCount = 0;
            classifier.IterationChanged += (sender, eventArgs) => { currentIterationCount = eventArgs.CompletedIterationCount; };

            // Train classifier
            var stopWatch = new Stopwatch();
            stopWatch.Start();
            classifier.Train(trainingSet, trainingSetLabels);
            stopWatch.Stop();

            // Produce predictions
            IEnumerable <IDictionary <string, double> > predictions =
                classifier.PredictDistribution(validationSet);
            var predictedLabels = classifier.Predict(validationSet);

            // Iteration count
            iterationCounts.Add(currentIterationCount);

            // Training time
            trainingTime.Add(stopWatch.ElapsedMilliseconds);

            // Compute accuracy
            accuracy.Add(1 - (evaluator.Evaluate(validationSet, validationSetLabels, predictedLabels, Metrics.ZeroOneError) / predictions.Count()));

            // Compute mean negative log probability
            negativeLogProbability.Add(evaluator.Evaluate(validationSet, validationSetLabels, predictions, Metrics.NegativeLogProbability) / predictions.Count());

            // Compute M-measure (averaged pairwise AUC)
            auc.Add(evaluator.AreaUnderRocCurve(validationSet, validationSetLabels, predictions));

            // Compute log evidence if desired
            evidence.Add(computeModelEvidence ? classifier.LogModelEvidence : double.NaN);

            // Persist performance metrics
            Console.WriteLine(
                "      Accuracy = {0,5:0.0000}   NegLogProb = {1,5:0.0000}   AUC = {2,5:0.0000}{3}   Iterations = {4}   Training time = {5}",
                accuracy[fold],
                negativeLogProbability[fold],
                auc[fold],
                computeModelEvidence ? string.Format("   Log evidence = {0,5:0.0000}", evidence[fold]) : string.Empty,
                iterationCounts[fold],
                FormatElapsedTime(trainingTime[fold]));

            SavePerformanceMetrics(
                reportFileName, accuracy, negativeLogProbability, auc, evidence, iterationCounts, trainingTime);
        }
    }
Ejemplo n.º 19
0
    /// <summary>
    ///
    /// </summary>
    /// <param name="evaluator"></param>
    /// <param name="x"></param>
    /// <param name="y"></param>
    /// <param name="yPredicDistrib"></param>
    /// <param name="yPredicLabel"></param>
    /// <param name="reportFileName"></param>
    /// <param name="positiveClassLabel"></param>
    /// <param name="calibrationCurveFileName"></param>
    /// <param name="rocCurveFileName"></param>
    /// <param name="groundTruthFileName"></param>
    /// <param name="predictionsFileName"></param>
    /// <remarks>Adapted from MicrosoftResearch.Infer.Learners</remarks>
    public EvaluationReportsMapped(
        IBayesPointMachineClassifier <
            IList <Vector>,
            int,
            IList <string>,
            string,
            IDictionary <string, double>,
            BayesPointMachineClassifierTrainingSettings,
            BinaryBayesPointMachineClassifierPredictionSettings <string> > classifier,
        ClassifierEvaluator <IList <Vector>, int, IList <string>, string> evaluator,
        Vector[] x,
        IList <string> y,
        IEnumerable <IDictionary <string, double> > yPredicDistrib,
        IEnumerable <string> yPredicLabel,
        string reportFileName,
        string positiveClassLabel,
        string groundTruthFileName          = "",
        string predictionsFileName          = "",
        string weightsFileName              = "",
        string calibrationCurveFileName     = "",
        string precisionRecallCurveFileName = "",
        string rocCurveFileName             = "")
    {
        Debug.Assert(classifier != null, "The classifier must not be null.");
        Debug.Assert(evaluator != null, "The evaluator must not be null.");
        Debug.Assert(x != null, "The feature vector must not be null.");
        Debug.Assert(y != null, "The targe variable must not be null.");
        Debug.Assert(yPredicDistrib != null, "The predictive distribution must not be null.");
        Debug.Assert(yPredicLabel != null, "The predicted labels must not be null.");
        Debug.Assert(!string.IsNullOrEmpty(reportFileName), "The report file name must not be null/empty.");
        Debug.Assert(!string.IsNullOrEmpty(positiveClassLabel), "The positive class label must not be null/empty.");

        // Write evaluation report header information
        if (!string.IsNullOrEmpty(reportFileName))
        {
            using (var writer = new StreamWriter(reportFileName))
            {
                this.WriteReportHeader(writer, groundTruthFileName, predictionsFileName);
                this.WriteReport(writer, evaluator, x, y, yPredicDistrib, yPredicLabel);
            }
        }

        // Write the prediction distribution for all labels
        if (!string.IsNullOrEmpty(predictionsFileName))
        {
            SaveLabelDistributions(predictionsFileName, yPredicDistrib);
        }

        // Compute and write the empirical probability calibration curve
        if (!string.IsNullOrEmpty(calibrationCurveFileName))
        {
            this.WriteCalibrationCurve(calibrationCurveFileName, evaluator, x, y, yPredicDistrib, positiveClassLabel);
        }

        // Compute and write the precision-recall curve
        if (!string.IsNullOrEmpty(precisionRecallCurveFileName))
        {
            this.WritePrecisionRecallCurve(precisionRecallCurveFileName, evaluator, x, y, yPredicDistrib, positiveClassLabel);
        }

        // Compute and write the receiver operating characteristic curve
        if (!string.IsNullOrEmpty(rocCurveFileName))
        {
            this.WriteRocCurve(rocCurveFileName, evaluator, x, y, yPredicDistrib, positiveClassLabel);
        }
        // Compute and write the weights
        if (!string.IsNullOrEmpty(weightsFileName))
        {
            this.SampleWeights(weightsFileName, classifier);
        }
    }