/// <summary>
        /// Runs the module.
        /// </summary>
        /// <param name="args">The command line arguments for the module.</param>
        /// <param name="usagePrefix">The prefix to print before the usage string.</param>
        /// <returns>True if the run was successful, false otherwise.</returns>
        public override bool Run(string[] args, string usagePrefix)
        {
            string trainingSetFile         = string.Empty;
            string maxParameterChangesFile = string.Empty;
            string modelFile      = string.Empty;
            int    iterationCount = BayesPointMachineClassifierTrainingSettings.IterationCountDefault;
            int    batchCount     = BayesPointMachineClassifierTrainingSettings.BatchCountDefault;

            var parser = new CommandLineParser();

            parser.RegisterParameterHandler("--training-set", "FILE", "File with training data", v => trainingSetFile = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--results", "FILE", "File to store the maximum parameter differences", v => maxParameterChangesFile = v, CommandLineParameterType.Optional);
            parser.RegisterParameterHandler("--model", "FILE", "File to store the trained binary Bayes point machine model", v => modelFile      = v, CommandLineParameterType.Optional);
            parser.RegisterParameterHandler("--iterations", "NUM", "Number of training algorithm iterations (defaults to " + iterationCount + ")", v => iterationCount = v, CommandLineParameterType.Optional);
            parser.RegisterParameterHandler("--batches", "NUM", "Number of batches to split the training data into (defaults to " + batchCount + ")", v => batchCount  = v, CommandLineParameterType.Optional);

            if (!parser.TryParse(args, usagePrefix))
            {
                return(false);
            }

            var trainingSet = ClassifierPersistenceUtils.LoadLabeledFeatureValues(trainingSetFile);

            BayesPointMachineClassifierModuleUtilities.WriteDataSetInfo(trainingSet);

            var classifier = BayesPointMachineClassifier.CreateBinaryClassifier(Mappings.Classifier);

            classifier.Settings.Training.IterationCount = iterationCount;
            classifier.Settings.Training.BatchCount     = batchCount;

            BayesPointMachineClassifierModuleUtilities.DiagnoseClassifier(classifier, trainingSet, maxParameterChangesFile, modelFile);

            return(true);
        }
        /// <summary>
        /// Runs the module.
        /// </summary>
        /// <param name="args">The command line arguments for the module.</param>
        /// <param name="usagePrefix">The prefix to print before the usage string.</param>
        /// <returns>True if the run was successful, false otherwise.</returns>
        public override bool Run(string[] args, string usagePrefix)
        {
            string testSetFile     = string.Empty;
            string modelFile       = string.Empty;
            string predictionsFile = string.Empty;

            var parser = new CommandLineParser();

            parser.RegisterParameterHandler("--test-set", "FILE", "File with test data", v => testSetFile = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--model", "FILE", "File with a trained multi-class Bayes point machine model", v => modelFile = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--predictions", "FILE", "File to store predictions for the test data", v => predictionsFile   = v, CommandLineParameterType.Required);
            if (!parser.TryParse(args, usagePrefix))
            {
                return(false);
            }

            var testSet = ClassifierPersistenceUtils.LoadLabeledFeatureValues(testSetFile);

            BayesPointMachineClassifierModuleUtilities.WriteDataSetInfo(testSet);

            var classifier =
                BayesPointMachineClassifier.LoadMulticlassClassifier <IList <LabeledFeatureValues>, LabeledFeatureValues, IList <LabelDistribution>, string, IDictionary <string, double> >(modelFile);

            // Predict labels
            var predictions = classifier.PredictDistribution(testSet);

            // Write labels to file
            ClassifierPersistenceUtils.SaveLabelDistributions(predictionsFile, predictions);

            return(true);
        }
Exemplo n.º 3
0
    public override void Evaluate(
        string inputModelFileName,
        string reportFileName,
        string positiveClassLabel,
        string groundTruthFileName,
        string predictionsFileName,
        string weightsFileName,
        string calibrationCurveFileName,
        string precisionRecallCurveFileName,
        string rocCurveFileName)
    {
        IBayesPointMachineClassifier <
            IList <Vector>, int, IList <string>, string, IDictionary <string, double>,
            BayesPointMachineClassifierTrainingSettings,
            BinaryBayesPointMachineClassifierPredictionSettings <string> > classifier = null;

        // Validate
        _validate.Evaluate(
            inputModelFileName: inputModelFileName,
            reportFileName: reportFileName,
            groundTruthFileName: groundTruthFileName,
            predictionsFileName: predictionsFileName,
            weightsFileName: weightsFileName,
            calibrationCurveFileName: calibrationCurveFileName,
            precisionRecallCurveFileName: precisionRecallCurveFileName,
            rocCurveFileName: rocCurveFileName);

        // Load model
        if (string.IsNullOrEmpty(inputModelFileName))
        {
            classifier =
                BayesPointMachineClassifier.LoadBinaryClassifier <
                    IList <Vector>, int, IList <string>, string, IDictionary <string, double> >
                    (inputModelFileName);
        }
        else
        {
            classifier = _classifier;
        }

        EvaluationReportsMapped evaluationReports = new EvaluationReportsMapped(
            classifier,
            _evaluator,
            _x,
            _y,
            _yPredicDistrib,
            _yPredicLabel,
            reportFileName,
            positiveClassLabel,
            groundTruthFileName,
            predictionsFileName,
            weightsFileName,
            calibrationCurveFileName,
            precisionRecallCurveFileName,
            rocCurveFileName);
    }
        /// <summary>
        /// Runs the module.
        /// </summary>
        /// <param name="args">The command line arguments for the module.</param>
        /// <param name="usagePrefix">The prefix to print before the usage string.</param>
        /// <returns>True if the run was successful, false otherwise.</returns>
        public override bool Run(string[] args, string usagePrefix)
        {
            string trainingSetFile      = string.Empty;
            string modelFile            = string.Empty;
            int    iterationCount       = BayesPointMachineClassifierTrainingSettings.IterationCountDefault;
            int    batchCount           = BayesPointMachineClassifierTrainingSettings.BatchCountDefault;
            bool   computeModelEvidence = BayesPointMachineClassifierTrainingSettings.ComputeModelEvidenceDefault;

            var parser = new CommandLineParser();

            parser.RegisterParameterHandler("--training-set", "FILE", "File with training data", v => trainingSetFile = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--model", "FILE", "File to store the trained multi-class Bayes point machine model", v => modelFile = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--iterations", "NUM", "Number of training algorithm iterations (defaults to " + iterationCount + ")", v => iterationCount = v, CommandLineParameterType.Optional);
            parser.RegisterParameterHandler("--batches", "NUM", "Number of batches to split the training data into (defaults to " + batchCount + ")", v => batchCount  = v, CommandLineParameterType.Optional);
            parser.RegisterParameterHandler("--compute-evidence", "Compute model evidence (defaults to " + computeModelEvidence + ")", () => computeModelEvidence      = true);

            if (!parser.TryParse(args, usagePrefix))
            {
                return(false);
            }

            var trainingSet = ClassifierPersistenceUtils.LoadLabeledFeatureValues(trainingSetFile);

            BayesPointMachineClassifierModuleUtilities.WriteDataSetInfo(trainingSet);

            var featureSet = trainingSet.Count > 0 ? trainingSet.First().FeatureSet : null;
            var mapping    = new ClassifierMapping(featureSet);
            var classifier = BayesPointMachineClassifier.CreateMulticlassClassifier(mapping);

            classifier.Settings.Training.IterationCount       = iterationCount;
            classifier.Settings.Training.BatchCount           = batchCount;
            classifier.Settings.Training.ComputeModelEvidence = computeModelEvidence;

            classifier.Train(trainingSet);

            if (classifier.Settings.Training.ComputeModelEvidence)
            {
                Console.WriteLine("Log evidence = {0,10:0.0000}", classifier.LogModelEvidence);
            }

            classifier.Save(modelFile);

            return(true);
        }
Exemplo n.º 5
0
        void TestML()
        {
            ExpectList el = new PK10ExpectReader().ReadNewestData(DateTime.Now.AddDays(-17));//至少180*16天+当天的记录数>1000
            //MessageBox.Show(el.LastData.OpenCode);
            var mapping    = new DaXiao_Mapping();
            var classifier = BayesPointMachineClassifier.CreateMulticlassClassifier(mapping);
            FeatureLabeItems trainingSet = new PKDataListSetFactory(el).OccurFeatureAndLabels();

            classifier.Train(trainingSet.FeatureVectors, trainingSet.Labels);
            List <Vector> testVector = new List <Vector>();//1
            Vector        v          = Vector.Zero(1);

            v[0] = 1;
            testVector.Add(v);
            var    predictions = classifier.PredictDistribution(testVector);
            string estimate    = classifier.Predict(0, testVector);

            MessageBox.Show(estimate);
        }
Exemplo n.º 6
0
    public override IDictionary <string, double> PredictInstance(
        string inputModelFileName,
        DistributionName distributionName,
        InferenceAlgorithm inferenceEngineAlgorithm,
        int instance,
        double noise)
    {
        TraceListeners.Log(TraceEventType.Warning, 0,
                           "Advanced setting will not be used: " +
                           "distributionName, inferenceEngineAlgorithm  & noise.", false, true);

        // Validate
        _validate.PredictInstance(
            inputModelFileName: inputModelFileName,
            instance: instance,
            numObservations: _numObservations);

        IBayesPointMachineClassifier <
            IList <Vector>, int, IList <string>, string, IDictionary <string, double>,
            BayesPointMachineClassifierTrainingSettings,
            BinaryBayesPointMachineClassifierPredictionSettings <string> > classifier = null;

        // Load model
        if (string.IsNullOrEmpty(inputModelFileName))
        {
            classifier =
                BayesPointMachineClassifier.LoadBinaryClassifier <
                    IList <Vector>, int, IList <string>, string, IDictionary <string, double> >
                    (inputModelFileName);
        }
        else
        {
            classifier = _classifier;
        }

        IDictionary <string, double> yPredicted =
            classifier.PredictDistribution(instance, _x);

        // string yPredicLabel = classifier.Predict(instance, _x);
        return(yPredicted);
    }
Exemplo n.º 7
0
        /// <summary>
        /// Runs the module.
        /// </summary>
        /// <param name="args">The command line arguments for the module.</param>
        /// <param name="usagePrefix">The prefix to print before the usage string.</param>
        /// <returns>True if the run was successful, false otherwise.</returns>
        public override bool Run(string[] args, string usagePrefix)
        {
            string modelFile   = string.Empty;
            string samplesFile = string.Empty;

            var parser = new CommandLineParser();

            parser.RegisterParameterHandler("--model", "FILE", "File with a trained binary Bayes point machine model", v => modelFile = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--samples", "FILE", "File to store samples of the weights", v => samplesFile             = v, CommandLineParameterType.Required);
            if (!parser.TryParse(args, usagePrefix))
            {
                return(false);
            }

            var classifier =
                BayesPointMachineClassifier.LoadBinaryClassifier <IList <LabeledFeatureValues>, LabeledFeatureValues, IList <LabelDistribution>, string, IDictionary <string, double> >(modelFile);

            BayesPointMachineClassifierModuleUtilities.SampleWeights(classifier, samplesFile);

            return(true);
        }
Exemplo n.º 8
0
    public override void TrainIncremental(
        string inputModelFileName,
        string outputModelFileName,
        int iterationCount,
        bool computeModelEvidence,
        int batchCount,
        DistributionName distributionName,
        InferenceAlgorithm inferenceEngineAlgorithm,
        double noise)
    {
        TraceListeners.Log(TraceEventType.Warning, 0,
                           "Advanced setting will not be used: " +
                           "distributionName, inferenceEngineAlgorithm & noise.", false, true);

        // Validate
        _validate.TrainIncremental(
            inputModelFileName: inputModelFileName,
            outputModelFileName: outputModelFileName,
            iterationCount: iterationCount,
            batchCount: batchCount);

        // Load model
        IBayesPointMachineClassifier <
            IList <Vector>, int, IList <string>, string, IDictionary <string, double>,
            BayesPointMachineClassifierTrainingSettings,
            BinaryBayesPointMachineClassifierPredictionSettings <string> > classifier =
            BayesPointMachineClassifier.LoadBinaryClassifier <
                IList <Vector>, int, IList <string>, string, IDictionary <string, double> >
                (inputModelFileName);

        // Set settings
        classifier.Settings.Training.ComputeModelEvidence = computeModelEvidence;
        classifier.Settings.Training.IterationCount       = iterationCount;
        classifier.Settings.Training.BatchCount           = batchCount;

        // train
        classifier.TrainIncremental(_x, _y);
        classifier.Save(outputModelFileName);
    }
Exemplo n.º 9
0
        /// <summary>
        /// Runs the module.
        /// </summary>
        /// <param name="args">The command line arguments for the module.</param>
        /// <param name="usagePrefix">The prefix to print before the usage string.</param>
        /// <returns>True if the run was successful, false otherwise.</returns>
        public override bool Run(string[] args, string usagePrefix)
        {
            string trainingSetFile = string.Empty;
            string inputModelFile  = string.Empty;
            string outputModelFile = string.Empty;
            int    iterationCount  = BayesPointMachineClassifierTrainingSettings.IterationCountDefault;
            int    batchCount      = BayesPointMachineClassifierTrainingSettings.BatchCountDefault;

            var parser = new CommandLineParser();

            parser.RegisterParameterHandler("--training-set", "FILE", "File with training data", v => trainingSetFile = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--input-model", "FILE", "File with the trained multi-class Bayes point machine model", v => inputModelFile = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--model", "FILE", "File to store the incrementally trained multi-class Bayes point machine model", v => outputModelFile   = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--iterations", "NUM", "Number of training algorithm iterations (defaults to " + iterationCount + ")", v => iterationCount = v, CommandLineParameterType.Optional);
            parser.RegisterParameterHandler("--batches", "NUM", "Number of batches to split the training data into (defaults to " + batchCount + ")", v => batchCount  = v, CommandLineParameterType.Optional);

            if (!parser.TryParse(args, usagePrefix))
            {
                return(false);
            }

            var trainingSet = ClassifierPersistenceUtils.LoadLabeledFeatureValues(trainingSetFile);

            BayesPointMachineClassifierModuleUtilities.WriteDataSetInfo(trainingSet);

            var classifier = BayesPointMachineClassifier.LoadMulticlassClassifier <IList <LabeledFeatureValues>, LabeledFeatureValues, IList <LabelDistribution>, string, IDictionary <string, double> >(inputModelFile);

            classifier.Settings.Training.IterationCount = iterationCount;
            classifier.Settings.Training.BatchCount     = batchCount;

            classifier.TrainIncremental(trainingSet);

            classifier.Save(outputModelFile);

            return(true);
        }
Exemplo n.º 10
0
    public BPMMapped(
        string[] labels)
    {
        Debug.Assert(labels != null, "The labels must not be null.");
        Debug.Assert(labels.Length == 2, "The labels must have two possible values.");

        // Initialise the validations
        _validate = new Validate();

        // Create a BPM from the mapping
        _mapping    = new GenericClassifierMapping(labels);
        _classifier = BayesPointMachineClassifier.CreateBinaryClassifier(_mapping);

        // Evaluator mapping
        var evaluatorMapping = _mapping.ForEvaluation();

        _evaluator = new ClassifierEvaluator
                     <IList <Vector>, int, IList <string>, string>(evaluatorMapping);

        // Other initialisations
        _availableDatasetName = new DatasetName();
        _numObservations      = 0;
        _numFeatures          = 0;
    }
Exemplo n.º 11
0
    public override void Predict(
        string inputModelFileName,
        DistributionName distributionName,
        InferenceAlgorithm inferenceEngineAlgorithm,
        double noise)
    {
        TraceListeners.Log(TraceEventType.Warning, 0,
                           "Advanced setting will not be used: " +
                           "distributionName, inferenceEngineAlgorithm & noise.", false, true);

        // Validate
        _validate.Predict(inputModelFileName);

        // Define the classifier
        IBayesPointMachineClassifier <
            IList <Vector>, int, IList <string>, string, IDictionary <string, double>,
            BayesPointMachineClassifierTrainingSettings,
            BinaryBayesPointMachineClassifierPredictionSettings <string> > classifier = null;

        // Load model
        if (string.IsNullOrEmpty(inputModelFileName))
        {
            classifier =
                BayesPointMachineClassifier.LoadBinaryClassifier <
                    IList <Vector>, int, IList <string>, string, IDictionary <string, double> >
                    (inputModelFileName);
        }
        else
        {
            classifier = _classifier;
        }

        _validate.ValidatePredict(_x, _x);
        _yPredicDistrib = classifier.PredictDistribution(_x);
        _yPredicLabel   = classifier.Predict(_x);
    }
Exemplo n.º 12
0
        /// <summary>
        /// Runs the module.
        /// </summary>
        /// <param name="args">The command line arguments for the module.</param>
        /// <param name="usagePrefix">The prefix to print before the usage string.</param>
        /// <returns>True if the run was successful, false otherwise.</returns>
        public override bool Run(string[] args, string usagePrefix)
        {
            string dataSetFile = string.Empty;
            string resultsFile = string.Empty;
            int    crossValidationFoldCount = 5;
            int    iterationCount           = BayesPointMachineClassifierTrainingSettings.IterationCountDefault;
            int    batchCount           = BayesPointMachineClassifierTrainingSettings.BatchCountDefault;
            bool   computeModelEvidence = BayesPointMachineClassifierTrainingSettings.ComputeModelEvidenceDefault;

            var parser = new CommandLineParser();

            parser.RegisterParameterHandler("--data-set", "FILE", "File with training data", v => dataSetFile           = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--results", "FILE", "File with cross-validation results", v => resultsFile = v, CommandLineParameterType.Required);
            parser.RegisterParameterHandler("--folds", "NUM", "Number of cross-validation folds (defaults to " + crossValidationFoldCount + ")", v => crossValidationFoldCount = v, CommandLineParameterType.Optional);
            parser.RegisterParameterHandler("--iterations", "NUM", "Number of training algorithm iterations (defaults to " + iterationCount + ")", v => iterationCount         = v, CommandLineParameterType.Optional);
            parser.RegisterParameterHandler("--batches", "NUM", "Number of batches to split the training data into (defaults to " + batchCount + ")", v => batchCount          = v, CommandLineParameterType.Optional);
            parser.RegisterParameterHandler("--compute-evidence", "Compute model evidence (defaults to " + computeModelEvidence + ")", () => computeModelEvidence = true);

            if (!parser.TryParse(args, usagePrefix))
            {
                return(false);
            }

            // Load and shuffle data
            var dataSet = ClassifierPersistenceUtils.LoadLabeledFeatureValues(dataSetFile);

            BayesPointMachineClassifierModuleUtilities.WriteDataSetInfo(dataSet);

            Rand.Restart(562);
            Rand.Shuffle(dataSet);

            // Create evaluator
            var evaluatorMapping = Mappings.Classifier.ForEvaluation();
            var evaluator        = new ClassifierEvaluator <IList <LabeledFeatureValues>, LabeledFeatureValues, IList <LabelDistribution>, string>(evaluatorMapping);

            // Create performance metrics
            var accuracy = new List <double>();
            var negativeLogProbability = new List <double>();
            var auc             = new List <double>();
            var evidence        = new List <double>();
            var iterationCounts = new List <double>();
            var trainingTime    = new List <double>();

            // Run cross-validation
            int validationSetSize = dataSet.Count / crossValidationFoldCount;

            Console.WriteLine("Running {0}-fold cross-validation on {1}", crossValidationFoldCount, dataSetFile);

            // TODO: Use chained mapping to implement cross-validation
            for (int fold = 0; fold < crossValidationFoldCount; fold++)
            {
                // Construct training and validation sets for fold
                int validationSetStart = fold * validationSetSize;
                int validationSetEnd   = (fold + 1 == crossValidationFoldCount)
                                           ? dataSet.Count
                                           : (fold + 1) * validationSetSize;

                var trainingSet   = new List <LabeledFeatureValues>();
                var validationSet = new List <LabeledFeatureValues>();

                for (int instance = 0; instance < dataSet.Count; instance++)
                {
                    if (validationSetStart <= instance && instance < validationSetEnd)
                    {
                        validationSet.Add(dataSet[instance]);
                    }
                    else
                    {
                        trainingSet.Add(dataSet[instance]);
                    }
                }

                // Print info
                Console.WriteLine("   Fold {0} [validation set instances {1} - {2}]", fold + 1, validationSetStart, validationSetEnd - 1);

                // Create classifier
                var classifier = BayesPointMachineClassifier.CreateBinaryClassifier(Mappings.Classifier);
                classifier.Settings.Training.IterationCount       = iterationCount;
                classifier.Settings.Training.BatchCount           = batchCount;
                classifier.Settings.Training.ComputeModelEvidence = computeModelEvidence;

                int currentIterationCount = 0;
                classifier.IterationChanged += (sender, eventArgs) => { currentIterationCount = eventArgs.CompletedIterationCount; };

                // Train classifier
                var stopWatch = new Stopwatch();
                stopWatch.Start();
                classifier.Train(trainingSet);
                stopWatch.Stop();

                // Produce predictions
                var predictions     = classifier.PredictDistribution(validationSet).ToList();
                var predictedLabels = predictions.Select(
                    prediction => prediction.Aggregate((aggregate, next) => next.Value > aggregate.Value ? next : aggregate).Key).ToList();

                // Iteration count
                iterationCounts.Add(currentIterationCount);

                // Training time
                trainingTime.Add(stopWatch.ElapsedMilliseconds);

                // Compute accuracy
                accuracy.Add(1 - (evaluator.Evaluate(validationSet, predictedLabels, Metrics.ZeroOneError) / predictions.Count));

                // Compute mean negative log probability
                negativeLogProbability.Add(evaluator.Evaluate(validationSet, predictions, Metrics.NegativeLogProbability) / predictions.Count);

                // Compute M-measure (averaged pairwise AUC)
                auc.Add(evaluator.AreaUnderRocCurve(validationSet, predictions));

                // Compute log evidence if desired
                evidence.Add(computeModelEvidence ? classifier.LogModelEvidence : double.NaN);

                // Persist performance metrics
                Console.WriteLine(
                    "      Accuracy = {0,5:0.0000}   NegLogProb = {1,5:0.0000}   AUC = {2,5:0.0000}{3}   Iterations = {4}   Training time = {5}",
                    accuracy[fold],
                    negativeLogProbability[fold],
                    auc[fold],
                    computeModelEvidence ? string.Format("   Log evidence = {0,5:0.0000}", evidence[fold]) : string.Empty,
                    iterationCounts[fold],
                    BayesPointMachineClassifierModuleUtilities.FormatElapsedTime(trainingTime[fold]));

                BayesPointMachineClassifierModuleUtilities.SavePerformanceMetrics(
                    resultsFile, accuracy, negativeLogProbability, auc, evidence, iterationCounts, trainingTime);
            }

            return(true);
        }
Exemplo n.º 13
0
    /// <summary>
    /// CrossValidate diagnosis
    /// </summary>
    /// <param name="x"></param>
    /// <param name="y"></param>
    /// <param name="mapping"></param>
    /// <param name="reportFileName"></param>
    /// <param name="crossValidationFoldCount"></param>
    /// <param name="iterationCount"></param>
    /// <param name="computeModelEvidence"></param>
    /// <param name="batchCount"></param>
    /// <remarks>Adapted from MicrosoftResearch.Infer.Learners</remarks>
    public CrossValidateMapped(
        Vector[] x,
        IList <string> y,
        GenericClassifierMapping mapping,
        string reportFileName,
        int crossValidationFoldCount, //folds
        int iterationCount,
        bool computeModelEvidence,
        int batchCount)
    {
        Debug.Assert(x != null, "The feature vector must not be null.");
        Debug.Assert(y != null, "The targe variable must not be null.");
        Debug.Assert(mapping != null, "The classifier map must not be null.");
        Debug.Assert(!string.IsNullOrEmpty(reportFileName), "The report file name must not be null/empty.");
        Debug.Assert(iterationCount > 0, "The iteration count must be greater than zero.");
        Debug.Assert(batchCount > 0, "The batch count must be greater than zero.");

        // Shuffle dataset
        shuffleVector(x);

        // Create evaluator
        var evaluatorMapping = mapping.ForEvaluation();
        var evaluator        = new ClassifierEvaluator <
            IList <Vector>,         // the type of the instance source,
            int,                    // the type of an instance
            IList <string>,         // the type of the label source
            string>(                // the type of a label.
            evaluatorMapping);


        // Create performance metrics
        var accuracy = new List <double>();
        var negativeLogProbability = new List <double>();
        var auc             = new List <double>();
        var evidence        = new List <double>();
        var iterationCounts = new List <double>();
        var trainingTime    = new List <double>();

        // Run cross-validation
        int validationSetSize     = x.Length / crossValidationFoldCount;
        int trainingSetSize       = x.Length - validationSetSize;
        int validationFoldSetSize = 0;
        int trainingFoldSetSize   = 0;

        Console.WriteLine(
            "Running {0}-fold cross-validation", crossValidationFoldCount);

        if (validationSetSize == 0 || trainingSetSize == 0)
        {
            Console.WriteLine("Invalid number of folds");
            Console.ReadKey();
            System.Environment.Exit(1);
        }

        for (int fold = 0; fold < crossValidationFoldCount; fold++)
        {
            // Construct training and validation sets for fold
            int validationSetStart = fold * validationSetSize;
            int validationSetEnd   = (fold + 1 == crossValidationFoldCount)
                                       ? x.Length
                                       : (fold + 1) * validationSetSize;


            validationFoldSetSize = validationSetEnd - validationSetStart;
            trainingFoldSetSize   = x.Length - validationFoldSetSize;

            Vector[]       trainingSet         = new Vector[trainingFoldSetSize];
            Vector[]       validationSet       = new Vector[validationFoldSetSize];
            IList <string> trainingSetLabels   = new List <string>();
            IList <string> validationSetLabels = new List <string>();

            for (int instance = 0, iv = 0, it = 0; instance < x.Length; instance++)
            {
                if (validationSetStart <= instance && instance < validationSetEnd)
                {
                    validationSet[iv++] = x[instance];
                    validationSetLabels.Add(y[instance]);
                }
                else
                {
                    trainingSet[it++] = x[instance];
                    trainingSetLabels.Add(y[instance]);
                }
            }

            // Print info
            Console.WriteLine("   Fold {0} [validation set instances {1} - {2}]", fold + 1, validationSetStart, validationSetEnd - 1);

            // Create classifier
            var classifier = BayesPointMachineClassifier.CreateBinaryClassifier(mapping);
            classifier.Settings.Training.IterationCount       = iterationCount;
            classifier.Settings.Training.BatchCount           = batchCount;
            classifier.Settings.Training.ComputeModelEvidence = computeModelEvidence;

            int currentIterationCount = 0;
            classifier.IterationChanged += (sender, eventArgs) => { currentIterationCount = eventArgs.CompletedIterationCount; };

            // Train classifier
            var stopWatch = new Stopwatch();
            stopWatch.Start();
            classifier.Train(trainingSet, trainingSetLabels);
            stopWatch.Stop();

            // Produce predictions
            IEnumerable <IDictionary <string, double> > predictions =
                classifier.PredictDistribution(validationSet);
            var predictedLabels = classifier.Predict(validationSet);

            // Iteration count
            iterationCounts.Add(currentIterationCount);

            // Training time
            trainingTime.Add(stopWatch.ElapsedMilliseconds);

            // Compute accuracy
            accuracy.Add(1 - (evaluator.Evaluate(validationSet, validationSetLabels, predictedLabels, Metrics.ZeroOneError) / predictions.Count()));

            // Compute mean negative log probability
            negativeLogProbability.Add(evaluator.Evaluate(validationSet, validationSetLabels, predictions, Metrics.NegativeLogProbability) / predictions.Count());

            // Compute M-measure (averaged pairwise AUC)
            auc.Add(evaluator.AreaUnderRocCurve(validationSet, validationSetLabels, predictions));

            // Compute log evidence if desired
            evidence.Add(computeModelEvidence ? classifier.LogModelEvidence : double.NaN);

            // Persist performance metrics
            Console.WriteLine(
                "      Accuracy = {0,5:0.0000}   NegLogProb = {1,5:0.0000}   AUC = {2,5:0.0000}{3}   Iterations = {4}   Training time = {5}",
                accuracy[fold],
                negativeLogProbability[fold],
                auc[fold],
                computeModelEvidence ? string.Format("   Log evidence = {0,5:0.0000}", evidence[fold]) : string.Empty,
                iterationCounts[fold],
                FormatElapsedTime(trainingTime[fold]));

            SavePerformanceMetrics(
                reportFileName, accuracy, negativeLogProbability, auc, evidence, iterationCounts, trainingTime);
        }
    }
Exemplo n.º 14
0
    /// <summary>
    /// Diagnoses the Bayes point machine classifier on the specified data set.
    /// </summary>
    /// <param name="x"></param>
    /// <param name="y"></param>
    /// <param name="mapping"></param>
    /// <param name="reportFileName">The name of the file to store the maximum parameter differences.</param>
    /// <param name="outputModelFileName">The name of the file to store the trained Bayes point machine model.</param>
    /// <param name="iterationCount"></param>
    /// <param name="computeModelEvidence"></param>
    /// <param name="batchCount"></param>
    /// <remarks>Adapted from MicrosoftResearch.Infer.Learners</remarks>
    public void DiagnoseClassifier(
        Vector[] x,
        IList <string> y,
        GenericClassifierMapping mapping,
        string outputModelFileName,
        string reportFileName,
        int iterationCount,
        bool computeModelEvidence,
        int batchCount)
    {
        Debug.Assert(x != null, "The feature vector must not be null.");
        Debug.Assert(y != null, "The targe variable must not be null.");
        Debug.Assert(mapping != null, "The classifier map must not be null.");
        Debug.Assert(!string.IsNullOrEmpty(reportFileName), "The report file name must not be null/empty.");
        Debug.Assert(iterationCount > 0, "The iteration count must be greater than zero.");
        Debug.Assert(batchCount > 0, "The batch count must be greater than zero.");

        // create a BPM from the mapping
        var classifier = BayesPointMachineClassifier.CreateBinaryClassifier(mapping);

        classifier.Settings.Training.ComputeModelEvidence = computeModelEvidence;
        classifier.Settings.Training.IterationCount       = iterationCount;
        classifier.Settings.Training.BatchCount           = batchCount;

        // Create prior distributions over weights
        Dictionary <int, double[]> maxMean;
        Dictionary <int, double[]> maxVar;
        int classCount               = 2;
        int featureCount             = x.Length;
        var priorWeightDistributions = Util.ArrayInit(classCount, c => Util.ArrayInit(featureCount, f => new Gaussian(0.0, 1.0)));

        // Create IterationChanged handler
        var watch = new Stopwatch();

        classifier.IterationChanged += (sender, eventArgs) =>
        {
            watch.Stop();
            double maxParameterChange = MaxDiff(eventArgs.WeightPosteriorDistributions, priorWeightDistributions, out maxMean, out maxVar);

            if (!string.IsNullOrEmpty(reportFileName))
            {
                SaveMaximumParameterDifference(
                    reportFileName,
                    eventArgs.CompletedIterationCount,
                    maxParameterChange,
                    watch.ElapsedMilliseconds,
                    maxMean,
                    maxVar);
            }

            Console.WriteLine(
                "[{0}] Iteration {1,-4}   dp = {2,-20}   dt = {3,5}ms",
                DateTime.Now.ToLongTimeString(),
                eventArgs.CompletedIterationCount,
                maxParameterChange,
                watch.ElapsedMilliseconds);

            // Copy weight marginals
            for (int c = 0; c < eventArgs.WeightPosteriorDistributions.Count; c++)
            {
                for (int f = 0; f < eventArgs.WeightPosteriorDistributions[c].Count; f++)
                {
                    priorWeightDistributions[c][f] = eventArgs.WeightPosteriorDistributions[c][f];
                }
            }

            watch.Restart();
        };

        // Write file header
        if (!string.IsNullOrEmpty(reportFileName))
        {
            using (var writer = new StreamWriter(reportFileName))
            {
                writer.WriteLine("# time, # iteration, " +
                                 "# maximum absolute parameter difference, " +
                                 "# iteration time in milliseconds, " +
                                 "# Max Mean, # Max Var.");
            }
        }

        // Train the Bayes point machine classifier
        Console.WriteLine("[{0}] Starting training...", DateTime.Now.ToLongTimeString());
        watch.Start();

        classifier.Train(x, y);

        // Compute evidence
        if (classifier.Settings.Training.ComputeModelEvidence)
        {
            Console.WriteLine("Log evidence = {0,10:0.0000}", classifier.LogModelEvidence);
        }

        // Save trained model
        if (!string.IsNullOrEmpty(outputModelFileName))
        {
            classifier.Save(outputModelFileName);
        }
    }
Exemplo n.º 15
0
 void ProcessData()
 {
     var mapping    = new JiOu_Mapping();
     var classifier = BayesPointMachineClassifier.CreateMulticlassClassifier(mapping);
 }