Exemple #1
0
        private void CrossValidationOn(string dataPath)
        {
            // Create a new environment for ML.NET operations. It can be used for exception tracking and logging,
            // as well as the source of randomness.
            var env = new LocalEnvironment();

            // We know that this is a classification task, so we create a multiclass classification context: it will give us the algorithms
            // we need, as well as the evaluation procedure.
            var classification = new MulticlassClassificationContext(env);

            // Step one: read the data as an IDataView.
            // First, we define the reader: specify the data columns and where to find them in the text file.
            var reader = TextLoader.CreateReader(env, ctx => (
                                                     // The four features of the Iris dataset.
                                                     SepalLength: ctx.LoadFloat(0),
                                                     SepalWidth: ctx.LoadFloat(1),
                                                     PetalLength: ctx.LoadFloat(2),
                                                     PetalWidth: ctx.LoadFloat(3),
                                                     // Label: kind of iris.
                                                     Label: ctx.LoadText(4)
                                                     ),
                                                 // Default separator is tab, but the dataset has comma.
                                                 separator: ',');

            // Read the data.
            var data = reader.Read(new MultiFileSource(dataPath));

            // Build the training pipeline.
            var learningPipeline = reader.MakeNewEstimator()
                                   .Append(r => (
                                               // Convert string label to a key.
                                               Label: r.Label.ToKey(),
                                               // Concatenate all the features together into one column 'Features'.
                                               Features: r.SepalLength.ConcatWith(r.SepalWidth, r.PetalLength, r.PetalWidth)))
                                   .Append(r => (
                                               r.Label,
                                               // Train the multi-class SDCA model to predict the label using features.
                                               Predictions: classification.Trainers.Sdca(r.Label, r.Features)));

            // Split the data 90:10 into train and test sets, train and evaluate.
            var(trainData, testData) = classification.TrainTestSplit(data, testFraction: 0.1);

            // Train the model.
            var model = learningPipeline.Fit(trainData);
            // Compute quality metrics on the test set.
            var metrics = classification.Evaluate(model.Transform(testData), r => r.Label, r => r.Predictions);

            Console.WriteLine(metrics.AccuracyMicro);

            // Now run the 5-fold cross-validation experiment, using the same pipeline.
            var cvResults = classification.CrossValidate(data, learningPipeline, r => r.Label, numFolds: 5);

            // The results object is an array of 5 elements. For each of the 5 folds, we have metrics, model and scored test data.
            // Let's compute the average micro-accuracy.
            var microAccuracies = cvResults.Select(r => r.metrics.AccuracyMicro);

            Console.WriteLine(microAccuracies.Average());
        }
Exemple #2
0
        public void CrossValidate()
        {
            var env        = new MLContext(seed: 0);
            var dataPath   = GetDataPath(TestDatasets.iris.trainFilename);
            var dataSource = new MultiFileSource(dataPath);

            var ctx    = new MulticlassClassificationContext(env);
            var reader = TextLoader.CreateReader(env,
                                                 c => (label: c.LoadText(0), features: c.LoadFloat(1, 4)));

            var est = reader.MakeNewEstimator()
                      .Append(r => (label: r.label.ToKey(), r.features))
                      .Append(r => (r.label, preds: ctx.Trainers.Sdca(
                                        r.label,
                                        r.features,
                                        maxIterations: 2)));

            var results = ctx.CrossValidate(reader.Read(dataSource), est, r => r.label)
                          .Select(x => x.metrics).ToArray();

            Assert.Equal(5, results.Length);
            Assert.True(results.All(x => x.LogLoss > 0));
        }
Exemple #3
0
        static void Main(string[] args)
        {
            var dataPath = @"Data/creditcard.csv";

            // Create a new environment for ML.NET operations.
            // It can be used for exception tracking and logging,
            // as well as the source of randomness.
            // Seed set to any number so you have a deterministic environment
            var env = new LocalEnvironment(seed: 1);

            // Step one: read the data as an IDataView.

            // Create the reader: define the data columns
            // and where to find them in the text file.
            var reader = TextLoader.CreateReader(env,
                                                 ctx => (
                                                     Features: ctx.LoadFloat(1, 29), // V1...V28 + Amount
                                                     Label: ctx.LoadText(30)),       // Class
                                                 separator: ',', hasHeader: true);

            // Now read the file
            // (remember though, readers are lazy, so the actual
            //  reading will happen when the data is accessed).
            var data = reader.Read(new MultiFileSource(dataPath));

            // 'transformedData' is a 'promise' of data. Let's actually read it.
            var someRows = data.AsDynamic
                           // Convert to an enumerable of user-defined type.
                           .AsEnumerable <TransactionData>(env, reuseRowObject: false)
                           // Take a couple values as an array.
                           .Take(4).ToArray();

            ConsoleHelpers.ConsoleWriteHeader("Show 4");
            foreach (var viewRow in someRows)
            {
                Console.WriteLine($"Label: {viewRow.Label}");
                Console.WriteLine($"Features: [0] {viewRow.Features[0]} [1] {viewRow.Features[1]} [2] {viewRow.Features[2]} ... [28] {viewRow.Features[28]}");
                //Console.WriteLine($"Features Normalized: [0] {viewRow.FeaturesNormalizedByMeanVar[0]} [1] {viewRow.FeaturesNormalizedByMeanVar[1]} [2] {viewRow.FeaturesNormalizedByMeanVar[2]} ... [28] {viewRow.FeaturesNormalizedByMeanVar[28]}");
            }
            Console.WriteLine("");

            // Step two: define the learning pipeline.

            // We know that this is a regression task, so we create a regression context: it will give us the algorithms
            // we need, as well as the evaluation procedure.
            var classification = new MulticlassClassificationContext(env);


            // Start creating our processing pipeline.
            var learningPipeline = reader.MakeNewEstimator()
                                   .Append(row => (
                                               FeaturesNormalizedByMeanVar: row.Features.NormalizeByMeanVar(), // normalize values
                                               Label: row.Label.ToKey()))
                                   .Append(row => (
                                               row.Label,
                                               Predictions:  classification.Trainers.Sdca(row.Label, features: row.FeaturesNormalizedByMeanVar)));

            // Split the data 80:20 into train and test sets, train and evaluate.
            var(trainData, testData) = classification.TrainTestSplit(data, testFraction: 0.2);

            // Step three: Train the model.
            var model = learningPipeline.Fit(trainData);
            // Compute quality metrics on the test set.
            var metrics = classification.Evaluate(model.Transform(testData), row => row.Label, row => row.Predictions);


            ConsoleHelpers.ConsoleWriteHeader("Train Metrics (80/20) :");
            Console.WriteLine($"Acuracy Macro: {metrics.AccuracyMacro}");
            Console.WriteLine($"Acuracy Micro: {metrics.AccuracyMicro}");
            Console.WriteLine($"Log Loss: {metrics.LogLoss}");
            Console.WriteLine($"Log Loss Reduction: {metrics.LogLossReduction}");



            // Now run the 5-fold cross-validation experiment, using the same pipeline.
            var cvResults = classification.CrossValidate(data, learningPipeline, r => r.Label, numFolds: 5);

            // The results object is an array of 5 elements. For each of the 5 folds, we have metrics, model and scored test data.
            // Let's compute the average micro-accuracy.

            var cvmetrics = cvResults.Select(r => r.metrics);
            int count     = 1;

            foreach (var metric in cvmetrics)
            {
                ConsoleHelpers.ConsoleWriteHeader($"Train Metrics Cross Validate [{count}/5]:");
                Console.WriteLine($"Acuracy Macro: {metrics.AccuracyMacro}");
                Console.WriteLine($"Acuracy Micro: {metrics.AccuracyMicro}");
                Console.WriteLine($"Log Loss: {metrics.LogLoss}");
                Console.WriteLine($"Log Loss Reduction: {metrics.LogLossReduction}");
                Console.WriteLine("");
                count++;
            }
        }