public void BuildAndTrain()
        {
            var featurizerModelLocation = inputModelLocation;

            ConsoleWriteHeader("Read model");
            Console.WriteLine($"Model location: {featurizerModelLocation}");
            Console.WriteLine($"Images folder: {imagesFolder}");
            Console.WriteLine($"Training file: {dataLocation}");
            Console.WriteLine($"Default parameters: image size=({ImageNetSettings.imageWidth},{ImageNetSettings.imageHeight}), image mean: {ImageNetSettings.mean}");



            var data = mlContext.Data.ReadFromTextFile <ImageNetData>(path: dataLocation, hasHeader: false);

            var pipeline = mlContext.Transforms.Conversion.MapValueToKey(outputColumnName: LabelTokey, inputColumnName: DefaultColumnNames.Label)
                           .Append(mlContext.Transforms.LoadImages(imagesFolder, (ImageReal, nameof(ImageNetData.ImagePath))))
                           .Append(mlContext.Transforms.Resize(outputColumnName: ImageReal, imageWidth: ImageNetSettings.imageWidth, imageHeight: ImageNetSettings.imageHeight, inputColumnName: ImageReal))
                           .Append(mlContext.Transforms.ExtractPixels(new ImagePixelExtractorTransformer.ColumnInfo(name: "input", inputColumnName: ImageReal, interleave: ImageNetSettings.channelsLast, offset: ImageNetSettings.mean)))
                           .Append(mlContext.Transforms.ScoreTensorFlowModel(modelLocation: featurizerModelLocation, outputColumnNames: new[] { "softmax2_pre_activation" }, inputColumnNames: new[] { "input" }))
                           .Append(mlContext.MulticlassClassification.Trainers.LogisticRegression(labelColumn: LabelTokey, featureColumn: "softmax2_pre_activation"))
                           .Append(mlContext.Transforms.Conversion.MapKeyToValue((PredictedLabelValue, DefaultColumnNames.PredictedLabel)));

            // Train the model
            ConsoleWriteHeader("Training classification model");
            ITransformer model = pipeline.Fit(data);

            // Process the training data through the model
            // This is an optional step, but it's useful for debugging issues
            var trainData = model.Transform(data);
            var loadedModelOutputColumnNames = trainData.Schema
                                               .Where(col => !col.IsHidden).Select(col => col.Name);
            var trainData2 = mlContext.CreateEnumerable <ImageNetPipeline>(trainData, false, true).ToList();

            trainData2.ForEach(pr => ConsoleWriteImagePrediction(pr.ImagePath, pr.PredictedLabelValue, pr.Score.Max()));

            // Get some performance metric on the model using training data
            var classificationContext = new MulticlassClassificationCatalog(mlContext);

            ConsoleWriteHeader("Classification metrics");
            var metrics = classificationContext.Evaluate(trainData, label: LabelTokey, predictedLabel: DefaultColumnNames.PredictedLabel);

            Console.WriteLine($"LogLoss is: {metrics.LogLoss}");
            Console.WriteLine($"PerClassLogLoss is: {String.Join(" , ", metrics.PerClassLogLoss.Select(c => c.ToString()))}");

            // Save the model to assets/outputs
            ConsoleWriteHeader("Save model to local file");
            ModelHelpers.DeleteAssets(outputModelLocation);
            using (var f = new FileStream(outputModelLocation, FileMode.Create))
                mlContext.Model.Save(model, f);

            Console.WriteLine($"Model saved: {outputModelLocation}");
        }
        private static IDataView GetMulticlassMetrics(
            IHostEnvironment env,
            IPredictor predictor,
            RoleMappedData roleMappedData,
            PermutationFeatureImportanceArguments input)
        {
            var roles             = roleMappedData.Schema.GetColumnRoleNames();
            var featureColumnName = roles.Where(x => x.Key.Value == RoleMappedSchema.ColumnRole.Feature.Value).First().Value;
            var labelColumnName   = roles.Where(x => x.Key.Value == RoleMappedSchema.ColumnRole.Label.Value).First().Value;
            var pred = new MulticlassPredictionTransformer <IPredictorProducing <VBuffer <float> > >(
                env, predictor as IPredictorProducing <VBuffer <float> >, roleMappedData.Data.Schema, featureColumnName, labelColumnName);
            var multiclassCatalog  = new MulticlassClassificationCatalog(env);
            var permutationMetrics = multiclassCatalog
                                     .PermutationFeatureImportance(pred,
                                                                   roleMappedData.Data,
                                                                   labelColumnName: labelColumnName,
                                                                   useFeatureWeightFilter: input.UseFeatureWeightFilter,
                                                                   numberOfExamplesToUse: input.NumberOfExamplesToUse,
                                                                   permutationCount: input.PermutationCount);

            var slotNames = GetSlotNames(roleMappedData.Schema);

            Contracts.Assert(slotNames.Length == permutationMetrics.Length,
                             "Mismatch between number of feature slots and number of features permuted.");

            List <MulticlassMetrics> metrics = new List <MulticlassMetrics>();

            for (int i = 0; i < permutationMetrics.Length; i++)
            {
                if (string.IsNullOrWhiteSpace(slotNames[i]))
                {
                    continue;
                }
                var pMetric = permutationMetrics[i];
                metrics.Add(new MulticlassMetrics
                {
                    FeatureName            = slotNames[i],
                    MacroAccuracy          = pMetric.MacroAccuracy.Mean,
                    MacroAccuracyStdErr    = pMetric.MacroAccuracy.StandardError,
                    MicroAccuracy          = pMetric.MicroAccuracy.Mean,
                    MicroAccuracyStdErr    = pMetric.MicroAccuracy.StandardError,
                    LogLoss                = pMetric.LogLoss.Mean,
                    LogLossStdErr          = pMetric.LogLoss.StandardError,
                    LogLossReduction       = pMetric.LogLossReduction.Mean,
                    LogLossReductionStdErr = pMetric.LogLossReduction.StandardError,
                    TopKAccuracy           = pMetric.TopKAccuracy.Mean,
                    TopKAccuracyStdErr     = pMetric.TopKAccuracy.StandardError,
                    PerClassLogLoss        = pMetric.PerClassLogLoss.Select(x => x.Mean).ToArray(),
                    PerClassLogLossStdErr  = pMetric.PerClassLogLoss.Select(x => x.StandardError).ToArray()
                });;
            }

            // Convert unknown size vectors to known size.
            var metric = metrics.First();
            SchemaDefinition schema = SchemaDefinition.Create(typeof(MulticlassMetrics));

            ConvertVectorToKnownSize(nameof(metric.PerClassLogLoss), metric.PerClassLogLoss.Length, ref schema);
            ConvertVectorToKnownSize(nameof(metric.PerClassLogLossStdErr), metric.PerClassLogLossStdErr.Length, ref schema);

            var dataOps = new DataOperationsCatalog(env);
            var result  = dataOps.LoadFromEnumerable(metrics, schema);

            return(result);
        }
Exemplo n.º 3
0
        // </SnippetInceptionSettings>

        // Build and train model
        public static void ReuseAndTuneInceptionModel(MLContext mlContext, string dataLocation, string imagesFolder, string inputModelLocation, string outputModelLocation)
        {
            Console.WriteLine("Read model");
            Console.WriteLine($"Model location: {inputModelLocation}");
            Console.WriteLine($"Images folder: {_trainImagesFolder}");
            Console.WriteLine($"Training file: {dataLocation}");
            Console.WriteLine($"Default parameters: image size=({InceptionSettings.ImageWidth},{InceptionSettings.ImageHeight}), image mean: {InceptionSettings.Mean}");

            // <SnippetLoadData>
            var data = mlContext.Data.ReadFromTextFile <ImageData>(path: dataLocation, hasHeader: true);
            // </SnippetLoadData>

            // <SnippetMapValueToKey1>
            var estimator = mlContext.Transforms.Conversion.MapValueToKey(outputColumnName: LabelTokey, inputColumnName: DefaultColumnNames.Label)
                            // </SnippetMapValueToKey1>
                            // The image transforms transform the images into the model's expected format.
                            // <SnippetImageTransforms>
                            .Append(mlContext.Transforms.LoadImages(_trainImagesFolder, (ImageReal, nameof(ImageData.ImagePath))))
                            .Append(mlContext.Transforms.Resize(outputColumnName: ImageReal, imageWidth: InceptionSettings.ImageWidth, imageHeight: InceptionSettings.ImageHeight, inputColumnName: ImageReal))
                            .Append(mlContext.Transforms.ExtractPixels(new ImagePixelExtractorTransformer.ColumnInfo(name: "input", inputColumnName: ImageReal, interleave: InceptionSettings.ChannelsLast, offset: InceptionSettings.Mean)))
                            // </SnippetImageTransforms>
                            // The ScoreTensorFlowModel transform scores the TensorFlow model and allows communication
                            // <SnippetScoreTensorFlowModel>
                            .Append(mlContext.Transforms.ScoreTensorFlowModel(modelLocation: inputModelLocation, outputColumnNames: new[] { "softmax2_pre_activation" }, inputColumnNames: new[] { "input" }))
                            // </SnippetScoreTensorFlowModel>
                            // <SnippetAddTrainer>
                            .Append(mlContext.MulticlassClassification.Trainers.LogisticRegression(labelColumn: LabelTokey, featureColumn: "softmax2_pre_activation"))
                            // </SnippetAddTrainer>
                            // <SnippetMapValueToKey2>
                            .Append(mlContext.Transforms.Conversion.MapKeyToValue((PredictedLabelValue, DefaultColumnNames.PredictedLabel)));

            // </SnippetMapValueToKey2>

            // Train the model
            Console.WriteLine("=============== Training classification model ===============");
            // Create and train the model based on the dataset that has been loaded, transformed.
            // <SnippetTrainModel>
            ITransformer model = estimator.Fit(data);
            // </SnippetTrainModel>

            // Process the training data through the model
            // This is an optional step, but it's useful for debugging issues
            // <SnippetTransformData>
            var predictions = model.Transform(data);
            // </SnippetTransformData>

            // Create enumerables for both the ImageData and ImagePrediction DataViews
            // for displaying results
            // <SnippetEnumerateDataViews>
            var imageData           = mlContext.CreateEnumerable <ImageData>(data, false, true);
            var imagePredictionData = mlContext.CreateEnumerable <ImagePrediction>(predictions, false, true);

            // </SnippetEnumerateDataViews>

            // Read the tags.tsv file and add the filepath to the image file name
            // before loading into ImageData
            // <SnippetCallPairAndDisplayResults1>
            PairAndDisplayResults(imageData, imagePredictionData);
            // </SnippetCallPairAndDisplayResults1>

            // Get some performance metrics on the model using training data
            Console.WriteLine("=============== Classification metrics ===============");

            // <SnippetEvaluate>
            var regressionContext = new MulticlassClassificationCatalog(mlContext);
            var metrics           = regressionContext.Evaluate(predictions, label: LabelTokey, predictedLabel: DefaultColumnNames.PredictedLabel);

            // </SnippetEvaluate>

            //<SnippetDisplayMetrics>
            Console.WriteLine($"LogLoss is: {metrics.LogLoss}");
            Console.WriteLine($"PerClassLogLoss is: {String.Join(" , ", metrics.PerClassLogLoss.Select(c => c.ToString()))}");
            //</SnippetDisplayMetrics>

            // Save the model to assets/outputs
            Console.WriteLine("=============== Save model to local file ===============");

            // <SnippetSaveModel>
            using (var fileStream = new FileStream(outputModelLocation, FileMode.Create))
                mlContext.Model.Save(model, fileStream);
            // </SnippetSaveModel>

            Console.WriteLine($"Model saved: {outputModelLocation}");
        }