public void BuildAndTrain() { var featurizerModelLocation = inputModelLocation; ConsoleWriteHeader("Read model"); Console.WriteLine($"Model location: {featurizerModelLocation}"); Console.WriteLine($"Images folder: {imagesFolder}"); Console.WriteLine($"Training file: {dataLocation}"); Console.WriteLine($"Default parameters: image size=({ImageNetSettings.imageWidth},{ImageNetSettings.imageHeight}), image mean: {ImageNetSettings.mean}"); var data = mlContext.Data.ReadFromTextFile <ImageNetData>(path: dataLocation, hasHeader: false); var pipeline = mlContext.Transforms.Conversion.MapValueToKey(outputColumnName: LabelTokey, inputColumnName: DefaultColumnNames.Label) .Append(mlContext.Transforms.LoadImages(imagesFolder, (ImageReal, nameof(ImageNetData.ImagePath)))) .Append(mlContext.Transforms.Resize(outputColumnName: ImageReal, imageWidth: ImageNetSettings.imageWidth, imageHeight: ImageNetSettings.imageHeight, inputColumnName: ImageReal)) .Append(mlContext.Transforms.ExtractPixels(new ImagePixelExtractorTransformer.ColumnInfo(name: "input", inputColumnName: ImageReal, interleave: ImageNetSettings.channelsLast, offset: ImageNetSettings.mean))) .Append(mlContext.Transforms.ScoreTensorFlowModel(modelLocation: featurizerModelLocation, outputColumnNames: new[] { "softmax2_pre_activation" }, inputColumnNames: new[] { "input" })) .Append(mlContext.MulticlassClassification.Trainers.LogisticRegression(labelColumn: LabelTokey, featureColumn: "softmax2_pre_activation")) .Append(mlContext.Transforms.Conversion.MapKeyToValue((PredictedLabelValue, DefaultColumnNames.PredictedLabel))); // Train the model ConsoleWriteHeader("Training classification model"); ITransformer model = pipeline.Fit(data); // Process the training data through the model // This is an optional step, but it's useful for debugging issues var trainData = model.Transform(data); var loadedModelOutputColumnNames = trainData.Schema .Where(col => !col.IsHidden).Select(col => col.Name); var trainData2 = mlContext.CreateEnumerable <ImageNetPipeline>(trainData, false, true).ToList(); trainData2.ForEach(pr => ConsoleWriteImagePrediction(pr.ImagePath, pr.PredictedLabelValue, pr.Score.Max())); // Get some performance metric on the model using training data var classificationContext = new MulticlassClassificationCatalog(mlContext); ConsoleWriteHeader("Classification metrics"); var metrics = classificationContext.Evaluate(trainData, label: LabelTokey, predictedLabel: DefaultColumnNames.PredictedLabel); Console.WriteLine($"LogLoss is: {metrics.LogLoss}"); Console.WriteLine($"PerClassLogLoss is: {String.Join(" , ", metrics.PerClassLogLoss.Select(c => c.ToString()))}"); // Save the model to assets/outputs ConsoleWriteHeader("Save model to local file"); ModelHelpers.DeleteAssets(outputModelLocation); using (var f = new FileStream(outputModelLocation, FileMode.Create)) mlContext.Model.Save(model, f); Console.WriteLine($"Model saved: {outputModelLocation}"); }
private static IDataView GetMulticlassMetrics( IHostEnvironment env, IPredictor predictor, RoleMappedData roleMappedData, PermutationFeatureImportanceArguments input) { var roles = roleMappedData.Schema.GetColumnRoleNames(); var featureColumnName = roles.Where(x => x.Key.Value == RoleMappedSchema.ColumnRole.Feature.Value).First().Value; var labelColumnName = roles.Where(x => x.Key.Value == RoleMappedSchema.ColumnRole.Label.Value).First().Value; var pred = new MulticlassPredictionTransformer <IPredictorProducing <VBuffer <float> > >( env, predictor as IPredictorProducing <VBuffer <float> >, roleMappedData.Data.Schema, featureColumnName, labelColumnName); var multiclassCatalog = new MulticlassClassificationCatalog(env); var permutationMetrics = multiclassCatalog .PermutationFeatureImportance(pred, roleMappedData.Data, labelColumnName: labelColumnName, useFeatureWeightFilter: input.UseFeatureWeightFilter, numberOfExamplesToUse: input.NumberOfExamplesToUse, permutationCount: input.PermutationCount); var slotNames = GetSlotNames(roleMappedData.Schema); Contracts.Assert(slotNames.Length == permutationMetrics.Length, "Mismatch between number of feature slots and number of features permuted."); List <MulticlassMetrics> metrics = new List <MulticlassMetrics>(); for (int i = 0; i < permutationMetrics.Length; i++) { if (string.IsNullOrWhiteSpace(slotNames[i])) { continue; } var pMetric = permutationMetrics[i]; metrics.Add(new MulticlassMetrics { FeatureName = slotNames[i], MacroAccuracy = pMetric.MacroAccuracy.Mean, MacroAccuracyStdErr = pMetric.MacroAccuracy.StandardError, MicroAccuracy = pMetric.MicroAccuracy.Mean, MicroAccuracyStdErr = pMetric.MicroAccuracy.StandardError, LogLoss = pMetric.LogLoss.Mean, LogLossStdErr = pMetric.LogLoss.StandardError, LogLossReduction = pMetric.LogLossReduction.Mean, LogLossReductionStdErr = pMetric.LogLossReduction.StandardError, TopKAccuracy = pMetric.TopKAccuracy.Mean, TopKAccuracyStdErr = pMetric.TopKAccuracy.StandardError, PerClassLogLoss = pMetric.PerClassLogLoss.Select(x => x.Mean).ToArray(), PerClassLogLossStdErr = pMetric.PerClassLogLoss.Select(x => x.StandardError).ToArray() });; } // Convert unknown size vectors to known size. var metric = metrics.First(); SchemaDefinition schema = SchemaDefinition.Create(typeof(MulticlassMetrics)); ConvertVectorToKnownSize(nameof(metric.PerClassLogLoss), metric.PerClassLogLoss.Length, ref schema); ConvertVectorToKnownSize(nameof(metric.PerClassLogLossStdErr), metric.PerClassLogLossStdErr.Length, ref schema); var dataOps = new DataOperationsCatalog(env); var result = dataOps.LoadFromEnumerable(metrics, schema); return(result); }
// </SnippetInceptionSettings> // Build and train model public static void ReuseAndTuneInceptionModel(MLContext mlContext, string dataLocation, string imagesFolder, string inputModelLocation, string outputModelLocation) { Console.WriteLine("Read model"); Console.WriteLine($"Model location: {inputModelLocation}"); Console.WriteLine($"Images folder: {_trainImagesFolder}"); Console.WriteLine($"Training file: {dataLocation}"); Console.WriteLine($"Default parameters: image size=({InceptionSettings.ImageWidth},{InceptionSettings.ImageHeight}), image mean: {InceptionSettings.Mean}"); // <SnippetLoadData> var data = mlContext.Data.ReadFromTextFile <ImageData>(path: dataLocation, hasHeader: true); // </SnippetLoadData> // <SnippetMapValueToKey1> var estimator = mlContext.Transforms.Conversion.MapValueToKey(outputColumnName: LabelTokey, inputColumnName: DefaultColumnNames.Label) // </SnippetMapValueToKey1> // The image transforms transform the images into the model's expected format. // <SnippetImageTransforms> .Append(mlContext.Transforms.LoadImages(_trainImagesFolder, (ImageReal, nameof(ImageData.ImagePath)))) .Append(mlContext.Transforms.Resize(outputColumnName: ImageReal, imageWidth: InceptionSettings.ImageWidth, imageHeight: InceptionSettings.ImageHeight, inputColumnName: ImageReal)) .Append(mlContext.Transforms.ExtractPixels(new ImagePixelExtractorTransformer.ColumnInfo(name: "input", inputColumnName: ImageReal, interleave: InceptionSettings.ChannelsLast, offset: InceptionSettings.Mean))) // </SnippetImageTransforms> // The ScoreTensorFlowModel transform scores the TensorFlow model and allows communication // <SnippetScoreTensorFlowModel> .Append(mlContext.Transforms.ScoreTensorFlowModel(modelLocation: inputModelLocation, outputColumnNames: new[] { "softmax2_pre_activation" }, inputColumnNames: new[] { "input" })) // </SnippetScoreTensorFlowModel> // <SnippetAddTrainer> .Append(mlContext.MulticlassClassification.Trainers.LogisticRegression(labelColumn: LabelTokey, featureColumn: "softmax2_pre_activation")) // </SnippetAddTrainer> // <SnippetMapValueToKey2> .Append(mlContext.Transforms.Conversion.MapKeyToValue((PredictedLabelValue, DefaultColumnNames.PredictedLabel))); // </SnippetMapValueToKey2> // Train the model Console.WriteLine("=============== Training classification model ==============="); // Create and train the model based on the dataset that has been loaded, transformed. // <SnippetTrainModel> ITransformer model = estimator.Fit(data); // </SnippetTrainModel> // Process the training data through the model // This is an optional step, but it's useful for debugging issues // <SnippetTransformData> var predictions = model.Transform(data); // </SnippetTransformData> // Create enumerables for both the ImageData and ImagePrediction DataViews // for displaying results // <SnippetEnumerateDataViews> var imageData = mlContext.CreateEnumerable <ImageData>(data, false, true); var imagePredictionData = mlContext.CreateEnumerable <ImagePrediction>(predictions, false, true); // </SnippetEnumerateDataViews> // Read the tags.tsv file and add the filepath to the image file name // before loading into ImageData // <SnippetCallPairAndDisplayResults1> PairAndDisplayResults(imageData, imagePredictionData); // </SnippetCallPairAndDisplayResults1> // Get some performance metrics on the model using training data Console.WriteLine("=============== Classification metrics ==============="); // <SnippetEvaluate> var regressionContext = new MulticlassClassificationCatalog(mlContext); var metrics = regressionContext.Evaluate(predictions, label: LabelTokey, predictedLabel: DefaultColumnNames.PredictedLabel); // </SnippetEvaluate> //<SnippetDisplayMetrics> Console.WriteLine($"LogLoss is: {metrics.LogLoss}"); Console.WriteLine($"PerClassLogLoss is: {String.Join(" , ", metrics.PerClassLogLoss.Select(c => c.ToString()))}"); //</SnippetDisplayMetrics> // Save the model to assets/outputs Console.WriteLine("=============== Save model to local file ==============="); // <SnippetSaveModel> using (var fileStream = new FileStream(outputModelLocation, FileMode.Create)) mlContext.Model.Save(model, fileStream); // </SnippetSaveModel> Console.WriteLine($"Model saved: {outputModelLocation}"); }