Ejemplo n.º 1
0
        public void SaveOnnxModelLoadAndScoreFastTree()
        {
            var mlContext = new MLContext(seed: 1);

            // Get the dataset.
            var data = mlContext.Data.LoadFromTextFile <HousingRegression>(TestCommon.GetDataPath(DataDir, TestDatasets.housing.trainFilename), hasHeader: true);

            // Create a pipeline to train on the housing data.
            var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features)
                           .Append(mlContext.Transforms.NormalizeMinMax("Features"))
                           .AppendCacheCheckpoint(mlContext)
                           .Append(mlContext.Regression.Trainers.FastTree(
                                       new FastTreeRegressionTrainer.Options {
                NumberOfThreads = 1, NumberOfTrees = 10
            }));

            // Fit the pipeline.
            var model = pipeline.Fit(data);

            // Serialize the pipeline to a file.
            var modelFileName = "SaveOnnxLoadAndScoreFastTreeModel.onnx";
            var modelPath     = TestCommon.DeleteOutputPath(OutDir, modelFileName);

            using (var file = File.Create(modelPath))
                mlContext.Model.ConvertToOnnx(model, data, file);

            // Load the model as a transform.
            // Note that when saving an ML.NET model as an ONNX model, the column types and column names will
            // change. The name changes as ONNX doesn't not allow the same name for an input and output within the ONNX model.
            // Therefore names maintained but have a number appended to the end of the name. In this case, Score0 is the output
            // of the ONNX model. We are renaming Score0 to Score using Copy Columns.
            // ONNX also uses tensors and will return an output of a tensor with the dimension of [1,1] for a single float.
            // Therefore the VectorScoreColumn class (which contains a float [] field called Score) is used for the return
            // type on the Prediction engine.
            // See #2980 and #2981 for more information.
            var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(modelPath)
                                .Append(mlContext.Transforms.CopyColumns("Score", "Score0"));
            var onnxModel = onnxEstimator.Fit(data);

            // Create prediction engine and test predictions.
            var originalPredictionEngine = mlContext.Model.CreatePredictionEngine <HousingRegression, ScoreColumn>(model);
            var onnxPredictionEngine     = mlContext.Model.CreatePredictionEngine <HousingRegression, VectorScoreColumn>(onnxModel);

            // Take a handful of examples out of the dataset and compute predictions.
            var dataEnumerator = mlContext.Data.CreateEnumerable <HousingRegression>(mlContext.Data.TakeRows(data, 5), false);

            foreach (var row in dataEnumerator)
            {
                var originalPrediction = originalPredictionEngine.Predict(row);
                var onnxPrediction     = onnxPredictionEngine.Predict(row);
                // Check that the predictions are identical.
                Assert.Equal(originalPrediction.Score, onnxPrediction.Score[0], precision: 4); // Note the low-precision equality!
            }
        }
Ejemplo n.º 2
0
        public void SaveOnnxModelLoadAndScoreKMeans()
        {
            var mlContext = new MLContext(seed: 1);

            // Get the dataset.
            var data = mlContext.Data.LoadFromTextFile <HousingRegression>(TestCommon.GetDataPath(DataDir, TestDatasets.housing.trainFilename), hasHeader: true);

            // Create a pipeline to train on the housing data.
            var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features)
                           .Append(mlContext.Transforms.NormalizeMinMax("Features"))
                           .AppendCacheCheckpoint(mlContext)
                           .Append(mlContext.Clustering.Trainers.KMeans(
                                       new KMeansTrainer.Options {
                NumberOfThreads = 1, MaximumNumberOfIterations = 10
            }));

            // Fit the pipeline.
            var model = pipeline.Fit(data);

            // Serialize the pipeline to a file.
            var modelFileName = "SaveOnnxLoadAndScoreKMeansModel.onnx";
            var modelPath     = TestCommon.DeleteOutputPath(OutDir, modelFileName);

            using (var file = File.Create(modelPath))
                mlContext.Model.ConvertToOnnx(model, data, file);

            // Load the model as a transform.
            var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(modelPath);
            var onnxModel     = onnxEstimator.Fit(data);

            // TODO #2980: ONNX outputs don't match the outputs of the model, so we must hand-correct this for now.
            // TODO #2981: ONNX models cannot be fit as part of a pipeline, so we must use a workaround like this.
            var onnxWorkaroundPipeline = onnxModel.Append(
                mlContext.Transforms.CopyColumns("Score", "Score0").Fit(onnxModel.Transform(data)));

            // Create prediction engine and test predictions.
            var originalPredictionEngine = mlContext.Model.CreatePredictionEngine <HousingRegression, VectorScoreColumn>(model);
            // TODO #2982: ONNX produces vector types and not the original output type.
            var onnxPredictionEngine = mlContext.Model.CreatePredictionEngine <HousingRegression, VectorScoreColumn>(onnxWorkaroundPipeline);

            // Take a handful of examples out of the dataset and compute predictions.
            var dataEnumerator = mlContext.Data.CreateEnumerable <HousingRegression>(mlContext.Data.TakeRows(data, 5), false);

            foreach (var row in dataEnumerator)
            {
                var originalPrediction = originalPredictionEngine.Predict(row);
                var onnxPrediction     = onnxPredictionEngine.Predict(row);
                // Check that the predictions are identical.
                Common.AssertEqual(originalPrediction.Score, onnxPrediction.Score, precision: 4); // Note the low precision!
            }
        }
Ejemplo n.º 3
0
        public void SaveOnnxModelLoadAndScoreFastTree()
        {
            var mlContext = new MLContext(seed: 1);

            // Get the dataset.
            var data = mlContext.Data.LoadFromTextFile <HousingRegression>(TestCommon.GetDataPath(DataDir, TestDatasets.housing.trainFilename), hasHeader: true);

            // Create a pipeline to train on the housing data.
            var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features)
                           .Append(mlContext.Transforms.NormalizeMinMax("Features"))
                           .AppendCacheCheckpoint(mlContext)
                           .Append(mlContext.Regression.Trainers.FastTree(
                                       new FastTreeRegressionTrainer.Options {
                NumberOfThreads = 1, NumberOfTrees = 10
            }));

            // Fit the pipeline.
            var model = pipeline.Fit(data);

            // Serialize the pipeline to a file.
            var modelFileName = "SaveOnnxLoadAndScoreFastTreeModel.onnx";
            var modelPath     = TestCommon.DeleteOutputPath(OutDir, modelFileName);

            using (var file = File.Create(modelPath))
                mlContext.Model.ConvertToOnnx(model, data, file);

            // Load the model as a transform.
            // ONNX uses tensors and will return an output of a tensor with the dimension of [1,1] for a single float.
            // Therefore the VectorScoreColumn class (which contains a float [] field called Score) is used for the return
            // type on the Prediction engine.
            // See #2980 and #2981 for more information.
            var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(modelPath, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
            var onnxModel     = onnxEstimator.Fit(data);

            // Create prediction engine and test predictions.
            var originalPredictionEngine = mlContext.Model.CreatePredictionEngine <HousingRegression, ScoreColumn>(model);
            var onnxPredictionEngine     = mlContext.Model.CreatePredictionEngine <HousingRegression, VectorScoreColumn>(onnxModel);

            // Take a handful of examples out of the dataset and compute predictions.
            var dataEnumerator = mlContext.Data.CreateEnumerable <HousingRegression>(mlContext.Data.TakeRows(data, 5), false);

            foreach (var row in dataEnumerator)
            {
                var originalPrediction = originalPredictionEngine.Predict(row);
                var onnxPrediction     = onnxPredictionEngine.Predict(row);
                // Check that the predictions are identical.
                Assert.Equal(originalPrediction.Score, onnxPrediction.Score[0], precision: 4);
            }
        }
Ejemplo n.º 4
0
        public void FitPipelineSaveModelAndPredict()
        {
            var mlContext = new MLContext(seed: 1);

            // Get the dataset.
            var data = mlContext.Data.LoadFromTextFile <HousingRegression>(TestCommon.GetDataPath(DataDir, TestDatasets.housing.trainFilename), hasHeader: true);

            // Create a pipeline to train on the housing data.
            var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features)
                           .Append(mlContext.Regression.Trainers.FastTree(
                                       new FastTreeRegressionTrainer.Options {
                NumberOfThreads = 1, NumberOfTrees = 10
            }));

            // Fit the pipeline.
            var model = pipeline.Fit(data);

            var modelPath = TestCommon.DeleteOutputPath(OutDir, "fitPipelineSaveModelAndPredict.zip");

            // Save model to a file.
            mlContext.Model.Save(model, data.Schema, modelPath);

            // Load model from a file.
            ITransformer serializedModel;

            using (var file = File.OpenRead(modelPath))
            {
                serializedModel = mlContext.Model.Load(file, out var serializedSchema);
                TestCommon.CheckSameSchemas(data.Schema, serializedSchema);
            }

            // Create prediction engine and test predictions.
            var originalPredictionEngine   = mlContext.Model.CreatePredictionEngine <HousingRegression, ScoreColumn>(model);
            var serializedPredictionEngine = mlContext.Model.CreatePredictionEngine <HousingRegression, ScoreColumn>(serializedModel);

            // Take a handful of examples out of the dataset and compute predictions.
            var dataEnumerator = mlContext.Data.CreateEnumerable <HousingRegression>(mlContext.Data.TakeRows(data, 5), false);

            foreach (var row in dataEnumerator)
            {
                var originalPrediction   = originalPredictionEngine.Predict(row);
                var serializedPrediction = serializedPredictionEngine.Predict(row);
                // Check that the predictions are identical.
                Assert.Equal(originalPrediction.Score, serializedPrediction.Score);
            }
        }
Ejemplo n.º 5
0
        public void SaveOnnxModelLoadAndScoreSDCA()
        {
            var mlContext = new MLContext(seed: 1);

            // Get the dataset.
            var data = mlContext.Data.LoadFromTextFile <HousingRegression>(TestCommon.GetDataPath(DataDir, TestDatasets.housing.trainFilename), hasHeader: true);

            // Create a pipeline to train on the housing data.
            var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features)
                           .Append(mlContext.Transforms.NormalizeMinMax("Features"))
                           .AppendCacheCheckpoint(mlContext)
                           .Append(mlContext.Regression.Trainers.Sdca(
                                       new SdcaRegressionTrainer.Options {
                NumberOfThreads = 1, MaximumNumberOfIterations = 10
            }));

            // Fit the pipeline.
            var model = pipeline.Fit(data);

            // Serialize the pipeline to a file.
            var modelFileName = "SaveOnnxLoadAndScoreSdcaModel.onnx";
            var modelPath     = TestCommon.DeleteOutputPath(OutDir, modelFileName);

            using (var file = File.Create(modelPath))
                mlContext.Model.ConvertToOnnx(model, data, file);

            // Load the model as a transform.
            var onnxEstimator = mlContext.Transforms.ApplyOnnxModel(modelPath, gpuDeviceId: _gpuDeviceId, fallbackToCpu: _fallbackToCpu);
            var onnxModel     = onnxEstimator.Fit(data);

            // Create prediction engine and test predictions.
            var originalPredictionEngine = mlContext.Model.CreatePredictionEngine <HousingRegression, ScoreColumn>(model);
            // TODO #2982: ONNX produces vector types and not the original output type.
            var onnxPredictionEngine = mlContext.Model.CreatePredictionEngine <HousingRegression, VectorScoreColumn>(onnxModel);

            // Take a handful of examples out of the dataset and compute predictions.
            var dataEnumerator = mlContext.Data.CreateEnumerable <HousingRegression>(mlContext.Data.TakeRows(data, 5), false);

            foreach (var row in dataEnumerator)
            {
                var originalPrediction = originalPredictionEngine.Predict(row);
                var onnxPrediction     = onnxPredictionEngine.Predict(row);
                // Check that the predictions are identical.
                Assert.Equal(originalPrediction.Score, onnxPrediction.Score[0], precision: 4);
            }
        }
Ejemplo n.º 6
0
        public void DetermineNugetVersionFromModel()
        {
            var mlContext = new MLContext(seed: 1);

            // Get the dataset.
            var data = mlContext.Data.LoadFromTextFile <HousingRegression>(TestCommon.GetDataPath(DataDir, TestDatasets.housing.trainFilename), hasHeader: true);

            // Create a pipeline to train on the housing data.
            var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features)
                           .Append(mlContext.Regression.Trainers.FastTree(
                                       new FastTreeRegressionTrainer.Options {
                NumberOfThreads = 1, NumberOfTrees = 10
            }));

            // Fit the pipeline.
            var model = pipeline.Fit(data);

            // Save model to a file.
            var modelPath = TestCommon.DeleteOutputPath(OutDir, "determineNugetVersionFromModel.zip");

            mlContext.Model.Save(model, data.Schema, modelPath);

            // Check that the version can be extracted from the model.
            var versionFileName = @"TrainingInfo" + Path.DirectorySeparatorChar + "Version.txt";

            using (ZipArchive archive = ZipFile.OpenRead(modelPath))
            {
                // The version of the entire model is kept in the version file.
                var versionPath = archive.Entries.First(x => x.FullName == versionFileName);
                Assert.NotNull(versionPath);
                using (var stream = versionPath.Open())
                    using (var reader = new StreamReader(stream))
                    {
                        // The only line in the file is the version of the model.
                        var line = reader.ReadLine();
                        Assert.Matches(new Regex(@"(\d+)\.(\d+)\.(\d+)(-[dev|ci|preview\.(\d+)\.(\d+)\.(\d+)]){0,1}"), line);
                    }
            }
        }
Ejemplo n.º 7
0
 protected string DeleteOutputPath(string name)
 {
     return(TestCommon.DeleteOutputPath(OutDir, name));
 }