예제 #1
0
        public void TestSaveAndLoadTreeFeaturizer()
        {
            int dataPointCount = 200;
            var data           = SamplesUtils.DatasetUtils.GenerateFloatLabelFloatFeatureVectorSamples(dataPointCount).ToList();
            var dataView       = ML.Data.LoadFromEnumerable(data);

            dataView = ML.Data.Cache(dataView);

            var trainerOptions = new FastForestRegressionTrainer.Options
            {
                NumberOfThreads            = 1,
                NumberOfTrees              = 10,
                NumberOfLeaves             = 4,
                MinimumExampleCountPerLeaf = 10,
                FeatureColumnName          = "Features",
                LabelColumnName            = "Label"
            };

            var options = new FastForestRegressionFeaturizationEstimator.Options()
            {
                InputColumnName  = "Features",
                TreesColumnName  = "Trees",
                LeavesColumnName = "Leaves",
                PathsColumnName  = "Paths",
                TrainerOptions   = trainerOptions
            };

            var pipeline = ML.Transforms.FeaturizeByFastForestRegression(options)
                           .Append(ML.Transforms.Concatenate("CombinedFeatures", "Features", "Trees", "Leaves", "Paths"))
                           .Append(ML.Regression.Trainers.Sdca("Label", "CombinedFeatures"));
            var model      = pipeline.Fit(dataView);
            var prediction = model.Transform(dataView);
            var metrics    = ML.Regression.Evaluate(prediction);

            Assert.True(metrics.MeanAbsoluteError < 0.25);
            Assert.True(metrics.MeanSquaredError < 0.1);

            // Save the trained model into file.
            ITransformer loadedModel = null;
            var          tempPath    = Path.GetTempFileName();

            using (var file = new SimpleFileHandle(Env, tempPath, true, true))
            {
                using (var fs = file.CreateWriteStream())
                    ML.Model.Save(model, null, fs);

                using (var fs = file.OpenReadStream())
                    loadedModel = ML.Model.Load(fs, out var schema);
            }
            var loadedPrediction = loadedModel.Transform(dataView);
            var loadedMetrics    = ML.Regression.Evaluate(loadedPrediction);

            Assert.Equal(metrics.MeanAbsoluteError, loadedMetrics.MeanAbsoluteError);
            Assert.Equal(metrics.MeanSquaredError, loadedMetrics.MeanSquaredError);
        }
예제 #2
0
        public void TestEstimatorSaveLoad()
        {
            IHostEnvironment env = new MLContext(1);
            var dataFile         = GetDataPath("images/images.tsv");
            var imageFolder      = Path.GetDirectoryName(dataFile);
            var data             = TextLoader.Create(env, new TextLoader.Options()
            {
                Columns = new[]
                {
                    new TextLoader.Column("ImagePath", DataKind.String, 0),
                    new TextLoader.Column("Name", DataKind.String, 1),
                }
            }, new MultiFileSource(dataFile));

            var pipe = new ImageLoadingEstimator(env, imageFolder, ("ImageReal", "ImagePath"))
                       .Append(new ImageResizingEstimator(env, "ImageReal", 100, 100, "ImageReal"))
                       .Append(new ImagePixelExtractingEstimator(env, "ImagePixels", "ImageReal"))
                       .Append(new ImageGrayscalingEstimator(env, ("ImageGray", "ImageReal")));

            pipe.GetOutputSchema(SchemaShape.Create(data.Schema));
            var model = pipe.Fit(data);

            var tempPath = Path.GetTempFileName();

            using (var file = new SimpleFileHandle(env, tempPath, true, true))
            {
                using (var fs = file.CreateWriteStream())
                    ML.Model.Save(model, null, fs);
                ITransformer model2;
                using (var fs = file.OpenReadStream())
                    model2 = ML.Model.Load(fs, out var schema);

                var transformerChain = model2 as TransformerChain <ITransformer>;
                Assert.NotNull(transformerChain);

                var newCols = ((ImageLoadingTransformer)transformerChain.First()).Columns;
                var oldCols = ((ImageLoadingTransformer)model.First()).Columns;
                Assert.True(newCols
                            .Zip(oldCols, (x, y) => x == y)
                            .All(x => x));
            }
            Done();
        }
예제 #3
0
        public void TestEstimatorSaveLoad()
        {
            IHostEnvironment env = new MLContext();
            var dataFile         = GetDataPath("images/images.tsv");
            var imageFolder      = Path.GetDirectoryName(dataFile);
            var data             = TextLoader.Create(env, new TextLoader.Arguments()
            {
                Column = new[]
                {
                    new TextLoader.Column("ImagePath", DataKind.TX, 0),
                    new TextLoader.Column("Name", DataKind.TX, 1),
                }
            }, new MultiFileSource(dataFile));

            var pipe = new ImageLoadingEstimator(env, imageFolder, ("ImagePath", "ImageReal"))
                       .Append(new ImageResizingEstimator(env, "ImageReal", "ImageReal", 100, 100))
                       .Append(new ImagePixelExtractingEstimator(env, "ImageReal", "ImagePixels"))
                       .Append(new ImageGrayscalingEstimator(env, ("ImageReal", "ImageGray")));

            pipe.GetOutputSchema(Core.Data.SchemaShape.Create(data.Schema));
            var model = pipe.Fit(data);

            var tempPath = Path.GetTempFileName();

            using (var file = new SimpleFileHandle(env, tempPath, true, true))
            {
                using (var fs = file.CreateWriteStream())
                    model.SaveTo(env, fs);
                var model2 = TransformerChain.LoadFrom(env, file.OpenReadStream());

                var newCols = ((ImageLoaderTransform)model2.First()).Columns;
                var oldCols = ((ImageLoaderTransform)model.First()).Columns;
                Assert.True(newCols
                            .Zip(oldCols, (x, y) => x == y)
                            .All(x => x));
            }
            Done();
        }
예제 #4
0
        public void TestOnnxTransformSaveAndLoadWithCustomShapes()
        {
            // The loaded model has input shape [-1, 3] and output shape [-1].
            var modelFile = Path.Combine(Directory.GetCurrentDirectory(), "unknowndimensions", "test_unknowndimensions_float.onnx");

            var dataPoints = new InputWithCustomShape[]
            {
                // It's a flattened 3-by-3 tensor.
                // [1.1, 1.3, 1.2]
                // |1.9, 1.3, 1.2|
                // [1.1, 1.3, 1.8]
                new InputWithCustomShape()
                {
                    input = new float[] { 1.1f, 1.3f, 1.2f, 1.9f, 1.3f, 1.2f, 1.1f, 1.3f, 1.8f }
                },
                // It's a flattened 3-by-3 tensor.
                // [0, 0, 1]
                // |1, 0, 0|
                // [1, 0, 0]
                new InputWithCustomShape()
                {
                    input = new float[] { 0f, 0f, 1f, 1f, 0f, 0f, 1f, 0f, 0f }
                }
            };

            var shapeDictionary = new Dictionary <string, int[]>()
            {
                { nameof(InputWithCustomShape.input), new int[] { 3, 3 } }
            };

            var dataView = ML.Data.LoadFromEnumerable(dataPoints);

            var pipeline = ML.Transforms.ApplyOnnxModel(nameof(PredictionWithCustomShape.argmax),
                                                        nameof(InputWithCustomShape.input), modelFile, shapeDictionary);

            var model = pipeline.Fit(dataView);

            // Save the trained ONNX transformer into file and then load it back.
            ITransformer loadedModel = null;
            var          tempPath    = Path.GetTempFileName();

            using (var file = new SimpleFileHandle(Env, tempPath, true, true))
            {
                // Save.
                using (var fs = file.CreateWriteStream())
                    ML.Model.Save(model, null, fs);

                // Load.
                using (var fs = file.OpenReadStream())
                    loadedModel = ML.Model.Load(fs, out var schema);
            }

            var transformedDataView = loadedModel.Transform(dataView);

            // Conduct the same check for all the 3 called public APIs.
            var transformedDataPoints = ML.Data.CreateEnumerable <PredictionWithCustomShape>(transformedDataView, false).ToList();

            // One data point generates one transformed data point.
            Assert.Equal(dataPoints.Count(), transformedDataPoints.Count);

            // Check result numbers. They are results of applying ONNX argmax along the second axis; for example
            // [1.1, 1.3, 1.2] ---> [1] because 1.3 (indexed by 1) is the largest element.
            // |1.9, 1.3, 1.2| ---> |0|         1.9             0
            // [1.1, 1.3, 1.8] ---> [2]         1.8             2
            var expectedResults = new long[][]
            {
                new long[] { 1, 0, 2 },
                new long[] { 2, 0, 0 }
            };

            for (int i = 0; i < transformedDataPoints.Count; ++i)
            {
                Assert.Equal(transformedDataPoints[i].argmax, expectedResults[i]);
            }

            (model as IDisposable)?.Dispose();
            (loadedModel as IDisposable)?.Dispose();
        }
예제 #5
0
        public void TestSaveAndLoadDoubleTreeFeaturizer()
        {
            int dataPointCount = 200;
            var data           = SamplesUtils.DatasetUtils.GenerateFloatLabelFloatFeatureVectorSamples(dataPointCount).ToList();
            var dataView       = ML.Data.LoadFromEnumerable(data);

            dataView = ML.Data.Cache(dataView);

            var trainerOptions = new FastForestRegressionTrainer.Options
            {
                NumberOfThreads            = 1,
                NumberOfTrees              = 10,
                NumberOfLeaves             = 4,
                MinimumExampleCountPerLeaf = 10,
                FeatureColumnName          = "Features",
                LabelColumnName            = "Label"
            };

            // Trains tree featurization on "Features" and applies on "CopiedFeatures".
            var options = new FastForestRegressionFeaturizationEstimator.Options()
            {
                InputColumnName  = "CopiedFeatures",
                TrainerOptions   = trainerOptions,
                TreesColumnName  = "OhMyTrees",
                LeavesColumnName = "OhMyLeaves",
                PathsColumnName  = "OhMyPaths"
            };

            var pipeline = ML.Transforms.CopyColumns("CopiedFeatures", "Features")
                           .Append(ML.Transforms.FeaturizeByFastForestRegression(options))
                           .Append(ML.Transforms.Concatenate("CombinedFeatures", "Features", "OhMyTrees", "OhMyLeaves", "OhMyPaths"))
                           .Append(ML.Regression.Trainers.Sdca("Label", "CombinedFeatures"));
            var model      = pipeline.Fit(dataView);
            var prediction = model.Transform(dataView);
            var metrics    = ML.Regression.Evaluate(prediction);

            Assert.True(metrics.MeanAbsoluteError < 0.25);
            Assert.True(metrics.MeanSquaredError < 0.1);

            // Save the trained model into file and then load it back.
            ITransformer loadedModel = null;
            var          tempPath    = Path.GetTempFileName();

            using (var file = new SimpleFileHandle(Env, tempPath, true, true))
            {
                using (var fs = file.CreateWriteStream())
                    ML.Model.Save(model, null, fs);

                using (var fs = file.OpenReadStream())
                    loadedModel = ML.Model.Load(fs, out var schema);
            }

            // Compute prediction using the loaded model.
            var loadedPrediction = loadedModel.Transform(dataView);
            var loadedMetrics    = ML.Regression.Evaluate(loadedPrediction);

            // Check if the loaded model produces the same result as the trained model.
            Assert.Equal(metrics.MeanAbsoluteError, loadedMetrics.MeanAbsoluteError);
            Assert.Equal(metrics.MeanSquaredError, loadedMetrics.MeanSquaredError);

            var secondPipeline = ML.Transforms.CopyColumns("CopiedFeatures", "Features")
                                 .Append(ML.Transforms.NormalizeBinning("CopiedFeatures"))
                                 .Append(ML.Transforms.FeaturizeByFastForestRegression(options))
                                 .Append(ML.Transforms.Concatenate("CombinedFeatures", "Features", "OhMyTrees", "OhMyLeaves", "OhMyPaths"))
                                 .Append(ML.Regression.Trainers.Sdca("Label", "CombinedFeatures"));
            var secondModel      = secondPipeline.Fit(dataView);
            var secondPrediction = secondModel.Transform(dataView);
            var secondMetrics    = ML.Regression.Evaluate(secondPrediction);

            // The second pipeline trains a tree featurizer on a bin-based normalized feature, so the second pipeline
            // is different from the first pipeline.
            Assert.NotEqual(metrics.MeanAbsoluteError, secondMetrics.MeanAbsoluteError);
            Assert.NotEqual(metrics.MeanSquaredError, secondMetrics.MeanSquaredError);
        }