Beispiel #1
0
        /// <summary>
        /// This method compares two pipelines to make sure they are identical. The first pipeline is passed
        /// as a <see cref="RoleMappedData"/>, and the second as a double byte array and a string array. The double
        /// byte array and the string array are obtained by calling <see cref="SerializeRoleMappedData"/> on the
        /// second pipeline.
        /// The comparison is done by saving <see ref="dataToCompare"/> as an in-memory <see cref="ZipArchive"/>,
        /// and for each entry in it, comparing its name, and the byte sequence to the corresponding entries in
        /// <see ref="dataZipEntryNames"/> and <see ref="dataSerialized"/>.
        /// This method throws if for any of the entries the name/byte sequence are not identical.
        /// </summary>
        internal static void CheckSamePipeline(IHostEnvironment env, IChannel ch,
                                               RoleMappedData dataToCompare, byte[][] dataSerialized, string[] dataZipEntryNames)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(ch, nameof(ch));
            ch.CheckValue(dataToCompare, nameof(dataToCompare));
            ch.CheckValue(dataSerialized, nameof(dataSerialized));
            ch.CheckValue(dataZipEntryNames, nameof(dataZipEntryNames));
            if (dataZipEntryNames.Length != dataSerialized.Length)
            {
                throw ch.ExceptParam(nameof(dataSerialized),
                                     $"The length of {nameof(dataSerialized)} must be equal to the length of {nameof(dataZipEntryNames)}");
            }

            using (var ms = new MemoryStream())
            {
                // REVIEW: This can be done more efficiently by adding a custom type of repository that
                // doesn't actually save the data, but upon stream closure compares the results to the given repository
                // and then discards it. Currently, however, this cannot be done because ModelSaveContext does not use
                // an abstract class/interface, but rather the RepositoryWriter class.
                TrainUtils.SaveModel(env, ch, ms, null, dataToCompare);

                string errorMsg = "Models contain different pipelines, cannot ensemble them.";
                var    zip      = new ZipArchive(ms);
                var    entries  = zip.Entries.OrderBy(e => e.FullName).ToArray();
                ch.Check(dataSerialized.Length == Utils.Size(entries));
                byte[] buffer = null;
                for (int i = 0; i < dataSerialized.Length; i++)
                {
                    ch.Check(dataZipEntryNames[i] == entries[i].FullName, errorMsg);
                    int len = dataSerialized[i].Length;
                    if (Utils.Size(buffer) < len)
                    {
                        buffer = new byte[len];
                    }
                    using (var s = entries[i].Open())
                    {
                        int bytesRead = s.Read(buffer, 0, len);
                        ch.Check(bytesRead == len, errorMsg);
                        for (int j = 0; j < len; j++)
                        {
                            ch.Check(buffer[j] == dataSerialized[i][j], errorMsg);
                        }
                        if (s.Read(buffer, 0, 1) > 0)
                        {
                            throw env.Except(errorMsg);
                        }
                    }
                }
            }
        }
        public void TrainSaveModelAndPredict()
        {
            var dataPath     = GetDataPath(SentimentDataPath);
            var testDataPath = GetDataPath(SentimentTestPath);

            using (var env = new LocalEnvironment(seed: 1, conc: 1))
            {
                // Pipeline
                var loader = TextLoader.ReadFile(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(dataPath));

                var trans = TextTransform.Create(env, MakeSentimentTextTransformArgs(), loader);

                // Train
                var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments
                {
                    NumThreads = 1
                });

                var cached     = new CacheDataView(env, trans, prefetch: null);
                var trainRoles = new RoleMappedData(cached, label: "Label", feature: "Features");
                var predictor  = trainer.Train(new Runtime.TrainContext(trainRoles));

                PredictionEngine <SentimentData, SentimentPrediction> model;
                using (var file = env.CreateTempFile())
                {
                    // Save model.
                    var scoreRoles = new RoleMappedData(trans, label: "Label", feature: "Features");
                    using (var ch = env.Start("saving"))
                        TrainUtils.SaveModel(env, ch, file, predictor, scoreRoles);

                    // Load model.
                    using (var fs = file.OpenReadStream())
                        model = env.CreatePredictionEngine <SentimentData, SentimentPrediction>(fs);
                }

                // Take a couple examples out of the test data and run predictions on top.
                var testLoader = TextLoader.ReadFile(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(GetDataPath(SentimentTestPath)));
                var testData   = testLoader.AsEnumerable <SentimentData>(env, false);
                foreach (var input in testData.Take(5))
                {
                    var prediction = model.Predict(input);
                    // Verify that predictions match and scores are separated from zero.
                    Assert.Equal(input.Sentiment, prediction.Sentiment);
                    Assert.True(input.Sentiment && prediction.Score > 1 || !input.Sentiment && prediction.Score < -1);
                }
            }
        }
        private IDataScorerTransform GetScorer(IHostEnvironment env, IDataView transforms, IPredictor pred, string testDataPath = null)
        {
            using (var ch = env.Start("Saving model"))
                using (var memoryStream = new MemoryStream())
                {
                    var trainRoles = new RoleMappedData(transforms, label: "Label", feature: "Features");

                    // Model cannot be saved with CacheDataView
                    TrainUtils.SaveModel(env, ch, memoryStream, pred, trainRoles);
                    memoryStream.Position = 0;
                    using (var rep = RepositoryReader.Open(memoryStream, ch))
                    {
                        IDataLoader    testPipe  = ModelFileUtils.LoadLoader(env, rep, new MultiFileSource(testDataPath), true);
                        RoleMappedData testRoles = new RoleMappedData(testPipe, label: "Label", feature: "Features");
                        return(ScoreUtils.GetScorer(pred, testRoles, env, testRoles.Schema));
                    }
                }
        }
        public void TestGcnNormOldSavingAndLoading()
        {
            string dataSource = GetDataPath(TestDatasets.generatedRegressionDataset.trainFilename);
            var    dataView   = TextLoader.CreateReader(ML,
                                                        c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)),
                                                        separator: ';', hasHeader: true)
                                .Read(dataSource).AsDynamic;
            var pipe = new GlobalContrastNormalizingEstimator(ML, "features", "whitened");

            var result      = pipe.Fit(dataView).Transform(dataView);
            var resultRoles = new RoleMappedData(result);

            using (var ms = new MemoryStream())
            {
                TrainUtils.SaveModel(ML, Env.Start("saving"), ms, null, resultRoles);
                ms.Position = 0;
                var loadedView = ModelFileUtils.LoadTransforms(ML, dataView, ms);
            }
        }
        internal override void Save(IHostEnvironment env, Stream stream)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(stream, nameof(stream));
            using (var ch = env.Start("Saving predictor model"))
            {
                // REVIEW: address the asymmetry in the way we're loading and saving the model.
                // Effectively, we have methods to load the transform model from a model.zip, but don't have
                // methods to compose the model.zip out of transform model, predictor and role mappings
                // (we use the TrainUtils.SaveModel that does all three).

                // Create the chain of transforms for saving.
                IDataView data = new EmptyDataView(env, TransformModel.InputSchema);
                data = TransformModel.Apply(env, data);
                var roleMappedData = new RoleMappedData(data, _roleMappings, opt: true);

                TrainUtils.SaveModel(env, ch, stream, Predictor, roleMappedData);
            }
        }
Beispiel #6
0
 void TestOldSavingAndLoading()
 {
     var data = new[] { new TestClass() { A = 1, B = 2, C = 3, }, new TestClass() { A = 4, B = 5, C = 6 } };
     var dataView = ML.Data.ReadFromEnumerable(data);
     var est = new ValueToKeyMappingEstimator(Env, new[]{
             new ValueToKeyMappingTransformer.ColumnInfo("A", "TermA"),
             new ValueToKeyMappingTransformer.ColumnInfo("B", "TermB"),
             new ValueToKeyMappingTransformer.ColumnInfo("C", "TermC")
         });
     var transformer = est.Fit(dataView);
     var result = transformer.Transform(dataView);
     var resultRoles = new RoleMappedData(result);
     using (var ms = new MemoryStream())
     {
         TrainUtils.SaveModel(Env, Env.Start("saving"), ms, null, resultRoles);
         ms.Position = 0;
         var loadedView = ModelFileUtils.LoadTransforms(Env, dataView, ms);
         ValidateTermTransformer(loadedView);
     }
 }
        public void TestWhiteningOldSavingAndLoading()
        {
            var    env        = new ConsoleEnvironment(seed: 0);
            string dataSource = GetDataPath("generated_regression_dataset.csv");
            var    dataView   = TextLoader.CreateReader(env,
                                                        c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)),
                                                        separator: ';', hasHeader: true)
                                .Read(dataSource).AsDynamic;
            var pipe = new VectorWhiteningEstimator(env, "features", "whitened");

            var result      = pipe.Fit(dataView).Transform(dataView);
            var resultRoles = new RoleMappedData(result);

            using (var ms = new MemoryStream())
            {
                TrainUtils.SaveModel(Env, Env.Start("saving"), ms, null, resultRoles);
                ms.Position = 0;
                var loadedView = ModelFileUtils.LoadTransforms(Env, dataView, ms);
            }
            Done();
        }
Beispiel #8
0
        /// <summary>
        /// This method takes a <see cref="RoleMappedData"/> as input, saves it as an in-memory <see cref="ZipArchive"/>
        /// and returns two arrays indexed by the entries in the zip:
        /// 1. An array of byte arrays, containing the byte sequences of each entry.
        /// 2. An array of strings, containing the name of each entry.
        ///
        /// This method is used for comparing pipelines. Its outputs can be passed to <see cref="CheckSamePipeline"/>
        /// to check if this pipeline is identical to another pipeline.
        /// </summary>
        public static void SerializeRoleMappedData(IHostEnvironment env, IChannel ch, RoleMappedData data,
                                                   out byte[][] dataSerialized, out string[] dataZipEntryNames)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(ch, nameof(ch));
            ch.CheckValue(data, nameof(data));

            using (var ms = new MemoryStream())
            {
                TrainUtils.SaveModel(env, ch, ms, null, data);
                var zip     = new ZipArchive(ms);
                var entries = zip.Entries.OrderBy(e => e.FullName).ToArray();
                dataSerialized    = new byte[Utils.Size(entries)][];
                dataZipEntryNames = new string[Utils.Size(entries)];
                for (int i = 0; i < Utils.Size(entries); i++)
                {
                    dataZipEntryNames[i] = entries[i].FullName;
                    dataSerialized[i]    = new byte[entries[i].Length];
                    using (var s = entries[i].Open())
                        s.Read(dataSerialized[i], 0, (int)entries[i].Length);
                }
            }
        }
        public void TestOldSavingAndLoading()
        {
            var modelFile = "model_matmul/frozen_saved_model.pb";

            var dataView = ML.Data.LoadFromEnumerable(
                new List <TestData>(new TestData[] {
                new TestData()
                {
                    a = new[] { 1.0f, 2.0f, 3.0f, 4.0f },
                    b = new[] { 1.0f, 2.0f, 3.0f, 4.0f }
                },
                new TestData()
                {
                    a = new[] { 2.0f, 2.0f, 2.0f, 2.0f },
                    b = new[] { 3.0f, 3.0f, 3.0f, 3.0f }
                },
                new TestData()
                {
                    a = new[] { 5.0f, 6.0f, 10.0f, 12.0f },
                    b = new[] { 10.0f, 8.0f, 6.0f, 6.0f }
                }
            }));

            using var model = ML.Model.LoadTensorFlowModel(modelFile);
            var est         = model.ScoreTensorFlowModel(new[] { "c" }, new[] { "a", "b" });
            var transformer = est.Fit(dataView);
            var result      = transformer.Transform(dataView);
            var resultRoles = new RoleMappedData(result);

            using (var ms = new MemoryStream())
            {
                TrainUtils.SaveModel(Env, Env.Start("saving"), ms, null, resultRoles);
                ms.Position = 0;
                var loadedView = ModelFileUtils.LoadTransforms(Env, dataView, ms);
                ValidateTensorFlowTransformer(loadedView);
            }
        }
        /// <summary>
        /// Saves the pipeline in a stream.
        /// </summary>
        /// <param name="fs">opened stream</param>
        /// <param name="removeFirstTransform">remove the first transform which is a PassThroughTransform</param>
        public void Save(Stream fs, bool removeFirstTransform = false)
        {
            RoleMappedData roleMap = null;

            if (removeFirstTransform)
            {
                var source = _transforms.First().transform;
                if (!(source is PassThroughTransform))
                {
                    throw Contracts.ExceptNotSupp($"The first transform should be of type PassThroughTransform.");
                }
                var replace = (source as PassThroughTransform).Source;
                var last    = _transforms.Last().transform;
                var newPipe = ApplyTransformUtils.ApplyAllTransformsToData(_env, last, replace, source);
                var roles   = _predictor.roleMapData.Schema.GetColumnRoleNames().ToArray();
                roleMap = new RoleMappedData(newPipe, roles);
            }
            else
            {
                roleMap = _predictor.roleMapData;
            }
            using (var ch = _env.Start("Save Predictor"))
                TrainUtils.SaveModel(_env, ch, fs, _predictor.predictor, roleMap);
        }
Beispiel #11
0
        public void TestOldSavingAndLoading()
        {
            var data = new[] { new TestClass()
                               {
                                   A = "This is a good sentence.", B = new string[2] {
                                       "Much words", "Wow So Cool"
                                   }
                               } };

            var dataView = ML.Data.ReadFromEnumerable(data);
            var pipe     = new WordTokenizingEstimator(Env, new[] {
                new WordTokenizingEstimator.ColumnInfo("TokenizeA", "A"),
                new WordTokenizingEstimator.ColumnInfo("TokenizeB", "B"),
            });
            var result      = pipe.Fit(dataView).Transform(dataView);
            var resultRoles = new RoleMappedData(result);

            using (var ms = new MemoryStream())
            {
                TrainUtils.SaveModel(Env, Env.Start("saving"), ms, null, resultRoles);
                ms.Position = 0;
                var loadedView = ModelFileUtils.LoadTransforms(Env, dataView, ms);
            }
        }
Beispiel #12
0
        public void TestOldSavingAndLoading()
        {
            var data = new[] {
                new TestClass()
                {
                    A = 1, B = new int[2] {
                        2, 3
                    }, C = new int[2] {
                        3, 4
                    }
                },
                new TestClass()
                {
                    A = 4, B = new int[2] {
                        2, 4
                    }, C = new int[3] {
                        2, 4, 3
                    }
                }
            };
            var dataView = ML.Data.LoadFromEnumerable(data);
            var pipe     = ML.Transforms.Categorical.OneHotEncoding(new[] {
                new OneHotEncodingEstimator.ColumnOptions("CatA", "A"),
                new OneHotEncodingEstimator.ColumnOptions("CatB", "B"),
                new OneHotEncodingEstimator.ColumnOptions("CatC", "C")
            });
            var result      = pipe.Fit(dataView).Transform(dataView);
            var resultRoles = new RoleMappedData(result);

            using (var ms = new MemoryStream())
            {
                TrainUtils.SaveModel(Env, Env.Start("saving"), ms, null, resultRoles);
                ms.Position = 0;
                var loadedView = ModelFileUtils.LoadTransforms(Env, dataView, ms);
            }
        }
        [ConditionalFact(typeof(Environment), nameof(Environment.Is64BitProcess))] // x86 fails with "An attempt was made to load a program with an incorrect format."
        void TestOldSavingAndLoading()
        {
            if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
            {
                return;
            }

            var modelFile = "squeezenet/00000001/model.onnx";

            var samplevector = GetSampleArrayData();

            var dataView = ComponentCreation.CreateDataView(Env,
                                                            new TestData[] {
                new TestData()
                {
                    data_0 = samplevector
                }
            });

            var inputNames  = new[] { "data_0" };
            var outputNames = new[] { "softmaxout_1" };
            var est         = new OnnxScoringEstimator(Env, modelFile, inputNames, outputNames);
            var transformer = est.Fit(dataView);
            var result      = transformer.Transform(dataView);
            var resultRoles = new RoleMappedData(result);

            using (var ms = new MemoryStream())
            {
                TrainUtils.SaveModel(Env, Env.Start("saving"), ms, null, resultRoles);
                ms.Position = 0;
                var loadedView = ModelFileUtils.LoadTransforms(Env, dataView, ms);

                loadedView.Schema.TryGetColumnIndex(outputNames[0], out int softMaxOut1);
                using (var cursor = loadedView.GetRowCursor(col => col == softMaxOut1))
                {
                    VBuffer <float> softMaxValue  = default;
                    var             softMaxGetter = cursor.GetGetter <VBuffer <float> >(softMaxOut1);
                    float           sum           = 0f;
                    int             i             = 0;
                    while (cursor.MoveNext())
                    {
                        softMaxGetter(ref softMaxValue);
                        var values = softMaxValue.DenseValues();
                        foreach (var val in values)
                        {
                            sum += val;
                            if (i == 0)
                            {
                                Assert.InRange(val, 0.00004, 0.00005);
                            }
                            if (i == 1)
                            {
                                Assert.InRange(val, 0.003844, 0.003845);
                            }
                            if (i == 999)
                            {
                                Assert.InRange(val, 0.0029566, 0.0029567);
                            }
                            i++;
                        }
                    }
                    Assert.InRange(sum, 1.0, 1.00001);
                }
            }
        }
Beispiel #14
0
        public void TestBackAndForthConversionWithoutAlphaNoInterleaveNoOffset()
        {
            IHostEnvironment env         = new MLContext();
            const int        imageHeight = 100;
            const int        imageWidth  = 130;
            var dataFile    = GetDataPath("images/images.tsv");
            var imageFolder = Path.GetDirectoryName(dataFile);
            var data        = TextLoader.Create(env, new TextLoader.Arguments()
            {
                Columns = new[]
                {
                    new TextLoader.Column("ImagePath", DataKind.TX, 0),
                    new TextLoader.Column("Name", DataKind.TX, 1),
                }
            }, new MultiFileSource(dataFile));
            var images  = new ImageLoaderTransformer(env, imageFolder, ("ImageReal", "ImagePath")).Transform(data);
            var cropped = new ImageResizerTransformer(env, "ImageCropped", imageWidth, imageHeight, "ImageReal").Transform(images);
            var pixels  = new ImagePixelExtractorTransformer(env, "ImagePixels", "ImageCropped").Transform(cropped);

            IDataView backToBitmaps = new VectorToImageTransform(env, new VectorToImageTransform.Arguments()
            {
                InterleaveArgb = false,
                Columns        = new VectorToImageTransform.Column[1] {
                    new VectorToImageTransform.Column()
                    {
                        Name = "ImageRestored", Source = "ImagePixels", ImageHeight = imageHeight, ImageWidth = imageWidth, ContainsAlpha = false
                    }
                }
            }, pixels);

            var fname = nameof(TestBackAndForthConversionWithoutAlphaNoInterleaveNoOffset) + "_model.zip";

            var fh = env.CreateOutputFile(fname);

            using (var ch = env.Start("save"))
                TrainUtils.SaveModel(env, ch, fh, null, new RoleMappedData(backToBitmaps));

            backToBitmaps = ModelFileUtils.LoadPipeline(env, fh.OpenReadStream(), new MultiFileSource(dataFile));
            DeleteOutputPath(fname);


            backToBitmaps.Schema.TryGetColumnIndex("ImageRestored", out int bitmapColumn);
            backToBitmaps.Schema.TryGetColumnIndex("ImageCropped", out int cropBitmapColumn);
            using (var cursor = backToBitmaps.GetRowCursorForAllColumns())
            {
                var    bitmapGetter   = cursor.GetGetter <Bitmap>(bitmapColumn);
                Bitmap restoredBitmap = default;

                var    bitmapCropGetter = cursor.GetGetter <Bitmap>(cropBitmapColumn);
                Bitmap croppedBitmap    = default;
                while (cursor.MoveNext())
                {
                    bitmapGetter(ref restoredBitmap);
                    Assert.NotNull(restoredBitmap);
                    bitmapCropGetter(ref croppedBitmap);
                    Assert.NotNull(croppedBitmap);
                    for (int x = 0; x < imageWidth; x++)
                    {
                        for (int y = 0; y < imageHeight; y++)
                        {
                            var c = croppedBitmap.GetPixel(x, y);
                            var r = restoredBitmap.GetPixel(x, y);
                            Assert.True(c.R == r.R && c.G == r.G && c.B == r.B);
                        }
                    }
                }
                Done();
            }
        }
Beispiel #15
0
        IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input, out IDataView sourceCtx)
        {
            sourceCtx = input;
            Contracts.CheckValue(env, "env");
            env.CheckValue(args, "args");
            env.CheckValue(input, "input");
            env.CheckValue(args.tag, "tag is empty");
            env.CheckValue(args.trainer, "trainer",
                           "Trainer cannot be null. If your model is already trained, please use ScoreTransform instead.");

            var views = TagHelper.EnumerateTaggedView(true, input).Where(c => c.Item1 == args.tag);

            if (views.Any())
            {
                throw env.Except("Tag '{0}' is already used.", args.tag);
            }

            var host = env.Register("TagTrainOrScoreTransform");

            using (var ch = host.Start("Train"))
            {
                ch.Trace("Constructing trainer");
                var trainerSett = ScikitSubComponent <ITrainer, SignatureTrainer> .AsSubComponent(args.trainer);

                ITrainer trainer    = trainerSett.CreateInstance(host);
                var      customCols = TrainUtils.CheckAndGenerateCustomColumns(env, args.CustomColumn);

                string             feat;
                string             group;
                var                data       = CreateDataFromArgs(_host, ch, new OpaqueDataView(input), args, out feat, out group);
                ICalibratorTrainer calibrator = args.calibrator == null
                                    ? null
                                    : ScikitSubComponent <ICalibratorTrainer, SignatureCalibrator> .AsSubComponent(args.calibrator).CreateInstance(host);

                var nameTrainer = args.trainer.ToString().Replace("{", "").Replace("}", "").Replace(" ", "").Replace("=", "").Replace("+", "Y").Replace("-", "N");
                var extTrainer  = new ExtendedTrainer(trainer, nameTrainer);
                _predictor = extTrainer.Train(host, ch, data, null, calibrator, args.maxCalibrationExamples);

                if (!string.IsNullOrEmpty(args.outputModel))
                {
                    ch.Info("Saving model into '{0}'", args.outputModel);
                    using (var fs = File.Create(args.outputModel))
                        TrainUtils.SaveModel(env, ch, fs, _predictor, data);
                    ch.Info("Done.");
                }

                if (_cali != null)
                {
                    throw ch.ExceptNotImpl("Calibrator is not implemented yet.");
                }

                ch.Trace("Scoring");
                if (_args.scorer != null)
                {
                    var mapper   = new SchemaBindablePredictorWrapper(_predictor);
                    var roles    = new RoleMappedSchema(input.Schema, null, feat, group: group);
                    var bound    = mapper.Bind(_host, roles);
                    var scorPars = ScikitSubComponent <IDataScorerTransform, SignatureDataScorer> .AsSubComponent(_args.scorer);

                    _scorer = scorPars.CreateInstance(_host, input, bound, roles);
                }
                else
                {
                    _scorer = PredictorHelper.CreateDefaultScorer(_host, input, feat, group, _predictor);
                }

                ch.Info("Tagging with tag '{0}'.", args.tag);

                var ar = new TagViewTransform.Arguments {
                    tag = args.tag
                };
                var res = new TagViewTransform(env, ar, _scorer, _predictor);
                return(res);
            }
        }
Beispiel #16
0
        public void TestOldSavingAndLoading()
        {
            var data = new[] {
                new TestClass()
                {
                    A = 1, B = 3, C = new float[2] {
                        1, 2
                    }, D = new double[2] {
                        3, 4
                    }
                },
                new TestClass()
                {
                    A = float.NaN, B = double.NaN, C = new float[2] {
                        float.NaN, float.NaN
                    }, D = new double[2] {
                        double.NaN, double.NaN
                    }
                },
                new TestClass()
                {
                    A = float.NegativeInfinity, B = double.NegativeInfinity, C = new float[2] {
                        float.NegativeInfinity, float.NegativeInfinity
                    }, D = new double[2] {
                        double.NegativeInfinity, double.NegativeInfinity
                    }
                },
                new TestClass()
                {
                    A = float.PositiveInfinity, B = double.PositiveInfinity, C = new float[2] {
                        float.PositiveInfinity, float.PositiveInfinity,
                    }, D = new double[2] {
                        double.PositiveInfinity, double.PositiveInfinity
                    }
                },
                new TestClass()
                {
                    A = 2, B = 1, C = new float[2] {
                        3, 4
                    }, D = new double[2] {
                        5, 6
                    }
                },
            };

            var dataView = ComponentCreation.CreateDataView(Env, data);
            var pipe     = new NAReplaceEstimator(Env,
                                                  new NAReplaceTransform.ColumnInfo("A", "NAA", NAReplaceTransform.ColumnInfo.ReplacementMode.Mean),
                                                  new NAReplaceTransform.ColumnInfo("B", "NAB", NAReplaceTransform.ColumnInfo.ReplacementMode.Mean),
                                                  new NAReplaceTransform.ColumnInfo("C", "NAC", NAReplaceTransform.ColumnInfo.ReplacementMode.Mean),
                                                  new NAReplaceTransform.ColumnInfo("D", "NAD", NAReplaceTransform.ColumnInfo.ReplacementMode.Mean));

            var result      = pipe.Fit(dataView).Transform(dataView);
            var resultRoles = new RoleMappedData(result);

            using (var ms = new MemoryStream())
            {
                TrainUtils.SaveModel(Env, Env.Start("saving"), ms, null, resultRoles);
                ms.Position = 0;
                var loadedView = ModelFileUtils.LoadTransforms(Env, dataView, ms);
            }
        }
        /// <summary>
        /// Finalizes the test on a predictor, calls the predictor with a scorer,
        /// saves the data, saves the models, loads it back, saves the data again,
        /// checks the output is the same.
        /// </summary>
        /// <param name="env">environment</param>
        /// <param name="outModelFilePath">output filename</param>
        /// <param name="predictor">predictor</param>
        /// <param name="roles">label, feature, ...</param>
        /// <param name="outData">first output data</param>
        /// <param name="outData2">second output data</param>
        /// <param name="kind">prediction kind</param>
        /// <param name="checkError">checks errors</param>
        /// <param name="ratio">check the error is below that threshold (if checkError is true)</param>
        /// <param name="ratioReadSave">check the predictions difference after reloading the model are below this threshold</param>
        public static void FinalizeSerializationTest(IHostEnvironment env,
                                                     string outModelFilePath, IPredictor predictor,
                                                     RoleMappedData roles, string outData, string outData2,
                                                     PredictionKind kind, bool checkError = true,
                                                     float ratio    = 0.8f, float ratioReadSave = 0.06f,
                                                     bool checkType = true)
        {
            string labelColumn = kind != PredictionKind.Clustering ? roles.Schema.Label.Value.Name : null;

            #region save, reading, running

            // Saves model.
            using (var ch = env.Start("Save"))
                using (var fs = File.Create(outModelFilePath))
                    TrainUtils.SaveModel(env, ch, fs, predictor, roles);
            if (!File.Exists(outModelFilePath))
            {
                throw new FileNotFoundException(outModelFilePath);
            }

            // Loads the model back.
            using (var fs = File.OpenRead(outModelFilePath))
            {
                var pred_local = ModelFileUtils.LoadPredictorOrNull(env, fs);
                if (pred_local == null)
                {
                    throw new Exception(string.Format("Unable to load '{0}'", outModelFilePath));
                }
                if (checkType && predictor.GetType() != pred_local.GetType())
                {
                    throw new Exception(string.Format("Type mismatch {0} != {1}", predictor.GetType(), pred_local.GetType()));
                }
            }

            // Checks the outputs.
            var sch1   = SchemaHelper.ToString(roles.Schema.Schema);
            var scorer = PredictorHelper.CreateDefaultScorer(env, roles, predictor);

            var sch2 = SchemaHelper.ToString(scorer.Schema);
            if (string.IsNullOrEmpty(sch1) || string.IsNullOrEmpty(sch2))
            {
                throw new Exception("Empty schemas");
            }

            var saver   = env.CreateSaver("Text");
            var columns = new int[scorer.Schema.Count];
            for (int i = 0; i < columns.Length; ++i)
            {
                columns[i] = saver.IsColumnSavable(scorer.Schema[i].Type) ? i : -1;
            }
            columns = columns.Where(c => c >= 0).ToArray();
            using (var fs2 = File.Create(outData))
                saver.SaveData(fs2, scorer, columns);

            if (!File.Exists(outModelFilePath))
            {
                throw new FileNotFoundException(outData);
            }

            // Check we have the same output.
            using (var fs = File.OpenRead(outModelFilePath))
            {
                var model = ModelFileUtils.LoadPredictorOrNull(env, fs);
                scorer = PredictorHelper.CreateDefaultScorer(env, roles, model);
                saver  = env.CreateSaver("Text");
                using (var fs2 = File.Create(outData2))
                    saver.SaveData(fs2, scorer, columns);
            }

            var t1 = File.ReadAllLines(outData);
            var t2 = File.ReadAllLines(outData2);
            if (t1.Length != t2.Length)
            {
                throw new Exception(string.Format("Not the same number of lines: {0} != {1}", t1.Length, t2.Length));
            }
            var linesN = new List <int>();
            for (int i = 0; i < t1.Length; ++i)
            {
                if (t1[i] != t2[i])
                {
                    linesN.Add(i);
                }
            }
            if (linesN.Count > (int)(t1.Length * ratioReadSave))
            {
                var rows = linesN.Select(i => string.Format("1-Mismatch on line {0}/{3}:\n{1}\n{2}", i, t1[i], t2[i], t1.Length)).ToList();
                rows.Add($"Number of differences: {linesN.Count}/{t1.Length}");
                throw new Exception(string.Join("\n", rows));
            }

            #endregion

            #region clustering

            if (kind == PredictionKind.Clustering)
            {
                // Nothing to do here.
                return;
            }

            #endregion

            #region supervized

            string expectedOuput = kind == PredictionKind.Regression ? "Score" : "PredictedLabel";

            // Get label and basic checking about performance.
            using (var cursor = scorer.GetRowCursor(scorer.Schema))
            {
                int ilabel, ipred;
                ilabel = SchemaHelper.GetColumnIndex(cursor.Schema, labelColumn);
                ipred  = SchemaHelper.GetColumnIndex(cursor.Schema, expectedOuput);
                var ty1   = cursor.Schema[ilabel].Type;
                var ty2   = cursor.Schema[ipred].Type;
                var dist1 = new Dictionary <int, int>();
                var dist2 = new Dictionary <int, int>();
                var conf  = new Dictionary <Tuple <int, int>, long>();

                if (kind == PredictionKind.MulticlassClassification)
                {
                    #region Multiclass

                    if (!ty2.IsKey())
                    {
                        throw new Exception(string.Format("Label='{0}' Predicted={1}'\nSchema: {2}", ty1, ty2, SchemaHelper.ToString(cursor.Schema)));
                    }

                    if (ty1.RawKind() == DataKind.Single)
                    {
                        var   lgetter = cursor.GetGetter <float>(SchemaHelper._dc(ilabel, cursor));
                        var   pgetter = cursor.GetGetter <uint>(SchemaHelper._dc(ipred, cursor));
                        float ans     = 0;
                        uint  pre     = 0;
                        while (cursor.MoveNext())
                        {
                            lgetter(ref ans);
                            pgetter(ref pre);

                            // The scorer +1 to the argmax.
                            ++ans;

                            var key = new Tuple <int, int>((int)pre, (int)ans);
                            if (!conf.ContainsKey(key))
                            {
                                conf[key] = 1;
                            }
                            else
                            {
                                ++conf[key];
                            }
                            if (!dist1.ContainsKey((int)ans))
                            {
                                dist1[(int)ans] = 1;
                            }
                            else
                            {
                                ++dist1[(int)ans];
                            }
                            if (!dist2.ContainsKey((int)pre))
                            {
                                dist2[(int)pre] = 1;
                            }
                            else
                            {
                                ++dist2[(int)pre];
                            }
                        }
                    }
                    else if (ty1.RawKind() == DataKind.UInt32 && ty1.IsKey())
                    {
                        var  lgetter = cursor.GetGetter <uint>(SchemaHelper._dc(ilabel, cursor));
                        var  pgetter = cursor.GetGetter <uint>(SchemaHelper._dc(ipred, cursor));
                        uint ans     = 0;
                        uint pre     = 0;
                        while (cursor.MoveNext())
                        {
                            lgetter(ref ans);
                            pgetter(ref pre);

                            var key = new Tuple <int, int>((int)pre, (int)ans);
                            if (!conf.ContainsKey(key))
                            {
                                conf[key] = 1;
                            }
                            else
                            {
                                ++conf[key];
                            }
                            if (!dist1.ContainsKey((int)ans))
                            {
                                dist1[(int)ans] = 1;
                            }
                            else
                            {
                                ++dist1[(int)ans];
                            }
                            if (!dist2.ContainsKey((int)pre))
                            {
                                dist2[(int)pre] = 1;
                            }
                            else
                            {
                                ++dist2[(int)pre];
                            }
                        }
                    }
                    else
                    {
                        throw new NotImplementedException(string.Format("Not implemented for type {0}", ty1.ToString()));
                    }
                    #endregion
                }
                else if (kind == PredictionKind.BinaryClassification)
                {
                    #region binary classification

                    if (ty2.RawKind() != DataKind.Boolean)
                    {
                        throw new Exception(string.Format("Label='{0}' Predicted={1}'\nSchema: {2}", ty1, ty2, SchemaHelper.ToString(cursor.Schema)));
                    }

                    if (ty1.RawKind() == DataKind.Single)
                    {
                        var   lgetter = cursor.GetGetter <float>(SchemaHelper._dc(ilabel, cursor));
                        var   pgetter = cursor.GetGetter <bool>(SchemaHelper._dc(ipred, cursor));
                        float ans     = 0;
                        bool  pre     = default(bool);
                        while (cursor.MoveNext())
                        {
                            lgetter(ref ans);
                            pgetter(ref pre);

                            if (ans != 0 && ans != 1)
                            {
                                throw Contracts.Except("The problem is not binary, expected answer is {0}", ans);
                            }

                            var key = new Tuple <int, int>(pre ? 1 : 0, (int)ans);
                            if (!conf.ContainsKey(key))
                            {
                                conf[key] = 1;
                            }
                            else
                            {
                                ++conf[key];
                            }
                            if (!dist1.ContainsKey((int)ans))
                            {
                                dist1[(int)ans] = 1;
                            }
                            else
                            {
                                ++dist1[(int)ans];
                            }
                            if (!dist2.ContainsKey(pre ? 1 : 0))
                            {
                                dist2[pre ? 1 : 0] = 1;
                            }
                            else
                            {
                                ++dist2[pre ? 1 : 0];
                            }
                        }
                    }
                    else if (ty1.RawKind() == DataKind.UInt32)
                    {
                        var  lgetter = cursor.GetGetter <uint>(SchemaHelper._dc(ilabel, cursor));
                        var  pgetter = cursor.GetGetter <bool>(SchemaHelper._dc(ipred, cursor));
                        uint ans     = 0;
                        bool pre     = default(bool);
                        while (cursor.MoveNext())
                        {
                            lgetter(ref ans);
                            pgetter(ref pre);
                            if (ty1.IsKey())
                            {
                                --ans;
                            }

                            if (ans != 0 && ans != 1)
                            {
                                throw Contracts.Except("The problem is not binary, expected answer is {0}", ans);
                            }

                            var key = new Tuple <int, int>(pre ? 1 : 0, (int)ans);
                            if (!conf.ContainsKey(key))
                            {
                                conf[key] = 1;
                            }
                            else
                            {
                                ++conf[key];
                            }
                            if (!dist1.ContainsKey((int)ans))
                            {
                                dist1[(int)ans] = 1;
                            }
                            else
                            {
                                ++dist1[(int)ans];
                            }
                            if (!dist2.ContainsKey(pre ? 1 : 0))
                            {
                                dist2[pre ? 1 : 0] = 1;
                            }
                            else
                            {
                                ++dist2[pre ? 1 : 0];
                            }
                        }
                    }
                    else if (ty1.RawKind() == DataKind.Boolean)
                    {
                        var  lgetter = cursor.GetGetter <bool>(SchemaHelper._dc(ilabel, cursor));
                        var  pgetter = cursor.GetGetter <bool>(SchemaHelper._dc(ipred, cursor));
                        bool ans     = default(bool);
                        bool pre     = default(bool);
                        while (cursor.MoveNext())
                        {
                            lgetter(ref ans);
                            pgetter(ref pre);

                            var key = new Tuple <int, int>(pre ? 1 : 0, ans ? 1 : 0);
                            if (!conf.ContainsKey(key))
                            {
                                conf[key] = 1;
                            }
                            else
                            {
                                ++conf[key];
                            }

                            if (!dist1.ContainsKey(ans ? 1 : 0))
                            {
                                dist1[ans ? 1 : 0] = 1;
                            }
                            else
                            {
                                ++dist1[ans ? 1 : 0];
                            }
                            if (!dist2.ContainsKey(pre ? 1 : 0))
                            {
                                dist2[pre ? 1 : 0] = 1;
                            }
                            else
                            {
                                ++dist2[pre ? 1 : 0];
                            }
                        }
                    }
                    else
                    {
                        throw new NotImplementedException(string.Format("Not implemented for type {0}", ty1));
                    }

                    #endregion
                }
                else if (kind == PredictionKind.Regression)
                {
                    #region regression

                    if (ty1.RawKind() != DataKind.Single)
                    {
                        throw new Exception(string.Format("Label='{0}' Predicted={1}'\nSchema: {2}", ty1, ty2, SchemaHelper.ToString(cursor.Schema)));
                    }
                    if (ty2.RawKind() != DataKind.Single)
                    {
                        throw new Exception(string.Format("Label='{0}' Predicted={1}'\nSchema: {2}", ty1, ty2, SchemaHelper.ToString(cursor.Schema)));
                    }

                    var   lgetter = cursor.GetGetter <float>(SchemaHelper._dc(ilabel, cursor));
                    var   pgetter = cursor.GetGetter <float>(SchemaHelper._dc(ipred, cursor));
                    float ans     = 0;
                    float pre     = 0f;
                    float error   = 0f;
                    while (cursor.MoveNext())
                    {
                        lgetter(ref ans);
                        pgetter(ref pre);
                        error += (ans - pre) * (ans - pre);
                        if (!dist1.ContainsKey((int)ans))
                        {
                            dist1[(int)ans] = 1;
                        }
                        else
                        {
                            ++dist1[(int)ans];
                        }
                        if (!dist2.ContainsKey((int)pre))
                        {
                            dist2[(int)pre] = 1;
                        }
                        else
                        {
                            ++dist2[(int)pre];
                        }
                    }

                    if (float.IsNaN(error) || float.IsInfinity(error))
                    {
                        throw new Exception("Regression wen wrong. Error is infinite.");
                    }

                    #endregion
                }
                else
                {
                    throw new NotImplementedException(string.Format("Not implemented for kind {0}", kind));
                }

                var nbError = conf.Where(c => c.Key.Item1 != c.Key.Item2).Select(c => c.Value).Sum();
                var nbTotal = conf.Select(c => c.Value).Sum();

                if (checkError && (nbError * 1.0 > nbTotal * ratio || dist2.Count <= 1))
                {
                    var sconf = string.Join("\n", conf.OrderBy(c => c.Key)
                                            .Select(c => string.Format("pred={0} exp={1} count={2}", c.Key.Item1, c.Key.Item2, c.Value)));
                    var sdist2 = string.Join("\n", dist1.OrderBy(c => c.Key)
                                             .Select(c => string.Format("label={0} count={1}", c.Key, c.Value)));
                    var sdist1 = string.Join("\n", dist2.OrderBy(c => c.Key).Take(20)
                                             .Select(c => string.Format("label={0} count={1}", c.Key, c.Value)));
                    throw new Exception(string.Format("Too many errors {0}/{1}={7}\n###########\nConfusion:\n{2}\n########\nDIST1\n{3}\n###########\nDIST2\n{4}\nOutput:\n{5}\n...\n{6}",
                                                      nbError, nbTotal,
                                                      sconf, sdist1, sdist2,
                                                      string.Join("\n", t1.Take(Math.Min(30, t1.Length))),
                                                      string.Join("\n", t1.Skip(Math.Max(0, t1.Length - 30))),
                                                      nbError * 1.0 / nbTotal));
                }
            }

            #endregion
        }
Beispiel #18
0
        public void TestOldSavingAndLoading()
        {
            if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
            {
                return;
            }


            var samplevector = getSampleArrayData();

            var dataView = ComponentCreation.CreateDataView(Env,
                                                            new TestData[] {
                new TestData()
                {
                    data_0 = samplevector
                }
            });

            var inputNames  = "data_0";
            var outputNames = "output_1";
            var est         = new DnnImageFeaturizerEstimator(Env, m => m.ModelSelector.ResNet18(m.Environment, m.InputColumn, m.OutputColumn), inputNames, outputNames);
            var transformer = est.Fit(dataView);
            var result      = transformer.Transform(dataView);
            var resultRoles = new RoleMappedData(result);

            using (var ms = new MemoryStream())
            {
                TrainUtils.SaveModel(Env, Env.Start("saving"), ms, null, resultRoles);
                ms.Position = 0;
                var loadedView = ModelFileUtils.LoadTransforms(Env, dataView, ms);

                loadedView.Schema.TryGetColumnIndex(outputNames, out int softMaxOut1);
                using (var cursor = loadedView.GetRowCursor(col => col == softMaxOut1))
                {
                    VBuffer <float> softMaxValue  = default;
                    var             softMaxGetter = cursor.GetGetter <VBuffer <float> >(softMaxOut1);
                    float           sum           = 0f;
                    int             i             = 0;
                    while (cursor.MoveNext())
                    {
                        softMaxGetter(ref softMaxValue);
                        var values = softMaxValue.DenseValues();
                        foreach (var val in values)
                        {
                            sum += val;
                            if (i == 0)
                            {
                                Assert.InRange(val, 0.0, 0.00001);
                            }
                            if (i == 7)
                            {
                                Assert.InRange(val, 0.62935, 0.62940);
                            }
                            if (i == 500)
                            {
                                Assert.InRange(val, 0.15521, 0.155225);
                            }
                            i++;
                        }
                    }
                    Assert.InRange(sum, 83.50, 84.50);
                }
            }
        }
        public void TestGreyscaleTransformImages()
        {
            using (var env = new ConsoleEnvironment())
            {
                var imageHeight = 150;
                var imageWidth  = 100;
                var dataFile    = GetDataPath("images/images.tsv");
                var imageFolder = Path.GetDirectoryName(dataFile);
                var data        = TextLoader.Create(env, new TextLoader.Arguments()
                {
                    Column = new[]
                    {
                        new TextLoader.Column("ImagePath", DataKind.TX, 0),
                        new TextLoader.Column("Name", DataKind.TX, 1),
                    }
                }, new MultiFileSource(dataFile));
                var images = ImageLoaderTransform.Create(env, new ImageLoaderTransform.Arguments()
                {
                    Column = new ImageLoaderTransform.Column[1]
                    {
                        new ImageLoaderTransform.Column()
                        {
                            Source = "ImagePath", Name = "ImageReal"
                        }
                    },
                    ImageFolder = imageFolder
                }, data);
                var cropped = ImageResizerTransform.Create(env, new ImageResizerTransform.Arguments()
                {
                    Column = new ImageResizerTransform.Column[1] {
                        new ImageResizerTransform.Column()
                        {
                            Name = "ImageCropped", Source = "ImageReal", ImageHeight = imageHeight, ImageWidth = imageWidth, Resizing = ImageResizerTransform.ResizingKind.IsoCrop
                        }
                    }
                }, images);

                IDataView grey = ImageGrayscaleTransform.Create(env, new ImageGrayscaleTransform.Arguments()
                {
                    Column = new ImageGrayscaleTransform.Column[1] {
                        new ImageGrayscaleTransform.Column()
                        {
                            Name = "ImageGrey", Source = "ImageCropped"
                        }
                    }
                }, cropped);

                var fname = nameof(TestGreyscaleTransformImages) + "_model.zip";

                var fh = env.CreateOutputFile(fname);
                using (var ch = env.Start("save"))
                    TrainUtils.SaveModel(env, ch, fh, null, new RoleMappedData(grey));

                grey = ModelFileUtils.LoadPipeline(env, fh.OpenReadStream(), new MultiFileSource(dataFile));
                DeleteOutputPath(fname);

                grey.Schema.TryGetColumnIndex("ImageGrey", out int greyColumn);
                using (var cursor = grey.GetRowCursor((x) => true))
                {
                    var    bitmapGetter = cursor.GetGetter <Bitmap>(greyColumn);
                    Bitmap bitmap       = default;
                    while (cursor.MoveNext())
                    {
                        bitmapGetter(ref bitmap);
                        Assert.NotNull(bitmap);
                        for (int x = 0; x < imageWidth; x++)
                        {
                            for (int y = 0; y < imageHeight; y++)
                            {
                                var pixel = bitmap.GetPixel(x, y);
                                // greyscale image has same values for R,G and B
                                Assert.True(pixel.R == pixel.G && pixel.G == pixel.B);
                            }
                        }
                    }
                }
            }
            Done();
        }
        public void TestOldSavingAndLoading()
        {
            var data = new[] {
                new TestClass()
                {
                    A = 1, B = 3, C = new float[2] {
                        1, 2
                    }, D = new double[2] {
                        3, 4
                    }
                },
                new TestClass()
                {
                    A = float.NaN, B = double.NaN, C = new float[2] {
                        float.NaN, float.NaN
                    }, D = new double[2] {
                        double.NaN, double.NaN
                    }
                },
                new TestClass()
                {
                    A = float.NegativeInfinity, B = double.NegativeInfinity, C = new float[2] {
                        float.NegativeInfinity, float.NegativeInfinity
                    }, D = new double[2] {
                        double.NegativeInfinity, double.NegativeInfinity
                    }
                },
                new TestClass()
                {
                    A = float.PositiveInfinity, B = double.PositiveInfinity, C = new float[2] {
                        float.PositiveInfinity, float.PositiveInfinity,
                    }, D = new double[2] {
                        double.PositiveInfinity, double.PositiveInfinity
                    }
                },
                new TestClass()
                {
                    A = 2, B = 1, C = new float[2] {
                        3, 4
                    }, D = new double[2] {
                        5, 6
                    }
                },
            };

            var dataView = ML.Data.LoadFromEnumerable(data);
            var pipe     = ML.Transforms.ReplaceMissingValues(
                new MissingValueReplacingEstimator.ColumnOptions("NAA", "A", MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean),
                new MissingValueReplacingEstimator.ColumnOptions("NAB", "B", MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean),
                new MissingValueReplacingEstimator.ColumnOptions("NAC", "C", MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean),
                new MissingValueReplacingEstimator.ColumnOptions("NAD", "D", MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean));

            var result      = pipe.Fit(dataView).Transform(dataView);
            var resultRoles = new RoleMappedData(result);

            using (var ms = new MemoryStream())
            {
                TrainUtils.SaveModel(Env, Env.Start("saving"), ms, null, resultRoles);
                ms.Position = 0;
                var loadedView = ModelFileUtils.LoadTransforms(Env, dataView, ms);
            }
        }
        public void TestOldSavingAndLoading()
        {
            //skip running for x86 as this test using too much memory (over 2GB limit on x86)
            //and very like to hit memory related issue when running on CI
            //TODO: optimized memory usage in related code and enable x86 run
            if (!Environment.Is64BitProcess)
            {
                return;
            }

            var samplevector = GetSampleArrayData();

            var dataView = ML.Data.LoadFromEnumerable(
                new TestData[] {
                new TestData()
                {
                    data_0 = samplevector
                }
            });

            var inputNames  = "data_0";
            var outputNames = "output_1";
            var est         = ML.Transforms.DnnFeaturizeImage(outputNames, m => m.ModelSelector.ResNet18(m.Environment, m.OutputColumn, m.InputColumn), inputNames);
            var transformer = est.Fit(dataView);
            var result      = transformer.Transform(dataView);
            var resultRoles = new RoleMappedData(result);

            using (var ms = new MemoryStream())
            {
                TrainUtils.SaveModel(Env, Env.Start("saving"), ms, null, resultRoles);
                ms.Position = 0;
                var loadedView = ModelFileUtils.LoadTransforms(Env, dataView, ms);

                using (var cursor = loadedView.GetRowCursor(loadedView.Schema[outputNames]))
                {
                    VBuffer <float> softMaxValue  = default;
                    var             softMaxGetter = cursor.GetGetter <VBuffer <float> >(loadedView.Schema[outputNames]);
                    float           sum           = 0f;
                    int             i             = 0;
                    while (cursor.MoveNext())
                    {
                        softMaxGetter(ref softMaxValue);
                        var values = softMaxValue.DenseValues();
                        foreach (var val in values)
                        {
                            sum += val;
                            if (i == 0)
                            {
                                Assert.InRange(val, 0.0, 0.00001);
                            }
                            if (i == 7)
                            {
                                Assert.InRange(val, 0.62935, 0.62940);
                            }
                            if (i == 500)
                            {
                                Assert.InRange(val, 0.15521, 0.155225);
                            }
                            i++;
                        }
                    }
                    Assert.InRange(sum, 83.50, 84.50);
                }
            }
        }
        public void TestBackAndForthConversionWithDifferentOrder()
        {
            IHostEnvironment env         = new MLContext();
            const int        imageHeight = 100;
            const int        imageWidth  = 130;
            var dataFile    = GetDataPath("images/images.tsv");
            var imageFolder = Path.GetDirectoryName(dataFile);
            var data        = TextLoader.Create(env, new TextLoader.Options()
            {
                Columns = new[]
                {
                    new TextLoader.Column("ImagePath", DataKind.String, 0),
                    new TextLoader.Column("Name", DataKind.String, 1),
                }
            }, new MultiFileSource(dataFile));
            var images  = new ImageLoadingTransformer(env, imageFolder, ("ImageReal", "ImagePath")).Transform(data);
            var cropped = new ImageResizingTransformer(env, "ImageCropped", imageWidth, imageHeight, "ImageReal").Transform(images);

            var       pixels        = new ImagePixelExtractingTransformer(env, "ImagePixels", "ImageCropped", ImagePixelExtractingEstimator.ColorBits.All, order: ImagePixelExtractingEstimator.ColorsOrder.ABRG).Transform(cropped);
            IDataView backToBitmaps = new VectorToImageConvertingTransformer(env, "ImageRestored", imageHeight, imageWidth, "ImagePixels",
                                                                             ImagePixelExtractingEstimator.ColorBits.All, order: ImagePixelExtractingEstimator.ColorsOrder.ABRG).Transform(pixels);

            var fname = nameof(TestBackAndForthConversionWithDifferentOrder) + "_model.zip";

            var fh = env.CreateOutputFile(fname);

            using (var ch = env.Start("save"))
                TrainUtils.SaveModel(env, ch, fh, null, new RoleMappedData(backToBitmaps));

            backToBitmaps = ModelFileUtils.LoadPipeline(env, fh.OpenReadStream(), new MultiFileSource(dataFile));
            DeleteOutputPath(fname);

            using (var cursor = backToBitmaps.GetRowCursorForAllColumns())
            {
                var    bitmapGetter   = cursor.GetGetter <Bitmap>(backToBitmaps.Schema["ImageRestored"]);
                Bitmap restoredBitmap = default;

                var    bitmapCropGetter = cursor.GetGetter <Bitmap>(backToBitmaps.Schema["ImageCropped"]);
                Bitmap croppedBitmap    = default;
                while (cursor.MoveNext())
                {
                    bitmapGetter(ref restoredBitmap);
                    Assert.NotNull(restoredBitmap);
                    bitmapCropGetter(ref croppedBitmap);
                    Assert.NotNull(croppedBitmap);
                    for (int x = 0; x < imageWidth; x++)
                    {
                        for (int y = 0; y < imageHeight; y++)
                        {
                            var c = croppedBitmap.GetPixel(x, y);
                            var r = restoredBitmap.GetPixel(x, y);
                            if (c != r)
                            {
                                Assert.False(true);
                            }
                            Assert.True(c == r);
                        }
                    }
                }
            }
            Done();
        }
Beispiel #23
0
        public void TestOldSavingAndLoading(int?gpuDeviceId, bool fallbackToCpu)
        {
            var modelFile    = "squeezenet/00000001/model.onnx";
            var samplevector = GetSampleArrayData();

            var dataView = ML.Data.LoadFromEnumerable(
                new TestData[] {
                new TestData()
                {
                    data_0 = samplevector
                }
            });

            var inputNames  = new[] { "data_0" };
            var outputNames = new[] { "softmaxout_1" };
            var est         = ML.Transforms.ApplyOnnxModel(outputNames, inputNames, modelFile, gpuDeviceId, fallbackToCpu);
            var transformer = est.Fit(dataView);
            var result      = transformer.Transform(dataView);
            var resultRoles = new RoleMappedData(result);

            using (var ms = new MemoryStream())
            {
                TrainUtils.SaveModel(Env, Env.Start("saving"), ms, null, resultRoles);
                ms.Position = 0;
                var loadedView = ModelFileUtils.LoadTransforms(Env, dataView, ms);

                var sofMaxOut1Col = loadedView.Schema[outputNames[0]];

                using (var cursor = loadedView.GetRowCursor(sofMaxOut1Col))
                {
                    VBuffer <float> softMaxValue  = default;
                    var             softMaxGetter = cursor.GetGetter <VBuffer <float> >(sofMaxOut1Col);
                    float           sum           = 0f;
                    int             i             = 0;
                    while (cursor.MoveNext())
                    {
                        softMaxGetter(ref softMaxValue);
                        var values = softMaxValue.DenseValues();
                        foreach (var val in values)
                        {
                            sum += val;
                            if (i == 0)
                            {
                                Assert.InRange(val, 0.00004, 0.00005);
                            }
                            if (i == 1)
                            {
                                Assert.InRange(val, 0.003844, 0.003845);
                            }
                            if (i == 999)
                            {
                                Assert.InRange(val, 0.0029566, 0.0029567);
                            }
                            i++;
                        }
                    }
                    Assert.InRange(sum, 0.99999, 1.00001);
                }
                (transformer as IDisposable)?.Dispose();
            }
        }
        public void TestBackAndForthConversionWithAlphaNoInterleave()
        {
            using (var env = new ConsoleEnvironment())
            {
                var imageHeight = 100;
                var imageWidth  = 130;
                var dataFile    = GetDataPath("images/images.tsv");
                var imageFolder = Path.GetDirectoryName(dataFile);
                var data        = TextLoader.Create(env, new TextLoader.Arguments()
                {
                    Column = new[]
                    {
                        new TextLoader.Column("ImagePath", DataKind.TX, 0),
                        new TextLoader.Column("Name", DataKind.TX, 1),
                    }
                }, new MultiFileSource(dataFile));
                var images = ImageLoaderTransform.Create(env, new ImageLoaderTransform.Arguments()
                {
                    Column = new ImageLoaderTransform.Column[1]
                    {
                        new ImageLoaderTransform.Column()
                        {
                            Source = "ImagePath", Name = "ImageReal"
                        }
                    },
                    ImageFolder = imageFolder
                }, data);
                var cropped = ImageResizerTransform.Create(env, new ImageResizerTransform.Arguments()
                {
                    Column = new ImageResizerTransform.Column[1] {
                        new ImageResizerTransform.Column()
                        {
                            Source = "ImageReal", Name = "ImageCropped", ImageHeight = imageHeight, ImageWidth = imageWidth, Resizing = ImageResizerTransform.ResizingKind.IsoCrop
                        }
                    }
                }, images);

                var pixels = ImagePixelExtractorTransform.Create(env, new ImagePixelExtractorTransform.Arguments()
                {
                    InterleaveArgb = false,
                    Offset         = 127.5f,
                    Scale          = 2f / 255,
                    Column         = new ImagePixelExtractorTransform.Column[1] {
                        new ImagePixelExtractorTransform.Column()
                        {
                            Source = "ImageCropped", Name = "ImagePixels", UseAlpha = true
                        }
                    }
                }, cropped);

                IDataView backToBitmaps = new VectorToImageTransform(env, new VectorToImageTransform.Arguments()
                {
                    InterleaveArgb = false,
                    Offset         = -1f,
                    Scale          = 255f / 2,
                    Column         = new VectorToImageTransform.Column[1] {
                        new VectorToImageTransform.Column()
                        {
                            Source = "ImagePixels", Name = "ImageRestored", ImageHeight = imageHeight, ImageWidth = imageWidth, ContainsAlpha = true
                        }
                    }
                }, pixels);

                var fname = nameof(TestBackAndForthConversionWithAlphaNoInterleave) + "_model.zip";

                var fh = env.CreateOutputFile(fname);
                using (var ch = env.Start("save"))
                    TrainUtils.SaveModel(env, ch, fh, null, new RoleMappedData(backToBitmaps));

                backToBitmaps = ModelFileUtils.LoadPipeline(env, fh.OpenReadStream(), new MultiFileSource(dataFile));
                DeleteOutputPath(fname);


                backToBitmaps.Schema.TryGetColumnIndex("ImageRestored", out int bitmapColumn);
                backToBitmaps.Schema.TryGetColumnIndex("ImageCropped", out int cropBitmapColumn);
                using (var cursor = backToBitmaps.GetRowCursor((x) => true))
                {
                    var    bitmapGetter   = cursor.GetGetter <Bitmap>(bitmapColumn);
                    Bitmap restoredBitmap = default;

                    var    bitmapCropGetter = cursor.GetGetter <Bitmap>(cropBitmapColumn);
                    Bitmap croppedBitmap    = default;
                    while (cursor.MoveNext())
                    {
                        bitmapGetter(ref restoredBitmap);
                        Assert.NotNull(restoredBitmap);
                        bitmapCropGetter(ref croppedBitmap);
                        Assert.NotNull(croppedBitmap);
                        for (int x = 0; x < imageWidth; x++)
                        {
                            for (int y = 0; y < imageHeight; y++)
                            {
                                var c = croppedBitmap.GetPixel(x, y);
                                var r = restoredBitmap.GetPixel(x, y);
                                Assert.True(c == r);
                            }
                        }
                    }
                }
            }
            Done();
        }
Beispiel #25
0
        public void TestBackAndForthConversionWithoutAlphaNoInterleave()
        {
            if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX))
            {
                return;
            }

            IHostEnvironment env         = new MLContext();
            const int        imageHeight = 100;
            const int        imageWidth  = 130;
            var dataFile    = GetDataPath("images/images.tsv");
            var imageFolder = Path.GetDirectoryName(dataFile);
            var data        = TextLoader.Create(env, new TextLoader.Options()
            {
                Columns = new[]
                {
                    new TextLoader.Column("ImagePath", DataKind.String, 0),
                    new TextLoader.Column("Name", DataKind.String, 1),
                }
            }, new MultiFileSource(dataFile));
            var images  = new ImageLoadingTransformer(env, imageFolder, ("ImageReal", "ImagePath")).Transform(data);
            var cropped = new ImageResizingTransformer(env, "ImageCropped", imageWidth, imageHeight, "ImageReal").Transform(images);
            var pixels  = new ImagePixelExtractingTransformer(env, "ImagePixels", "ImageCropped", scaleImage: 2f / 19, offsetImage: 30).Transform(cropped);

            IDataView backToBitmaps = new VectorToImageConvertingTransformer(env, "ImageRestored", imageHeight, imageWidth, "ImagePixels",
                                                                             scaleImage: 19 / 2f, offsetImage: -30).Transform(pixels);

            var fname = nameof(TestBackAndForthConversionWithoutAlphaNoInterleave) + "_model.zip";

            var fh = env.CreateOutputFile(fname);

            using (var ch = env.Start("save"))
                TrainUtils.SaveModel(env, ch, fh, null, new RoleMappedData(backToBitmaps));

            backToBitmaps = ModelFileUtils.LoadPipeline(env, fh.OpenReadStream(), new MultiFileSource(dataFile));
            DeleteOutputPath(fname);

            using (var cursor = backToBitmaps.GetRowCursorForAllColumns())
            {
                var    bitmapGetter   = cursor.GetGetter <Bitmap>(backToBitmaps.Schema["ImageRestored"]);
                Bitmap restoredBitmap = default;

                var    bitmapCropGetter = cursor.GetGetter <Bitmap>(backToBitmaps.Schema["ImageCropped"]);
                Bitmap croppedBitmap    = default;
                while (cursor.MoveNext())
                {
                    bitmapGetter(ref restoredBitmap);
                    Assert.NotNull(restoredBitmap);
                    bitmapCropGetter(ref croppedBitmap);
                    Assert.NotNull(croppedBitmap);
                    for (int x = 0; x < imageWidth; x++)
                    {
                        for (int y = 0; y < imageHeight; y++)
                        {
                            var c = croppedBitmap.GetPixel(x, y);
                            var r = restoredBitmap.GetPixel(x, y);
                            Assert.True(c.R == r.R && c.G == r.G && c.B == r.B);
                        }
                    }
                }
            }
            Done();
        }
Beispiel #26
0
        /// <summary>
        /// Finalize the test on a transform, calls the transform,
        /// saves the data, saves the models, loads it back, saves the data again,
        /// checks the output is the same.
        /// </summary>
        /// <param name="env">environment</param>
        /// <param name="outModelFilePath"model filename</param>
        /// <param name="transform">transform to test</param>
        /// <param name="source">source (view before applying the transform</param>
        /// <param name="outData">fist data file</param>
        /// <param name="outData2">second data file</param>
        /// <param name="startsWith">Check that outputs is the same on disk after outputting the transformed data after the model was serialized</param>
        public static void SerializationTestTransform(IHostEnvironment env,
                                                      string outModelFilePath, IDataTransform transform,
                                                      IDataView source, string outData, string outData2,
                                                      bool startsWith = false, bool skipDoubleQuote = false,
                                                      bool forceDense = false)
        {
            // Saves model.
            var roles = env.CreateExamples(transform, null);

            using (var ch = env.Start("SaveModel"))
                using (var fs = File.Create(outModelFilePath))
                    TrainUtils.SaveModel(env, ch, fs, null, roles);
            if (!File.Exists(outModelFilePath))
            {
                throw new FileNotFoundException(outModelFilePath);
            }

            // We load it again.
            using (var fs = File.OpenRead(outModelFilePath))
            {
                var tr2 = env.LoadTransforms(fs, source);
                if (tr2 == null)
                {
                    throw new Exception(string.Format("Unable to load '{0}'", outModelFilePath));
                }
                if (transform.GetType() != tr2.GetType())
                {
                    throw new Exception(string.Format("Type mismatch {0} != {1}", transform.GetType(), tr2.GetType()));
                }
            }

            // Checks the outputs.
            var saver   = env.CreateSaver(forceDense ? "Text{dense=+}" : "Text");
            var columns = new int[transform.Schema.Count];

            for (int i = 0; i < columns.Length; ++i)
            {
                columns[i] = i;
            }
            using (var fs2 = File.Create(outData))
                saver.SaveData(fs2, transform, columns);

            if (!File.Exists(outModelFilePath))
            {
                throw new FileNotFoundException(outData);
            }

            // Check we have the same output.
            using (var fs = File.OpenRead(outModelFilePath))
            {
                var tr = env.LoadTransforms(fs, source);
                saver = env.CreateSaver(forceDense ? "Text{dense=+}" : "Text");
                using (var fs2 = File.Create(outData2))
                    saver.SaveData(fs2, tr, columns);
            }

            var t1 = File.ReadAllLines(outData);
            var t2 = File.ReadAllLines(outData2);

            if (t1.Length != t2.Length)
            {
                throw new Exception(string.Format("Not the same number of lines: {0} != {1}", t1.Length, t2.Length));
            }
            for (int i = 0; i < t1.Length; ++i)
            {
                if (skipDoubleQuote && (t1[i].Contains("\"\"\t\"\"") || t2[i].Contains("\"\"\t\"\"")))
                {
                    continue;
                }
                if ((startsWith && !t1[i].StartsWith(t2[i])) || (!startsWith && t1[i] != t2[i]))
                {
                    if (t1[i].EndsWith("\t5\t0:\"\""))
                    {
                        var a = t1[i].Substring(0, t1[i].Length - "\t5\t0:\"\"".Length);
                        a += "\t\"\"\t\"\"\t\"\"\t\"\"\t\"\"";
                        var b = t2[i];
                        if ((startsWith && !a.StartsWith(b)) || (!startsWith && a != b))
                        {
                            throw new Exception(string.Format("2-Mismatch on line {0}/{3}:\n{1}\n{2}", i, t1[i], t2[i], t1.Length));
                        }
                    }
                    else
                    {
                        // The test might fail because one side is dense and the other is sparse.
                        throw new Exception(string.Format("3-Mismatch on line {0}/{3}:\n{1}\n{2}", i, t1[i], t2[i], t1.Length));
                    }
                }
            }
        }