/// <summary> /// This method compares two pipelines to make sure they are identical. The first pipeline is passed /// as a <see cref="RoleMappedData"/>, and the second as a double byte array and a string array. The double /// byte array and the string array are obtained by calling <see cref="SerializeRoleMappedData"/> on the /// second pipeline. /// The comparison is done by saving <see ref="dataToCompare"/> as an in-memory <see cref="ZipArchive"/>, /// and for each entry in it, comparing its name, and the byte sequence to the corresponding entries in /// <see ref="dataZipEntryNames"/> and <see ref="dataSerialized"/>. /// This method throws if for any of the entries the name/byte sequence are not identical. /// </summary> internal static void CheckSamePipeline(IHostEnvironment env, IChannel ch, RoleMappedData dataToCompare, byte[][] dataSerialized, string[] dataZipEntryNames) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(ch, nameof(ch)); ch.CheckValue(dataToCompare, nameof(dataToCompare)); ch.CheckValue(dataSerialized, nameof(dataSerialized)); ch.CheckValue(dataZipEntryNames, nameof(dataZipEntryNames)); if (dataZipEntryNames.Length != dataSerialized.Length) { throw ch.ExceptParam(nameof(dataSerialized), $"The length of {nameof(dataSerialized)} must be equal to the length of {nameof(dataZipEntryNames)}"); } using (var ms = new MemoryStream()) { // REVIEW: This can be done more efficiently by adding a custom type of repository that // doesn't actually save the data, but upon stream closure compares the results to the given repository // and then discards it. Currently, however, this cannot be done because ModelSaveContext does not use // an abstract class/interface, but rather the RepositoryWriter class. TrainUtils.SaveModel(env, ch, ms, null, dataToCompare); string errorMsg = "Models contain different pipelines, cannot ensemble them."; var zip = new ZipArchive(ms); var entries = zip.Entries.OrderBy(e => e.FullName).ToArray(); ch.Check(dataSerialized.Length == Utils.Size(entries)); byte[] buffer = null; for (int i = 0; i < dataSerialized.Length; i++) { ch.Check(dataZipEntryNames[i] == entries[i].FullName, errorMsg); int len = dataSerialized[i].Length; if (Utils.Size(buffer) < len) { buffer = new byte[len]; } using (var s = entries[i].Open()) { int bytesRead = s.Read(buffer, 0, len); ch.Check(bytesRead == len, errorMsg); for (int j = 0; j < len; j++) { ch.Check(buffer[j] == dataSerialized[i][j], errorMsg); } if (s.Read(buffer, 0, 1) > 0) { throw env.Except(errorMsg); } } } } }
public void TrainSaveModelAndPredict() { var dataPath = GetDataPath(SentimentDataPath); var testDataPath = GetDataPath(SentimentTestPath); using (var env = new LocalEnvironment(seed: 1, conc: 1)) { // Pipeline var loader = TextLoader.ReadFile(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(dataPath)); var trans = TextTransform.Create(env, MakeSentimentTextTransformArgs(), loader); // Train var trainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments { NumThreads = 1 }); var cached = new CacheDataView(env, trans, prefetch: null); var trainRoles = new RoleMappedData(cached, label: "Label", feature: "Features"); var predictor = trainer.Train(new Runtime.TrainContext(trainRoles)); PredictionEngine <SentimentData, SentimentPrediction> model; using (var file = env.CreateTempFile()) { // Save model. var scoreRoles = new RoleMappedData(trans, label: "Label", feature: "Features"); using (var ch = env.Start("saving")) TrainUtils.SaveModel(env, ch, file, predictor, scoreRoles); // Load model. using (var fs = file.OpenReadStream()) model = env.CreatePredictionEngine <SentimentData, SentimentPrediction>(fs); } // Take a couple examples out of the test data and run predictions on top. var testLoader = TextLoader.ReadFile(env, MakeSentimentTextLoaderArgs(), new MultiFileSource(GetDataPath(SentimentTestPath))); var testData = testLoader.AsEnumerable <SentimentData>(env, false); foreach (var input in testData.Take(5)) { var prediction = model.Predict(input); // Verify that predictions match and scores are separated from zero. Assert.Equal(input.Sentiment, prediction.Sentiment); Assert.True(input.Sentiment && prediction.Score > 1 || !input.Sentiment && prediction.Score < -1); } } }
private IDataScorerTransform GetScorer(IHostEnvironment env, IDataView transforms, IPredictor pred, string testDataPath = null) { using (var ch = env.Start("Saving model")) using (var memoryStream = new MemoryStream()) { var trainRoles = new RoleMappedData(transforms, label: "Label", feature: "Features"); // Model cannot be saved with CacheDataView TrainUtils.SaveModel(env, ch, memoryStream, pred, trainRoles); memoryStream.Position = 0; using (var rep = RepositoryReader.Open(memoryStream, ch)) { IDataLoader testPipe = ModelFileUtils.LoadLoader(env, rep, new MultiFileSource(testDataPath), true); RoleMappedData testRoles = new RoleMappedData(testPipe, label: "Label", feature: "Features"); return(ScoreUtils.GetScorer(pred, testRoles, env, testRoles.Schema)); } } }
public void TestGcnNormOldSavingAndLoading() { string dataSource = GetDataPath(TestDatasets.generatedRegressionDataset.trainFilename); var dataView = TextLoader.CreateReader(ML, c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)), separator: ';', hasHeader: true) .Read(dataSource).AsDynamic; var pipe = new GlobalContrastNormalizingEstimator(ML, "features", "whitened"); var result = pipe.Fit(dataView).Transform(dataView); var resultRoles = new RoleMappedData(result); using (var ms = new MemoryStream()) { TrainUtils.SaveModel(ML, Env.Start("saving"), ms, null, resultRoles); ms.Position = 0; var loadedView = ModelFileUtils.LoadTransforms(ML, dataView, ms); } }
internal override void Save(IHostEnvironment env, Stream stream) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(stream, nameof(stream)); using (var ch = env.Start("Saving predictor model")) { // REVIEW: address the asymmetry in the way we're loading and saving the model. // Effectively, we have methods to load the transform model from a model.zip, but don't have // methods to compose the model.zip out of transform model, predictor and role mappings // (we use the TrainUtils.SaveModel that does all three). // Create the chain of transforms for saving. IDataView data = new EmptyDataView(env, TransformModel.InputSchema); data = TransformModel.Apply(env, data); var roleMappedData = new RoleMappedData(data, _roleMappings, opt: true); TrainUtils.SaveModel(env, ch, stream, Predictor, roleMappedData); } }
void TestOldSavingAndLoading() { var data = new[] { new TestClass() { A = 1, B = 2, C = 3, }, new TestClass() { A = 4, B = 5, C = 6 } }; var dataView = ML.Data.ReadFromEnumerable(data); var est = new ValueToKeyMappingEstimator(Env, new[]{ new ValueToKeyMappingTransformer.ColumnInfo("A", "TermA"), new ValueToKeyMappingTransformer.ColumnInfo("B", "TermB"), new ValueToKeyMappingTransformer.ColumnInfo("C", "TermC") }); var transformer = est.Fit(dataView); var result = transformer.Transform(dataView); var resultRoles = new RoleMappedData(result); using (var ms = new MemoryStream()) { TrainUtils.SaveModel(Env, Env.Start("saving"), ms, null, resultRoles); ms.Position = 0; var loadedView = ModelFileUtils.LoadTransforms(Env, dataView, ms); ValidateTermTransformer(loadedView); } }
public void TestWhiteningOldSavingAndLoading() { var env = new ConsoleEnvironment(seed: 0); string dataSource = GetDataPath("generated_regression_dataset.csv"); var dataView = TextLoader.CreateReader(env, c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)), separator: ';', hasHeader: true) .Read(dataSource).AsDynamic; var pipe = new VectorWhiteningEstimator(env, "features", "whitened"); var result = pipe.Fit(dataView).Transform(dataView); var resultRoles = new RoleMappedData(result); using (var ms = new MemoryStream()) { TrainUtils.SaveModel(Env, Env.Start("saving"), ms, null, resultRoles); ms.Position = 0; var loadedView = ModelFileUtils.LoadTransforms(Env, dataView, ms); } Done(); }
/// <summary> /// This method takes a <see cref="RoleMappedData"/> as input, saves it as an in-memory <see cref="ZipArchive"/> /// and returns two arrays indexed by the entries in the zip: /// 1. An array of byte arrays, containing the byte sequences of each entry. /// 2. An array of strings, containing the name of each entry. /// /// This method is used for comparing pipelines. Its outputs can be passed to <see cref="CheckSamePipeline"/> /// to check if this pipeline is identical to another pipeline. /// </summary> public static void SerializeRoleMappedData(IHostEnvironment env, IChannel ch, RoleMappedData data, out byte[][] dataSerialized, out string[] dataZipEntryNames) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(ch, nameof(ch)); ch.CheckValue(data, nameof(data)); using (var ms = new MemoryStream()) { TrainUtils.SaveModel(env, ch, ms, null, data); var zip = new ZipArchive(ms); var entries = zip.Entries.OrderBy(e => e.FullName).ToArray(); dataSerialized = new byte[Utils.Size(entries)][]; dataZipEntryNames = new string[Utils.Size(entries)]; for (int i = 0; i < Utils.Size(entries); i++) { dataZipEntryNames[i] = entries[i].FullName; dataSerialized[i] = new byte[entries[i].Length]; using (var s = entries[i].Open()) s.Read(dataSerialized[i], 0, (int)entries[i].Length); } } }
public void TestOldSavingAndLoading() { var modelFile = "model_matmul/frozen_saved_model.pb"; var dataView = ML.Data.LoadFromEnumerable( new List <TestData>(new TestData[] { new TestData() { a = new[] { 1.0f, 2.0f, 3.0f, 4.0f }, b = new[] { 1.0f, 2.0f, 3.0f, 4.0f } }, new TestData() { a = new[] { 2.0f, 2.0f, 2.0f, 2.0f }, b = new[] { 3.0f, 3.0f, 3.0f, 3.0f } }, new TestData() { a = new[] { 5.0f, 6.0f, 10.0f, 12.0f }, b = new[] { 10.0f, 8.0f, 6.0f, 6.0f } } })); using var model = ML.Model.LoadTensorFlowModel(modelFile); var est = model.ScoreTensorFlowModel(new[] { "c" }, new[] { "a", "b" }); var transformer = est.Fit(dataView); var result = transformer.Transform(dataView); var resultRoles = new RoleMappedData(result); using (var ms = new MemoryStream()) { TrainUtils.SaveModel(Env, Env.Start("saving"), ms, null, resultRoles); ms.Position = 0; var loadedView = ModelFileUtils.LoadTransforms(Env, dataView, ms); ValidateTensorFlowTransformer(loadedView); } }
/// <summary> /// Saves the pipeline in a stream. /// </summary> /// <param name="fs">opened stream</param> /// <param name="removeFirstTransform">remove the first transform which is a PassThroughTransform</param> public void Save(Stream fs, bool removeFirstTransform = false) { RoleMappedData roleMap = null; if (removeFirstTransform) { var source = _transforms.First().transform; if (!(source is PassThroughTransform)) { throw Contracts.ExceptNotSupp($"The first transform should be of type PassThroughTransform."); } var replace = (source as PassThroughTransform).Source; var last = _transforms.Last().transform; var newPipe = ApplyTransformUtils.ApplyAllTransformsToData(_env, last, replace, source); var roles = _predictor.roleMapData.Schema.GetColumnRoleNames().ToArray(); roleMap = new RoleMappedData(newPipe, roles); } else { roleMap = _predictor.roleMapData; } using (var ch = _env.Start("Save Predictor")) TrainUtils.SaveModel(_env, ch, fs, _predictor.predictor, roleMap); }
public void TestOldSavingAndLoading() { var data = new[] { new TestClass() { A = "This is a good sentence.", B = new string[2] { "Much words", "Wow So Cool" } } }; var dataView = ML.Data.ReadFromEnumerable(data); var pipe = new WordTokenizingEstimator(Env, new[] { new WordTokenizingEstimator.ColumnInfo("TokenizeA", "A"), new WordTokenizingEstimator.ColumnInfo("TokenizeB", "B"), }); var result = pipe.Fit(dataView).Transform(dataView); var resultRoles = new RoleMappedData(result); using (var ms = new MemoryStream()) { TrainUtils.SaveModel(Env, Env.Start("saving"), ms, null, resultRoles); ms.Position = 0; var loadedView = ModelFileUtils.LoadTransforms(Env, dataView, ms); } }
public void TestOldSavingAndLoading() { var data = new[] { new TestClass() { A = 1, B = new int[2] { 2, 3 }, C = new int[2] { 3, 4 } }, new TestClass() { A = 4, B = new int[2] { 2, 4 }, C = new int[3] { 2, 4, 3 } } }; var dataView = ML.Data.LoadFromEnumerable(data); var pipe = ML.Transforms.Categorical.OneHotEncoding(new[] { new OneHotEncodingEstimator.ColumnOptions("CatA", "A"), new OneHotEncodingEstimator.ColumnOptions("CatB", "B"), new OneHotEncodingEstimator.ColumnOptions("CatC", "C") }); var result = pipe.Fit(dataView).Transform(dataView); var resultRoles = new RoleMappedData(result); using (var ms = new MemoryStream()) { TrainUtils.SaveModel(Env, Env.Start("saving"), ms, null, resultRoles); ms.Position = 0; var loadedView = ModelFileUtils.LoadTransforms(Env, dataView, ms); } }
[ConditionalFact(typeof(Environment), nameof(Environment.Is64BitProcess))] // x86 fails with "An attempt was made to load a program with an incorrect format." void TestOldSavingAndLoading() { if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { return; } var modelFile = "squeezenet/00000001/model.onnx"; var samplevector = GetSampleArrayData(); var dataView = ComponentCreation.CreateDataView(Env, new TestData[] { new TestData() { data_0 = samplevector } }); var inputNames = new[] { "data_0" }; var outputNames = new[] { "softmaxout_1" }; var est = new OnnxScoringEstimator(Env, modelFile, inputNames, outputNames); var transformer = est.Fit(dataView); var result = transformer.Transform(dataView); var resultRoles = new RoleMappedData(result); using (var ms = new MemoryStream()) { TrainUtils.SaveModel(Env, Env.Start("saving"), ms, null, resultRoles); ms.Position = 0; var loadedView = ModelFileUtils.LoadTransforms(Env, dataView, ms); loadedView.Schema.TryGetColumnIndex(outputNames[0], out int softMaxOut1); using (var cursor = loadedView.GetRowCursor(col => col == softMaxOut1)) { VBuffer <float> softMaxValue = default; var softMaxGetter = cursor.GetGetter <VBuffer <float> >(softMaxOut1); float sum = 0f; int i = 0; while (cursor.MoveNext()) { softMaxGetter(ref softMaxValue); var values = softMaxValue.DenseValues(); foreach (var val in values) { sum += val; if (i == 0) { Assert.InRange(val, 0.00004, 0.00005); } if (i == 1) { Assert.InRange(val, 0.003844, 0.003845); } if (i == 999) { Assert.InRange(val, 0.0029566, 0.0029567); } i++; } } Assert.InRange(sum, 1.0, 1.00001); } } }
public void TestBackAndForthConversionWithoutAlphaNoInterleaveNoOffset() { IHostEnvironment env = new MLContext(); const int imageHeight = 100; const int imageWidth = 130; var dataFile = GetDataPath("images/images.tsv"); var imageFolder = Path.GetDirectoryName(dataFile); var data = TextLoader.Create(env, new TextLoader.Arguments() { Columns = new[] { new TextLoader.Column("ImagePath", DataKind.TX, 0), new TextLoader.Column("Name", DataKind.TX, 1), } }, new MultiFileSource(dataFile)); var images = new ImageLoaderTransformer(env, imageFolder, ("ImageReal", "ImagePath")).Transform(data); var cropped = new ImageResizerTransformer(env, "ImageCropped", imageWidth, imageHeight, "ImageReal").Transform(images); var pixels = new ImagePixelExtractorTransformer(env, "ImagePixels", "ImageCropped").Transform(cropped); IDataView backToBitmaps = new VectorToImageTransform(env, new VectorToImageTransform.Arguments() { InterleaveArgb = false, Columns = new VectorToImageTransform.Column[1] { new VectorToImageTransform.Column() { Name = "ImageRestored", Source = "ImagePixels", ImageHeight = imageHeight, ImageWidth = imageWidth, ContainsAlpha = false } } }, pixels); var fname = nameof(TestBackAndForthConversionWithoutAlphaNoInterleaveNoOffset) + "_model.zip"; var fh = env.CreateOutputFile(fname); using (var ch = env.Start("save")) TrainUtils.SaveModel(env, ch, fh, null, new RoleMappedData(backToBitmaps)); backToBitmaps = ModelFileUtils.LoadPipeline(env, fh.OpenReadStream(), new MultiFileSource(dataFile)); DeleteOutputPath(fname); backToBitmaps.Schema.TryGetColumnIndex("ImageRestored", out int bitmapColumn); backToBitmaps.Schema.TryGetColumnIndex("ImageCropped", out int cropBitmapColumn); using (var cursor = backToBitmaps.GetRowCursorForAllColumns()) { var bitmapGetter = cursor.GetGetter <Bitmap>(bitmapColumn); Bitmap restoredBitmap = default; var bitmapCropGetter = cursor.GetGetter <Bitmap>(cropBitmapColumn); Bitmap croppedBitmap = default; while (cursor.MoveNext()) { bitmapGetter(ref restoredBitmap); Assert.NotNull(restoredBitmap); bitmapCropGetter(ref croppedBitmap); Assert.NotNull(croppedBitmap); for (int x = 0; x < imageWidth; x++) { for (int y = 0; y < imageHeight; y++) { var c = croppedBitmap.GetPixel(x, y); var r = restoredBitmap.GetPixel(x, y); Assert.True(c.R == r.R && c.G == r.G && c.B == r.B); } } } Done(); } }
IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input, out IDataView sourceCtx) { sourceCtx = input; Contracts.CheckValue(env, "env"); env.CheckValue(args, "args"); env.CheckValue(input, "input"); env.CheckValue(args.tag, "tag is empty"); env.CheckValue(args.trainer, "trainer", "Trainer cannot be null. If your model is already trained, please use ScoreTransform instead."); var views = TagHelper.EnumerateTaggedView(true, input).Where(c => c.Item1 == args.tag); if (views.Any()) { throw env.Except("Tag '{0}' is already used.", args.tag); } var host = env.Register("TagTrainOrScoreTransform"); using (var ch = host.Start("Train")) { ch.Trace("Constructing trainer"); var trainerSett = ScikitSubComponent <ITrainer, SignatureTrainer> .AsSubComponent(args.trainer); ITrainer trainer = trainerSett.CreateInstance(host); var customCols = TrainUtils.CheckAndGenerateCustomColumns(env, args.CustomColumn); string feat; string group; var data = CreateDataFromArgs(_host, ch, new OpaqueDataView(input), args, out feat, out group); ICalibratorTrainer calibrator = args.calibrator == null ? null : ScikitSubComponent <ICalibratorTrainer, SignatureCalibrator> .AsSubComponent(args.calibrator).CreateInstance(host); var nameTrainer = args.trainer.ToString().Replace("{", "").Replace("}", "").Replace(" ", "").Replace("=", "").Replace("+", "Y").Replace("-", "N"); var extTrainer = new ExtendedTrainer(trainer, nameTrainer); _predictor = extTrainer.Train(host, ch, data, null, calibrator, args.maxCalibrationExamples); if (!string.IsNullOrEmpty(args.outputModel)) { ch.Info("Saving model into '{0}'", args.outputModel); using (var fs = File.Create(args.outputModel)) TrainUtils.SaveModel(env, ch, fs, _predictor, data); ch.Info("Done."); } if (_cali != null) { throw ch.ExceptNotImpl("Calibrator is not implemented yet."); } ch.Trace("Scoring"); if (_args.scorer != null) { var mapper = new SchemaBindablePredictorWrapper(_predictor); var roles = new RoleMappedSchema(input.Schema, null, feat, group: group); var bound = mapper.Bind(_host, roles); var scorPars = ScikitSubComponent <IDataScorerTransform, SignatureDataScorer> .AsSubComponent(_args.scorer); _scorer = scorPars.CreateInstance(_host, input, bound, roles); } else { _scorer = PredictorHelper.CreateDefaultScorer(_host, input, feat, group, _predictor); } ch.Info("Tagging with tag '{0}'.", args.tag); var ar = new TagViewTransform.Arguments { tag = args.tag }; var res = new TagViewTransform(env, ar, _scorer, _predictor); return(res); } }
public void TestOldSavingAndLoading() { var data = new[] { new TestClass() { A = 1, B = 3, C = new float[2] { 1, 2 }, D = new double[2] { 3, 4 } }, new TestClass() { A = float.NaN, B = double.NaN, C = new float[2] { float.NaN, float.NaN }, D = new double[2] { double.NaN, double.NaN } }, new TestClass() { A = float.NegativeInfinity, B = double.NegativeInfinity, C = new float[2] { float.NegativeInfinity, float.NegativeInfinity }, D = new double[2] { double.NegativeInfinity, double.NegativeInfinity } }, new TestClass() { A = float.PositiveInfinity, B = double.PositiveInfinity, C = new float[2] { float.PositiveInfinity, float.PositiveInfinity, }, D = new double[2] { double.PositiveInfinity, double.PositiveInfinity } }, new TestClass() { A = 2, B = 1, C = new float[2] { 3, 4 }, D = new double[2] { 5, 6 } }, }; var dataView = ComponentCreation.CreateDataView(Env, data); var pipe = new NAReplaceEstimator(Env, new NAReplaceTransform.ColumnInfo("A", "NAA", NAReplaceTransform.ColumnInfo.ReplacementMode.Mean), new NAReplaceTransform.ColumnInfo("B", "NAB", NAReplaceTransform.ColumnInfo.ReplacementMode.Mean), new NAReplaceTransform.ColumnInfo("C", "NAC", NAReplaceTransform.ColumnInfo.ReplacementMode.Mean), new NAReplaceTransform.ColumnInfo("D", "NAD", NAReplaceTransform.ColumnInfo.ReplacementMode.Mean)); var result = pipe.Fit(dataView).Transform(dataView); var resultRoles = new RoleMappedData(result); using (var ms = new MemoryStream()) { TrainUtils.SaveModel(Env, Env.Start("saving"), ms, null, resultRoles); ms.Position = 0; var loadedView = ModelFileUtils.LoadTransforms(Env, dataView, ms); } }
/// <summary> /// Finalizes the test on a predictor, calls the predictor with a scorer, /// saves the data, saves the models, loads it back, saves the data again, /// checks the output is the same. /// </summary> /// <param name="env">environment</param> /// <param name="outModelFilePath">output filename</param> /// <param name="predictor">predictor</param> /// <param name="roles">label, feature, ...</param> /// <param name="outData">first output data</param> /// <param name="outData2">second output data</param> /// <param name="kind">prediction kind</param> /// <param name="checkError">checks errors</param> /// <param name="ratio">check the error is below that threshold (if checkError is true)</param> /// <param name="ratioReadSave">check the predictions difference after reloading the model are below this threshold</param> public static void FinalizeSerializationTest(IHostEnvironment env, string outModelFilePath, IPredictor predictor, RoleMappedData roles, string outData, string outData2, PredictionKind kind, bool checkError = true, float ratio = 0.8f, float ratioReadSave = 0.06f, bool checkType = true) { string labelColumn = kind != PredictionKind.Clustering ? roles.Schema.Label.Value.Name : null; #region save, reading, running // Saves model. using (var ch = env.Start("Save")) using (var fs = File.Create(outModelFilePath)) TrainUtils.SaveModel(env, ch, fs, predictor, roles); if (!File.Exists(outModelFilePath)) { throw new FileNotFoundException(outModelFilePath); } // Loads the model back. using (var fs = File.OpenRead(outModelFilePath)) { var pred_local = ModelFileUtils.LoadPredictorOrNull(env, fs); if (pred_local == null) { throw new Exception(string.Format("Unable to load '{0}'", outModelFilePath)); } if (checkType && predictor.GetType() != pred_local.GetType()) { throw new Exception(string.Format("Type mismatch {0} != {1}", predictor.GetType(), pred_local.GetType())); } } // Checks the outputs. var sch1 = SchemaHelper.ToString(roles.Schema.Schema); var scorer = PredictorHelper.CreateDefaultScorer(env, roles, predictor); var sch2 = SchemaHelper.ToString(scorer.Schema); if (string.IsNullOrEmpty(sch1) || string.IsNullOrEmpty(sch2)) { throw new Exception("Empty schemas"); } var saver = env.CreateSaver("Text"); var columns = new int[scorer.Schema.Count]; for (int i = 0; i < columns.Length; ++i) { columns[i] = saver.IsColumnSavable(scorer.Schema[i].Type) ? i : -1; } columns = columns.Where(c => c >= 0).ToArray(); using (var fs2 = File.Create(outData)) saver.SaveData(fs2, scorer, columns); if (!File.Exists(outModelFilePath)) { throw new FileNotFoundException(outData); } // Check we have the same output. using (var fs = File.OpenRead(outModelFilePath)) { var model = ModelFileUtils.LoadPredictorOrNull(env, fs); scorer = PredictorHelper.CreateDefaultScorer(env, roles, model); saver = env.CreateSaver("Text"); using (var fs2 = File.Create(outData2)) saver.SaveData(fs2, scorer, columns); } var t1 = File.ReadAllLines(outData); var t2 = File.ReadAllLines(outData2); if (t1.Length != t2.Length) { throw new Exception(string.Format("Not the same number of lines: {0} != {1}", t1.Length, t2.Length)); } var linesN = new List <int>(); for (int i = 0; i < t1.Length; ++i) { if (t1[i] != t2[i]) { linesN.Add(i); } } if (linesN.Count > (int)(t1.Length * ratioReadSave)) { var rows = linesN.Select(i => string.Format("1-Mismatch on line {0}/{3}:\n{1}\n{2}", i, t1[i], t2[i], t1.Length)).ToList(); rows.Add($"Number of differences: {linesN.Count}/{t1.Length}"); throw new Exception(string.Join("\n", rows)); } #endregion #region clustering if (kind == PredictionKind.Clustering) { // Nothing to do here. return; } #endregion #region supervized string expectedOuput = kind == PredictionKind.Regression ? "Score" : "PredictedLabel"; // Get label and basic checking about performance. using (var cursor = scorer.GetRowCursor(scorer.Schema)) { int ilabel, ipred; ilabel = SchemaHelper.GetColumnIndex(cursor.Schema, labelColumn); ipred = SchemaHelper.GetColumnIndex(cursor.Schema, expectedOuput); var ty1 = cursor.Schema[ilabel].Type; var ty2 = cursor.Schema[ipred].Type; var dist1 = new Dictionary <int, int>(); var dist2 = new Dictionary <int, int>(); var conf = new Dictionary <Tuple <int, int>, long>(); if (kind == PredictionKind.MulticlassClassification) { #region Multiclass if (!ty2.IsKey()) { throw new Exception(string.Format("Label='{0}' Predicted={1}'\nSchema: {2}", ty1, ty2, SchemaHelper.ToString(cursor.Schema))); } if (ty1.RawKind() == DataKind.Single) { var lgetter = cursor.GetGetter <float>(SchemaHelper._dc(ilabel, cursor)); var pgetter = cursor.GetGetter <uint>(SchemaHelper._dc(ipred, cursor)); float ans = 0; uint pre = 0; while (cursor.MoveNext()) { lgetter(ref ans); pgetter(ref pre); // The scorer +1 to the argmax. ++ans; var key = new Tuple <int, int>((int)pre, (int)ans); if (!conf.ContainsKey(key)) { conf[key] = 1; } else { ++conf[key]; } if (!dist1.ContainsKey((int)ans)) { dist1[(int)ans] = 1; } else { ++dist1[(int)ans]; } if (!dist2.ContainsKey((int)pre)) { dist2[(int)pre] = 1; } else { ++dist2[(int)pre]; } } } else if (ty1.RawKind() == DataKind.UInt32 && ty1.IsKey()) { var lgetter = cursor.GetGetter <uint>(SchemaHelper._dc(ilabel, cursor)); var pgetter = cursor.GetGetter <uint>(SchemaHelper._dc(ipred, cursor)); uint ans = 0; uint pre = 0; while (cursor.MoveNext()) { lgetter(ref ans); pgetter(ref pre); var key = new Tuple <int, int>((int)pre, (int)ans); if (!conf.ContainsKey(key)) { conf[key] = 1; } else { ++conf[key]; } if (!dist1.ContainsKey((int)ans)) { dist1[(int)ans] = 1; } else { ++dist1[(int)ans]; } if (!dist2.ContainsKey((int)pre)) { dist2[(int)pre] = 1; } else { ++dist2[(int)pre]; } } } else { throw new NotImplementedException(string.Format("Not implemented for type {0}", ty1.ToString())); } #endregion } else if (kind == PredictionKind.BinaryClassification) { #region binary classification if (ty2.RawKind() != DataKind.Boolean) { throw new Exception(string.Format("Label='{0}' Predicted={1}'\nSchema: {2}", ty1, ty2, SchemaHelper.ToString(cursor.Schema))); } if (ty1.RawKind() == DataKind.Single) { var lgetter = cursor.GetGetter <float>(SchemaHelper._dc(ilabel, cursor)); var pgetter = cursor.GetGetter <bool>(SchemaHelper._dc(ipred, cursor)); float ans = 0; bool pre = default(bool); while (cursor.MoveNext()) { lgetter(ref ans); pgetter(ref pre); if (ans != 0 && ans != 1) { throw Contracts.Except("The problem is not binary, expected answer is {0}", ans); } var key = new Tuple <int, int>(pre ? 1 : 0, (int)ans); if (!conf.ContainsKey(key)) { conf[key] = 1; } else { ++conf[key]; } if (!dist1.ContainsKey((int)ans)) { dist1[(int)ans] = 1; } else { ++dist1[(int)ans]; } if (!dist2.ContainsKey(pre ? 1 : 0)) { dist2[pre ? 1 : 0] = 1; } else { ++dist2[pre ? 1 : 0]; } } } else if (ty1.RawKind() == DataKind.UInt32) { var lgetter = cursor.GetGetter <uint>(SchemaHelper._dc(ilabel, cursor)); var pgetter = cursor.GetGetter <bool>(SchemaHelper._dc(ipred, cursor)); uint ans = 0; bool pre = default(bool); while (cursor.MoveNext()) { lgetter(ref ans); pgetter(ref pre); if (ty1.IsKey()) { --ans; } if (ans != 0 && ans != 1) { throw Contracts.Except("The problem is not binary, expected answer is {0}", ans); } var key = new Tuple <int, int>(pre ? 1 : 0, (int)ans); if (!conf.ContainsKey(key)) { conf[key] = 1; } else { ++conf[key]; } if (!dist1.ContainsKey((int)ans)) { dist1[(int)ans] = 1; } else { ++dist1[(int)ans]; } if (!dist2.ContainsKey(pre ? 1 : 0)) { dist2[pre ? 1 : 0] = 1; } else { ++dist2[pre ? 1 : 0]; } } } else if (ty1.RawKind() == DataKind.Boolean) { var lgetter = cursor.GetGetter <bool>(SchemaHelper._dc(ilabel, cursor)); var pgetter = cursor.GetGetter <bool>(SchemaHelper._dc(ipred, cursor)); bool ans = default(bool); bool pre = default(bool); while (cursor.MoveNext()) { lgetter(ref ans); pgetter(ref pre); var key = new Tuple <int, int>(pre ? 1 : 0, ans ? 1 : 0); if (!conf.ContainsKey(key)) { conf[key] = 1; } else { ++conf[key]; } if (!dist1.ContainsKey(ans ? 1 : 0)) { dist1[ans ? 1 : 0] = 1; } else { ++dist1[ans ? 1 : 0]; } if (!dist2.ContainsKey(pre ? 1 : 0)) { dist2[pre ? 1 : 0] = 1; } else { ++dist2[pre ? 1 : 0]; } } } else { throw new NotImplementedException(string.Format("Not implemented for type {0}", ty1)); } #endregion } else if (kind == PredictionKind.Regression) { #region regression if (ty1.RawKind() != DataKind.Single) { throw new Exception(string.Format("Label='{0}' Predicted={1}'\nSchema: {2}", ty1, ty2, SchemaHelper.ToString(cursor.Schema))); } if (ty2.RawKind() != DataKind.Single) { throw new Exception(string.Format("Label='{0}' Predicted={1}'\nSchema: {2}", ty1, ty2, SchemaHelper.ToString(cursor.Schema))); } var lgetter = cursor.GetGetter <float>(SchemaHelper._dc(ilabel, cursor)); var pgetter = cursor.GetGetter <float>(SchemaHelper._dc(ipred, cursor)); float ans = 0; float pre = 0f; float error = 0f; while (cursor.MoveNext()) { lgetter(ref ans); pgetter(ref pre); error += (ans - pre) * (ans - pre); if (!dist1.ContainsKey((int)ans)) { dist1[(int)ans] = 1; } else { ++dist1[(int)ans]; } if (!dist2.ContainsKey((int)pre)) { dist2[(int)pre] = 1; } else { ++dist2[(int)pre]; } } if (float.IsNaN(error) || float.IsInfinity(error)) { throw new Exception("Regression wen wrong. Error is infinite."); } #endregion } else { throw new NotImplementedException(string.Format("Not implemented for kind {0}", kind)); } var nbError = conf.Where(c => c.Key.Item1 != c.Key.Item2).Select(c => c.Value).Sum(); var nbTotal = conf.Select(c => c.Value).Sum(); if (checkError && (nbError * 1.0 > nbTotal * ratio || dist2.Count <= 1)) { var sconf = string.Join("\n", conf.OrderBy(c => c.Key) .Select(c => string.Format("pred={0} exp={1} count={2}", c.Key.Item1, c.Key.Item2, c.Value))); var sdist2 = string.Join("\n", dist1.OrderBy(c => c.Key) .Select(c => string.Format("label={0} count={1}", c.Key, c.Value))); var sdist1 = string.Join("\n", dist2.OrderBy(c => c.Key).Take(20) .Select(c => string.Format("label={0} count={1}", c.Key, c.Value))); throw new Exception(string.Format("Too many errors {0}/{1}={7}\n###########\nConfusion:\n{2}\n########\nDIST1\n{3}\n###########\nDIST2\n{4}\nOutput:\n{5}\n...\n{6}", nbError, nbTotal, sconf, sdist1, sdist2, string.Join("\n", t1.Take(Math.Min(30, t1.Length))), string.Join("\n", t1.Skip(Math.Max(0, t1.Length - 30))), nbError * 1.0 / nbTotal)); } } #endregion }
public void TestOldSavingAndLoading() { if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) { return; } var samplevector = getSampleArrayData(); var dataView = ComponentCreation.CreateDataView(Env, new TestData[] { new TestData() { data_0 = samplevector } }); var inputNames = "data_0"; var outputNames = "output_1"; var est = new DnnImageFeaturizerEstimator(Env, m => m.ModelSelector.ResNet18(m.Environment, m.InputColumn, m.OutputColumn), inputNames, outputNames); var transformer = est.Fit(dataView); var result = transformer.Transform(dataView); var resultRoles = new RoleMappedData(result); using (var ms = new MemoryStream()) { TrainUtils.SaveModel(Env, Env.Start("saving"), ms, null, resultRoles); ms.Position = 0; var loadedView = ModelFileUtils.LoadTransforms(Env, dataView, ms); loadedView.Schema.TryGetColumnIndex(outputNames, out int softMaxOut1); using (var cursor = loadedView.GetRowCursor(col => col == softMaxOut1)) { VBuffer <float> softMaxValue = default; var softMaxGetter = cursor.GetGetter <VBuffer <float> >(softMaxOut1); float sum = 0f; int i = 0; while (cursor.MoveNext()) { softMaxGetter(ref softMaxValue); var values = softMaxValue.DenseValues(); foreach (var val in values) { sum += val; if (i == 0) { Assert.InRange(val, 0.0, 0.00001); } if (i == 7) { Assert.InRange(val, 0.62935, 0.62940); } if (i == 500) { Assert.InRange(val, 0.15521, 0.155225); } i++; } } Assert.InRange(sum, 83.50, 84.50); } } }
public void TestGreyscaleTransformImages() { using (var env = new ConsoleEnvironment()) { var imageHeight = 150; var imageWidth = 100; var dataFile = GetDataPath("images/images.tsv"); var imageFolder = Path.GetDirectoryName(dataFile); var data = TextLoader.Create(env, new TextLoader.Arguments() { Column = new[] { new TextLoader.Column("ImagePath", DataKind.TX, 0), new TextLoader.Column("Name", DataKind.TX, 1), } }, new MultiFileSource(dataFile)); var images = ImageLoaderTransform.Create(env, new ImageLoaderTransform.Arguments() { Column = new ImageLoaderTransform.Column[1] { new ImageLoaderTransform.Column() { Source = "ImagePath", Name = "ImageReal" } }, ImageFolder = imageFolder }, data); var cropped = ImageResizerTransform.Create(env, new ImageResizerTransform.Arguments() { Column = new ImageResizerTransform.Column[1] { new ImageResizerTransform.Column() { Name = "ImageCropped", Source = "ImageReal", ImageHeight = imageHeight, ImageWidth = imageWidth, Resizing = ImageResizerTransform.ResizingKind.IsoCrop } } }, images); IDataView grey = ImageGrayscaleTransform.Create(env, new ImageGrayscaleTransform.Arguments() { Column = new ImageGrayscaleTransform.Column[1] { new ImageGrayscaleTransform.Column() { Name = "ImageGrey", Source = "ImageCropped" } } }, cropped); var fname = nameof(TestGreyscaleTransformImages) + "_model.zip"; var fh = env.CreateOutputFile(fname); using (var ch = env.Start("save")) TrainUtils.SaveModel(env, ch, fh, null, new RoleMappedData(grey)); grey = ModelFileUtils.LoadPipeline(env, fh.OpenReadStream(), new MultiFileSource(dataFile)); DeleteOutputPath(fname); grey.Schema.TryGetColumnIndex("ImageGrey", out int greyColumn); using (var cursor = grey.GetRowCursor((x) => true)) { var bitmapGetter = cursor.GetGetter <Bitmap>(greyColumn); Bitmap bitmap = default; while (cursor.MoveNext()) { bitmapGetter(ref bitmap); Assert.NotNull(bitmap); for (int x = 0; x < imageWidth; x++) { for (int y = 0; y < imageHeight; y++) { var pixel = bitmap.GetPixel(x, y); // greyscale image has same values for R,G and B Assert.True(pixel.R == pixel.G && pixel.G == pixel.B); } } } } } Done(); }
public void TestOldSavingAndLoading() { var data = new[] { new TestClass() { A = 1, B = 3, C = new float[2] { 1, 2 }, D = new double[2] { 3, 4 } }, new TestClass() { A = float.NaN, B = double.NaN, C = new float[2] { float.NaN, float.NaN }, D = new double[2] { double.NaN, double.NaN } }, new TestClass() { A = float.NegativeInfinity, B = double.NegativeInfinity, C = new float[2] { float.NegativeInfinity, float.NegativeInfinity }, D = new double[2] { double.NegativeInfinity, double.NegativeInfinity } }, new TestClass() { A = float.PositiveInfinity, B = double.PositiveInfinity, C = new float[2] { float.PositiveInfinity, float.PositiveInfinity, }, D = new double[2] { double.PositiveInfinity, double.PositiveInfinity } }, new TestClass() { A = 2, B = 1, C = new float[2] { 3, 4 }, D = new double[2] { 5, 6 } }, }; var dataView = ML.Data.LoadFromEnumerable(data); var pipe = ML.Transforms.ReplaceMissingValues( new MissingValueReplacingEstimator.ColumnOptions("NAA", "A", MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean), new MissingValueReplacingEstimator.ColumnOptions("NAB", "B", MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean), new MissingValueReplacingEstimator.ColumnOptions("NAC", "C", MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean), new MissingValueReplacingEstimator.ColumnOptions("NAD", "D", MissingValueReplacingEstimator.ColumnOptions.ReplacementMode.Mean)); var result = pipe.Fit(dataView).Transform(dataView); var resultRoles = new RoleMappedData(result); using (var ms = new MemoryStream()) { TrainUtils.SaveModel(Env, Env.Start("saving"), ms, null, resultRoles); ms.Position = 0; var loadedView = ModelFileUtils.LoadTransforms(Env, dataView, ms); } }
public void TestOldSavingAndLoading() { //skip running for x86 as this test using too much memory (over 2GB limit on x86) //and very like to hit memory related issue when running on CI //TODO: optimized memory usage in related code and enable x86 run if (!Environment.Is64BitProcess) { return; } var samplevector = GetSampleArrayData(); var dataView = ML.Data.LoadFromEnumerable( new TestData[] { new TestData() { data_0 = samplevector } }); var inputNames = "data_0"; var outputNames = "output_1"; var est = ML.Transforms.DnnFeaturizeImage(outputNames, m => m.ModelSelector.ResNet18(m.Environment, m.OutputColumn, m.InputColumn), inputNames); var transformer = est.Fit(dataView); var result = transformer.Transform(dataView); var resultRoles = new RoleMappedData(result); using (var ms = new MemoryStream()) { TrainUtils.SaveModel(Env, Env.Start("saving"), ms, null, resultRoles); ms.Position = 0; var loadedView = ModelFileUtils.LoadTransforms(Env, dataView, ms); using (var cursor = loadedView.GetRowCursor(loadedView.Schema[outputNames])) { VBuffer <float> softMaxValue = default; var softMaxGetter = cursor.GetGetter <VBuffer <float> >(loadedView.Schema[outputNames]); float sum = 0f; int i = 0; while (cursor.MoveNext()) { softMaxGetter(ref softMaxValue); var values = softMaxValue.DenseValues(); foreach (var val in values) { sum += val; if (i == 0) { Assert.InRange(val, 0.0, 0.00001); } if (i == 7) { Assert.InRange(val, 0.62935, 0.62940); } if (i == 500) { Assert.InRange(val, 0.15521, 0.155225); } i++; } } Assert.InRange(sum, 83.50, 84.50); } } }
public void TestBackAndForthConversionWithDifferentOrder() { IHostEnvironment env = new MLContext(); const int imageHeight = 100; const int imageWidth = 130; var dataFile = GetDataPath("images/images.tsv"); var imageFolder = Path.GetDirectoryName(dataFile); var data = TextLoader.Create(env, new TextLoader.Options() { Columns = new[] { new TextLoader.Column("ImagePath", DataKind.String, 0), new TextLoader.Column("Name", DataKind.String, 1), } }, new MultiFileSource(dataFile)); var images = new ImageLoadingTransformer(env, imageFolder, ("ImageReal", "ImagePath")).Transform(data); var cropped = new ImageResizingTransformer(env, "ImageCropped", imageWidth, imageHeight, "ImageReal").Transform(images); var pixels = new ImagePixelExtractingTransformer(env, "ImagePixels", "ImageCropped", ImagePixelExtractingEstimator.ColorBits.All, order: ImagePixelExtractingEstimator.ColorsOrder.ABRG).Transform(cropped); IDataView backToBitmaps = new VectorToImageConvertingTransformer(env, "ImageRestored", imageHeight, imageWidth, "ImagePixels", ImagePixelExtractingEstimator.ColorBits.All, order: ImagePixelExtractingEstimator.ColorsOrder.ABRG).Transform(pixels); var fname = nameof(TestBackAndForthConversionWithDifferentOrder) + "_model.zip"; var fh = env.CreateOutputFile(fname); using (var ch = env.Start("save")) TrainUtils.SaveModel(env, ch, fh, null, new RoleMappedData(backToBitmaps)); backToBitmaps = ModelFileUtils.LoadPipeline(env, fh.OpenReadStream(), new MultiFileSource(dataFile)); DeleteOutputPath(fname); using (var cursor = backToBitmaps.GetRowCursorForAllColumns()) { var bitmapGetter = cursor.GetGetter <Bitmap>(backToBitmaps.Schema["ImageRestored"]); Bitmap restoredBitmap = default; var bitmapCropGetter = cursor.GetGetter <Bitmap>(backToBitmaps.Schema["ImageCropped"]); Bitmap croppedBitmap = default; while (cursor.MoveNext()) { bitmapGetter(ref restoredBitmap); Assert.NotNull(restoredBitmap); bitmapCropGetter(ref croppedBitmap); Assert.NotNull(croppedBitmap); for (int x = 0; x < imageWidth; x++) { for (int y = 0; y < imageHeight; y++) { var c = croppedBitmap.GetPixel(x, y); var r = restoredBitmap.GetPixel(x, y); if (c != r) { Assert.False(true); } Assert.True(c == r); } } } } Done(); }
public void TestOldSavingAndLoading(int?gpuDeviceId, bool fallbackToCpu) { var modelFile = "squeezenet/00000001/model.onnx"; var samplevector = GetSampleArrayData(); var dataView = ML.Data.LoadFromEnumerable( new TestData[] { new TestData() { data_0 = samplevector } }); var inputNames = new[] { "data_0" }; var outputNames = new[] { "softmaxout_1" }; var est = ML.Transforms.ApplyOnnxModel(outputNames, inputNames, modelFile, gpuDeviceId, fallbackToCpu); var transformer = est.Fit(dataView); var result = transformer.Transform(dataView); var resultRoles = new RoleMappedData(result); using (var ms = new MemoryStream()) { TrainUtils.SaveModel(Env, Env.Start("saving"), ms, null, resultRoles); ms.Position = 0; var loadedView = ModelFileUtils.LoadTransforms(Env, dataView, ms); var sofMaxOut1Col = loadedView.Schema[outputNames[0]]; using (var cursor = loadedView.GetRowCursor(sofMaxOut1Col)) { VBuffer <float> softMaxValue = default; var softMaxGetter = cursor.GetGetter <VBuffer <float> >(sofMaxOut1Col); float sum = 0f; int i = 0; while (cursor.MoveNext()) { softMaxGetter(ref softMaxValue); var values = softMaxValue.DenseValues(); foreach (var val in values) { sum += val; if (i == 0) { Assert.InRange(val, 0.00004, 0.00005); } if (i == 1) { Assert.InRange(val, 0.003844, 0.003845); } if (i == 999) { Assert.InRange(val, 0.0029566, 0.0029567); } i++; } } Assert.InRange(sum, 0.99999, 1.00001); } (transformer as IDisposable)?.Dispose(); } }
public void TestBackAndForthConversionWithAlphaNoInterleave() { using (var env = new ConsoleEnvironment()) { var imageHeight = 100; var imageWidth = 130; var dataFile = GetDataPath("images/images.tsv"); var imageFolder = Path.GetDirectoryName(dataFile); var data = TextLoader.Create(env, new TextLoader.Arguments() { Column = new[] { new TextLoader.Column("ImagePath", DataKind.TX, 0), new TextLoader.Column("Name", DataKind.TX, 1), } }, new MultiFileSource(dataFile)); var images = ImageLoaderTransform.Create(env, new ImageLoaderTransform.Arguments() { Column = new ImageLoaderTransform.Column[1] { new ImageLoaderTransform.Column() { Source = "ImagePath", Name = "ImageReal" } }, ImageFolder = imageFolder }, data); var cropped = ImageResizerTransform.Create(env, new ImageResizerTransform.Arguments() { Column = new ImageResizerTransform.Column[1] { new ImageResizerTransform.Column() { Source = "ImageReal", Name = "ImageCropped", ImageHeight = imageHeight, ImageWidth = imageWidth, Resizing = ImageResizerTransform.ResizingKind.IsoCrop } } }, images); var pixels = ImagePixelExtractorTransform.Create(env, new ImagePixelExtractorTransform.Arguments() { InterleaveArgb = false, Offset = 127.5f, Scale = 2f / 255, Column = new ImagePixelExtractorTransform.Column[1] { new ImagePixelExtractorTransform.Column() { Source = "ImageCropped", Name = "ImagePixels", UseAlpha = true } } }, cropped); IDataView backToBitmaps = new VectorToImageTransform(env, new VectorToImageTransform.Arguments() { InterleaveArgb = false, Offset = -1f, Scale = 255f / 2, Column = new VectorToImageTransform.Column[1] { new VectorToImageTransform.Column() { Source = "ImagePixels", Name = "ImageRestored", ImageHeight = imageHeight, ImageWidth = imageWidth, ContainsAlpha = true } } }, pixels); var fname = nameof(TestBackAndForthConversionWithAlphaNoInterleave) + "_model.zip"; var fh = env.CreateOutputFile(fname); using (var ch = env.Start("save")) TrainUtils.SaveModel(env, ch, fh, null, new RoleMappedData(backToBitmaps)); backToBitmaps = ModelFileUtils.LoadPipeline(env, fh.OpenReadStream(), new MultiFileSource(dataFile)); DeleteOutputPath(fname); backToBitmaps.Schema.TryGetColumnIndex("ImageRestored", out int bitmapColumn); backToBitmaps.Schema.TryGetColumnIndex("ImageCropped", out int cropBitmapColumn); using (var cursor = backToBitmaps.GetRowCursor((x) => true)) { var bitmapGetter = cursor.GetGetter <Bitmap>(bitmapColumn); Bitmap restoredBitmap = default; var bitmapCropGetter = cursor.GetGetter <Bitmap>(cropBitmapColumn); Bitmap croppedBitmap = default; while (cursor.MoveNext()) { bitmapGetter(ref restoredBitmap); Assert.NotNull(restoredBitmap); bitmapCropGetter(ref croppedBitmap); Assert.NotNull(croppedBitmap); for (int x = 0; x < imageWidth; x++) { for (int y = 0; y < imageHeight; y++) { var c = croppedBitmap.GetPixel(x, y); var r = restoredBitmap.GetPixel(x, y); Assert.True(c == r); } } } } } Done(); }
public void TestBackAndForthConversionWithoutAlphaNoInterleave() { if (RuntimeInformation.IsOSPlatform(OSPlatform.OSX)) { return; } IHostEnvironment env = new MLContext(); const int imageHeight = 100; const int imageWidth = 130; var dataFile = GetDataPath("images/images.tsv"); var imageFolder = Path.GetDirectoryName(dataFile); var data = TextLoader.Create(env, new TextLoader.Options() { Columns = new[] { new TextLoader.Column("ImagePath", DataKind.String, 0), new TextLoader.Column("Name", DataKind.String, 1), } }, new MultiFileSource(dataFile)); var images = new ImageLoadingTransformer(env, imageFolder, ("ImageReal", "ImagePath")).Transform(data); var cropped = new ImageResizingTransformer(env, "ImageCropped", imageWidth, imageHeight, "ImageReal").Transform(images); var pixels = new ImagePixelExtractingTransformer(env, "ImagePixels", "ImageCropped", scaleImage: 2f / 19, offsetImage: 30).Transform(cropped); IDataView backToBitmaps = new VectorToImageConvertingTransformer(env, "ImageRestored", imageHeight, imageWidth, "ImagePixels", scaleImage: 19 / 2f, offsetImage: -30).Transform(pixels); var fname = nameof(TestBackAndForthConversionWithoutAlphaNoInterleave) + "_model.zip"; var fh = env.CreateOutputFile(fname); using (var ch = env.Start("save")) TrainUtils.SaveModel(env, ch, fh, null, new RoleMappedData(backToBitmaps)); backToBitmaps = ModelFileUtils.LoadPipeline(env, fh.OpenReadStream(), new MultiFileSource(dataFile)); DeleteOutputPath(fname); using (var cursor = backToBitmaps.GetRowCursorForAllColumns()) { var bitmapGetter = cursor.GetGetter <Bitmap>(backToBitmaps.Schema["ImageRestored"]); Bitmap restoredBitmap = default; var bitmapCropGetter = cursor.GetGetter <Bitmap>(backToBitmaps.Schema["ImageCropped"]); Bitmap croppedBitmap = default; while (cursor.MoveNext()) { bitmapGetter(ref restoredBitmap); Assert.NotNull(restoredBitmap); bitmapCropGetter(ref croppedBitmap); Assert.NotNull(croppedBitmap); for (int x = 0; x < imageWidth; x++) { for (int y = 0; y < imageHeight; y++) { var c = croppedBitmap.GetPixel(x, y); var r = restoredBitmap.GetPixel(x, y); Assert.True(c.R == r.R && c.G == r.G && c.B == r.B); } } } } Done(); }
/// <summary> /// Finalize the test on a transform, calls the transform, /// saves the data, saves the models, loads it back, saves the data again, /// checks the output is the same. /// </summary> /// <param name="env">environment</param> /// <param name="outModelFilePath"model filename</param> /// <param name="transform">transform to test</param> /// <param name="source">source (view before applying the transform</param> /// <param name="outData">fist data file</param> /// <param name="outData2">second data file</param> /// <param name="startsWith">Check that outputs is the same on disk after outputting the transformed data after the model was serialized</param> public static void SerializationTestTransform(IHostEnvironment env, string outModelFilePath, IDataTransform transform, IDataView source, string outData, string outData2, bool startsWith = false, bool skipDoubleQuote = false, bool forceDense = false) { // Saves model. var roles = env.CreateExamples(transform, null); using (var ch = env.Start("SaveModel")) using (var fs = File.Create(outModelFilePath)) TrainUtils.SaveModel(env, ch, fs, null, roles); if (!File.Exists(outModelFilePath)) { throw new FileNotFoundException(outModelFilePath); } // We load it again. using (var fs = File.OpenRead(outModelFilePath)) { var tr2 = env.LoadTransforms(fs, source); if (tr2 == null) { throw new Exception(string.Format("Unable to load '{0}'", outModelFilePath)); } if (transform.GetType() != tr2.GetType()) { throw new Exception(string.Format("Type mismatch {0} != {1}", transform.GetType(), tr2.GetType())); } } // Checks the outputs. var saver = env.CreateSaver(forceDense ? "Text{dense=+}" : "Text"); var columns = new int[transform.Schema.Count]; for (int i = 0; i < columns.Length; ++i) { columns[i] = i; } using (var fs2 = File.Create(outData)) saver.SaveData(fs2, transform, columns); if (!File.Exists(outModelFilePath)) { throw new FileNotFoundException(outData); } // Check we have the same output. using (var fs = File.OpenRead(outModelFilePath)) { var tr = env.LoadTransforms(fs, source); saver = env.CreateSaver(forceDense ? "Text{dense=+}" : "Text"); using (var fs2 = File.Create(outData2)) saver.SaveData(fs2, tr, columns); } var t1 = File.ReadAllLines(outData); var t2 = File.ReadAllLines(outData2); if (t1.Length != t2.Length) { throw new Exception(string.Format("Not the same number of lines: {0} != {1}", t1.Length, t2.Length)); } for (int i = 0; i < t1.Length; ++i) { if (skipDoubleQuote && (t1[i].Contains("\"\"\t\"\"") || t2[i].Contains("\"\"\t\"\""))) { continue; } if ((startsWith && !t1[i].StartsWith(t2[i])) || (!startsWith && t1[i] != t2[i])) { if (t1[i].EndsWith("\t5\t0:\"\"")) { var a = t1[i].Substring(0, t1[i].Length - "\t5\t0:\"\"".Length); a += "\t\"\"\t\"\"\t\"\"\t\"\"\t\"\""; var b = t2[i]; if ((startsWith && !a.StartsWith(b)) || (!startsWith && a != b)) { throw new Exception(string.Format("2-Mismatch on line {0}/{3}:\n{1}\n{2}", i, t1[i], t2[i], t1.Length)); } } else { // The test might fail because one side is dense and the other is sparse. throw new Exception(string.Format("3-Mismatch on line {0}/{3}:\n{1}\n{2}", i, t1[i], t2[i], t1.Length)); } } } }