public void TestI_Q_KMeansInnerAPIWithDataFrame() { var methodName = System.Reflection.MethodBase.GetCurrentMethod().Name; var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName); var iris = FileHelper.GetTestFile("iris.txt"); using (var env = new ConsoleEnvironment(conc: 1)) { ComponentHelper.AddStandardComponents(env); var df = Scikit.ML.DataManipulation.DataFrameIO.ReadCsv(iris, sep: '\t', dtypes: new ColumnType[] { NumberType.R4 }); var conc = env.CreateTransform("Concat{col=Feature:Sepal_length,Sepal_width}", df); var roleMap = env.CreateExamples(conc, "Feature", label: "Label"); var trainer = CreateTrainer(env, "km"); IPredictor model; using (var ch = env.Start("test")) model = TrainUtils.Train(env, ch, roleMap, trainer, null, 0); using (var ch = env.Start("Save")) using (var fs = File.Create(outModelFilePath)) TrainUtils.SaveModel(env, ch, fs, model, roleMap); Predictor pred; using (var fs = File.OpenRead(outModelFilePath)) pred = env.LoadPredictorOrNull(fs); #pragma warning disable CS0618 var scorer = ScoreUtils.GetScorer(pred.GetPredictorObject() as IPredictor, roleMap, env, null); #pragma warning restore CS0618 var dfout = Scikit.ML.DataManipulation.DataFrameIO.ReadView(scorer); Assert.AreEqual(dfout.Shape, new Tuple <int, int>(150, 13)); } }
public void LdaWorkout() { var env = new ConsoleEnvironment(seed: 42, conc: 1); string sentimentDataPath = GetDataPath("wikipedia-detox-250-line-data.tsv"); var data = TextLoader.CreateReader(env, ctx => ( label: ctx.LoadBool(0), text: ctx.LoadText(1)), hasHeader: true) .Read(sentimentDataPath); var invalidData = TextLoader.CreateReader(env, ctx => ( label: ctx.LoadBool(0), text: ctx.LoadFloat(1)), hasHeader: true) .Read(sentimentDataPath); var est = new WordBagEstimator(env, "text", "bag_of_words"). Append(new LdaEstimator(env, "bag_of_words", "topics", 10, advancedSettings: s => { s.NumIterations = 10; s.ResetRandomGenerator = true; })); // The following call fails because of the following issue // https://github.com/dotnet/machinelearning/issues/969 // TestEstimatorCore(est, data.AsDynamic, invalidInput: invalidData.AsDynamic); var outputPath = GetOutputPath("Text", "ldatopics.tsv"); using (var ch = env.Start("save")) { var saver = new TextSaver(env, new TextSaver.Arguments { Silent = true, OutputHeader = false, Dense = true }); IDataView savedData = TakeFilter.Create(env, est.Fit(data.AsDynamic).Transform(data.AsDynamic), 4); savedData = ColumnSelectingTransformer.CreateKeep(env, savedData, new[] { "topics" }); using (var fs = File.Create(outputPath)) DataSaverUtils.SaveDataView(ch, saver, savedData, fs, keepHidden: true); Assert.Equal(10, (savedData.Schema.GetColumnType(0) as VectorType)?.Size); } // Diabling this check due to the following issue with consitency of output. // `seed` specified in ConsoleEnvironment has no effect. // https://github.com/dotnet/machinelearning/issues/1004 // On single box, setting `s.ResetRandomGenerator = true` works but fails on build server // CheckEquality("Text", "ldatopics.tsv"); Done(); }
public void TestPcaEstimator() { var data = TextLoader.CreateReader(_env, c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)), separator: ';', hasHeader: true) .Read(_dataSource); var est = new PcaEstimator(_env, "features", "pca", rank: 5, seed: 1); var outputPath = GetOutputPath("PCA", "pca.tsv"); using (var ch = _env.Start("save")) { IDataView savedData = TakeFilter.Create(_env, est.Fit(data.AsDynamic).Transform(data.AsDynamic), 4); savedData = new ChooseColumnsTransform(_env, savedData, "pca"); using (var fs = File.Create(outputPath)) DataSaverUtils.SaveDataView(ch, _saver, savedData, fs, keepHidden: true); } CheckEquality("PCA", "pca.tsv", digitsOfPrecision: 4); Done(); }
public void PcaWorkout() { var env = new ConsoleEnvironment(seed: 1, conc: 1); string dataSource = GetDataPath("generated_regression_dataset.csv"); var data = TextLoader.CreateReader(env, c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)), separator: ';', hasHeader: true) .Read(new MultiFileSource(dataSource)); var invalidData = TextLoader.CreateReader(env, c => (label: c.LoadFloat(11), features: c.LoadText(0, 10)), separator: ';', hasHeader: true) .Read(new MultiFileSource(dataSource)); var est = new PcaEstimator(env, "features", "pca", rank: 5, advancedSettings: s => { s.Seed = 1; }); // The following call fails because of the following issue // https://github.com/dotnet/machinelearning/issues/969 // TestEstimatorCore(est, data.AsDynamic, invalidInput: invalidData.AsDynamic); var outputPath = GetOutputPath("PCA", "pca.tsv"); using (var ch = env.Start("save")) { var saver = new TextSaver(env, new TextSaver.Arguments { Silent = true, OutputHeader = false }); IDataView savedData = TakeFilter.Create(env, est.Fit(data.AsDynamic).Transform(data.AsDynamic), 4); savedData = new ChooseColumnsTransform(env, savedData, "pca"); using (var fs = File.Create(outputPath)) DataSaverUtils.SaveDataView(ch, saver, savedData, fs, keepHidden: true); } CheckEquality("PCA", "pca.tsv"); Done(); }
public void TestBackAndForthConversionWithoutAlphaNoInterleaveNoOffset() { using (var env = new ConsoleEnvironment()) { var imageHeight = 100; var imageWidth = 130; var dataFile = GetDataPath("images/images.tsv"); var imageFolder = Path.GetDirectoryName(dataFile); var data = TextLoader.Create(env, new TextLoader.Arguments() { Column = new[] { new TextLoader.Column("ImagePath", DataKind.TX, 0), new TextLoader.Column("Name", DataKind.TX, 1), } }, new MultiFileSource(dataFile)); var images = ImageLoaderTransform.Create(env, new ImageLoaderTransform.Arguments() { Column = new ImageLoaderTransform.Column[1] { new ImageLoaderTransform.Column() { Source = "ImagePath", Name = "ImageReal" } }, ImageFolder = imageFolder }, data); var cropped = ImageResizerTransform.Create(env, new ImageResizerTransform.Arguments() { Column = new ImageResizerTransform.Column[1] { new ImageResizerTransform.Column() { Source = "ImageReal", Name = "ImageCropped", ImageHeight = imageHeight, ImageWidth = imageWidth, Resizing = ImageResizerTransform.ResizingKind.IsoCrop } } }, images); var pixels = ImagePixelExtractorTransform.Create(env, new ImagePixelExtractorTransform.Arguments() { InterleaveArgb = false, Column = new ImagePixelExtractorTransform.Column[1] { new ImagePixelExtractorTransform.Column() { Source = "ImageCropped", Name = "ImagePixels", UseAlpha = false } } }, cropped); IDataView backToBitmaps = new VectorToImageTransform(env, new VectorToImageTransform.Arguments() { InterleaveArgb = false, Column = new VectorToImageTransform.Column[1] { new VectorToImageTransform.Column() { Source = "ImagePixels", Name = "ImageRestored", ImageHeight = imageHeight, ImageWidth = imageWidth, ContainsAlpha = false } } }, pixels); var fname = nameof(TestBackAndForthConversionWithoutAlphaNoInterleaveNoOffset) + "_model.zip"; var fh = env.CreateOutputFile(fname); using (var ch = env.Start("save")) TrainUtils.SaveModel(env, ch, fh, null, new RoleMappedData(backToBitmaps)); backToBitmaps = ModelFileUtils.LoadPipeline(env, fh.OpenReadStream(), new MultiFileSource(dataFile)); DeleteOutputPath(fname); backToBitmaps.Schema.TryGetColumnIndex("ImageRestored", out int bitmapColumn); backToBitmaps.Schema.TryGetColumnIndex("ImageCropped", out int cropBitmapColumn); using (var cursor = backToBitmaps.GetRowCursor((x) => true)) { var bitmapGetter = cursor.GetGetter <Bitmap>(bitmapColumn); Bitmap restoredBitmap = default; var bitmapCropGetter = cursor.GetGetter <Bitmap>(cropBitmapColumn); Bitmap croppedBitmap = default; while (cursor.MoveNext()) { bitmapGetter(ref restoredBitmap); Assert.NotNull(restoredBitmap); bitmapCropGetter(ref croppedBitmap); Assert.NotNull(croppedBitmap); for (int x = 0; x < imageWidth; x++) { for (int y = 0; y < imageHeight; y++) { var c = croppedBitmap.GetPixel(x, y); var r = restoredBitmap.GetPixel(x, y); Assert.True(c.R == r.R && c.G == r.G && c.B == r.B); } } } } } Done(); }
public void TestGreyscaleTransformImages() { using (var env = new ConsoleEnvironment()) { var imageHeight = 150; var imageWidth = 100; var dataFile = GetDataPath("images/images.tsv"); var imageFolder = Path.GetDirectoryName(dataFile); var data = TextLoader.Create(env, new TextLoader.Arguments() { Column = new[] { new TextLoader.Column("ImagePath", DataKind.TX, 0), new TextLoader.Column("Name", DataKind.TX, 1), } }, new MultiFileSource(dataFile)); var images = ImageLoaderTransform.Create(env, new ImageLoaderTransform.Arguments() { Column = new ImageLoaderTransform.Column[1] { new ImageLoaderTransform.Column() { Source = "ImagePath", Name = "ImageReal" } }, ImageFolder = imageFolder }, data); var cropped = ImageResizerTransform.Create(env, new ImageResizerTransform.Arguments() { Column = new ImageResizerTransform.Column[1] { new ImageResizerTransform.Column() { Name = "ImageCropped", Source = "ImageReal", ImageHeight = imageHeight, ImageWidth = imageWidth, Resizing = ImageResizerTransform.ResizingKind.IsoCrop } } }, images); IDataView grey = ImageGrayscaleTransform.Create(env, new ImageGrayscaleTransform.Arguments() { Column = new ImageGrayscaleTransform.Column[1] { new ImageGrayscaleTransform.Column() { Name = "ImageGrey", Source = "ImageCropped" } } }, cropped); var fname = nameof(TestGreyscaleTransformImages) + "_model.zip"; var fh = env.CreateOutputFile(fname); using (var ch = env.Start("save")) TrainUtils.SaveModel(env, ch, fh, null, new RoleMappedData(grey)); grey = ModelFileUtils.LoadPipeline(env, fh.OpenReadStream(), new MultiFileSource(dataFile)); DeleteOutputPath(fname); grey.Schema.TryGetColumnIndex("ImageGrey", out int greyColumn); using (var cursor = grey.GetRowCursor((x) => true)) { var bitmapGetter = cursor.GetGetter <Bitmap>(greyColumn); Bitmap bitmap = default; while (cursor.MoveNext()) { bitmapGetter(ref bitmap); Assert.NotNull(bitmap); for (int x = 0; x < imageWidth; x++) { for (int y = 0; y < imageHeight; y++) { var pixel = bitmap.GetPixel(x, y); // greyscale image has same values for R,G and B Assert.True(pixel.R == pixel.G && pixel.G == pixel.B); } } } } } Done(); }
public async Task TestDownloadError() { var envVarOld = Environment.GetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable); var resourcePathVarOld = Environment.GetEnvironmentVariable(Utils.CustomSearchDirEnvVariable); Environment.SetEnvironmentVariable(Utils.CustomSearchDirEnvVariable, null); try { var envVar = Environment.GetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable); var saveToDir = GetOutputPath("copyto"); DeleteOutputPath("copyto", "breast-cancer.txt"); var sbOut = new StringBuilder(); var sbErr = new StringBuilder(); // Bad url. if (!Uri.TryCreate("https://fake-website/fake-model.model/", UriKind.Absolute, out var badUri)) { Fail("Uri could not be created"); } Environment.SetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable, badUri.AbsoluteUri); envVar = Environment.GetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable); if (envVar != badUri.AbsoluteUri) { Fail("Environment variable not set properly"); } DeleteOutputPath("copyto", "ResNet_18_Updated.model"); sbOut.Clear(); sbErr.Clear(); using (var outWriter = new StringWriter(sbOut)) using (var errWriter = new StringWriter(sbErr)) { var env = new ConsoleEnvironment(42, outWriter: outWriter, errWriter: errWriter); using (var ch = env.Start("Downloading")) { var fileName = "test_bad_url.model"; await Assert.ThrowsAsync <NotSupportedException>(() => ResourceManagerUtils.Instance.EnsureResourceAsync(env, ch, "Image/ResNet_18_Updated.model", fileName, saveToDir, 10 * 1000)); Log("Bad url"); Log($"out: {sbOut.ToString()}"); Log($"error: {sbErr.ToString()}"); if (File.Exists(Path.Combine(saveToDir, fileName))) { Fail($"File '{Path.Combine(saveToDir, fileName)}' should have been deleted."); } } } // Good url, bad page. if (!Uri.TryCreate("https://cnn.com/", UriKind.Absolute, out var cnn)) { Fail("Uri could not be created"); } Environment.SetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable, cnn.AbsoluteUri); envVar = Environment.GetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable); if (envVar != cnn.AbsoluteUri) { Fail("Environment variable not set properly"); } DeleteOutputPath("copyto", "ResNet_18_Updated.model"); sbOut.Clear(); sbErr.Clear(); using (var outWriter = new StringWriter(sbOut)) using (var errWriter = new StringWriter(sbErr)) { var env = new ConsoleEnvironment(42, outWriter: outWriter, errWriter: errWriter); using (var ch = env.Start("Downloading")) { var fileName = "test_cnn_page_does_not_exist.model"; await Assert.ThrowsAsync <NotSupportedException>(() => ResourceManagerUtils.Instance.EnsureResourceAsync(env, ch, "Image/ResNet_18_Updated.model", fileName, saveToDir, 10 * 1000)); Log("Good url, bad page"); Log($"out: {sbOut.ToString()}"); Log($"error: {sbErr.ToString()}"); if (File.Exists(Path.Combine(saveToDir, fileName))) { Fail($"File '{Path.Combine(saveToDir, fileName)}' should have been deleted."); } } } //Good url, good page Environment.SetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable, envVarOld); envVar = Environment.GetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable); if (envVar != envVarOld) { Fail("Environment variable not set properly"); } DeleteOutputPath("copyto", "sentiment.emd"); sbOut.Clear(); sbErr.Clear(); using (var outWriter = new StringWriter(sbOut)) using (var errWriter = new StringWriter(sbErr)) { var env = new ConsoleEnvironment(42, outWriter: outWriter, errWriter: errWriter); using (var ch = env.Start("Downloading")) { var fileName = "sentiment.emd"; var t = ResourceManagerUtils.Instance.EnsureResourceAsync(env, ch, "text/Sswe/sentiment.emd", fileName, saveToDir, 1 * 60 * 1000); var results = await t; if (results.ErrorMessage != null) { Fail(String.Format("Expected zero length error string. Received error: {0}", results.ErrorMessage)); } if (t.Status != TaskStatus.RanToCompletion) { Fail("Download did not complete succesfully"); } if (!File.Exists(GetOutputPath("copyto", "sentiment.emd"))) { Fail($"File '{GetOutputPath("copyto", "sentiment.emd")}' does not exist. " + $"File was downloaded to '{results.FileName}' instead." + $"MICROSOFTML_RESOURCE_PATH is set to {Environment.GetEnvironmentVariable(Utils.CustomSearchDirEnvVariable)}"); } } } Done(); } finally { // Set environment variable back to its old value. Environment.SetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable, envVarOld); Environment.SetEnvironmentVariable(Utils.CustomSearchDirEnvVariable, resourcePathVarOld); } }