Example #1
0
        public void TestI_Q_KMeansInnerAPIWithDataFrame()
        {
            var methodName       = System.Reflection.MethodBase.GetCurrentMethod().Name;
            var outModelFilePath = FileHelper.GetOutputFile("outModelFilePath.zip", methodName);
            var iris             = FileHelper.GetTestFile("iris.txt");

            using (var env = new ConsoleEnvironment(conc: 1))
            {
                ComponentHelper.AddStandardComponents(env);

                var        df      = Scikit.ML.DataManipulation.DataFrameIO.ReadCsv(iris, sep: '\t', dtypes: new ColumnType[] { NumberType.R4 });
                var        conc    = env.CreateTransform("Concat{col=Feature:Sepal_length,Sepal_width}", df);
                var        roleMap = env.CreateExamples(conc, "Feature", label: "Label");
                var        trainer = CreateTrainer(env, "km");
                IPredictor model;
                using (var ch = env.Start("test"))
                    model = TrainUtils.Train(env, ch, roleMap, trainer, null, 0);

                using (var ch = env.Start("Save"))
                    using (var fs = File.Create(outModelFilePath))
                        TrainUtils.SaveModel(env, ch, fs, model, roleMap);

                Predictor pred;
                using (var fs = File.OpenRead(outModelFilePath))
                    pred = env.LoadPredictorOrNull(fs);

#pragma warning disable CS0618
                var scorer = ScoreUtils.GetScorer(pred.GetPredictorObject() as IPredictor, roleMap, env, null);
#pragma warning restore CS0618
                var dfout = Scikit.ML.DataManipulation.DataFrameIO.ReadView(scorer);
                Assert.AreEqual(dfout.Shape, new Tuple <int, int>(150, 13));
            }
        }
        public void LdaWorkout()
        {
            var    env = new ConsoleEnvironment(seed: 42, conc: 1);
            string sentimentDataPath = GetDataPath("wikipedia-detox-250-line-data.tsv");
            var    data = TextLoader.CreateReader(env, ctx => (
                                                      label: ctx.LoadBool(0),
                                                      text: ctx.LoadText(1)), hasHeader: true)
                          .Read(sentimentDataPath);

            var invalidData = TextLoader.CreateReader(env, ctx => (
                                                          label: ctx.LoadBool(0),
                                                          text: ctx.LoadFloat(1)), hasHeader: true)
                              .Read(sentimentDataPath);

            var est = new WordBagEstimator(env, "text", "bag_of_words").
                      Append(new LdaEstimator(env, "bag_of_words", "topics", 10, advancedSettings: s => {
                s.NumIterations        = 10;
                s.ResetRandomGenerator = true;
            }));

            // The following call fails because of the following issue
            // https://github.com/dotnet/machinelearning/issues/969
            // TestEstimatorCore(est, data.AsDynamic, invalidInput: invalidData.AsDynamic);

            var outputPath = GetOutputPath("Text", "ldatopics.tsv");

            using (var ch = env.Start("save"))
            {
                var saver = new TextSaver(env, new TextSaver.Arguments {
                    Silent = true, OutputHeader = false, Dense = true
                });
                IDataView savedData = TakeFilter.Create(env, est.Fit(data.AsDynamic).Transform(data.AsDynamic), 4);
                savedData = ColumnSelectingTransformer.CreateKeep(env, savedData, new[] { "topics" });

                using (var fs = File.Create(outputPath))
                    DataSaverUtils.SaveDataView(ch, saver, savedData, fs, keepHidden: true);

                Assert.Equal(10, (savedData.Schema.GetColumnType(0) as VectorType)?.Size);
            }

            // Diabling this check due to the following issue with consitency of output.
            // `seed` specified in ConsoleEnvironment has no effect.
            // https://github.com/dotnet/machinelearning/issues/1004
            // On single box, setting `s.ResetRandomGenerator = true` works but fails on build server
            // CheckEquality("Text", "ldatopics.tsv");
            Done();
        }
Example #3
0
        public void TestPcaEstimator()
        {
            var data = TextLoader.CreateReader(_env,
                                               c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)),
                                               separator: ';', hasHeader: true)
                       .Read(_dataSource);

            var est        = new PcaEstimator(_env, "features", "pca", rank: 5, seed: 1);
            var outputPath = GetOutputPath("PCA", "pca.tsv");

            using (var ch = _env.Start("save"))
            {
                IDataView savedData = TakeFilter.Create(_env, est.Fit(data.AsDynamic).Transform(data.AsDynamic), 4);
                savedData = new ChooseColumnsTransform(_env, savedData, "pca");

                using (var fs = File.Create(outputPath))
                    DataSaverUtils.SaveDataView(ch, _saver, savedData, fs, keepHidden: true);
            }

            CheckEquality("PCA", "pca.tsv", digitsOfPrecision: 4);
            Done();
        }
Example #4
0
        public void PcaWorkout()
        {
            var    env        = new ConsoleEnvironment(seed: 1, conc: 1);
            string dataSource = GetDataPath("generated_regression_dataset.csv");
            var    data       = TextLoader.CreateReader(env,
                                                        c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)),
                                                        separator: ';', hasHeader: true)
                                .Read(new MultiFileSource(dataSource));

            var invalidData = TextLoader.CreateReader(env,
                                                      c => (label: c.LoadFloat(11), features: c.LoadText(0, 10)),
                                                      separator: ';', hasHeader: true)
                              .Read(new MultiFileSource(dataSource));

            var est = new PcaEstimator(env, "features", "pca", rank: 5, advancedSettings: s => {
                s.Seed = 1;
            });

            // The following call fails because of the following issue
            // https://github.com/dotnet/machinelearning/issues/969
            // TestEstimatorCore(est, data.AsDynamic, invalidInput: invalidData.AsDynamic);

            var outputPath = GetOutputPath("PCA", "pca.tsv");

            using (var ch = env.Start("save"))
            {
                var saver = new TextSaver(env, new TextSaver.Arguments {
                    Silent = true, OutputHeader = false
                });
                IDataView savedData = TakeFilter.Create(env, est.Fit(data.AsDynamic).Transform(data.AsDynamic), 4);
                savedData = new ChooseColumnsTransform(env, savedData, "pca");

                using (var fs = File.Create(outputPath))
                    DataSaverUtils.SaveDataView(ch, saver, savedData, fs, keepHidden: true);
            }

            CheckEquality("PCA", "pca.tsv");
            Done();
        }
        public void TestBackAndForthConversionWithoutAlphaNoInterleaveNoOffset()
        {
            using (var env = new ConsoleEnvironment())
            {
                var imageHeight = 100;
                var imageWidth  = 130;
                var dataFile    = GetDataPath("images/images.tsv");
                var imageFolder = Path.GetDirectoryName(dataFile);
                var data        = TextLoader.Create(env, new TextLoader.Arguments()
                {
                    Column = new[]
                    {
                        new TextLoader.Column("ImagePath", DataKind.TX, 0),
                        new TextLoader.Column("Name", DataKind.TX, 1),
                    }
                }, new MultiFileSource(dataFile));
                var images = ImageLoaderTransform.Create(env, new ImageLoaderTransform.Arguments()
                {
                    Column = new ImageLoaderTransform.Column[1]
                    {
                        new ImageLoaderTransform.Column()
                        {
                            Source = "ImagePath", Name = "ImageReal"
                        }
                    },
                    ImageFolder = imageFolder
                }, data);
                var cropped = ImageResizerTransform.Create(env, new ImageResizerTransform.Arguments()
                {
                    Column = new ImageResizerTransform.Column[1] {
                        new ImageResizerTransform.Column()
                        {
                            Source = "ImageReal", Name = "ImageCropped", ImageHeight = imageHeight, ImageWidth = imageWidth, Resizing = ImageResizerTransform.ResizingKind.IsoCrop
                        }
                    }
                }, images);

                var pixels = ImagePixelExtractorTransform.Create(env, new ImagePixelExtractorTransform.Arguments()
                {
                    InterleaveArgb = false,
                    Column         = new ImagePixelExtractorTransform.Column[1] {
                        new ImagePixelExtractorTransform.Column()
                        {
                            Source = "ImageCropped", Name = "ImagePixels", UseAlpha = false
                        }
                    }
                }, cropped);

                IDataView backToBitmaps = new VectorToImageTransform(env, new VectorToImageTransform.Arguments()
                {
                    InterleaveArgb = false,
                    Column         = new VectorToImageTransform.Column[1] {
                        new VectorToImageTransform.Column()
                        {
                            Source = "ImagePixels", Name = "ImageRestored", ImageHeight = imageHeight, ImageWidth = imageWidth, ContainsAlpha = false
                        }
                    }
                }, pixels);

                var fname = nameof(TestBackAndForthConversionWithoutAlphaNoInterleaveNoOffset) + "_model.zip";

                var fh = env.CreateOutputFile(fname);
                using (var ch = env.Start("save"))
                    TrainUtils.SaveModel(env, ch, fh, null, new RoleMappedData(backToBitmaps));

                backToBitmaps = ModelFileUtils.LoadPipeline(env, fh.OpenReadStream(), new MultiFileSource(dataFile));
                DeleteOutputPath(fname);


                backToBitmaps.Schema.TryGetColumnIndex("ImageRestored", out int bitmapColumn);
                backToBitmaps.Schema.TryGetColumnIndex("ImageCropped", out int cropBitmapColumn);
                using (var cursor = backToBitmaps.GetRowCursor((x) => true))
                {
                    var    bitmapGetter   = cursor.GetGetter <Bitmap>(bitmapColumn);
                    Bitmap restoredBitmap = default;

                    var    bitmapCropGetter = cursor.GetGetter <Bitmap>(cropBitmapColumn);
                    Bitmap croppedBitmap    = default;
                    while (cursor.MoveNext())
                    {
                        bitmapGetter(ref restoredBitmap);
                        Assert.NotNull(restoredBitmap);
                        bitmapCropGetter(ref croppedBitmap);
                        Assert.NotNull(croppedBitmap);
                        for (int x = 0; x < imageWidth; x++)
                        {
                            for (int y = 0; y < imageHeight; y++)
                            {
                                var c = croppedBitmap.GetPixel(x, y);
                                var r = restoredBitmap.GetPixel(x, y);
                                Assert.True(c.R == r.R && c.G == r.G && c.B == r.B);
                            }
                        }
                    }
                }
            }
            Done();
        }
        public void TestGreyscaleTransformImages()
        {
            using (var env = new ConsoleEnvironment())
            {
                var imageHeight = 150;
                var imageWidth  = 100;
                var dataFile    = GetDataPath("images/images.tsv");
                var imageFolder = Path.GetDirectoryName(dataFile);
                var data        = TextLoader.Create(env, new TextLoader.Arguments()
                {
                    Column = new[]
                    {
                        new TextLoader.Column("ImagePath", DataKind.TX, 0),
                        new TextLoader.Column("Name", DataKind.TX, 1),
                    }
                }, new MultiFileSource(dataFile));
                var images = ImageLoaderTransform.Create(env, new ImageLoaderTransform.Arguments()
                {
                    Column = new ImageLoaderTransform.Column[1]
                    {
                        new ImageLoaderTransform.Column()
                        {
                            Source = "ImagePath", Name = "ImageReal"
                        }
                    },
                    ImageFolder = imageFolder
                }, data);
                var cropped = ImageResizerTransform.Create(env, new ImageResizerTransform.Arguments()
                {
                    Column = new ImageResizerTransform.Column[1] {
                        new ImageResizerTransform.Column()
                        {
                            Name = "ImageCropped", Source = "ImageReal", ImageHeight = imageHeight, ImageWidth = imageWidth, Resizing = ImageResizerTransform.ResizingKind.IsoCrop
                        }
                    }
                }, images);

                IDataView grey = ImageGrayscaleTransform.Create(env, new ImageGrayscaleTransform.Arguments()
                {
                    Column = new ImageGrayscaleTransform.Column[1] {
                        new ImageGrayscaleTransform.Column()
                        {
                            Name = "ImageGrey", Source = "ImageCropped"
                        }
                    }
                }, cropped);

                var fname = nameof(TestGreyscaleTransformImages) + "_model.zip";

                var fh = env.CreateOutputFile(fname);
                using (var ch = env.Start("save"))
                    TrainUtils.SaveModel(env, ch, fh, null, new RoleMappedData(grey));

                grey = ModelFileUtils.LoadPipeline(env, fh.OpenReadStream(), new MultiFileSource(dataFile));
                DeleteOutputPath(fname);

                grey.Schema.TryGetColumnIndex("ImageGrey", out int greyColumn);
                using (var cursor = grey.GetRowCursor((x) => true))
                {
                    var    bitmapGetter = cursor.GetGetter <Bitmap>(greyColumn);
                    Bitmap bitmap       = default;
                    while (cursor.MoveNext())
                    {
                        bitmapGetter(ref bitmap);
                        Assert.NotNull(bitmap);
                        for (int x = 0; x < imageWidth; x++)
                        {
                            for (int y = 0; y < imageHeight; y++)
                            {
                                var pixel = bitmap.GetPixel(x, y);
                                // greyscale image has same values for R,G and B
                                Assert.True(pixel.R == pixel.G && pixel.G == pixel.B);
                            }
                        }
                    }
                }
            }
            Done();
        }
        public async Task TestDownloadError()
        {
            var envVarOld          = Environment.GetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable);
            var resourcePathVarOld = Environment.GetEnvironmentVariable(Utils.CustomSearchDirEnvVariable);

            Environment.SetEnvironmentVariable(Utils.CustomSearchDirEnvVariable, null);

            try
            {
                var envVar    = Environment.GetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable);
                var saveToDir = GetOutputPath("copyto");
                DeleteOutputPath("copyto", "breast-cancer.txt");
                var sbOut = new StringBuilder();
                var sbErr = new StringBuilder();

                // Bad url.
                if (!Uri.TryCreate("https://fake-website/fake-model.model/", UriKind.Absolute, out var badUri))
                {
                    Fail("Uri could not be created");
                }

                Environment.SetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable, badUri.AbsoluteUri);
                envVar = Environment.GetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable);
                if (envVar != badUri.AbsoluteUri)
                {
                    Fail("Environment variable not set properly");
                }

                DeleteOutputPath("copyto", "ResNet_18_Updated.model");
                sbOut.Clear();
                sbErr.Clear();
                using (var outWriter = new StringWriter(sbOut))
                    using (var errWriter = new StringWriter(sbErr))
                    {
                        var env = new ConsoleEnvironment(42, outWriter: outWriter, errWriter: errWriter);
                        using (var ch = env.Start("Downloading"))
                        {
                            var fileName = "test_bad_url.model";
                            await Assert.ThrowsAsync <NotSupportedException>(() => ResourceManagerUtils.Instance.EnsureResourceAsync(env, ch, "Image/ResNet_18_Updated.model", fileName, saveToDir, 10 * 1000));

                            Log("Bad url");
                            Log($"out: {sbOut.ToString()}");
                            Log($"error: {sbErr.ToString()}");

                            if (File.Exists(Path.Combine(saveToDir, fileName)))
                            {
                                Fail($"File '{Path.Combine(saveToDir, fileName)}' should have been deleted.");
                            }
                        }
                    }

                // Good url, bad page.
                if (!Uri.TryCreate("https://cnn.com/", UriKind.Absolute, out var cnn))
                {
                    Fail("Uri could not be created");
                }
                Environment.SetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable, cnn.AbsoluteUri);
                envVar = Environment.GetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable);
                if (envVar != cnn.AbsoluteUri)
                {
                    Fail("Environment variable not set properly");
                }

                DeleteOutputPath("copyto", "ResNet_18_Updated.model");
                sbOut.Clear();
                sbErr.Clear();
                using (var outWriter = new StringWriter(sbOut))
                    using (var errWriter = new StringWriter(sbErr))
                    {
                        var env = new ConsoleEnvironment(42, outWriter: outWriter, errWriter: errWriter);
                        using (var ch = env.Start("Downloading"))
                        {
                            var fileName = "test_cnn_page_does_not_exist.model";
                            await Assert.ThrowsAsync <NotSupportedException>(() => ResourceManagerUtils.Instance.EnsureResourceAsync(env, ch, "Image/ResNet_18_Updated.model", fileName, saveToDir, 10 * 1000));

                            Log("Good url, bad page");
                            Log($"out: {sbOut.ToString()}");
                            Log($"error: {sbErr.ToString()}");

                            if (File.Exists(Path.Combine(saveToDir, fileName)))
                            {
                                Fail($"File '{Path.Combine(saveToDir, fileName)}' should have been deleted.");
                            }
                        }
                    }

                //Good url, good page
                Environment.SetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable, envVarOld);
                envVar = Environment.GetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable);
                if (envVar != envVarOld)
                {
                    Fail("Environment variable not set properly");
                }

                DeleteOutputPath("copyto", "sentiment.emd");
                sbOut.Clear();
                sbErr.Clear();
                using (var outWriter = new StringWriter(sbOut))
                    using (var errWriter = new StringWriter(sbErr))
                    {
                        var env = new ConsoleEnvironment(42, outWriter: outWriter, errWriter: errWriter);
                        using (var ch = env.Start("Downloading"))
                        {
                            var fileName = "sentiment.emd";
                            var t        = ResourceManagerUtils.Instance.EnsureResourceAsync(env, ch, "text/Sswe/sentiment.emd", fileName, saveToDir, 1 * 60 * 1000);
                            var results  = await t;

                            if (results.ErrorMessage != null)
                            {
                                Fail(String.Format("Expected zero length error string. Received error: {0}", results.ErrorMessage));
                            }
                            if (t.Status != TaskStatus.RanToCompletion)
                            {
                                Fail("Download did not complete succesfully");
                            }
                            if (!File.Exists(GetOutputPath("copyto", "sentiment.emd")))
                            {
                                Fail($"File '{GetOutputPath("copyto", "sentiment.emd")}' does not exist. " +
                                     $"File was downloaded to '{results.FileName}' instead." +
                                     $"MICROSOFTML_RESOURCE_PATH is set to {Environment.GetEnvironmentVariable(Utils.CustomSearchDirEnvVariable)}");
                            }
                        }
                    }
                Done();
            }
            finally
            {
                // Set environment variable back to its old value.
                Environment.SetEnvironmentVariable(ResourceManagerUtils.CustomResourcesUrlEnvVariable, envVarOld);
                Environment.SetEnvironmentVariable(Utils.CustomSearchDirEnvVariable, resourcePathVarOld);
            }
        }