public void KeyToValue()
        {
            string dataPath = GetDataPath("breast-cancer.txt");
            var    data     = ML.Data.LoadFromTextFile(dataPath, new[] {
                new TextLoader.Column("ScalarString", DataKind.String, 0),
                new TextLoader.Column("VectorString", DataKind.String, 1, 4),
            });

            var transformedData = new ValueToKeyMappingEstimator(Env, new[] {
                new ValueToKeyMappingEstimator.ColumnOptions("A", "ScalarString"),
                new ValueToKeyMappingEstimator.ColumnOptions("B", "VectorString")
            })
                                  .Fit(data).Transform(data);

            var est = ML.Transforms.Conversion.MapKeyToValue("ScalarString", "A")
                      .Append(ML.Transforms.Conversion.MapKeyToValue("VectorString", "B"));

            TestEstimatorCore(est, transformedData, invalidInput: data);

            var data2Transformed = est.Fit(transformedData).Transform(transformedData);
            // Check that term and ToValue are round-trippable.
            var dataLeft  = ML.Transforms.SelectColumns(new[] { "ScalarString", "VectorString" }).Fit(data).Transform(data);
            var dataRight = ML.Transforms.SelectColumns(new[] { "ScalarString", "VectorString" }).Fit(data2Transformed).Transform(data2Transformed);

            TestCommon.CheckSameSchemas(dataLeft.Schema, dataRight.Schema);
            CheckSameValues(dataLeft, dataRight);
            Done();
        }
        public void SaveLoaderAndTransformerAndLoad()
        {
            var mlContext = new MLContext();

            var file      = new MultiFileSource(TestCommon.GetDataPath(DataDir, TestDatasets.adult.trainFilename));
            var loader    = mlContext.Data.CreateTextLoader <InputData>(hasHeader: true, dataSample: file);
            var estimator = mlContext.Transforms.NormalizeMinMax("Features");
            var data      = loader.Load(file);
            var model     = estimator.Fit(data);

            // First get the input schema.
            var expectedInputSchema = loader.GetOutputSchema();

            Assert.Equal(2, expectedInputSchema.Count);
            Assert.NotNull(expectedInputSchema.GetColumnOrNull("Label"));
            Assert.NotNull(expectedInputSchema.GetColumnOrNull("Features"));
            Assert.True(expectedInputSchema["Features"].HasSlotNames());

            string modelPath = TestCommon.GetOutputPath(OutDir, FullTestName + "-model.zip");

            mlContext.Model.Save(model, loader, modelPath);

            // Reload the loader and schema.
            Load(mlContext, modelPath, out var loadedWithSchema, out var loadedInputSchema,
                 out var loadedWithLoader, out var loadedLoaderWithTransformer);
            Assert.IsType <NormalizingTransformer>(loadedWithSchema);
            Assert.IsType <NormalizingTransformer>(loadedWithLoader);
            Assert.IsType <TextLoader>(loadedLoaderWithTransformer);

            TestCommon.CheckSameSchemas(expectedInputSchema, loadedInputSchema);
            var reloadedLoaderInputSchema = loadedLoaderWithTransformer.GetOutputSchema();

            TestCommon.CheckSameSchemas(expectedInputSchema, reloadedLoaderInputSchema);
        }
Exemplo n.º 3
0
        public void FitPipelineSaveModelAndPredict()
        {
            var mlContext = new MLContext(seed: 1);

            // Get the dataset.
            var data = mlContext.Data.LoadFromTextFile <HousingRegression>(TestCommon.GetDataPath(DataDir, TestDatasets.housing.trainFilename), hasHeader: true);

            // Create a pipeline to train on the housing data.
            var pipeline = mlContext.Transforms.Concatenate("Features", HousingRegression.Features)
                           .Append(mlContext.Regression.Trainers.FastTree(
                                       new FastTreeRegressionTrainer.Options {
                NumberOfThreads = 1, NumberOfTrees = 10
            }));

            // Fit the pipeline.
            var model = pipeline.Fit(data);

            var modelPath = TestCommon.DeleteOutputPath(OutDir, "fitPipelineSaveModelAndPredict.zip");

            // Save model to a file.
            mlContext.Model.Save(model, data.Schema, modelPath);

            // Load model from a file.
            ITransformer serializedModel;

            using (var file = File.OpenRead(modelPath))
            {
                serializedModel = mlContext.Model.Load(file, out var serializedSchema);
                TestCommon.CheckSameSchemas(data.Schema, serializedSchema);
            }

            // Create prediction engine and test predictions.
            var originalPredictionEngine   = mlContext.Model.CreatePredictionEngine <HousingRegression, ScoreColumn>(model);
            var serializedPredictionEngine = mlContext.Model.CreatePredictionEngine <HousingRegression, ScoreColumn>(serializedModel);

            // Take a handful of examples out of the dataset and compute predictions.
            var dataEnumerator = mlContext.Data.CreateEnumerable <HousingRegression>(mlContext.Data.TakeRows(data, 5), false);

            foreach (var row in dataEnumerator)
            {
                var originalPrediction   = originalPredictionEngine.Predict(row);
                var serializedPrediction = serializedPredictionEngine.Predict(row);
                // Check that the predictions are identical.
                Assert.Equal(originalPrediction.Score, serializedPrediction.Score);
            }
        }
        public void SaveCompositeLoaderAndLoad()
        {
            var mlContext = new MLContext();

            var file      = new MultiFileSource(TestCommon.GetDataPath(DataDir, TestDatasets.adult.trainFilename));
            var loader    = mlContext.Data.CreateTextLoader <InputData>(hasHeader: true, dataSample: file);
            var composite = loader.Append(mlContext.Transforms.NormalizeMinMax("Features"));
            var loaderWithEmbeddedModel = composite.Fit(file);

            string modelPath = TestCommon.GetOutputPath(OutDir, FullTestName + "-model.zip");

            mlContext.Model.Save(null, loaderWithEmbeddedModel, modelPath);

            Load(mlContext, modelPath, out var loadedWithSchema, out var loadedSchema,
                 out var loadedWithLoader, out var loadedLoaderWithTransformer);
            // Because we saved the transform model as part of the composite loader, with no transforms,
            // the transform that should be loaded should be an empty transformer chain, since the "model,"
            // such as it is, has been combined with the loader.
            Assert.Empty(Assert.IsType <TransformerChain <ITransformer> >(loadedWithSchema));
            Assert.Empty(Assert.IsType <TransformerChain <ITransformer> >(loadedWithLoader));

            var expectedSchema = loaderWithEmbeddedModel.GetOutputSchema();

            Assert.True(expectedSchema.Count == 3);
            Assert.NotNull(expectedSchema.GetColumnOrNull("Label"));
            Assert.NotNull(expectedSchema.GetColumnOrNull("Features"));
            Assert.True(expectedSchema["Features"].HasSlotNames());

            TestCommon.CheckSameSchemas(loaderWithEmbeddedModel.GetOutputSchema(), loadedSchema);
            var schemaFromLoadedLoader = loadedLoaderWithTransformer.GetOutputSchema();

            TestCommon.CheckSameSchemas(loaderWithEmbeddedModel.GetOutputSchema(), schemaFromLoadedLoader);

            // The type of the loader itself should be a composite data loader, and its single transformer
            // should be the normalizing transformer.
            var compositeLoader = Assert.IsType <CompositeDataLoader <IMultiStreamSource, ITransformer> >(loadedLoaderWithTransformer);
            var chainFromLoader = compositeLoader.Transformer;

            Assert.IsType <NormalizingTransformer>(Assert.Single(compositeLoader.Transformer));
        }
Exemplo n.º 5
0
        public void LoadModelAndExtractPredictor()
        {
            var mlContext = new MLContext(1);

            var file   = new MultiFileSource(TestCommon.GetDataPath(DataDir, TestDatasets.adult.trainFilename));
            var loader = mlContext.Data.CreateTextLoader <InputData>(hasHeader: true, dataSample: file);
            var data   = loader.Load(file);

            // Pipeline.
            var pipeline = mlContext.BinaryClassification.Trainers.Gam();
            // Define the same pipeline starting with the loader.
            var pipeline1 = loader.Append(mlContext.BinaryClassification.Trainers.Gam());

            // Train.
            var transformerModel     = pipeline.Fit(data);
            var compositeLoaderModel = pipeline1.Fit(file);

            // Save and reload the "same" model with some differences in structure.

            // In this case we are saving the transformer model, but *not* the loader, just the schema from that loader.
            string modelAndSchemaPath = TestCommon.GetOutputPath(OutDir, FullTestName + "-model-schema.zip");

            mlContext.Model.Save(transformerModel, data.Schema, modelAndSchemaPath);

            // In this case we have combined the loader with the transformer model to form a "composite" loader, and are just
            // saving that one loader to this file.
            string compositeLoaderModelPath = TestCommon.GetOutputPath(OutDir, FullTestName + "-composite-model.zip");

            mlContext.Model.Save(null, compositeLoaderModel, compositeLoaderModelPath);

            // In this case we are saving the transformer model, as well as the associated data loader.
            string loaderAndTransformerModelPath = TestCommon.GetOutputPath(OutDir, FullTestName + "-loader-transformer.zip");

            mlContext.Model.Save(transformerModel, loader, loaderAndTransformerModelPath);

            ITransformer loadedTransformerModel;
            IDataLoader <IMultiStreamSource> loadedCompositeLoader;
            ITransformer loadedTransformerModel1;

            using (var fs = File.OpenRead(modelAndSchemaPath))
                loadedTransformerModel = mlContext.Model.Load(fs, out var loadedSchema);
            using (var fs = File.OpenRead(compositeLoaderModelPath))
            {
                // This model can be loaded either as a composite data loader,
                // a transformer model + an input schema, or a transformer model + a data loader.
                var t = mlContext.Model.LoadWithDataLoader(fs, out loadedCompositeLoader);
                // This is a bit strange, as it seems to test that it can reload from the same
                // stream twice opened only once, which as far as I know is not really a requirement
                // of the design or API, but we are nonetheless testing it. If this winds up failing,
                // I'm not sure we should really insist on this as a design requirement.
                var t1 = mlContext.Model.Load(fs, out var s);

                TestCommon.CheckSameSchemas(loadedCompositeLoader.GetOutputSchema(), s);
                // We combined the GAM with the loader, so the remaining chain should just be empty.
                Assert.Empty(Assert.IsType <TransformerChain <ITransformer> >(t));
                Assert.Empty(Assert.IsType <TransformerChain <ITransformer> >(t1));
            }
            using (var fs = File.OpenRead(loaderAndTransformerModelPath))
            {
                // This model can be loaded either as a composite data loader,
                // a transformer model + an input schema, or a transformer model + a data loader.
                var t = mlContext.Model.Load(fs, out var s);
                TestCommon.CheckSameSchemas(loader.GetOutputSchema(), s);

                loadedTransformerModel1 = mlContext.Model.LoadWithDataLoader(fs, out var l);
            }
Exemplo n.º 6
0
        public void TestSvmLightLoaderAndSaverWithTermMapping()
        {
            // Test with a term mapping, instead of the actual SVM^light format that
            // requires positive integers. ALso check that qid works here.
            var path = CreateDataset("-data.txt", new string[] {
                "1 qid:1 aurora:3.14159 beachwood:123",
                "-1 qid:5 beachwood:345 chagrin:-21",
            });

            var model = ML.Data.CreateSvmLightLoaderWithFeatureNames(dataSample: new MultiFileSource(path));
            var data  = model.Load(path);

            Assert.True(data.Schema["Features"].Type.GetValueCount() == 3);

            var schemaDef = SchemaDefinition.Create(typeof(SvmLightOutput));

            schemaDef["Features"].ColumnType = new VectorDataViewType(NumberDataViewType.Single, 3);
            schemaDef["Features"].AddAnnotation(
                AnnotationUtils.Kinds.SlotNames, new VBuffer <ReadOnlyMemory <char> >(3, new[] { "aurora".AsMemory(), "beachwood".AsMemory(), "chagrin".AsMemory() }),
                new VectorDataViewType(TextDataViewType.Instance, 3));
            var expectedData = ML.Data.LoadFromEnumerable(new SvmLightOutput[]
            {
                new SvmLightOutput()
                {
                    Label = 1, Weight = 1, GroupId = 1, Features = new VBuffer <float>(3, 2, new[] { 3.14159f, 123f }, new[] { 0, 1 })
                },
                new SvmLightOutput()
                {
                    Label = -1, Weight = 1, GroupId = 5, Features = new VBuffer <float>(3, 2, new[] { 345f, -21f }, new[] { 1, 2 })
                },
            }, schemaDef);

            CheckSameValues(data, expectedData, checkId: false);
            TestCommon.CheckSameSchemas(data.Schema, expectedData.Schema);

            // Save, reload and compare dataviews again.
            var outputPath = DeleteOutputPath(TestName + "-saved-data.txt");

            using (var stream = File.Create(outputPath))
                ML.Data.SaveInSvmLightFormat(expectedData, stream, zeroBasedIndexing: true, rowGroupColumnName: "GroupId");
            data = ML.Data.LoadFromSvmLightFile(outputPath, zeroBased: true);
            CheckSameValues(data, expectedData, checkId: false);

            // We reload the model, but on a new set of data. The "euclid" key should be
            // ignored as it would not have been detected by the term transform.
            path = CreateDataset("-data2.txt", new string[] {
                "-1 aurora:1 chagrin:2",
                "1 chagrin:3 euclid:4"
            });
            data = model.Load(path);
            Assert.True(data.Schema["Features"].Type.GetValueCount() == 3);

            expectedData = ML.Data.LoadFromEnumerable(new SvmLightOutput[]
            {
                new SvmLightOutput()
                {
                    Label = -1, Weight = 1, Features = new VBuffer <float>(3, 2, new[] { 1f, 2f }, new[] { 0, 2 })
                },
                new SvmLightOutput()
                {
                    Label = 1, Weight = 1, Features = new VBuffer <float>(3, 1, new[] { 3f }, new[] { 2 })
                },
            }, schemaDef);
            CheckSameValues(data, expectedData, checkId: false);

            // Save, reload and compare dataviews again.
            outputPath = DeleteOutputPath(TestName + "-saved-data2.txt");
            using (var stream = File.Create(outputPath))
                ML.Data.SaveInSvmLightFormat(expectedData, stream);
            data = ML.Data.LoadFromSvmLightFile(outputPath);
            CheckSameValues(data, expectedData, checkId: false);
        }
        public void SaveAndLoadModelWithLoader()
        {
            var mlContext = new MLContext();

            var file   = new MultiFileSource(TestCommon.GetDataPath(DataDir, TestDatasets.adult.trainFilename));
            var loader = mlContext.Data.CreateTextLoader <InputData>(hasHeader: true, dataSample: file);
            var data   = loader.Load(file);

            // Pipeline.
            var pipeline = mlContext.BinaryClassification.Trainers.Gam();

            // Train.
            var model = pipeline.Fit(data);

            // Save and reload.
            string modelPath = TestCommon.GetOutputPath(OutDir, FullTestName + "-model.zip");

            mlContext.Model.Save(model, loader, modelPath);

            IDataLoader <IMultiStreamSource> loadedLoader;
            ITransformer   loadedModelWithoutLoader;
            ITransformer   loadedModelWithLoader;
            DataViewSchema loadedSchema;

            using (var fs = File.OpenRead(modelPath))
            {
                loadedModelWithLoader = mlContext.Model.LoadWithDataLoader(fs, out loadedLoader);
                Assert.IsAssignableFrom <ISingleFeaturePredictionTransformer <object> >(loadedModelWithLoader);
                loadedModelWithoutLoader = mlContext.Model.Load(fs, out loadedSchema);
                Assert.IsAssignableFrom <ISingleFeaturePredictionTransformer <object> >(loadedModelWithoutLoader);

                TestCommon.CheckSameSchemas(loadedLoader.GetOutputSchema(), loadedSchema);
            }

            // When using a novel data source other than one derived from the loader, we will not have
            // the slot names.
            data = mlContext.Data.LoadFromEnumerable(new[] { new InputData() });
            data = loadedModelWithoutLoader.Transform(data);
            Assert.False(data.Schema["Features"].HasSlotNames());
            // When we plumb the loaded schema through the transformer though, we should have slot names.
            var noLoaderTransformedSchema = loadedModelWithoutLoader.GetOutputSchema(loadedSchema);

            Assert.True(noLoaderTransformedSchema["Features"].HasSlotNames());

            data = loadedLoader.Load(file);
            Assert.True(data.Schema["Features"].HasSlotNames());
            VBuffer <ReadOnlyMemory <char> > slotNames = default;

            data.Schema["Features"].GetSlotNames(ref slotNames);
            var ageIndex = FindIndex(slotNames.GetValues(), "age");
            var singleFeaturePredictionTransformer = loadedModelWithLoader as ISingleFeaturePredictionTransformer <object>;

            Assert.NotNull(singleFeaturePredictionTransformer);
            var calibratedModelParameters = singleFeaturePredictionTransformer.Model as CalibratedModelParametersBase;

            Assert.NotNull(calibratedModelParameters);
            var gamModel = calibratedModelParameters.SubModel as GamBinaryModelParameters;

            Assert.NotNull(gamModel);
            var ageBinUpperBounds = gamModel.GetBinUpperBounds(ageIndex);
            var ageBinEffects     = gamModel.GetBinEffects(ageIndex);
        }