Exemplo n.º 1
0
        /// <summary>
        /// Load data from files using inferred columns.
        /// </summary>
        private static void LoadData(MLContext mlContext, ColumnInferenceResults columnInference)
        {
            TextLoader textLoader = mlContext.Data.CreateTextLoader(columnInference.TextLoaderOptions);

            TrainDataView = textLoader.Load(TrainDataPath);
            TestDataView  = textLoader.Load(TestDataPath);
        }
Exemplo n.º 2
0
        public static ITransformer Retrain(MLContext mlContext, ExperimentResult <MulticlassClassificationMetrics> experimentResult,
                                           ColumnInferenceResults columnInference, DataFilePaths paths, bool fixedBug = false)
        {
            ConsoleHelper.ConsoleWriteHeader("=============== Re-fitting best pipeline ===============");
            var textLoader       = mlContext.Data.CreateTextLoader(columnInference.TextLoaderOptions);
            var combinedDataView = textLoader.Load(new MultiFileSource(paths.TrainPath, paths.ValidatePath, paths.TestPath));
            var bestRun          = experimentResult.BestRun;

            if (fixedBug)
            {
                // TODO: retry: below gave error but I thought it would work:
                //refitModel = MulticlassExperiment.Retrain(experimentResult,
                //    "final model",
                //    new MultiFileSource(paths.TrainPath, paths.ValidatePath, paths.FittedPath),
                //    paths.TestPath,
                //    paths.FinalPath, textLoader, mlContext);
                // but if failed before fixing this maybe the problem was in *EvaluateTrainedModelAndPrintMetrics*
            }
            var refitModel = bestRun.Estimator.Fit(combinedDataView);

            EvaluateTrainedModelAndPrintMetrics(mlContext, refitModel, "production model", textLoader.Load(paths.TestPath));
            // Save the re-fit model to a.ZIP file
            SaveModel(mlContext, refitModel, paths.FinalModelPath, textLoader.Load(paths.TestPath));

            Trace.WriteLine("The model is saved to {0}", paths.FinalModelPath);
            return(refitModel);
        }
Exemplo n.º 3
0
        private void TestModelInput(Pipeline pipeline, ColumnInferenceResults columnInference,
                                    IDictionary <string, CodeGeneratorSettings.ColumnMapping> mapping, string info)
        {
            // test with null map case
            var columnMappingStringList = Utils.GenerateClassLabels(columnInference);
            var modelInputProject       = new CSharpCodeFile()
            {
                File = new ModelInputClass()
                {
                    Namespace   = "test",
                    ClassLabels = columnMappingStringList,
                    Target      = GenerateTarget.Cli,
                }.TransformText(),
                Name = "ModelInput.cs",
            };

            NamerFactory.AdditionalInformation = info + "_null_map";
            Approvals.Verify(modelInputProject.File);

            // test with map case
            columnMappingStringList = Utils.GenerateClassLabels(columnInference, mapping);
            modelInputProject       = new CSharpCodeFile()
            {
                File = new ModelInputClass()
                {
                    Namespace   = "test",
                    ClassLabels = columnMappingStringList,
                    Target      = GenerateTarget.Cli,
                }.TransformText(),
                Name = "ModelInput.cs",
            };
            NamerFactory.AdditionalInformation = info + "_map";
            Approvals.Verify(modelInputProject.File);
        }
Exemplo n.º 4
0
        public void ClassLabelGenerationBasicTest()
        {
            var columns = new TextLoader.Column[]
            {
                new TextLoader.Column()
                {
                    Name = "Label", Source = new TextLoader.Range[] { new TextLoader.Range(0) }, DataKind = DataKind.Boolean
                },
            };

            var result = new ColumnInferenceResults()
            {
                TextLoaderOptions = new TextLoader.Options()
                {
                    Columns        = columns,
                    AllowQuoting   = false,
                    AllowSparse    = false,
                    Separators     = new[] { ',' },
                    HasHeader      = true,
                    TrimWhitespace = true
                },
                ColumnInformation = new ColumnInformation()
            };

            CodeGenerator codeGenerator = new CodeGenerator(null, result, null);
            var           actual        = codeGenerator.GenerateClassLabels();
            var           expected1     = "[ColumnName(\"Label\"), LoadColumn(0)]";
            var           expected2     = "public bool Label{get; set;}";

            Assert.Equal(expected1, actual[0]);
            Assert.Equal(expected2, actual[1]);
        }
Exemplo n.º 5
0
        /// <summary>
        /// Infer columns in the dataset with AutoML.
        /// </summary>
        private static ColumnInferenceResults InferColumns(MLContext mlContext)
        {
            ConsoleHelper.ConsoleWriteHeader("=============== Inferring columns in dataset ===============");
            ColumnInferenceResults columnInference = mlContext.Auto().InferColumns(TrainDataPath, LabelColumnName, groupColumns: false);

            ConsoleHelper.Print(columnInference);
            return(columnInference);
        }
Exemplo n.º 6
0
        /// <summary>
        /// Infer columns in the dataset with AutoML.
        /// </summary>
        private static ColumnInferenceResults InferColumns(MLContext mlContext, string TrainDataPath)
        {
            Console.WriteLine("=============== Inferring columns in dataset ===============");
            ColumnInferenceResults columnInference = mlContext.Auto().InferColumns(TrainDataPath, LabelColumnName, groupColumns: false);

            Console.WriteLine(columnInference);
            return(columnInference);
        }
Exemplo n.º 7
0
        internal static IList <string> GenerateClassLabels(ColumnInferenceResults columnInferenceResults, IDictionary <string, CodeGeneratorSettings.ColumnMapping> columnMapping = default)
        {
            IList <string> result      = new List <string>();
            List <string>  columnNames = new List <string>();

            foreach (var column in columnInferenceResults.TextLoaderOptions.Columns)
            {
                StringBuilder sb      = new StringBuilder();
                int           range   = (column.Source[0].Max - column.Source[0].Min).Value;
                bool          isArray = range > 0;
                sb.Append(Symbols.PublicSymbol);
                sb.Append(Symbols.Space);

                // if column is in columnMapping, use the type and name in that
                DataKind dataKind;
                string   columnName;

                if (columnMapping != null && columnMapping.ContainsKey(column.Name))
                {
                    dataKind   = columnMapping[column.Name].ColumnType;
                    columnName = columnMapping[column.Name].ColumnName;
                }
                else
                {
                    dataKind   = column.DataKind;
                    columnName = column.Name;
                }
                sb.Append(GetSymbolOfDataKind(dataKind));

                // Accomodate VectorType (array) columns
                if (range > 0)
                {
                    result.Add($"[ColumnName(\"{columnName}\"),LoadColumn({column.Source[0].Min}, {column.Source[0].Max}) VectorType({(range + 1)})]");
                    sb.Append("[]");
                }
                else
                {
                    result.Add($"[ColumnName(\"{columnName}\"), LoadColumn({column.Source[0].Min})]");
                }
                sb.Append(" ");
                columnNames.Add(column.Name);
                result.Add(sb.ToString());
                result.Add("\r\n");
            }
            // Get normalized and unique column names. If there are duplicate column names, the
            // differentiator suffix '_col_x' will be added to each column name, where 'x' is
            // the load order for a given column.
            List <string> normalizedColumnNames = GenerateColumnNames(columnNames);

            for (int i = 1; i < result.Count; i += 3)
            {
                // Get normalized column name for correctly typed class property name
                result[i] += normalizedColumnNames[i / 3];
                result[i] += "{get; set;}";
            }
            return(result);
        }
        private (Pipeline, ColumnInferenceResults) GetMockedAzureImagePipelineAndInference()
        {
            // construct pipeline
            var onnxPipeLineNode = new PipelineNode(nameof(SpecialTransformer.ApplyOnnxModel), PipelineNodeType.Transform, new[] { "input.1" }, new[] { "output.1" },
                                                    new Dictionary <string, object>()
            {
                { "outputColumnNames", "output1" },
                { "inputColumnNames", "input1" },
            });
            var loadImageNode   = new PipelineNode(EstimatorName.ImageLoading.ToString(), PipelineNodeType.Transform, "ImageSource", "ImageSource_featurized");
            var resizeImageNode = new PipelineNode(
                nameof(SpecialTransformer.ResizeImage),
                PipelineNodeType.Transform,
                "ImageSource_featurized",
                "ImageSource_featurized",
                new Dictionary <string, object>()
            {
                { "imageWidth", 224 },
                { "imageHeight", 224 },
            });
            var extractPixelsNode    = new PipelineNode(nameof(SpecialTransformer.ExtractPixel), PipelineNodeType.Transform, "ImageSource_featurized", "ImageSource_featurized");
            var normalizePipeline    = new PipelineNode(nameof(SpecialTransformer.NormalizeMapping), PipelineNodeType.Transform, string.Empty, string.Empty);
            var labelMapPipelineNode = new PipelineNode(nameof(SpecialTransformer.LabelMapping), PipelineNodeType.Transform, string.Empty, string.Empty);
            var bestPipeLine         = new Pipeline(new PipelineNode[]
            {
                loadImageNode,
                resizeImageNode,
                extractPixelsNode,
                normalizePipeline,
                onnxPipeLineNode,
                labelMapPipelineNode,
            });

            // construct column inference
            var textLoaderArgs = new TextLoader.Options()
            {
                Columns = new[] {
                    new TextLoader.Column("Label", DataKind.String, 0),
                    new TextLoader.Column("ImageSource", DataKind.String, 1),     // 0?
                },
                AllowQuoting = true,
                AllowSparse  = true,
                HasHeader    = true,
                Separators   = new[] { '\t' }
            };

            var columnInference = new ColumnInferenceResults()
            {
                TextLoaderOptions = textLoaderArgs,
                ColumnInformation = new ColumnInformation()
                {
                    LabelColumnName = "Label"
                }
            };

            return(bestPipeLine, columnInference);
        }
Exemplo n.º 9
0
        /// <summary>
        /// Re-fit best pipeline on all available data.
        /// </summary>
        private static ITransformer RefitBestPipeline(MLContext mlContext, ExperimentResult <RegressionMetrics> experimentResult,
                                                      ColumnInferenceResults columnInference)
        {
            ConsoleHelper.ConsoleWriteHeader("=============== Re-fitting best pipeline ===============");
            var textLoader       = mlContext.Data.CreateTextLoader(columnInference.TextLoaderOptions);
            var combinedDataView = textLoader.Load(new MultiFileSource(TrainDataPath, TestDataPath));
            RunDetail <RegressionMetrics> bestRun = experimentResult.BestRun;

            return(bestRun.Estimator.Fit(combinedDataView));
        }
        private (Pipeline, ColumnInferenceResults) GetMockedRecommendationPipelineAndInference()
        {
            if (_mockedPipeline == null)
            {
                MLContext context    = new MLContext();
                var       hyperParam = new Dictionary <string, object>()
                {
                    { "MatrixColumnIndexColumnName", "userId" },
                    { "MatrixRowIndexColumnName", "movieId" },
                    { "LabelColumnName", "Label" },
                    { nameof(MatrixFactorizationTrainer.Options.NumberOfIterations), 10 },
                    { nameof(MatrixFactorizationTrainer.Options.LearningRate), 0.01f },
                    { nameof(MatrixFactorizationTrainer.Options.ApproximationRank), 8 },
                    { nameof(MatrixFactorizationTrainer.Options.Lambda), 0.01f },
                    { nameof(MatrixFactorizationTrainer.Options.LossFunction), MatrixFactorizationTrainer.LossFunctionType.SquareLossRegression },
                    { nameof(MatrixFactorizationTrainer.Options.Alpha), 1f },
                    { nameof(MatrixFactorizationTrainer.Options.C), 0.00001f },
                };
                var valueToKeyPipelineNode1 = new PipelineNode(nameof(EstimatorName.ValueToKeyMapping), PipelineNodeType.Transform, "userId", "userId");
                var valueToKeyPipelineNode2 = new PipelineNode(nameof(EstimatorName.ValueToKeyMapping), PipelineNodeType.Transform, "movieId", "movieId");
                var matrixPipelineNode      = new PipelineNode(nameof(TrainerName.MatrixFactorization), PipelineNodeType.Trainer, "Features", "Score", hyperParam);
                var pipeline = new Pipeline(new PipelineNode[]
                {
                    valueToKeyPipelineNode1,
                    valueToKeyPipelineNode2,
                    matrixPipelineNode
                });

                _mockedPipeline = pipeline;
                var textLoaderArgs = new TextLoader.Options()
                {
                    Columns = new[] {
                        new TextLoader.Column("Label", DataKind.String, 0),
                        new TextLoader.Column("userId", DataKind.String, 1),
                        new TextLoader.Column("movieId", DataKind.String, 2),
                    },
                    AllowQuoting = true,
                    AllowSparse  = true,
                    HasHeader    = true,
                    Separators   = new[] { ',' }
                };

                this._columnInference = new ColumnInferenceResults()
                {
                    TextLoaderOptions = textLoaderArgs,
                    ColumnInformation = new ColumnInformation()
                    {
                        LabelColumnName  = "Label",
                        UserIdColumnName = "userId",
                        ItemIdColumnName = "movieId"
                    }
                };
            }
            return(_mockedPipeline, _columnInference);
        }
        public AzureAttachCodeGenenrator(Pipeline pipeline, ColumnInferenceResults columnInferenceResults, CodeGeneratorSettings options)
        {
            _pipeline = pipeline;
            _settings = options;
            _columnInferenceResult = columnInferenceResults;
            Name = _settings.OutputName;
            var namespaceValue = Utilities.Utils.Normalize(_settings.OutputName);

            AzureAttachConsoleApp = new AzureAttachConsoleAppCodeGenerator(_pipeline, _columnInferenceResult, _settings, namespaceValue);
            AzureAttachModel      = new AzureAttachModelCodeGenerator(_pipeline, _columnInferenceResult, _settings, namespaceValue);
        }
Exemplo n.º 12
0
        private static ExperimentResult <RegressionMetrics> RunAutoMLExperiment(MLContext mlContext,
                                                                                ColumnInferenceResults columnInference)
        {
            // STEP 1: Display first few rows of the training data
            ConsoleHelper.ShowDataViewInConsole(mlContext, TrainDataView);

            // STEP 2: Build a pre-featurizer for use in the AutoML experiment.
            // (Internally, AutoML uses one or more train/validation data splits to
            // evaluate the models it produces. The pre-featurizer is fit only on the
            // training data split to produce a trained transform. Then, the trained transform
            // is applied to both the train and validation data splits.)
            IEstimator <ITransformer> preFeaturizer = mlContext.Transforms.Conversion.MapValue("is_cash",
                                                                                               new[] { new KeyValuePair <string, bool>("CSH", true) }, "payment_type");

            // STEP 3: Customize column information returned by InferColumns API
            ColumnInformation columnInformation = columnInference.ColumnInformation;

            columnInformation.CategoricalColumnNames.Remove("payment_type");
            columnInformation.IgnoredColumnNames.Add("payment_type");

            // STEP 4: Initialize a cancellation token source to stop the experiment.
            var cts = new CancellationTokenSource();

            // STEP 5: Initialize our user-defined progress handler that AutoML will
            // invoke after each model it produces and evaluates.
            var progressHandler = new RegressionExperimentProgressHandler();

            // STEP 6: Create experiment settings
            var experimentSettings = CreateExperimentSettings(mlContext, cts);

            // STEP 7: Run AutoML regression experiment
            var experiment = mlContext.Auto().CreateRegressionExperiment(experimentSettings);

            ConsoleHelper.ConsoleWriteHeader("=============== Running AutoML experiment ===============");
            Console.WriteLine($"Running AutoML regression experiment...");
            var stopwatch = Stopwatch.StartNew();

            // Cancel experiment after the user presses any key
            CancelExperimentAfterAnyKeyPress(cts);
            ExperimentResult <RegressionMetrics> experimentResult = experiment.Execute(TrainDataView, columnInformation, preFeaturizer, progressHandler);

            Console.WriteLine($"{experimentResult.RunDetails.Count()} models were returned after {stopwatch.Elapsed.TotalSeconds:0.00} seconds{Environment.NewLine}");

            // Print top models found by AutoML
            PrintTopModels(experimentResult);

            return(experimentResult);
        }
        private (Pipeline, ColumnInferenceResults) GetMockedRankingPipelineAndInference()
        {
            if (_mockedPipeline == null)
            {
                MLContext context    = new MLContext();
                var       hyperParam = new Dictionary <string, object>()
                {
                    { "rowGroupColumnName", "GroupId" },
                    { "LabelColumnName", "Label" },
                };
                var hashPipelineNode     = new PipelineNode(nameof(EstimatorName.Hashing), PipelineNodeType.Transform, "GroupId", "GroupId");
                var lightGbmPipelineNode = new PipelineNode(nameof(TrainerName.LightGbmRanking), PipelineNodeType.Trainer, "Features", "Score", hyperParam);
                var pipeline             = new Pipeline(new PipelineNode[]
                {
                    hashPipelineNode,
                    lightGbmPipelineNode
                });
                _mockedPipeline = pipeline;
                var textLoaderArgs = new TextLoader.Options()
                {
                    Columns = new[] {
                        new TextLoader.Column("Label", DataKind.Boolean, 0),
                        new TextLoader.Column("GroupId", DataKind.Single, 1),
                        new TextLoader.Column("col1", DataKind.Single, 0),
                        new TextLoader.Column("col2", DataKind.String, 0),
                        new TextLoader.Column("col3", DataKind.Int32, 0),
                        new TextLoader.Column("col4", DataKind.UInt32, 0),
                    },
                    AllowQuoting = true,
                    AllowSparse  = true,
                    HasHeader    = true,
                    Separators   = new[] { ',' }
                };

                this._columnInference = new ColumnInferenceResults()
                {
                    TextLoaderOptions = textLoaderArgs,
                    ColumnInformation = new ColumnInformation()
                    {
                        LabelColumnName = "Label", GroupIdColumnName = "GroupId"
                    }
                };
            }

            return(_mockedPipeline, _columnInference);
        }
        private (Pipeline, ColumnInferenceResults) GetMockedOvaPipelineAndInference()
        {
            if (_mockedOvaPipeline == null)
            {
                MLContext context = new MLContext();
                // same learners with different hyperparameters
                var hyperparams1 = new Microsoft.ML.AutoML.ParameterSet(new List <Microsoft.ML.AutoML.IParameterValue>()
                {
                    new LongParameterValue("NumLeaves", 2)
                });
                var trainer1    = new SuggestedTrainer(context, new FastForestOvaExtension(), new ColumnInformation(), hyperparams1);
                var transforms1 = new List <SuggestedTransform>()
                {
                    ColumnConcatenatingExtension.CreateSuggestedTransform(context, new[] { "In" }, "Out")
                };
                var inferredPipeline1 = new SuggestedPipeline(transforms1, new List <SuggestedTransform>(), trainer1, context, true);

                this._mockedOvaPipeline = inferredPipeline1.ToPipeline();
                var textLoaderArgs = new TextLoader.Options()
                {
                    Columns = new[] {
                        new TextLoader.Column("Label", DataKind.Boolean, 0),
                        new TextLoader.Column("col1", DataKind.Single, 1),
                        new TextLoader.Column("col2", DataKind.Single, 0),
                        new TextLoader.Column("col3", DataKind.String, 0),
                        new TextLoader.Column("col4", DataKind.Int32, 0),
                        new TextLoader.Column("col5", DataKind.UInt32, 0),
                    },
                    AllowQuoting = true,
                    AllowSparse  = true,
                    HasHeader    = true,
                    Separators   = new[] { ',' }
                };


                this._columnInference = new ColumnInferenceResults()
                {
                    TextLoaderOptions = textLoaderArgs,
                    ColumnInformation = new ColumnInformation()
                    {
                        LabelColumnName = "Label"
                    }
                };
            }
            return(_mockedOvaPipeline, _columnInference);
        }
        private (Pipeline, ColumnInferenceResults) GetMockedRecommendationPipelineAndInference()
        {
            if (mockedPipeline == null)
            {
                MLContext context = new MLContext();

                var trainer1 = new SuggestedTrainer(context, new MatrixFactorizationExtension(), new ColumnInformation()
                {
                    LabelColumnName  = "Label",
                    UserIdColumnName = "userId",
                    ItemIdColumnName = "movieId",
                }, hyperParamSet: null);
                var transforms1 = new List <SuggestedTransform>()
                {
                    ColumnConcatenatingExtension.CreateSuggestedTransform(context, new[] { "In" }, "Out")
                };
                var inferredPipeline1 = new SuggestedPipeline(transforms1, new List <SuggestedTransform>(), trainer1, context, false);

                mockedPipeline = inferredPipeline1.ToPipeline();
                var textLoaderArgs = new TextLoader.Options()
                {
                    Columns = new[] {
                        new TextLoader.Column("Label", DataKind.String, 0),
                        new TextLoader.Column("userId", DataKind.String, 1),
                        new TextLoader.Column("movieId", DataKind.String, 2),
                    },
                    AllowQuoting = true,
                    AllowSparse  = true,
                    HasHeader    = true,
                    Separators   = new[] { ',' }
                };

                this.columnInference = new ColumnInferenceResults()
                {
                    TextLoaderOptions = textLoaderArgs,
                    ColumnInformation = new ColumnInformation()
                    {
                        LabelColumnName  = "Label",
                        UserIdColumnName = "userId",
                        ItemIdColumnName = "movieId"
                    }
                };
            }
            return(mockedPipeline, columnInference);
        }
Exemplo n.º 16
0
        public async Task TestGenerateSampleDataAsyncDuplicateColumnNames()
        {
            var filePath = "sample2.txt";

            using (var file = new StreamWriter(filePath))
            {
                await file.WriteLineAsync("Label,STR,STR,PATH,INT,DOUBLE,FLOAT,FLOAT,TrickySTR,SingleNan,SinglePositiveInfinity,SingleNegativeInfinity,SingleNegativeInfinity,EmptyString,One,T");

                await file.WriteLineAsync("label1,feature1,feature2,/path/to/file,2,1.2,1.223E+10,1.223E+11,ab\"\';@#$%^&-++==,NaN,Infinity,-Infinity,-Infinity,,1,T");

                await file.FlushAsync();

                file.Close();
                var context         = new MLContext();
                var dataView        = context.Data.LoadFromTextFile <TestClassContainsDuplicates>(filePath, separatorChar: ',', hasHeader: true);
                var columnInference = new ColumnInferenceResults()
                {
                    ColumnInformation = new ColumnInformation()
                    {
                        LabelColumnName = "Label_col_0"
                    }
                };
                var sampleData = Utils.GenerateSampleData(dataView, columnInference);
                Assert.Equal("@\"feature1\"", sampleData["STR_col_1"]);
                Assert.Equal("@\"feature2\"", sampleData["STR_col_2"]);
                Assert.Equal("@\"/path/to/file\"", sampleData["PATH_col_3"]);
                Assert.Equal("2", sampleData["INT_col_4"]);
                Assert.Equal("1.2", sampleData["DOUBLE_col_5"]);
                Assert.Equal("1.223E+10F", sampleData["FLOAT_col_6"]);
                Assert.Equal("1.223E+11F", sampleData["FLOAT_col_7"]);
                Assert.Equal("@\"ab\\\"\';@#$%^&-++==\"", sampleData["TrickySTR_col_8"]);
                Assert.Equal($"Single.NaN", sampleData["SingleNan_col_9"]);
                Assert.Equal($"Single.PositiveInfinity", sampleData["SinglePositiveInfinity_col_10"]);
                Assert.Equal($"Single.NegativeInfinity", sampleData["SingleNegativeInfinity_col_11"]);
                Assert.Equal($"Single.NegativeInfinity", sampleData["SingleNegativeInfinity_col_12"]);
                Assert.Equal("@\"\"", sampleData["EmptyString_col_13"]);
                Assert.Equal($"true", sampleData["One_col_14"]);
                Assert.Equal($"true", sampleData["T_col_15"]);
            }
        }
Exemplo n.º 17
0
        public void ClassLabelGenerationTest()
        {
            Assert.Equal(CodeGenTestData.inputColumns.Count, CodeGenTestData.expectedLabels.Count);
            for (int i = 0; i < CodeGenTestData.inputColumns.Count; i++)
            {
                var result = new ColumnInferenceResults()
                {
                    TextLoaderOptions = new TextLoader.Options()
                    {
                        Columns        = CodeGenTestData.inputColumns[i],
                        AllowQuoting   = false,
                        AllowSparse    = false,
                        Separators     = new[] { ',' },
                        HasHeader      = true,
                        TrimWhitespace = true
                    },
                    ColumnInformation = new ColumnInformation()
                };

                CodeGenerator codeGenerator = new CodeGenerator(null, result, null);
                var           actualLabels  = codeGenerator.GenerateClassLabels();
                Assert.Equal(actualLabels, CodeGenTestData.expectedLabels[i]);
            }
        }
Exemplo n.º 18
0
 internal CodeGenerator(Pipeline pipeline, ColumnInferenceResults columnInferenceResult, CodeGeneratorSettings settings)
 {
     _pipeline = pipeline;
     _columnInferenceResult = columnInferenceResult;
     _settings = settings;
 }
        public AzureAttachConsoleAppCodeGenerator(Pipeline pipeline, ColumnInferenceResults columnInferenceResults, CodeGeneratorSettings options, string namespaceValue)
        {
            _pipeline = pipeline;
            _settings = options;
            _columnInferenceResult = columnInferenceResults;
            _nameSpaceValue        = namespaceValue;
            Name = $"{_settings.OutputName}.ConsoleApp";

            var(_, _, PreTrainerTransforms, _) = _pipeline.GenerateTransformsAndTrainers();

            ModelBuilder = new CSharpCodeFile()
            {
                File = new AzureModelBuilder()
                {
                    Path                 = _settings.TrainDataset,
                    HasHeader            = _columnInferenceResult.TextLoaderOptions.HasHeader,
                    Separator            = _columnInferenceResult.TextLoaderOptions.Separators.FirstOrDefault(),
                    PreTrainerTransforms = PreTrainerTransforms,
                    AllowQuoting         = _columnInferenceResult.TextLoaderOptions.AllowQuoting,
                    AllowSparse          = _columnInferenceResult.TextLoaderOptions.AllowSparse,
                    Namespace            = _nameSpaceValue,
                    Target               = _settings.Target,
                    OnnxModelPath        = _settings.OnnxModelPath,
                    MLNetModelpath       = _settings.ModelPath,
                }.TransformText(),
                Name = "ModelBuilder.cs",
            };

            PredictProject = new CSharpProjectFile()
            {
                File = new PredictProject()
                {
                    Namespace = _nameSpaceValue,
                    IncludeMklComponentsPackage       = false,
                    IncludeLightGBMPackage            = false,
                    IncludeFastTreePackage            = false,
                    IncludeImageTransformerPackage    = _settings.IsImage,
                    IncludeImageClassificationPackage = false,
                    IncludeOnnxPackage        = true,
                    IncludeResNet18Package    = false,
                    IncludeRecommenderPackage = false,
                    StablePackageVersion      = _settings.StablePackageVersion,
                    UnstablePackageVersion    = _settings.UnstablePackageVersion,
                }.TransformText(),
                Name = $"{_settings.OutputName}.ConsoleApp.csproj",
            };

            var columns      = _columnInferenceResult.TextLoaderOptions.Columns;
            var featuresList = columns.Where((str) => str.Name != _settings.LabelName).Select((str) => str.Name).ToList();
            var sampleResult = Utils.GenerateSampleData(_settings.TrainDataset, _columnInferenceResult);

            PredictProgram = new CSharpCodeFile()
            {
                File = new PredictProgram()
                {
                    TaskType     = _settings.MlTask.ToString(),
                    LabelName    = _settings.LabelName,
                    Namespace    = _nameSpaceValue,
                    AllowQuoting = _columnInferenceResult.TextLoaderOptions.AllowQuoting,
                    AllowSparse  = _columnInferenceResult.TextLoaderOptions.AllowSparse,
                    HasHeader    = _columnInferenceResult.TextLoaderOptions.HasHeader,
                    Separator    = _columnInferenceResult.TextLoaderOptions.Separators.FirstOrDefault(),
                    Target       = _settings.Target,
                    Features     = featuresList,
                    SampleData   = sampleResult,
                }.TransformText(),
                Name = "Program.cs",
            };
        }
Exemplo n.º 20
0
        private (Pipeline, ColumnInferenceResults, IDictionary <string, CodeGeneratorSettings.ColumnMapping>) GetMockedAzurePipelineAndInferenceUnsanitizedColumnNames()
        {
            // construct pipeline
            var onnxPipeLineNode = new PipelineNode(nameof(SpecialTransformer.ApplyOnnxModel), PipelineNodeType.Transform, new[] { "input.1" }, new[] { "output.1" },
                                                    new Dictionary <string, object>()
            {
                { "outputColumnNames", "output1" },
                { "inputColumnNames", "input1" },
            });
            var labelMapPipelineNode = new PipelineNode(nameof(SpecialTransformer.LabelMapping), PipelineNodeType.Transform, string.Empty, string.Empty);
            var bestPipeLine         = new Pipeline(new PipelineNode[]
            {
                onnxPipeLineNode,
                labelMapPipelineNode,
            });

            // construct column inference
            var textLoaderArgs = new TextLoader.Options()
            {
                Columns = new[] {
                    new TextLoader.Column("id", DataKind.Int32, 0),
                    new TextLoader.Column("MsAssetNum", DataKind.Int32, 1),
                    new TextLoader.Column("Make", DataKind.String, 2),
                    new TextLoader.Column("Model", DataKind.String, 3),
                    new TextLoader.Column("model", DataKind.Double, 4),
                    new TextLoader.Column("work category", DataKind.String, 5),
                    new TextLoader.Column("Work category", DataKind.Int32, 6),
                    new TextLoader.Column("IsDetachable", DataKind.Boolean, 7),
                },
                AllowQuoting = true,
                AllowSparse  = true,
                HasHeader    = true,
                Separators   = new[] { ',' }
            };

            var columnInference = new ColumnInferenceResults()
            {
                TextLoaderOptions = textLoaderArgs,
                ColumnInformation = new ColumnInformation()
                {
                    LabelColumnName = "Label"
                }
            };

            // construct columnMapping
            // mock columnMapping
            var mapping = new Dictionary <string, CodeGeneratorSettings.ColumnMapping>()
            {
                {
                    "id",
                    new CodeGeneratorSettings.ColumnMapping()
                    {
                        ColumnName = "input_0",
                        ColumnType = DataKind.Int32,
                    }
                },
                {
                    "MsAssetNum",
                    new CodeGeneratorSettings.ColumnMapping()
                    {
                        ColumnName = "input_1",
                        ColumnType = DataKind.Int32,
                    }
                },
                {
                    "Make",
                    new CodeGeneratorSettings.ColumnMapping()
                    {
                        ColumnName = "input_2",
                        ColumnType = DataKind.String,
                    }
                },
                {
                    "Model",
                    new CodeGeneratorSettings.ColumnMapping()
                    {
                        ColumnName = "input_3",
                        ColumnType = DataKind.String,
                    }
                },
                {
                    "model",
                    new CodeGeneratorSettings.ColumnMapping()
                    {
                        ColumnName = "input_4",
                        ColumnType = DataKind.Double,
                    }
                },
                {
                    "work category",
                    new CodeGeneratorSettings.ColumnMapping()
                    {
                        ColumnName = "input_5",
                        ColumnType = DataKind.String,
                    }
                },
                {
                    "Work Category",
                    new CodeGeneratorSettings.ColumnMapping()
                    {
                        ColumnName = "input_6",
                        ColumnType = DataKind.Int32,
                    }
                },
                {
                    "IsDetachable",
                    new CodeGeneratorSettings.ColumnMapping()
                    {
                        ColumnName = "input_7",
                        ColumnType = DataKind.Boolean,
                    }
                }
            };

            return(bestPipeLine, columnInference, mapping);
        }
Exemplo n.º 21
0
        internal static IList <string> GenerateClassLabels(ColumnInferenceResults columnInferenceResults, IDictionary <string, CodeGeneratorSettings.ColumnMapping> columnMapping = default)
        {
            IList <string> result = new List <string>();

            foreach (var column in columnInferenceResults.TextLoaderOptions.Columns)
            {
                StringBuilder sb      = new StringBuilder();
                int           range   = (column.Source[0].Max - column.Source[0].Min).Value;
                bool          isArray = range > 0;
                sb.Append(Symbols.PublicSymbol);
                sb.Append(Symbols.Space);

                // if column is in columnMapping, use the type and name in that
                DataKind dataKind;
                string   columnName;

                if (columnMapping != null && columnMapping.ContainsKey(column.Name))
                {
                    dataKind   = columnMapping[column.Name].ColumnType;
                    columnName = columnMapping[column.Name].ColumnName;
                }
                else
                {
                    dataKind   = column.DataKind;
                    columnName = column.Name;
                }
                switch (dataKind)
                {
                case Microsoft.ML.Data.DataKind.String:
                    sb.Append(Symbols.StringSymbol);
                    break;

                case Microsoft.ML.Data.DataKind.Boolean:
                    sb.Append(Symbols.BoolSymbol);
                    break;

                case Microsoft.ML.Data.DataKind.Single:
                    sb.Append(Symbols.FloatSymbol);
                    break;

                case Microsoft.ML.Data.DataKind.Double:
                    sb.Append(Symbols.DoubleSymbol);
                    break;

                case Microsoft.ML.Data.DataKind.Int32:
                    sb.Append(Symbols.IntSymbol);
                    break;

                case Microsoft.ML.Data.DataKind.UInt32:
                    sb.Append(Symbols.UIntSymbol);
                    break;

                case Microsoft.ML.Data.DataKind.Int64:
                    sb.Append(Symbols.LongSymbol);
                    break;

                case Microsoft.ML.Data.DataKind.UInt64:
                    sb.Append(Symbols.UlongSymbol);
                    break;

                default:
                    throw new ArgumentException($"The data type '{column.DataKind}' is not handled currently.");
                }

                if (range > 0)
                {
                    result.Add($"[ColumnName(\"{columnName}\"),LoadColumn({column.Source[0].Min}, {column.Source[0].Max}) VectorType({(range + 1)})]");
                    sb.Append("[]");
                }
                else
                {
                    result.Add($"[ColumnName(\"{columnName}\"), LoadColumn({column.Source[0].Min})]");
                }
                sb.Append(" ");
                sb.Append(Utils.Normalize(column.Name));
                sb.Append("{get; set;}");
                result.Add(sb.ToString());
                result.Add("\r\n");
            }
            return(result);
        }
        private (Pipeline, ColumnInferenceResults, IDictionary <string, CodeGeneratorSettings.ColumnMapping>) GetMockedAzurePipelineAndInference()
        {
            // construct pipeline
            var onnxPipeLineNode = new PipelineNode(nameof(SpecialTransformer.ApplyOnnxModel), PipelineNodeType.Transform, new[] { "input.1" }, new[] { "output.1" },
                                                    new Dictionary <string, object>()
            {
                { "outputColumnNames", "output1" },
                { "inputColumnNames", "input1" },
            });
            var labelMapPipelineNode = new PipelineNode(nameof(SpecialTransformer.LabelMapping), PipelineNodeType.Transform, string.Empty, string.Empty);
            var bestPipeLine         = new Pipeline(new PipelineNode[]
            {
                onnxPipeLineNode,
                labelMapPipelineNode,
            });

            // construct column inference
            var textLoaderArgs = new TextLoader.Options()
            {
                Columns = new[] {
                    new TextLoader.Column("Age", DataKind.Double, 0),
                    new TextLoader.Column("Workclass", DataKind.String, 1),     // 0?
                    new TextLoader.Column("Fnlwgt", DataKind.Double, 2),
                    new TextLoader.Column("Education", DataKind.String, 3),
                    new TextLoader.Column("Education_num", DataKind.Double, 4),
                    new TextLoader.Column("Marital_status", DataKind.String, 5),
                    new TextLoader.Column("Occupation", DataKind.String, 6),
                    new TextLoader.Column("Relationship", DataKind.String, 7),
                    new TextLoader.Column("Race", DataKind.String, 8),
                    new TextLoader.Column("Sex", DataKind.String, 9),
                    new TextLoader.Column("Capital_gain", DataKind.Double, 10),
                    new TextLoader.Column("Capital_loss", DataKind.Double, 11),
                    new TextLoader.Column("Hours_per_week", DataKind.Double, 12),
                    new TextLoader.Column("Native_country", DataKind.String, 13),
                    new TextLoader.Column("label", DataKind.Boolean, 14),
                },
                AllowQuoting = true,
                AllowSparse  = true,
                HasHeader    = true,
                Separators   = new[] { ',' }
            };

            var columnInference = new ColumnInferenceResults()
            {
                TextLoaderOptions = textLoaderArgs,
                ColumnInformation = new ColumnInformation()
                {
                    LabelColumnName = "Label"
                }
            };

            // construct columnMapping
            // mock columnMapping
            var mapping = new Dictionary <string, CodeGeneratorSettings.ColumnMapping>()
            {
                {
                    "Age",
                    new CodeGeneratorSettings.ColumnMapping()
                    {
                        ColumnName = "input_0",
                        ColumnType = DataKind.Int64,
                    }
                },
                {
                    "Workclass",
                    new CodeGeneratorSettings.ColumnMapping()
                    {
                        ColumnName = "input_1",
                        ColumnType = DataKind.String,
                    }
                },
                {
                    "Fnlwgt",
                    new CodeGeneratorSettings.ColumnMapping()
                    {
                        ColumnName = "input_2",
                        ColumnType = DataKind.Int64,
                    }
                },
                {
                    "Education",
                    new CodeGeneratorSettings.ColumnMapping()
                    {
                        ColumnName = "input_3",
                        ColumnType = DataKind.String,
                    }
                },
                {
                    "Education_num",
                    new CodeGeneratorSettings.ColumnMapping()
                    {
                        ColumnName = "input_4",
                        ColumnType = DataKind.Int64,
                    }
                },
                {
                    "Marital_status",
                    new CodeGeneratorSettings.ColumnMapping()
                    {
                        ColumnName = "input_5",
                        ColumnType = DataKind.String,
                    }
                },
                {
                    "Occupation",
                    new CodeGeneratorSettings.ColumnMapping()
                    {
                        ColumnName = "input_6",
                        ColumnType = DataKind.String,
                    }
                },
                {
                    "Relationship",
                    new CodeGeneratorSettings.ColumnMapping()
                    {
                        ColumnName = "input_7",
                        ColumnType = DataKind.String,
                    }
                },
                {
                    "Race",
                    new CodeGeneratorSettings.ColumnMapping()
                    {
                        ColumnName = "input_8",
                        ColumnType = DataKind.String,
                    }
                },
                {
                    "Sex",
                    new CodeGeneratorSettings.ColumnMapping()
                    {
                        ColumnName = "input_9",
                        ColumnType = DataKind.String,
                    }
                },
                {
                    "Capital_gain",
                    new CodeGeneratorSettings.ColumnMapping()
                    {
                        ColumnName = "input_10",
                        ColumnType = DataKind.Int64,
                    }
                },
                {
                    "Capital_loss",
                    new CodeGeneratorSettings.ColumnMapping()
                    {
                        ColumnName = "input_11",
                        ColumnType = DataKind.Int64,
                    }
                },
                {
                    "Hours_per_week",
                    new CodeGeneratorSettings.ColumnMapping()
                    {
                        ColumnName = "input_12",
                        ColumnType = DataKind.Int64,
                    }
                },
                {
                    "Native_country",
                    new CodeGeneratorSettings.ColumnMapping()
                    {
                        ColumnName = "input_13",
                        ColumnType = DataKind.String,
                    }
                },
                {
                    "label",
                    new CodeGeneratorSettings.ColumnMapping()
                    {
                        ColumnName = "label(IsOver50K)",
                        ColumnType = DataKind.Boolean,
                    }
                }
            };

            return(bestPipeLine, columnInference, mapping);
        }
Exemplo n.º 23
0
        internal static IDictionary <string, string> GenerateSampleData(IDataView dataView, ColumnInferenceResults columnInference)
        {
            var featureColumns = dataView.Schema.AsEnumerable().Where(col => col.Name != columnInference.ColumnInformation.LabelColumnName && !columnInference.ColumnInformation.IgnoredColumnNames.Contains(col.Name));
            var rowCursor      = dataView.GetRowCursor(featureColumns);

            var sampleData = featureColumns.Select(column => new { key = Utils.Normalize(column.Name), val = "null" }).ToDictionary(x => x.key, x => x.val);

            if (rowCursor.MoveNext())
            {
                var getGetGetterMethod = typeof(Utils).GetMethod(nameof(Utils.GetValueFromColumn), BindingFlags.Static | BindingFlags.Public | BindingFlags.NonPublic);

                foreach (var column in featureColumns)
                {
                    var    getGeneraicGetGetterMethod = getGetGetterMethod.MakeGenericMethod(column.Type.RawType);
                    string val = getGeneraicGetGetterMethod.Invoke(null, new object[] { rowCursor, column }) as string;
                    sampleData[Utils.Normalize(column.Name)] = val;
                }
            }

            return(sampleData);
        }
Exemplo n.º 24
0
 /// <summary>
 /// Take the first line of data from inputFile and parse it as a dictionary using schema from columnInference.
 /// </summary>
 /// <param name="inputFile">path to input file.</param>
 /// <param name="columnInference">Column Inferernce Result.</param>
 /// <returns>A dictionary which key is sanitized column name and value is first line of data.</returns>
 internal static IDictionary <string, string> GenerateSampleData(string inputFile, ColumnInferenceResults columnInference)
 {
     try
     {
         var mlContext  = new MLContext();
         var textLoader = mlContext.Data.CreateTextLoader(columnInference.TextLoaderOptions);
         var trainData  = textLoader.Load(inputFile);
         return(Utils.GenerateSampleData(trainData, columnInference));
     }
     catch (Exception)
     {
         return(null);
     }
 }
Exemplo n.º 25
0
        internal static IDictionary <string, string> GenerateSampleData(IDataView dataView, ColumnInferenceResults columnInference)
        {
            var featureColumns = dataView.Schema.ToList().FindAll(
                col => col.Name != columnInference.ColumnInformation.LabelColumnName &&
                !columnInference.ColumnInformation.IgnoredColumnNames.Contains(col.Name));
            var rowCursor = dataView.GetRowCursor(featureColumns);

            OrderedDictionary sampleData = new OrderedDictionary();
            // Get normalized and unique column names. If there are duplicate column names, the
            // differentiator suffix '_col_x' will be added to each column name, where 'x' is
            // the load order for a given column.
            List <string> normalizedColumnNames = GenerateColumnNames(featureColumns.Select(column => column.Name).ToList());

            foreach (string columnName in normalizedColumnNames)
            {
                sampleData[columnName] = null;
            }
            if (rowCursor.MoveNext())
            {
                var getGetGetterMethod = typeof(Utils).GetMethod(nameof(Utils.GetValueFromColumn), BindingFlags.Static | BindingFlags.Public | BindingFlags.NonPublic);

                // Access each feature column name through its index in featureColumns
                // as there may exist duplicate column names. In this case, sampleData
                // column names may have the differentiator suffix of '_col_x' added,
                // which requires access to each column name in through its index.
                for (int i = 0; i < featureColumns.Count(); i++)
                {
                    var    getGenericGetGetterMethod = getGetGetterMethod.MakeGenericMethod(featureColumns[i].Type.RawType);
                    string val = getGenericGetGetterMethod.Invoke(null, new object[] { rowCursor, featureColumns[i] }) as string;
                    sampleData[i] = val;
                }
            }

            return(sampleData.Cast <DictionaryEntry>().ToDictionary(k => (string)k.Key, v => (string)v.Value));
        }
Exemplo n.º 26
0
        public AzureAttachModelCodeGenerator(Pipeline pipeline, ColumnInferenceResults columnInferenceResults, CodeGeneratorSettings options, string namespaceValue)
        {
            _pipeline = pipeline;
            _settings = options;
            _columnInferenceResult = columnInferenceResults;
            _nameSpaceValue        = namespaceValue;
            Name = $"{_settings.OutputName}.Model";

            ModelInputClass = new CSharpCodeFile()
            {
                File = new ModelInputClass()
                {
                    Namespace   = _nameSpaceValue,
                    ClassLabels = Utilities.Utils.GenerateClassLabels(_columnInferenceResult, _settings.OnnxInputMapping),
                    Target      = _settings.Target
                }.TransformText(),
                Name = "ModelInput.cs",
            };

            var  labelType       = _columnInferenceResult.TextLoaderOptions.Columns.Where(t => t.Name == _settings.LabelName).First().DataKind;
            Type labelTypeCsharp = Utils.GetCSharpType(labelType);

            ModelOutputClass = new CSharpCodeFile()
            {
                File = new ModelOutputClass()
                {
                    Namespace           = _nameSpaceValue,
                    Target              = _settings.Target,
                    TaskType            = _settings.MlTask.ToString(),
                    PredictionLabelType = labelTypeCsharp.Name,
                }.TransformText(),
                Name = "ModelOutput.cs",
            };

            NormalizeMapping = new CSharpCodeFile()
            {
                File = new NormalizeMapping()
                {
                    Target    = _settings.Target,
                    Namespace = _nameSpaceValue,
                }.TransformText(),
                Name = "NormalizeMapping.cs",
            };

            ModelProject = new CSharpProjectFile()
            {
                File = new ModelProject()
                {
                    IncludeFastTreePackage            = false,
                    IncludeImageClassificationPackage = false,
                    IncludeImageTransformerPackage    = _settings.IsImage,
                    IncludeLightGBMPackage            = false,
                    IncludeMklComponentsPackage       = false,
                    IncludeOnnxModel          = true,
                    IncludeRecommenderPackage = false,
                    StablePackageVersion      = _settings.StablePackageVersion,
                    UnstablePackageVersion    = _settings.UnstablePackageVersion,
                }.TransformText(),
                Name = $"{ _settings.OutputName }.Model.csproj",
            };

            LabelMapping = new CSharpCodeFile()
            {
                File = new LabelMapping()
                {
                    Target    = _settings.Target,
                    Namespace = _nameSpaceValue,
                    LabelMappingInputLabelType = typeof(Int64).Name,
                    PredictionLabelType        = labelTypeCsharp.Name,
                    TaskType = _settings.MlTask.ToString(),
                }.TransformText(),
                Name = "LabelMapping.cs",
            };

            ImageLabelMapping = new CSharpCodeFile()
            {
                File = new ImageLabelMapping()
                {
                    Target    = _settings.Target,
                    Namespace = _nameSpaceValue,
                    Labels    = _settings.ClassificationLabel,
                }.TransformText(),
                Name = "LabelMapping.cs",
            };

            ConsumeModel = new CSharpCodeFile()
            {
                File = new ConsumeModel()
                {
                    Namespace           = _nameSpaceValue,
                    Target              = _settings.Target,
                    HasLabelMapping     = true,
                    HasNormalizeMapping = _settings.IsImage,
                    MLNetModelpath      = _settings.ModelPath,
                }.TransformText(),
                Name = "ConsumeModel.cs",
            };
        }
Exemplo n.º 27
0
        private static ExperimentResult <BinaryClassificationMetrics> RunAutoMLExperiment(MLContext mlContext,
                                                                                          ColumnInferenceResults columnInference)
        {
            // STEP 1: Display first few rows of the training data.

            // ConsoleHelper.ShowDataViewInConsole(mlContext, TrainDataView);

            // STEP 2: Build a pre-featurizer for use in the AutoML experiment.
            // (Internally, AutoML uses one or more train/validation data splits to
            // evaluate the models it produces. The pre-featurizer is fit only on the
            // training data split to produce a trained transform. Then, the trained transform
            // is applied to both the train and validation data splits.)
            //IEstimator<ITransformer> preFeaturizer = mlContext.Transforms.Conversion.MapValue("is_cash",
            //    new[] { new KeyValuePair<string, bool>("CSH", true) }, "payment_type");

            //IEstimator<ITransformer> preFeaturizer = mlContext.Transforms.Conversion.MapValue("fstrClassCategory",
            //    new[] { new KeyValuePair<float, String>(1, "First"), new KeyValuePair<float, String>(2, "Second"), new KeyValuePair<float, String>(3, "Third") }, "fstrClass").Append(mlContext.Transforms.Categorical.OneHotEncoding("fstrClassCategory", "fstrClassCategory")).Append(mlContext.Transforms.DropColumns("fstrClass"));

            // STEP 3: Customize column information returned by InferColumns API.
            ColumnInformation columnInformation = columnInference.ColumnInformation;

            columnInformation = CorrectColumnTypes(columnInformation);

            // columnInformation.NumericColumnNames.Remove("fstrClass");
            // columnInformation.CategoricalColumnNames.Add("fstrClass");
            // columnInformation.IgnoredColumnNames.Add("fstrClass");
            // columnInformation.IgnoredColumnNames.


            // STEP 4: Initialize a cancellation token source to stop the experiment.
            var cts = new CancellationTokenSource();

            // STEP 5: Initialize our user-defined progress handler that AutoML will
            // invoke after each model it produces and evaluates.
            var progressHandler = new BinaryExperimentProgressHandler(); //  RegressionExperimentProgressHandler();

            // STEP 6: Create experiment settings
            var experimentSettings = CreateExperimentSettings(mlContext, cts);

            // STEP 7: Run AutoML Binary Classification experiment.
            var experiment = mlContext.Auto().CreateBinaryClassificationExperiment(experimentSettings);

            ConsoleHelper.ConsoleWriteHeader("=============== Running AutoML experiment ===============");
            Console.WriteLine($"Running AutoML regression experiment...");
            var stopwatch = Stopwatch.StartNew();

            // Cancel experiment after the user presses any key.
            CancelExperimentAfterAnyKeyPress(cts);
            ExperimentResult <BinaryClassificationMetrics> experimentResult = experiment.Execute(trainData: TrainDataView, columnInformation: columnInformation, progressHandler: progressHandler);

            Console.WriteLine($"{experimentResult.RunDetails.Count()} models were returned after {stopwatch.Elapsed.TotalSeconds:0.00} seconds{Environment.NewLine}");

            // Print top models found by AutoML.
            PrintTopModels(experimentResult);
            // var featureNames = columnInformation.CategoricalColumnNames.Concat(columnInformation.ImagePathColumnNames).Concat(columnInformation.NumericColumnNames).Concat(columnInformation.TextColumnNames).ToList();
            // var permutationMetrics = mlContext.BinaryClassification.PermutationFeatureImportance(predictionTransformer: )
            // PrintContributions(featureNames, TrainDataView, experimentResult.RunDetails);

            // DatasetDimensionsUtil.GetTextColumnCardinality();

            return(experimentResult);
        }
Exemplo n.º 28
0
 public static void Print(ColumnInferenceResults results)
 {
     Console.WriteLine("Inferred dataset columns --");
     new ColumnInferencePrinter(results).Print();
     Console.WriteLine();
 }
Exemplo n.º 29
0
 public ColumnInferencePrinter(ColumnInferenceResults results)
 {
     _results = results;
 }
        public AzureAttachModelCodeGenerator(Pipeline pipeline, ColumnInferenceResults columnInferenceResults, CodeGeneratorSettings options, string namespaceValue)
        {
            _pipeline = pipeline;
            _settings = options;
            _columnInferenceResult = columnInferenceResults;
            _nameSpaceValue        = namespaceValue;
            Name = $"{_settings.OutputName}.Model";

            ModelInputClass = new CSharpCodeFile()
            {
                File = new ModelInputClass()
                {
                    Namespace   = _nameSpaceValue,
                    ClassLabels = Utilities.Utils.GenerateClassLabels(_columnInferenceResult, _settings.OnnxInputMapping),
                    Target      = _settings.Target
                }.TransformText(),
                Name = "ModelInput.cs",
            };

            var  labelType       = _columnInferenceResult.TextLoaderOptions.Columns.Where(t => t.Name == _settings.LabelName).First().DataKind;
            Type labelTypeCsharp = Utils.GetCSharpType(labelType);

            AzureImageModelOutputClass = new CSharpCodeFile()
            {
                File = new AzureImageModelOutputClass()
                {
                    Namespace = _nameSpaceValue,
                    Target    = _settings.Target,
                    Labels    = _settings.ClassificationLabel,
                }.TransformText(),
                Name = "ModelOutput.cs",
            };

            AzureObjectDetectionModelOutputClass = new CSharpCodeFile()
            {
                File = new AzureObjectDetectionModelOutputClass()
                {
                    Namespace = _nameSpaceValue,
                    Target    = _settings.Target,
                    Labels    = _settings.ObjectLabel,
                }.TransformText(),
                Name = "ModelOutput.cs",
            };

            ModelProject = new CSharpProjectFile()
            {
                File = new ModelProject()
                {
                    IncludeFastTreePackage            = false,
                    IncludeImageClassificationPackage = false,
                    IncludeImageTransformerPackage    = _settings.IsImage,
                    IncludeLightGBMPackage            = false,
                    IncludeMklComponentsPackage       = false,
                    IncludeOnnxModel          = true,
                    IncludeOnnxRuntime        = _settings.IsObjectDetection,
                    IncludeRecommenderPackage = false,
                    StablePackageVersion      = _settings.StablePackageVersion,
                    UnstablePackageVersion    = _settings.UnstablePackageVersion,
                    OnnxRuntimePackageVersion = _settings.OnnxRuntimePacakgeVersion,
                    Target = _settings.Target,
                }.TransformText(),
                Name = $"{ _settings.OutputName }.Model.csproj",
            };

            ConsumeModel = new CSharpCodeFile()
            {
                File = new ConsumeModel()
                {
                    Namespace              = _nameSpaceValue,
                    Target                 = _settings.Target,
                    MLNetModelName         = _settings.ModelName,
                    OnnxModelName          = _settings.OnnxModelName,
                    IsAzureImage           = _settings.IsAzureAttach && _settings.IsImage,
                    IsAzureObjectDetection = _settings.IsObjectDetection && _settings.IsAzureAttach,
                }.TransformText(),
                Name = "ConsumeModel.cs",
            };
        }