Exemple #1
0
        static async Task Main(string[] args)
        {
            //setup our DI
            var serviceProvider = new ServiceCollection()
                                  .AddLogging()
                                  .AddSingleton <IYahooFinanceService, YahooFinanceService>()
                                  .AddHttpClient()
                                  .BuildServiceProvider();


            var loggerFactory = LoggerFactory.Create(builder => {
                builder.AddFilter("Microsoft", LogLevel.Warning)
                .AddFilter("System", LogLevel.Warning)
                .AddFilter("CandleStickMachineLearning.Program", LogLevel.Debug)
                .AddConsole();
            });

            var _logger = loggerFactory.CreateLogger <Program>();

            _logger.LogInformation("Hello World!");

            //do the actual work here
            var yahooFinanceService = serviceProvider.GetService <IYahooFinanceService>();
            var barsList            = await yahooFinanceService.GetBars("AAPL", DateTime.UtcNow.AddMonths(-1), DateTime.UtcNow, "1h");

            var context = new MLContext();

            var trainData = context.Data.LoadFromEnumerable <Models.Bar>(barsList);

            var settings = new RegressionExperimentSettings
            {
                MaxExperimentTimeInSeconds = 20,
                OptimizingMetric           = RegressionMetric.MeanAbsoluteError
            };

            var labelColumnInfo = new ColumnInformation()
            {
                LabelColumnName = "Label"
            };

            var progress = new Progress <RunDetail <RegressionMetrics> >(p =>
            {
                if (p.ValidationMetrics != null)
                {
                    _logger.LogInformation($"Current Result - {p.TrainerName}, {p.ValidationMetrics.RSquared}, {p.ValidationMetrics.MeanAbsoluteError}");
                }
            });

            var experiment = context.Auto().CreateRegressionExperiment(settings);

            var result = experiment.Execute(trainData, labelColumnInfo, progressHandler: progress);

            Console.WriteLine(Environment.NewLine);
            Console.WriteLine("Best run:");
            Console.WriteLine($"Trainer name - {result.BestRun.TrainerName}");
            Console.WriteLine($"RSquared - {result.BestRun.ValidationMetrics.RSquared}");
            Console.WriteLine($"MAE - {result.BestRun.ValidationMetrics.MeanAbsoluteError}");

            Console.ReadLine();
        }
        public void TrainerExtensionInstanceTests()
        {
            var context      = new MLContext(1);
            var columnInfo   = new ColumnInformation();
            var trainerNames = Enum.GetValues(typeof(TrainerName)).Cast <TrainerName>()
                               .Except(new[] { TrainerName.Ova });

            foreach (var trainerName in trainerNames)
            {
                var extension = TrainerExtensionCatalog.GetTrainerExtension(trainerName);

                IEnumerable <SweepableParam> sweepParams = null;
                if (trainerName != TrainerName.ImageClassification)
                {
                    sweepParams = extension.GetHyperparamSweepRanges();
                    Assert.NotNull(sweepParams);
                    foreach (var sweepParam in sweepParams)
                    {
                        sweepParam.RawValue = 1;
                    }

                    var instance = extension.CreateInstance(context, sweepParams, columnInfo);
                    Assert.NotNull(instance);
                    var pipelineNode = extension.CreatePipelineNode(null, columnInfo);
                    Assert.NotNull(pipelineNode);
                }
            }
        }
        public void BuildFastTreeRankingPipelineNode()
        {
            var columnInfo = new ColumnInformation()
            {
                LabelColumnName   = "L",
                GroupIdColumnName = "GId"
            };
            var pipelineNode = new FastTreeRankingExtension().CreatePipelineNode(null, columnInfo);
            var expectedJson = @"{
  ""Name"": ""FastTreeRanking"",
  ""NodeType"": ""Trainer"",
  ""InColumns"": [
    ""Features""
  ],
  ""OutColumns"": [
    ""Score""
  ],
  ""Properties"": {
    ""LabelColumnName"": ""L"",
    ""RowGroupColumnName"": ""GId""
  }
}";

            Util.AssertObjectMatchesJson(expectedJson, pipelineNode);
        }
Exemple #4
0
        public void GetColumnNamesTest()
        {
            var columnInfo = new ColumnInformation()
            {
                LabelColumnName       = "Label",
                SamplingKeyColumnName = "SamplingKey",
                UserIdColumnName      = "UserId",
                ItemIdColumnName      = "MovieId",
                GroupIdColumnName     = "GroupId"
            };

            columnInfo.CategoricalColumnNames.Add("Cat1");
            columnInfo.CategoricalColumnNames.Add("Cat2");
            columnInfo.NumericColumnNames.Add("Num");
            var columnNames = ColumnInformationUtil.GetColumnNames(columnInfo);

            Assert.Equal(8, columnNames.Count());
            Assert.Contains("Label", columnNames);
            Assert.Contains("SamplingKey", columnNames);
            Assert.Contains("UserId", columnNames);
            Assert.Contains("MovieId", columnNames);
            Assert.Contains("GroupId", columnNames);
            Assert.Contains("Cat1", columnNames);
            Assert.Contains("Cat2", columnNames);
            Assert.Contains("Num", columnNames);
        }
        public void BuildPipelineNodeWithCustomColumns()
        {
            var columnInfo = new ColumnInformation()
            {
                LabelColumnName         = "L",
                ExampleWeightColumnName = "W"
            };
            var sweepParams = SweepableParams.BuildFastForestParams();

            foreach (var sweepParam in sweepParams)
            {
                sweepParam.RawValue = 1;
            }

            var pipelineNode = new FastForestBinaryExtension().CreatePipelineNode(sweepParams, columnInfo);
            var expectedJson = @"{
  ""Name"": ""FastForestBinary"",
  ""NodeType"": ""Trainer"",
  ""InColumns"": [
    ""Features""
  ],
  ""OutColumns"": [
    ""Score""
  ],
  ""Properties"": {
    ""NumberOfLeaves"": 1,
    ""MinimumExampleCountPerLeaf"": 10,
    ""NumberOfTrees"": 100,
    ""LabelColumnName"": ""L"",
    ""ExampleWeightColumnName"": ""W""
  }
}";

            Util.AssertObjectMatchesJson(expectedJson, pipelineNode);
        }
Exemple #6
0
        public void GetColumnPurpose()
        {
            var columnInfo = new ColumnInformation()
            {
                LabelColumnName         = "Label",
                ExampleWeightColumnName = "Weight",
                SamplingKeyColumnName   = "SamplingKey",
                UserIdColumnName        = "UserId",
                ItemIdColumnName        = "MovieId",
                GroupIdColumnName       = "GroupId"
            };

            columnInfo.CategoricalColumnNames.Add("Cat");
            columnInfo.NumericColumnNames.Add("Num");
            columnInfo.TextColumnNames.Add("Text");
            columnInfo.IgnoredColumnNames.Add("Ignored");

            Assert.Equal(ColumnPurpose.Label, ColumnInformationUtil.GetColumnPurpose(columnInfo, "Label"));
            Assert.Equal(ColumnPurpose.Weight, ColumnInformationUtil.GetColumnPurpose(columnInfo, "Weight"));
            Assert.Equal(ColumnPurpose.SamplingKey, ColumnInformationUtil.GetColumnPurpose(columnInfo, "SamplingKey"));
            Assert.Equal(ColumnPurpose.UserId, ColumnInformationUtil.GetColumnPurpose(columnInfo, "UserId"));
            Assert.Equal(ColumnPurpose.GroupId, ColumnInformationUtil.GetColumnPurpose(columnInfo, "GroupId"));
            Assert.Equal(ColumnPurpose.ItemId, ColumnInformationUtil.GetColumnPurpose(columnInfo, "MovieId"));
            Assert.Equal(ColumnPurpose.CategoricalFeature, ColumnInformationUtil.GetColumnPurpose(columnInfo, "Cat"));
            Assert.Equal(ColumnPurpose.NumericFeature, ColumnInformationUtil.GetColumnPurpose(columnInfo, "Num"));
            Assert.Equal(ColumnPurpose.TextFeature, ColumnInformationUtil.GetColumnPurpose(columnInfo, "Text"));
            Assert.Equal(ColumnPurpose.Ignore, ColumnInformationUtil.GetColumnPurpose(columnInfo, "Ignored"));
            Assert.Null(ColumnInformationUtil.GetColumnPurpose(columnInfo, "NonExistent"));
        }
        public void InferDatasetWithoutHeader()
        {
            var context    = new MLContext(1);
            var filePath   = Path.Combine("TestData", "DatasetWithoutHeader.txt");
            var columnInfo = new ColumnInformation()
            {
                LabelColumnName  = "col0",
                UserIdColumnName = "col1",
                ItemIdColumnName = "col2",
            };

            columnInfo.IgnoredColumnNames.Add("col4");
            var result = ColumnInferenceApi.InferColumns(context, filePath, columnInfo, ',', null, null, false, false, false);

            Assert.Equal(6, result.TextLoaderOptions.Columns.Count());

            var labelColumn  = result.TextLoaderOptions.Columns.First(c => c.Name == "col0");
            var userColumn   = result.TextLoaderOptions.Columns.First(c => c.Name == "col1");
            var itemColumn   = result.TextLoaderOptions.Columns.First(c => c.Name == "col2");
            var ignoreColumn = result.TextLoaderOptions.Columns.First(c => c.Name == "col4");

            Assert.Equal(DataKind.String, labelColumn.DataKind);
            Assert.Equal(DataKind.Single, userColumn.DataKind);
            Assert.Equal(DataKind.Single, itemColumn.DataKind);
            Assert.Equal(DataKind.Single, ignoreColumn.DataKind);

            Assert.Single(result.ColumnInformation.CategoricalColumnNames);
            Assert.Empty(result.ColumnInformation.TextColumnNames);
        }
        public void ValidateExperimentExecuteDuplicateCol()
        {
            var columnInfo = new ColumnInformation();

            columnInfo.NumericColumnNames.Add(DefaultColumnNames.Label);

            var ex = Assert.Throws <ArgumentException>(() => UserInputValidationUtil.ValidateExperimentExecuteArgs(_data, columnInfo, null, TaskKind.Regression));
        }
Exemple #9
0
        public void Start()
        {
            //Infer columns and load train data
            var columnInferenceResult = mlContext.Auto().InferColumns(
                path: TRAIN_DATA_FILEPATH,
                labelColumnName: "next",
                groupColumns: false);

            TextLoader textLoader = mlContext.Data.CreateTextLoader(columnInferenceResult.TextLoaderOptions);

            trainData = textLoader.Load(TRAIN_DATA_FILEPATH);

            //Modify infered columns information
            columnInformation = columnInferenceResult.ColumnInformation;

            columnInformation.CategoricalColumnNames.Add("productId");
            columnInformation.NumericColumnNames.Remove("productId");

            columnInformation.CategoricalColumnNames.Add("year");
            columnInformation.NumericColumnNames.Remove("year");

            columnInformation.NumericColumnNames.Remove("units");
            columnInformation.IgnoredColumnNames.Add("units");


            var experimentSettings = new RegressionExperimentSettings()
            {
                MaxExperimentTimeInSeconds = 10,
                OptimizingMetric           = RegressionMetric.RootMeanSquaredError,
                CacheDirectory             = new DirectoryInfo(CACHE_DIRECTORY),
                CancellationToken          = cancelationTokenSource.Token
            };

            //Exclude trainers from experiment
            experimentSettings.Trainers.Remove(RegressionTrainer.Ols);

            RegressionExperiment experiment = mlContext.Auto().CreateRegressionExperiment(experimentSettings);
            ExperimentResult <RegressionMetrics> experimentResult = experiment.Execute(
                trainData: trainData,
                columnInformation: columnInformation,
                progressHandler: new RegressionProgressHandler(),
                preFeaturizer: null);

            ITransformer model = experimentResult.BestRun.Model;
            IEstimator <ITransformer> estimator = experimentResult.BestRun.Estimator;

            //Make batch predictions
            IDataView predictionsDataView = model.Transform(trainData);

            PrintPredictions(predictionsDataView);
            PrintPredictionsEnumerable(predictionsDataView);


            model = estimator.Fit(trainData);
            mlContext.Model.Save(model, trainData.Schema, MODEL_FILEPATH);
            Console.WriteLine("Done");
        }
        public void ValidateExperimentExecuteNullNumericCol()
        {
            var columnInfo = new ColumnInformation();

            columnInfo.NumericColumnNames.Add(null);

            var ex = Assert.Throws <ArgumentException>(() => UserInputValidationUtil.ValidateExperimentExecuteArgs(_data, columnInfo, null, TaskKind.Regression));

            Assert.Equal("Null column string was specified as numeric in column information", ex.Message);
        }
Exemple #11
0
        public void ValidateExperimentExecuteNumericColNotInTrain()
        {
            var columnInfo = new ColumnInformation();

            columnInfo.NumericColumnNames.Add("N");

            var ex = Assert.Throws <ArgumentException>(() => UserInputValidationUtil.ValidateExperimentExecuteArgs(Data, columnInfo, null, TaskKind.Regression));

            Assert.Equal("Provided label column 'Label' was of type Boolean, but only type Single is allowed.", ex.Message);
        }
        public void CheckGetValue(int value)
        {
            const string name    = "World";
            const int    ordinal = 13;
            var          f       = new Field("Hello", value);
            var          c       = new ColumnInformation(f, ordinal, name, false);

            Assert.AreEqual(f.Get <int>(), c.Get <int>());
            Assert.AreEqual(f.Get <long>(), c.Get <long>());
            Assert.AreEqual(f.Get <string>(), c.Get <string>());
        }
        public void TrainerExtensionTensorFlowInstanceTests()
        {
            var context    = new MLContext(1);
            var columnInfo = new ColumnInformation();
            var extension  = TrainerExtensionCatalog.GetTrainerExtension(TrainerName.ImageClassification);
            var instance   = extension.CreateInstance(context, null, columnInfo);

            Assert.NotNull(instance);
            var pipelineNode = extension.CreatePipelineNode(null, columnInfo);

            Assert.NotNull(pipelineNode);
        }
        public void CheckValuesAreSaved()
        {
            const string name    = "World";
            const int    ordinal = 13;
            var          f       = new Field("Hello", 42);
            var          c       = new ColumnInformation(f, ordinal, name, false);

            Assert.AreEqual(name, c.Name);
            Assert.AreEqual(ordinal, c.Ordinal);
            Assert.AreEqual("Hello", f.Name);
            Assert.AreEqual(42, f.Get <int>());
        }
        public void ValidateColumnNotContainedInData()
        {
            var schemaBuilder = new DataViewSchema.Builder();

            schemaBuilder.AddColumn(DefaultColumnNames.Features, NumberDataViewType.Single);
            schemaBuilder.AddColumn(DefaultColumnNames.Label, NumberDataViewType.Single);
            var schema     = schemaBuilder.ToSchema();
            var dataView   = DataViewTestFixture.BuildDummyDataView(schema);
            var columnInfo = new ColumnInformation();

            columnInfo.CategoricalColumnNames.Add("Categorical");
            Assert.Throws <ArgumentException>(() => ColumnInferenceValidationUtil.ValidateSpecifiedColumnsExist(columnInfo, dataView));
        }
Exemple #16
0
        private static ExperimentResult <RegressionMetrics> RunAutoMLExperiment(MLContext mlContext,
                                                                                ColumnInferenceResults columnInference)
        {
            // STEP 1: Display first few rows of the training data
            ConsoleHelper.ShowDataViewInConsole(mlContext, TrainDataView);

            // STEP 2: Build a pre-featurizer for use in the AutoML experiment.
            // (Internally, AutoML uses one or more train/validation data splits to
            // evaluate the models it produces. The pre-featurizer is fit only on the
            // training data split to produce a trained transform. Then, the trained transform
            // is applied to both the train and validation data splits.)
            IEstimator <ITransformer> preFeaturizer = mlContext.Transforms.Conversion.MapValue("is_cash",
                                                                                               new[] { new KeyValuePair <string, bool>("CSH", true) }, "payment_type");

            // STEP 3: Customize column information returned by InferColumns API
            ColumnInformation columnInformation = columnInference.ColumnInformation;

            columnInformation.CategoricalColumnNames.Remove("payment_type");
            columnInformation.IgnoredColumnNames.Add("payment_type");

            // STEP 4: Initialize a cancellation token source to stop the experiment.
            var cts = new CancellationTokenSource();

            // STEP 5: Initialize our user-defined progress handler that AutoML will
            // invoke after each model it produces and evaluates.
            var progressHandler = new RegressionExperimentProgressHandler();

            // STEP 6: Create experiment settings
            var experimentSettings = CreateExperimentSettings(mlContext, cts);

            // STEP 7: Run AutoML regression experiment
            var experiment = mlContext.Auto().CreateRegressionExperiment(experimentSettings);

            ConsoleHelper.ConsoleWriteHeader("=============== Running AutoML experiment ===============");
            Console.WriteLine($"Running AutoML regression experiment...");
            var stopwatch = Stopwatch.StartNew();

            // Cancel experiment after the user presses any key
            CancelExperimentAfterAnyKeyPress(cts);
            ExperimentResult <RegressionMetrics> experimentResult = experiment.Execute(TrainDataView, columnInformation, preFeaturizer, progressHandler);

            Console.WriteLine($"{experimentResult.RunDetails.Count()} models were returned after {stopwatch.Elapsed.TotalSeconds:0.00} seconds{Environment.NewLine}");

            // Print top models found by AutoML
            PrintTopModels(experimentResult);

            return(experimentResult);
        }
Exemple #17
0
        public void InferColumnsColumnInfoParam()
        {
            var columnInfo = new ColumnInformation()
            {
                LabelColumnName = DatasetUtil.MlNetGeneratedRegressionLabel
            };
            var result = new MLContext().Auto().InferColumns(DatasetUtil.DownloadMlNetGeneratedRegressionDataset(),
                                                             columnInfo);
            var labelCol = result.TextLoaderOptions.Columns.First(c => c.Name == DatasetUtil.MlNetGeneratedRegressionLabel);

            Assert.Equal(DataKind.Single, labelCol.DataKind);
            Assert.Equal(DatasetUtil.MlNetGeneratedRegressionLabel, result.ColumnInformation.LabelColumnName);
            Assert.Single(result.ColumnInformation.NumericColumnNames);
            Assert.Equal(DefaultColumnNames.Features, result.ColumnInformation.NumericColumnNames.First());
            Assert.Null(result.ColumnInformation.ExampleWeightColumnName);
        }
Exemple #18
0
        public async Task <IList <ColumnInformation> > GetColumnInformation(string connectionString)
        {
            using var connection = new SqlConnection(connectionString);

            var command = new SqlCommand(StoredProcedureExtensions.GetColumnInformation(), connection)
            {
                CommandType = CommandType.Text
            };

            connection.Open();

            using var dataReader = await command.ExecuteReaderAsync();

            var columnInformationModels = await ColumnInformation.FromDataReader(dataReader);

            return(columnInformationModels.ToList());
        }
Exemple #19
0
        public void ValidateTextColumnNotText()
        {
            const string TextPurposeColName = "TextColumn";
            var          schemaBuilder      = new DataViewSchema.Builder();

            schemaBuilder.AddColumn(DefaultColumnNames.Features, NumberDataViewType.Single);
            schemaBuilder.AddColumn(DefaultColumnNames.Label, NumberDataViewType.Single);
            schemaBuilder.AddColumn(TextPurposeColName, NumberDataViewType.Single);
            var schema   = schemaBuilder.ToSchema();
            var dataView = DataViewTestFixture.BuildDummyDataView(schema);

            var columnInfo = new ColumnInformation();

            columnInfo.TextColumnNames.Add(TextPurposeColName);

            var ex = Assert.Throws <ArgumentException>(() => UserInputValidationUtil.ValidateExperimentExecuteArgs(dataView, columnInfo, null, TaskKind.Regression));

            Assert.Equal("Provided text column 'TextColumn' was of type Single, but only type String is allowed.", ex.Message);
        }
Exemple #20
0
    //Use parameters rather than accessing module level properties
    private IList <ColumnInformation> GetColumnInformationForTable(string dbName, string tableName)
    {
        // Favor object oriented styles and meaningful names.  Your method does not return a list of tables
        // it returns a list of column meta data
        List <ColumnInformation> columnInformations = new List <ColumnInformation>();
        // Avoid SQL conncatenation if at all possible.  NEVER concatenate where parameters into SQL commands and NEVER EVER with single quotes.
        // Here table name requires concatenation but the select parameter TableName does not.
        string selectCmdString = "SELECT column_name,data_type,character_maximum_length FROM " + dbName + ".information_schema.columns WHERE table_name = @TableName";

        // Use parameters.  Get everything ready first, don't open connections prematurely and only wrap error prone code in try blocks.
        SqlCommand   cmd = new SqlCommand(selectCmdString, conn);
        SqlParameter tableNameParameter = new SqlParameter("@TableName", tableName);

        cmd.Parameters.Add(tableNameParameter);
        // Use a DataReader since you cannot modify this data anyway.
        // This also shows an appropriate use of a try block to ensure a connection gets closed,
        // but better yet, open your reader with the CommandBehavior set to close
        // and get rid of this try block altogether
        try
        {
            //Reconsider use of a module or global level connection.  May be better to create a new here.
            conn.Open();
            SqlDataReader reader = cmd.ExecuteReader();
            //Favor OOP styles rather than indexes and arrays and repeated calls to determine things like Rows.Count in a loop
            while (reader.Read())
            {
                // Favor explicit member access rather than index acess.
                //YOUR HOMEWORK!  Study DataReader access and rewrite the code below to handle possible nulls in length field.  Use a method based on evaluating conditionals, DO NOT use a method based on a try block.
                ColumnInformation columnInformation = new ColumnInformation(reader["column_name"].ToString(), reader["data_type"].ToString(), (int)reader["character_maximum_length"].ToString());
                columnInformations.Add(columnInformation);
            }
            reader.Close();
        }
        finally
        {
            // The only reason to use the try is to make sure the connection gets closed here.  A better approach
            // is to use the CommandBehavior.CloseConnection option and get rid of the try finally block completely.
            // But NEVER just wrap a bunch of code in try blocks arbitrarily, swallow any errors and return a null.
            conn.Close();
        }

        return(columnInformations);
    }
Exemple #21
0
        public void InferColumnsFromMultilineInputFile()
        {
            // Check if we can infer the column information
            // from and input file which has escaped newlines inside quotes
            var       dataPath  = GetDataPath("multiline.csv");
            MLContext mlContext = new MLContext();
            var       inputColumnInformation = new ColumnInformation();

            inputColumnInformation.LabelColumnName = @"id";
            var result = mlContext.Auto().InferColumns(dataPath, inputColumnInformation);

            // File has 3 columns: "id", "description" and "animal"
            Assert.NotNull(result.ColumnInformation.LabelColumnName);
            Assert.Equal(1, result.ColumnInformation.TextColumnNames.Count);
            Assert.Equal(1, result.ColumnInformation.CategoricalColumnNames.Count);

            Assert.Equal("id", result.ColumnInformation.LabelColumnName);
            Assert.Equal("description", result.ColumnInformation.TextColumnNames.First());
            Assert.Equal("animal", result.ColumnInformation.CategoricalColumnNames.First());
        }
Exemple #22
0
        private static ColumnInformation CorrectColumnTypes(ColumnInformation columnInformation)
        {
            var modifyColumns = new List <String>();

            foreach (var numCol in columnInformation.NumericColumnNames)
            {
                if (numCol.Contains("fstr"))
                {
                    modifyColumns.Add(numCol);
                }
            }

            foreach (var numCol in modifyColumns)
            {
                columnInformation.NumericColumnNames.Remove(numCol);
                columnInformation.CategoricalColumnNames.Add(numCol);
            }

            return(columnInformation);
        }
        private List <ColumnInformation> GetColumnInformation()
        {
            var columns = new List <ColumnInformation>();

            foreach (PropertyInfo propertyInfo in _type.GetProperties())
            {
                var attribute = Attributes.GetAttribute <UserDefinedTableTypeColumnAttribute>(propertyInfo);

                if (attribute != null)
                {
                    var column = new ColumnInformation();
                    column.Name     = attribute.Name ?? propertyInfo.Name;
                    column.Property = propertyInfo;
                    column.Order    = attribute.Order;

                    columns.Add(column);
                }
            }

            return(columns.OrderBy(info => info.Order).ToList());
        }
Exemple #24
0
        public ITransformer AutoTrain(IEnumerable <Transaction> trainingData, uint maxTimeInSec)
        {
            _trainingDataView = _mlContext.Data.LoadFromEnumerable(trainingData);

            var experimentSettings = new MulticlassExperimentSettings();

            experimentSettings.MaxExperimentTimeInSeconds = maxTimeInSec;
            experimentSettings.OptimizingMetric           = MulticlassClassificationMetric.MacroAccuracy;

            var experiment = _mlContext.Auto().CreateMulticlassClassificationExperiment(experimentSettings);
            var columnInfo = new ColumnInformation
            {
                LabelColumnName = nameof(Transaction.Category)
            };

            columnInfo.TextColumnNames.Add(nameof(Transaction.Description));

            var result = experiment.Execute(_trainingDataView, columnInfo);

            return(result.BestRun.Model);
        }
Exemple #25
0
        static void Main(string[] args)
        {
            var context = new MLContext();

            var trainData = context.Data.LoadFromTextFile <HousingData>("./housing.csv", hasHeader: true, separatorChar: ',');

            var settings = new RegressionExperimentSettings
            {
                MaxExperimentTimeInSeconds = 20,
                OptimizingMetric           = RegressionMetric.MeanAbsoluteError
            };

            var labelColumnInfo = new ColumnInformation()
            {
                LabelColumnName = "Label"
            };

            var progress = new Progress <RunDetail <RegressionMetrics> >(p =>
            {
                if (p.ValidationMetrics != null)
                {
                    Console.WriteLine($"Current Result - {p.TrainerName}, {p.ValidationMetrics.RSquared}, {p.ValidationMetrics.MeanAbsoluteError}");
                }
            });

            var experiment = context.Auto().CreateRegressionExperiment(settings);

            var result = experiment.Execute(trainData, labelColumnInfo, progressHandler: progress);

            Console.WriteLine(Environment.NewLine);
            Console.WriteLine("Best run:");
            Console.WriteLine($"Trainer name - {result.BestRun.TrainerName}");
            Console.WriteLine($"RSquared - {result.BestRun.ValidationMetrics.RSquared}");
            Console.WriteLine($"MAE - {result.BestRun.ValidationMetrics.MeanAbsoluteError}");

            Console.ReadLine();
        }
Exemple #26
0
 /// <summary>
 /// Use this method to check if the provider supports obtaining the specified information.
 /// </summary>
 public bool HasCapability(ColumnInformation ci)
 {
     return((ci & AnalyzerCapability) != 0);
 }
Exemple #27
0
        public void InferredPipelinesHashTest()
        {
            var context    = new MLContext();
            var columnInfo = new ColumnInformation();

            // test same learners with no hyperparams have the same hash code
            var trainer1          = new SuggestedTrainer(context, new LightGbmBinaryExtension(), columnInfo);
            var trainer2          = new SuggestedTrainer(context, new LightGbmBinaryExtension(), columnInfo);
            var transforms1       = new List <SuggestedTransform>();
            var transforms2       = new List <SuggestedTransform>();
            var inferredPipeline1 = new SuggestedPipeline(transforms1, new List <SuggestedTransform>(), trainer1, context, false);
            var inferredPipeline2 = new SuggestedPipeline(transforms2, new List <SuggestedTransform>(), trainer2, context, false);

            Assert.Equal(inferredPipeline1.GetHashCode(), inferredPipeline2.GetHashCode());

            // test same learners with hyperparams set vs empty hyperparams have different hash codes
            var hyperparams1 = new ParameterSet(new List <IParameterValue>()
            {
                new LongParameterValue("NumberOfLeaves", 2)
            });

            trainer1          = new SuggestedTrainer(context, new LightGbmBinaryExtension(), columnInfo, hyperparams1);
            trainer2          = new SuggestedTrainer(context, new LightGbmBinaryExtension(), columnInfo);
            inferredPipeline1 = new SuggestedPipeline(transforms1, new List <SuggestedTransform>(), trainer1, context, false);
            inferredPipeline2 = new SuggestedPipeline(transforms2, new List <SuggestedTransform>(), trainer2, context, false);
            Assert.NotEqual(inferredPipeline1.GetHashCode(), inferredPipeline2.GetHashCode());

            // same learners with different hyperparams
            hyperparams1 = new ParameterSet(new List <IParameterValue>()
            {
                new LongParameterValue("NumberOfLeaves", 2)
            });
            var hyperparams2 = new ParameterSet(new List <IParameterValue>()
            {
                new LongParameterValue("NumberOfLeaves", 6)
            });

            trainer1          = new SuggestedTrainer(context, new LightGbmBinaryExtension(), columnInfo, hyperparams1);
            trainer2          = new SuggestedTrainer(context, new LightGbmBinaryExtension(), columnInfo, hyperparams2);
            inferredPipeline1 = new SuggestedPipeline(transforms1, new List <SuggestedTransform>(), trainer1, context, false);
            inferredPipeline2 = new SuggestedPipeline(transforms2, new List <SuggestedTransform>(), trainer2, context, false);
            Assert.NotEqual(inferredPipeline1.GetHashCode(), inferredPipeline2.GetHashCode());

            // same learners with same transforms
            trainer1    = new SuggestedTrainer(context, new LightGbmBinaryExtension(), columnInfo);
            trainer2    = new SuggestedTrainer(context, new LightGbmBinaryExtension(), columnInfo);
            transforms1 = new List <SuggestedTransform>()
            {
                ColumnConcatenatingExtension.CreateSuggestedTransform(context, new[] { "In" }, "Out")
            };
            transforms2 = new List <SuggestedTransform>()
            {
                ColumnConcatenatingExtension.CreateSuggestedTransform(context, new[] { "In" }, "Out")
            };
            inferredPipeline1 = new SuggestedPipeline(transforms1, new List <SuggestedTransform>(), trainer1, context, false);
            inferredPipeline2 = new SuggestedPipeline(transforms2, new List <SuggestedTransform>(), trainer2, context, false);
            Assert.Equal(inferredPipeline1.GetHashCode(), inferredPipeline2.GetHashCode());

            // same transforms with different learners
            trainer1    = new SuggestedTrainer(context, new SdcaLogisticRegressionBinaryExtension(), columnInfo);
            trainer2    = new SuggestedTrainer(context, new LightGbmBinaryExtension(), columnInfo);
            transforms1 = new List <SuggestedTransform>()
            {
                ColumnConcatenatingExtension.CreateSuggestedTransform(context, new[] { "In" }, "Out")
            };
            transforms2 = new List <SuggestedTransform>()
            {
                ColumnConcatenatingExtension.CreateSuggestedTransform(context, new[] { "In" }, "Out")
            };
            inferredPipeline1 = new SuggestedPipeline(transforms1, new List <SuggestedTransform>(), trainer1, context, false);
            inferredPipeline2 = new SuggestedPipeline(transforms2, new List <SuggestedTransform>(), trainer2, context, false);
            Assert.NotEqual(inferredPipeline1.GetHashCode(), inferredPipeline2.GetHashCode());
        }
		/// <summary>
		/// Use this method to check if the provider supports obtaining the specified information.
		/// </summary>
		public bool HasCapability( ColumnInformation ci )
		{
			return (ci & AnalyzerCapability) != 0;
		}
 public static ColumnInferenceResults InferColumns(MLContext context, string path, ColumnInformation columnInfo,
                                                   char?separatorChar, bool?allowQuotedStrings, bool?supportSparse, bool trimWhitespace, bool groupColumns, bool hasHeader = true)
 {
     try {
         return((ColumnInferenceResults)InferColumnsMethodInfo.Invoke(null, new object[] { context, path, columnInfo, separatorChar, allowQuotedStrings, supportSparse, trimWhitespace, groupColumns, hasHeader }));
     } catch (Exception ex)
     {
         throw ex.InnerException;
     }
 }
Exemple #30
0
        private static ExperimentResult <BinaryClassificationMetrics> RunAutoMLExperiment(MLContext mlContext,
                                                                                          ColumnInferenceResults columnInference)
        {
            // STEP 1: Display first few rows of the training data.

            // ConsoleHelper.ShowDataViewInConsole(mlContext, TrainDataView);

            // STEP 2: Build a pre-featurizer for use in the AutoML experiment.
            // (Internally, AutoML uses one or more train/validation data splits to
            // evaluate the models it produces. The pre-featurizer is fit only on the
            // training data split to produce a trained transform. Then, the trained transform
            // is applied to both the train and validation data splits.)
            //IEstimator<ITransformer> preFeaturizer = mlContext.Transforms.Conversion.MapValue("is_cash",
            //    new[] { new KeyValuePair<string, bool>("CSH", true) }, "payment_type");

            //IEstimator<ITransformer> preFeaturizer = mlContext.Transforms.Conversion.MapValue("fstrClassCategory",
            //    new[] { new KeyValuePair<float, String>(1, "First"), new KeyValuePair<float, String>(2, "Second"), new KeyValuePair<float, String>(3, "Third") }, "fstrClass").Append(mlContext.Transforms.Categorical.OneHotEncoding("fstrClassCategory", "fstrClassCategory")).Append(mlContext.Transforms.DropColumns("fstrClass"));

            // STEP 3: Customize column information returned by InferColumns API.
            ColumnInformation columnInformation = columnInference.ColumnInformation;

            columnInformation = CorrectColumnTypes(columnInformation);

            // columnInformation.NumericColumnNames.Remove("fstrClass");
            // columnInformation.CategoricalColumnNames.Add("fstrClass");
            // columnInformation.IgnoredColumnNames.Add("fstrClass");
            // columnInformation.IgnoredColumnNames.


            // STEP 4: Initialize a cancellation token source to stop the experiment.
            var cts = new CancellationTokenSource();

            // STEP 5: Initialize our user-defined progress handler that AutoML will
            // invoke after each model it produces and evaluates.
            var progressHandler = new BinaryExperimentProgressHandler(); //  RegressionExperimentProgressHandler();

            // STEP 6: Create experiment settings
            var experimentSettings = CreateExperimentSettings(mlContext, cts);

            // STEP 7: Run AutoML Binary Classification experiment.
            var experiment = mlContext.Auto().CreateBinaryClassificationExperiment(experimentSettings);

            ConsoleHelper.ConsoleWriteHeader("=============== Running AutoML experiment ===============");
            Console.WriteLine($"Running AutoML regression experiment...");
            var stopwatch = Stopwatch.StartNew();

            // Cancel experiment after the user presses any key.
            CancelExperimentAfterAnyKeyPress(cts);
            ExperimentResult <BinaryClassificationMetrics> experimentResult = experiment.Execute(trainData: TrainDataView, columnInformation: columnInformation, progressHandler: progressHandler);

            Console.WriteLine($"{experimentResult.RunDetails.Count()} models were returned after {stopwatch.Elapsed.TotalSeconds:0.00} seconds{Environment.NewLine}");

            // Print top models found by AutoML.
            PrintTopModels(experimentResult);
            // var featureNames = columnInformation.CategoricalColumnNames.Concat(columnInformation.ImagePathColumnNames).Concat(columnInformation.NumericColumnNames).Concat(columnInformation.TextColumnNames).ToList();
            // var permutationMetrics = mlContext.BinaryClassification.PermutationFeatureImportance(predictionTransformer: )
            // PrintContributions(featureNames, TrainDataView, experimentResult.RunDetails);

            // DatasetDimensionsUtil.GetTextColumnCardinality();

            return(experimentResult);
        }