static async Task Main(string[] args) { //setup our DI var serviceProvider = new ServiceCollection() .AddLogging() .AddSingleton <IYahooFinanceService, YahooFinanceService>() .AddHttpClient() .BuildServiceProvider(); var loggerFactory = LoggerFactory.Create(builder => { builder.AddFilter("Microsoft", LogLevel.Warning) .AddFilter("System", LogLevel.Warning) .AddFilter("CandleStickMachineLearning.Program", LogLevel.Debug) .AddConsole(); }); var _logger = loggerFactory.CreateLogger <Program>(); _logger.LogInformation("Hello World!"); //do the actual work here var yahooFinanceService = serviceProvider.GetService <IYahooFinanceService>(); var barsList = await yahooFinanceService.GetBars("AAPL", DateTime.UtcNow.AddMonths(-1), DateTime.UtcNow, "1h"); var context = new MLContext(); var trainData = context.Data.LoadFromEnumerable <Models.Bar>(barsList); var settings = new RegressionExperimentSettings { MaxExperimentTimeInSeconds = 20, OptimizingMetric = RegressionMetric.MeanAbsoluteError }; var labelColumnInfo = new ColumnInformation() { LabelColumnName = "Label" }; var progress = new Progress <RunDetail <RegressionMetrics> >(p => { if (p.ValidationMetrics != null) { _logger.LogInformation($"Current Result - {p.TrainerName}, {p.ValidationMetrics.RSquared}, {p.ValidationMetrics.MeanAbsoluteError}"); } }); var experiment = context.Auto().CreateRegressionExperiment(settings); var result = experiment.Execute(trainData, labelColumnInfo, progressHandler: progress); Console.WriteLine(Environment.NewLine); Console.WriteLine("Best run:"); Console.WriteLine($"Trainer name - {result.BestRun.TrainerName}"); Console.WriteLine($"RSquared - {result.BestRun.ValidationMetrics.RSquared}"); Console.WriteLine($"MAE - {result.BestRun.ValidationMetrics.MeanAbsoluteError}"); Console.ReadLine(); }
public void TrainerExtensionInstanceTests() { var context = new MLContext(1); var columnInfo = new ColumnInformation(); var trainerNames = Enum.GetValues(typeof(TrainerName)).Cast <TrainerName>() .Except(new[] { TrainerName.Ova }); foreach (var trainerName in trainerNames) { var extension = TrainerExtensionCatalog.GetTrainerExtension(trainerName); IEnumerable <SweepableParam> sweepParams = null; if (trainerName != TrainerName.ImageClassification) { sweepParams = extension.GetHyperparamSweepRanges(); Assert.NotNull(sweepParams); foreach (var sweepParam in sweepParams) { sweepParam.RawValue = 1; } var instance = extension.CreateInstance(context, sweepParams, columnInfo); Assert.NotNull(instance); var pipelineNode = extension.CreatePipelineNode(null, columnInfo); Assert.NotNull(pipelineNode); } } }
public void BuildFastTreeRankingPipelineNode() { var columnInfo = new ColumnInformation() { LabelColumnName = "L", GroupIdColumnName = "GId" }; var pipelineNode = new FastTreeRankingExtension().CreatePipelineNode(null, columnInfo); var expectedJson = @"{ ""Name"": ""FastTreeRanking"", ""NodeType"": ""Trainer"", ""InColumns"": [ ""Features"" ], ""OutColumns"": [ ""Score"" ], ""Properties"": { ""LabelColumnName"": ""L"", ""RowGroupColumnName"": ""GId"" } }"; Util.AssertObjectMatchesJson(expectedJson, pipelineNode); }
public void GetColumnNamesTest() { var columnInfo = new ColumnInformation() { LabelColumnName = "Label", SamplingKeyColumnName = "SamplingKey", UserIdColumnName = "UserId", ItemIdColumnName = "MovieId", GroupIdColumnName = "GroupId" }; columnInfo.CategoricalColumnNames.Add("Cat1"); columnInfo.CategoricalColumnNames.Add("Cat2"); columnInfo.NumericColumnNames.Add("Num"); var columnNames = ColumnInformationUtil.GetColumnNames(columnInfo); Assert.Equal(8, columnNames.Count()); Assert.Contains("Label", columnNames); Assert.Contains("SamplingKey", columnNames); Assert.Contains("UserId", columnNames); Assert.Contains("MovieId", columnNames); Assert.Contains("GroupId", columnNames); Assert.Contains("Cat1", columnNames); Assert.Contains("Cat2", columnNames); Assert.Contains("Num", columnNames); }
public void BuildPipelineNodeWithCustomColumns() { var columnInfo = new ColumnInformation() { LabelColumnName = "L", ExampleWeightColumnName = "W" }; var sweepParams = SweepableParams.BuildFastForestParams(); foreach (var sweepParam in sweepParams) { sweepParam.RawValue = 1; } var pipelineNode = new FastForestBinaryExtension().CreatePipelineNode(sweepParams, columnInfo); var expectedJson = @"{ ""Name"": ""FastForestBinary"", ""NodeType"": ""Trainer"", ""InColumns"": [ ""Features"" ], ""OutColumns"": [ ""Score"" ], ""Properties"": { ""NumberOfLeaves"": 1, ""MinimumExampleCountPerLeaf"": 10, ""NumberOfTrees"": 100, ""LabelColumnName"": ""L"", ""ExampleWeightColumnName"": ""W"" } }"; Util.AssertObjectMatchesJson(expectedJson, pipelineNode); }
public void GetColumnPurpose() { var columnInfo = new ColumnInformation() { LabelColumnName = "Label", ExampleWeightColumnName = "Weight", SamplingKeyColumnName = "SamplingKey", UserIdColumnName = "UserId", ItemIdColumnName = "MovieId", GroupIdColumnName = "GroupId" }; columnInfo.CategoricalColumnNames.Add("Cat"); columnInfo.NumericColumnNames.Add("Num"); columnInfo.TextColumnNames.Add("Text"); columnInfo.IgnoredColumnNames.Add("Ignored"); Assert.Equal(ColumnPurpose.Label, ColumnInformationUtil.GetColumnPurpose(columnInfo, "Label")); Assert.Equal(ColumnPurpose.Weight, ColumnInformationUtil.GetColumnPurpose(columnInfo, "Weight")); Assert.Equal(ColumnPurpose.SamplingKey, ColumnInformationUtil.GetColumnPurpose(columnInfo, "SamplingKey")); Assert.Equal(ColumnPurpose.UserId, ColumnInformationUtil.GetColumnPurpose(columnInfo, "UserId")); Assert.Equal(ColumnPurpose.GroupId, ColumnInformationUtil.GetColumnPurpose(columnInfo, "GroupId")); Assert.Equal(ColumnPurpose.ItemId, ColumnInformationUtil.GetColumnPurpose(columnInfo, "MovieId")); Assert.Equal(ColumnPurpose.CategoricalFeature, ColumnInformationUtil.GetColumnPurpose(columnInfo, "Cat")); Assert.Equal(ColumnPurpose.NumericFeature, ColumnInformationUtil.GetColumnPurpose(columnInfo, "Num")); Assert.Equal(ColumnPurpose.TextFeature, ColumnInformationUtil.GetColumnPurpose(columnInfo, "Text")); Assert.Equal(ColumnPurpose.Ignore, ColumnInformationUtil.GetColumnPurpose(columnInfo, "Ignored")); Assert.Null(ColumnInformationUtil.GetColumnPurpose(columnInfo, "NonExistent")); }
public void InferDatasetWithoutHeader() { var context = new MLContext(1); var filePath = Path.Combine("TestData", "DatasetWithoutHeader.txt"); var columnInfo = new ColumnInformation() { LabelColumnName = "col0", UserIdColumnName = "col1", ItemIdColumnName = "col2", }; columnInfo.IgnoredColumnNames.Add("col4"); var result = ColumnInferenceApi.InferColumns(context, filePath, columnInfo, ',', null, null, false, false, false); Assert.Equal(6, result.TextLoaderOptions.Columns.Count()); var labelColumn = result.TextLoaderOptions.Columns.First(c => c.Name == "col0"); var userColumn = result.TextLoaderOptions.Columns.First(c => c.Name == "col1"); var itemColumn = result.TextLoaderOptions.Columns.First(c => c.Name == "col2"); var ignoreColumn = result.TextLoaderOptions.Columns.First(c => c.Name == "col4"); Assert.Equal(DataKind.String, labelColumn.DataKind); Assert.Equal(DataKind.Single, userColumn.DataKind); Assert.Equal(DataKind.Single, itemColumn.DataKind); Assert.Equal(DataKind.Single, ignoreColumn.DataKind); Assert.Single(result.ColumnInformation.CategoricalColumnNames); Assert.Empty(result.ColumnInformation.TextColumnNames); }
public void ValidateExperimentExecuteDuplicateCol() { var columnInfo = new ColumnInformation(); columnInfo.NumericColumnNames.Add(DefaultColumnNames.Label); var ex = Assert.Throws <ArgumentException>(() => UserInputValidationUtil.ValidateExperimentExecuteArgs(_data, columnInfo, null, TaskKind.Regression)); }
public void Start() { //Infer columns and load train data var columnInferenceResult = mlContext.Auto().InferColumns( path: TRAIN_DATA_FILEPATH, labelColumnName: "next", groupColumns: false); TextLoader textLoader = mlContext.Data.CreateTextLoader(columnInferenceResult.TextLoaderOptions); trainData = textLoader.Load(TRAIN_DATA_FILEPATH); //Modify infered columns information columnInformation = columnInferenceResult.ColumnInformation; columnInformation.CategoricalColumnNames.Add("productId"); columnInformation.NumericColumnNames.Remove("productId"); columnInformation.CategoricalColumnNames.Add("year"); columnInformation.NumericColumnNames.Remove("year"); columnInformation.NumericColumnNames.Remove("units"); columnInformation.IgnoredColumnNames.Add("units"); var experimentSettings = new RegressionExperimentSettings() { MaxExperimentTimeInSeconds = 10, OptimizingMetric = RegressionMetric.RootMeanSquaredError, CacheDirectory = new DirectoryInfo(CACHE_DIRECTORY), CancellationToken = cancelationTokenSource.Token }; //Exclude trainers from experiment experimentSettings.Trainers.Remove(RegressionTrainer.Ols); RegressionExperiment experiment = mlContext.Auto().CreateRegressionExperiment(experimentSettings); ExperimentResult <RegressionMetrics> experimentResult = experiment.Execute( trainData: trainData, columnInformation: columnInformation, progressHandler: new RegressionProgressHandler(), preFeaturizer: null); ITransformer model = experimentResult.BestRun.Model; IEstimator <ITransformer> estimator = experimentResult.BestRun.Estimator; //Make batch predictions IDataView predictionsDataView = model.Transform(trainData); PrintPredictions(predictionsDataView); PrintPredictionsEnumerable(predictionsDataView); model = estimator.Fit(trainData); mlContext.Model.Save(model, trainData.Schema, MODEL_FILEPATH); Console.WriteLine("Done"); }
public void ValidateExperimentExecuteNullNumericCol() { var columnInfo = new ColumnInformation(); columnInfo.NumericColumnNames.Add(null); var ex = Assert.Throws <ArgumentException>(() => UserInputValidationUtil.ValidateExperimentExecuteArgs(_data, columnInfo, null, TaskKind.Regression)); Assert.Equal("Null column string was specified as numeric in column information", ex.Message); }
public void ValidateExperimentExecuteNumericColNotInTrain() { var columnInfo = new ColumnInformation(); columnInfo.NumericColumnNames.Add("N"); var ex = Assert.Throws <ArgumentException>(() => UserInputValidationUtil.ValidateExperimentExecuteArgs(Data, columnInfo, null, TaskKind.Regression)); Assert.Equal("Provided label column 'Label' was of type Boolean, but only type Single is allowed.", ex.Message); }
public void CheckGetValue(int value) { const string name = "World"; const int ordinal = 13; var f = new Field("Hello", value); var c = new ColumnInformation(f, ordinal, name, false); Assert.AreEqual(f.Get <int>(), c.Get <int>()); Assert.AreEqual(f.Get <long>(), c.Get <long>()); Assert.AreEqual(f.Get <string>(), c.Get <string>()); }
public void TrainerExtensionTensorFlowInstanceTests() { var context = new MLContext(1); var columnInfo = new ColumnInformation(); var extension = TrainerExtensionCatalog.GetTrainerExtension(TrainerName.ImageClassification); var instance = extension.CreateInstance(context, null, columnInfo); Assert.NotNull(instance); var pipelineNode = extension.CreatePipelineNode(null, columnInfo); Assert.NotNull(pipelineNode); }
public void CheckValuesAreSaved() { const string name = "World"; const int ordinal = 13; var f = new Field("Hello", 42); var c = new ColumnInformation(f, ordinal, name, false); Assert.AreEqual(name, c.Name); Assert.AreEqual(ordinal, c.Ordinal); Assert.AreEqual("Hello", f.Name); Assert.AreEqual(42, f.Get <int>()); }
public void ValidateColumnNotContainedInData() { var schemaBuilder = new DataViewSchema.Builder(); schemaBuilder.AddColumn(DefaultColumnNames.Features, NumberDataViewType.Single); schemaBuilder.AddColumn(DefaultColumnNames.Label, NumberDataViewType.Single); var schema = schemaBuilder.ToSchema(); var dataView = DataViewTestFixture.BuildDummyDataView(schema); var columnInfo = new ColumnInformation(); columnInfo.CategoricalColumnNames.Add("Categorical"); Assert.Throws <ArgumentException>(() => ColumnInferenceValidationUtil.ValidateSpecifiedColumnsExist(columnInfo, dataView)); }
private static ExperimentResult <RegressionMetrics> RunAutoMLExperiment(MLContext mlContext, ColumnInferenceResults columnInference) { // STEP 1: Display first few rows of the training data ConsoleHelper.ShowDataViewInConsole(mlContext, TrainDataView); // STEP 2: Build a pre-featurizer for use in the AutoML experiment. // (Internally, AutoML uses one or more train/validation data splits to // evaluate the models it produces. The pre-featurizer is fit only on the // training data split to produce a trained transform. Then, the trained transform // is applied to both the train and validation data splits.) IEstimator <ITransformer> preFeaturizer = mlContext.Transforms.Conversion.MapValue("is_cash", new[] { new KeyValuePair <string, bool>("CSH", true) }, "payment_type"); // STEP 3: Customize column information returned by InferColumns API ColumnInformation columnInformation = columnInference.ColumnInformation; columnInformation.CategoricalColumnNames.Remove("payment_type"); columnInformation.IgnoredColumnNames.Add("payment_type"); // STEP 4: Initialize a cancellation token source to stop the experiment. var cts = new CancellationTokenSource(); // STEP 5: Initialize our user-defined progress handler that AutoML will // invoke after each model it produces and evaluates. var progressHandler = new RegressionExperimentProgressHandler(); // STEP 6: Create experiment settings var experimentSettings = CreateExperimentSettings(mlContext, cts); // STEP 7: Run AutoML regression experiment var experiment = mlContext.Auto().CreateRegressionExperiment(experimentSettings); ConsoleHelper.ConsoleWriteHeader("=============== Running AutoML experiment ==============="); Console.WriteLine($"Running AutoML regression experiment..."); var stopwatch = Stopwatch.StartNew(); // Cancel experiment after the user presses any key CancelExperimentAfterAnyKeyPress(cts); ExperimentResult <RegressionMetrics> experimentResult = experiment.Execute(TrainDataView, columnInformation, preFeaturizer, progressHandler); Console.WriteLine($"{experimentResult.RunDetails.Count()} models were returned after {stopwatch.Elapsed.TotalSeconds:0.00} seconds{Environment.NewLine}"); // Print top models found by AutoML PrintTopModels(experimentResult); return(experimentResult); }
public void InferColumnsColumnInfoParam() { var columnInfo = new ColumnInformation() { LabelColumnName = DatasetUtil.MlNetGeneratedRegressionLabel }; var result = new MLContext().Auto().InferColumns(DatasetUtil.DownloadMlNetGeneratedRegressionDataset(), columnInfo); var labelCol = result.TextLoaderOptions.Columns.First(c => c.Name == DatasetUtil.MlNetGeneratedRegressionLabel); Assert.Equal(DataKind.Single, labelCol.DataKind); Assert.Equal(DatasetUtil.MlNetGeneratedRegressionLabel, result.ColumnInformation.LabelColumnName); Assert.Single(result.ColumnInformation.NumericColumnNames); Assert.Equal(DefaultColumnNames.Features, result.ColumnInformation.NumericColumnNames.First()); Assert.Null(result.ColumnInformation.ExampleWeightColumnName); }
public async Task <IList <ColumnInformation> > GetColumnInformation(string connectionString) { using var connection = new SqlConnection(connectionString); var command = new SqlCommand(StoredProcedureExtensions.GetColumnInformation(), connection) { CommandType = CommandType.Text }; connection.Open(); using var dataReader = await command.ExecuteReaderAsync(); var columnInformationModels = await ColumnInformation.FromDataReader(dataReader); return(columnInformationModels.ToList()); }
public void ValidateTextColumnNotText() { const string TextPurposeColName = "TextColumn"; var schemaBuilder = new DataViewSchema.Builder(); schemaBuilder.AddColumn(DefaultColumnNames.Features, NumberDataViewType.Single); schemaBuilder.AddColumn(DefaultColumnNames.Label, NumberDataViewType.Single); schemaBuilder.AddColumn(TextPurposeColName, NumberDataViewType.Single); var schema = schemaBuilder.ToSchema(); var dataView = DataViewTestFixture.BuildDummyDataView(schema); var columnInfo = new ColumnInformation(); columnInfo.TextColumnNames.Add(TextPurposeColName); var ex = Assert.Throws <ArgumentException>(() => UserInputValidationUtil.ValidateExperimentExecuteArgs(dataView, columnInfo, null, TaskKind.Regression)); Assert.Equal("Provided text column 'TextColumn' was of type Single, but only type String is allowed.", ex.Message); }
//Use parameters rather than accessing module level properties private IList <ColumnInformation> GetColumnInformationForTable(string dbName, string tableName) { // Favor object oriented styles and meaningful names. Your method does not return a list of tables // it returns a list of column meta data List <ColumnInformation> columnInformations = new List <ColumnInformation>(); // Avoid SQL conncatenation if at all possible. NEVER concatenate where parameters into SQL commands and NEVER EVER with single quotes. // Here table name requires concatenation but the select parameter TableName does not. string selectCmdString = "SELECT column_name,data_type,character_maximum_length FROM " + dbName + ".information_schema.columns WHERE table_name = @TableName"; // Use parameters. Get everything ready first, don't open connections prematurely and only wrap error prone code in try blocks. SqlCommand cmd = new SqlCommand(selectCmdString, conn); SqlParameter tableNameParameter = new SqlParameter("@TableName", tableName); cmd.Parameters.Add(tableNameParameter); // Use a DataReader since you cannot modify this data anyway. // This also shows an appropriate use of a try block to ensure a connection gets closed, // but better yet, open your reader with the CommandBehavior set to close // and get rid of this try block altogether try { //Reconsider use of a module or global level connection. May be better to create a new here. conn.Open(); SqlDataReader reader = cmd.ExecuteReader(); //Favor OOP styles rather than indexes and arrays and repeated calls to determine things like Rows.Count in a loop while (reader.Read()) { // Favor explicit member access rather than index acess. //YOUR HOMEWORK! Study DataReader access and rewrite the code below to handle possible nulls in length field. Use a method based on evaluating conditionals, DO NOT use a method based on a try block. ColumnInformation columnInformation = new ColumnInformation(reader["column_name"].ToString(), reader["data_type"].ToString(), (int)reader["character_maximum_length"].ToString()); columnInformations.Add(columnInformation); } reader.Close(); } finally { // The only reason to use the try is to make sure the connection gets closed here. A better approach // is to use the CommandBehavior.CloseConnection option and get rid of the try finally block completely. // But NEVER just wrap a bunch of code in try blocks arbitrarily, swallow any errors and return a null. conn.Close(); } return(columnInformations); }
public void InferColumnsFromMultilineInputFile() { // Check if we can infer the column information // from and input file which has escaped newlines inside quotes var dataPath = GetDataPath("multiline.csv"); MLContext mlContext = new MLContext(); var inputColumnInformation = new ColumnInformation(); inputColumnInformation.LabelColumnName = @"id"; var result = mlContext.Auto().InferColumns(dataPath, inputColumnInformation); // File has 3 columns: "id", "description" and "animal" Assert.NotNull(result.ColumnInformation.LabelColumnName); Assert.Equal(1, result.ColumnInformation.TextColumnNames.Count); Assert.Equal(1, result.ColumnInformation.CategoricalColumnNames.Count); Assert.Equal("id", result.ColumnInformation.LabelColumnName); Assert.Equal("description", result.ColumnInformation.TextColumnNames.First()); Assert.Equal("animal", result.ColumnInformation.CategoricalColumnNames.First()); }
private static ColumnInformation CorrectColumnTypes(ColumnInformation columnInformation) { var modifyColumns = new List <String>(); foreach (var numCol in columnInformation.NumericColumnNames) { if (numCol.Contains("fstr")) { modifyColumns.Add(numCol); } } foreach (var numCol in modifyColumns) { columnInformation.NumericColumnNames.Remove(numCol); columnInformation.CategoricalColumnNames.Add(numCol); } return(columnInformation); }
private List <ColumnInformation> GetColumnInformation() { var columns = new List <ColumnInformation>(); foreach (PropertyInfo propertyInfo in _type.GetProperties()) { var attribute = Attributes.GetAttribute <UserDefinedTableTypeColumnAttribute>(propertyInfo); if (attribute != null) { var column = new ColumnInformation(); column.Name = attribute.Name ?? propertyInfo.Name; column.Property = propertyInfo; column.Order = attribute.Order; columns.Add(column); } } return(columns.OrderBy(info => info.Order).ToList()); }
public ITransformer AutoTrain(IEnumerable <Transaction> trainingData, uint maxTimeInSec) { _trainingDataView = _mlContext.Data.LoadFromEnumerable(trainingData); var experimentSettings = new MulticlassExperimentSettings(); experimentSettings.MaxExperimentTimeInSeconds = maxTimeInSec; experimentSettings.OptimizingMetric = MulticlassClassificationMetric.MacroAccuracy; var experiment = _mlContext.Auto().CreateMulticlassClassificationExperiment(experimentSettings); var columnInfo = new ColumnInformation { LabelColumnName = nameof(Transaction.Category) }; columnInfo.TextColumnNames.Add(nameof(Transaction.Description)); var result = experiment.Execute(_trainingDataView, columnInfo); return(result.BestRun.Model); }
static void Main(string[] args) { var context = new MLContext(); var trainData = context.Data.LoadFromTextFile <HousingData>("./housing.csv", hasHeader: true, separatorChar: ','); var settings = new RegressionExperimentSettings { MaxExperimentTimeInSeconds = 20, OptimizingMetric = RegressionMetric.MeanAbsoluteError }; var labelColumnInfo = new ColumnInformation() { LabelColumnName = "Label" }; var progress = new Progress <RunDetail <RegressionMetrics> >(p => { if (p.ValidationMetrics != null) { Console.WriteLine($"Current Result - {p.TrainerName}, {p.ValidationMetrics.RSquared}, {p.ValidationMetrics.MeanAbsoluteError}"); } }); var experiment = context.Auto().CreateRegressionExperiment(settings); var result = experiment.Execute(trainData, labelColumnInfo, progressHandler: progress); Console.WriteLine(Environment.NewLine); Console.WriteLine("Best run:"); Console.WriteLine($"Trainer name - {result.BestRun.TrainerName}"); Console.WriteLine($"RSquared - {result.BestRun.ValidationMetrics.RSquared}"); Console.WriteLine($"MAE - {result.BestRun.ValidationMetrics.MeanAbsoluteError}"); Console.ReadLine(); }
/// <summary> /// Use this method to check if the provider supports obtaining the specified information. /// </summary> public bool HasCapability(ColumnInformation ci) { return((ci & AnalyzerCapability) != 0); }
public void InferredPipelinesHashTest() { var context = new MLContext(); var columnInfo = new ColumnInformation(); // test same learners with no hyperparams have the same hash code var trainer1 = new SuggestedTrainer(context, new LightGbmBinaryExtension(), columnInfo); var trainer2 = new SuggestedTrainer(context, new LightGbmBinaryExtension(), columnInfo); var transforms1 = new List <SuggestedTransform>(); var transforms2 = new List <SuggestedTransform>(); var inferredPipeline1 = new SuggestedPipeline(transforms1, new List <SuggestedTransform>(), trainer1, context, false); var inferredPipeline2 = new SuggestedPipeline(transforms2, new List <SuggestedTransform>(), trainer2, context, false); Assert.Equal(inferredPipeline1.GetHashCode(), inferredPipeline2.GetHashCode()); // test same learners with hyperparams set vs empty hyperparams have different hash codes var hyperparams1 = new ParameterSet(new List <IParameterValue>() { new LongParameterValue("NumberOfLeaves", 2) }); trainer1 = new SuggestedTrainer(context, new LightGbmBinaryExtension(), columnInfo, hyperparams1); trainer2 = new SuggestedTrainer(context, new LightGbmBinaryExtension(), columnInfo); inferredPipeline1 = new SuggestedPipeline(transforms1, new List <SuggestedTransform>(), trainer1, context, false); inferredPipeline2 = new SuggestedPipeline(transforms2, new List <SuggestedTransform>(), trainer2, context, false); Assert.NotEqual(inferredPipeline1.GetHashCode(), inferredPipeline2.GetHashCode()); // same learners with different hyperparams hyperparams1 = new ParameterSet(new List <IParameterValue>() { new LongParameterValue("NumberOfLeaves", 2) }); var hyperparams2 = new ParameterSet(new List <IParameterValue>() { new LongParameterValue("NumberOfLeaves", 6) }); trainer1 = new SuggestedTrainer(context, new LightGbmBinaryExtension(), columnInfo, hyperparams1); trainer2 = new SuggestedTrainer(context, new LightGbmBinaryExtension(), columnInfo, hyperparams2); inferredPipeline1 = new SuggestedPipeline(transforms1, new List <SuggestedTransform>(), trainer1, context, false); inferredPipeline2 = new SuggestedPipeline(transforms2, new List <SuggestedTransform>(), trainer2, context, false); Assert.NotEqual(inferredPipeline1.GetHashCode(), inferredPipeline2.GetHashCode()); // same learners with same transforms trainer1 = new SuggestedTrainer(context, new LightGbmBinaryExtension(), columnInfo); trainer2 = new SuggestedTrainer(context, new LightGbmBinaryExtension(), columnInfo); transforms1 = new List <SuggestedTransform>() { ColumnConcatenatingExtension.CreateSuggestedTransform(context, new[] { "In" }, "Out") }; transforms2 = new List <SuggestedTransform>() { ColumnConcatenatingExtension.CreateSuggestedTransform(context, new[] { "In" }, "Out") }; inferredPipeline1 = new SuggestedPipeline(transforms1, new List <SuggestedTransform>(), trainer1, context, false); inferredPipeline2 = new SuggestedPipeline(transforms2, new List <SuggestedTransform>(), trainer2, context, false); Assert.Equal(inferredPipeline1.GetHashCode(), inferredPipeline2.GetHashCode()); // same transforms with different learners trainer1 = new SuggestedTrainer(context, new SdcaLogisticRegressionBinaryExtension(), columnInfo); trainer2 = new SuggestedTrainer(context, new LightGbmBinaryExtension(), columnInfo); transforms1 = new List <SuggestedTransform>() { ColumnConcatenatingExtension.CreateSuggestedTransform(context, new[] { "In" }, "Out") }; transforms2 = new List <SuggestedTransform>() { ColumnConcatenatingExtension.CreateSuggestedTransform(context, new[] { "In" }, "Out") }; inferredPipeline1 = new SuggestedPipeline(transforms1, new List <SuggestedTransform>(), trainer1, context, false); inferredPipeline2 = new SuggestedPipeline(transforms2, new List <SuggestedTransform>(), trainer2, context, false); Assert.NotEqual(inferredPipeline1.GetHashCode(), inferredPipeline2.GetHashCode()); }
/// <summary> /// Use this method to check if the provider supports obtaining the specified information. /// </summary> public bool HasCapability( ColumnInformation ci ) { return (ci & AnalyzerCapability) != 0; }
public static ColumnInferenceResults InferColumns(MLContext context, string path, ColumnInformation columnInfo, char?separatorChar, bool?allowQuotedStrings, bool?supportSparse, bool trimWhitespace, bool groupColumns, bool hasHeader = true) { try { return((ColumnInferenceResults)InferColumnsMethodInfo.Invoke(null, new object[] { context, path, columnInfo, separatorChar, allowQuotedStrings, supportSparse, trimWhitespace, groupColumns, hasHeader })); } catch (Exception ex) { throw ex.InnerException; } }
private static ExperimentResult <BinaryClassificationMetrics> RunAutoMLExperiment(MLContext mlContext, ColumnInferenceResults columnInference) { // STEP 1: Display first few rows of the training data. // ConsoleHelper.ShowDataViewInConsole(mlContext, TrainDataView); // STEP 2: Build a pre-featurizer for use in the AutoML experiment. // (Internally, AutoML uses one or more train/validation data splits to // evaluate the models it produces. The pre-featurizer is fit only on the // training data split to produce a trained transform. Then, the trained transform // is applied to both the train and validation data splits.) //IEstimator<ITransformer> preFeaturizer = mlContext.Transforms.Conversion.MapValue("is_cash", // new[] { new KeyValuePair<string, bool>("CSH", true) }, "payment_type"); //IEstimator<ITransformer> preFeaturizer = mlContext.Transforms.Conversion.MapValue("fstrClassCategory", // new[] { new KeyValuePair<float, String>(1, "First"), new KeyValuePair<float, String>(2, "Second"), new KeyValuePair<float, String>(3, "Third") }, "fstrClass").Append(mlContext.Transforms.Categorical.OneHotEncoding("fstrClassCategory", "fstrClassCategory")).Append(mlContext.Transforms.DropColumns("fstrClass")); // STEP 3: Customize column information returned by InferColumns API. ColumnInformation columnInformation = columnInference.ColumnInformation; columnInformation = CorrectColumnTypes(columnInformation); // columnInformation.NumericColumnNames.Remove("fstrClass"); // columnInformation.CategoricalColumnNames.Add("fstrClass"); // columnInformation.IgnoredColumnNames.Add("fstrClass"); // columnInformation.IgnoredColumnNames. // STEP 4: Initialize a cancellation token source to stop the experiment. var cts = new CancellationTokenSource(); // STEP 5: Initialize our user-defined progress handler that AutoML will // invoke after each model it produces and evaluates. var progressHandler = new BinaryExperimentProgressHandler(); // RegressionExperimentProgressHandler(); // STEP 6: Create experiment settings var experimentSettings = CreateExperimentSettings(mlContext, cts); // STEP 7: Run AutoML Binary Classification experiment. var experiment = mlContext.Auto().CreateBinaryClassificationExperiment(experimentSettings); ConsoleHelper.ConsoleWriteHeader("=============== Running AutoML experiment ==============="); Console.WriteLine($"Running AutoML regression experiment..."); var stopwatch = Stopwatch.StartNew(); // Cancel experiment after the user presses any key. CancelExperimentAfterAnyKeyPress(cts); ExperimentResult <BinaryClassificationMetrics> experimentResult = experiment.Execute(trainData: TrainDataView, columnInformation: columnInformation, progressHandler: progressHandler); Console.WriteLine($"{experimentResult.RunDetails.Count()} models were returned after {stopwatch.Elapsed.TotalSeconds:0.00} seconds{Environment.NewLine}"); // Print top models found by AutoML. PrintTopModels(experimentResult); // var featureNames = columnInformation.CategoricalColumnNames.Concat(columnInformation.ImagePathColumnNames).Concat(columnInformation.NumericColumnNames).Concat(columnInformation.TextColumnNames).ToList(); // var permutationMetrics = mlContext.BinaryClassification.PermutationFeatureImportance(predictionTransformer: ) // PrintContributions(featureNames, TrainDataView, experimentResult.RunDetails); // DatasetDimensionsUtil.GetTextColumnCardinality(); return(experimentResult); }