static async Task RegressionExample(bool train = true) { var bestAlg = string.Empty; double mse = double.MaxValue; var mlContext = new MLContext(); var sqlConnection = $@"Server = localhost;database = Local;user = sa;password = sa"; var testdata = (await Utilities.SQL.SQLServer.ExecuteReaderAsync <TaxiFare>(sqlConnection, "SELECT TOP(10) * FROM [taxi-fare-test]", parameters: null, objectBuilder: (row) => Utilities.Shared.Data.RowBuilderExplicit <TaxiFare>(row))); var traindata = (await Utilities.SQL.SQLServer.ExecuteReaderAsync <TaxiFare>(sqlConnection, "SELECT * FROM [taxi-fare-train] ORDER BY NEWID()", parameters: null, objectBuilder: (row) => Utilities.Shared.Data.RowBuilderExplicit <TaxiFare>(row))); var algorithms = new Dictionary <string, Func <IEnumerable <TaxiFare>, Action <ITransformer>, PredictionEngine <TaxiFare, TaxiFareRegression> > >() { { "SDCA", (data, action) => Regression.StochasticDoubleCoordinateAscent <TaxiFare, TaxiFareRegression>(data, additionModelAction: action) }, { "LBFGS", (data, action) => Regression.LbfgsPoisson <TaxiFare, TaxiFareRegression>(data, additionModelAction: action) }, { "FastTree", (data, action) => Regression.FastTree <TaxiFare, TaxiFareRegression>(data, additionModelAction: action) }, { "FastTreeTweedie", (data, action) => Regression.FastTreeTweedie <TaxiFare, TaxiFareRegression>(data, additionModelAction: action) }, { "FastForest", (data, action) => Regression.FastForest <TaxiFare, TaxiFareRegression>(data, additionModelAction: action) }, }; foreach (var algorithm in algorithms) { PredictionEngine <TaxiFare, TaxiFareRegression> engine = default; ITransformer model = default; var path = $@"Regression_{algorithm.Key}.zip"; if (File.Exists(path) && !train) { model = Global.LoadModel(path); engine = mlContext.Model.CreatePredictionEngine <TaxiFare, TaxiFareRegression>(model); } else { engine = algorithm.Value(traindata, (mdl) => { model = mdl; }); } MachineLearning.Global.SaveModel(model, $@"Regression_{algorithm.Key}.zip"); var metrics = Metrics.EvaluateRegressionModel(model, mlContext.Data.LoadFromEnumerable(testdata)); MachineLearning.ConsoleHelper.ConsoleWriteHeader($@"Evaluate metrics for {algorithm.Key} algorithm."); foreach (var prop in metrics.GetType().GetProperties()) { Console.WriteLine($@"{prop.Name} : {prop.GetValue(metrics)}"); } if (metrics.MeanSquaredError < mse) { mse = metrics.MeanSquaredError; bestAlg = algorithm.Key; } //var predictedList = new List<TaxiFareRegression>(); //foreach (var t in testdata) //{ // var predict = engine.Predict(t); // predictedList.Add(predict); // Console.WriteLine(string.Format(@"Actual : {0,5} / Predict {1,5} ({2,0}%)", Math.Round(t.fare_amount, 2), Math.Round(predict.Predicted_Score, 2), predict.CalculateVariance(t))); //} var predictedList = engine.Predict(testdata); VisualizeRegression(algorithm.Key, testdata, predictedList, metrics, $"{algorithm.Key}_reg.svg"); } Console.ForegroundColor = ConsoleColor.Green; Console.WriteLine($@"Best algorithm based-on Mean Squared Error : {bestAlg}"); Console.ForegroundColor = ConsoleColor.White; }
static async Task RegressionExample(bool train = true) { var bestAlg = string.Empty; double mse = double.MaxValue; var mlContext = new MLContext(); var sqlConnection = $@"Server = localhost;database = Local;user = sa;password = sa"; IEnumerable <TaxiFare> testdata = null; IEnumerable <TaxiFare> traindata = null; using (var connection = new SQLServer(sqlConnection)) { testdata = await connection.QueryAsync <TaxiFareTest>(top : 10); traindata = await connection.QueryAsync <TaxiFareTrain>(); var context = new MLContext(1); var dataframe = context.Data.LoadFromEnumerable(traindata); var crossValidatePreparer = context.Transforms.Concatenate("Features", new[] { "rate_code", "passenger_count", "trip_time_in_secs", "trip_distance" }). Append(context.Transforms.NormalizeMinMax("Features")); var cleanedData = crossValidatePreparer.Fit(dataframe); var transformedData = cleanedData.Transform(dataframe); var crossValidate = context.Regression.CrossValidate(transformedData, context.Regression.Trainers.FastTreeTweedie(), numberOfFolds: 5); var rsqrs = crossValidate.Select(x => new { model = x.Model, rsquared = x.Metrics.RSquared }).ToList(); } return; var algorithms = new Dictionary <string, Func <IEnumerable <TaxiFare>, Action <ITransformer>, PredictionEngine <TaxiFare, TaxiFareRegression> > >() { { "SDCA", (data, action) => Regression.StochasticDoubleCoordinateAscent <TaxiFare, TaxiFareRegression>(data, additionModelAction: action) }, { "LBFGS", (data, action) => Regression.LbfgsPoisson <TaxiFare, TaxiFareRegression>(data, additionModelAction: action) }, { "FastTree", (data, action) => Regression.FastTree <TaxiFare, TaxiFareRegression>(data, additionModelAction: action) }, { "FastTreeTweedie", (data, action) => Regression.FastTreeTweedie <TaxiFare, TaxiFareRegression>(data, additionModelAction: action) }, { "FastForest", (data, action) => Regression.FastForest <TaxiFare, TaxiFareRegression>(data, additionModelAction: action) }, { "GeneralizedAdditiveModel", (data, action) => Regression.GeneralizedAdditiveModel <TaxiFare, TaxiFareRegression>(data, additionModelAction: action) }, }; foreach (var algorithm in algorithms) { PredictionEngine <TaxiFare, TaxiFareRegression> engine = default; ITransformer model = default; var path = $@"Regression_{algorithm.Key}.zip"; if (File.Exists(path) && !train) { model = Global.LoadModel(path); engine = mlContext.Model.CreatePredictionEngine <TaxiFare, TaxiFareRegression>(model); } else { engine = algorithm.Value(traindata, (mdl) => { model = mdl; }); } MachineLearning.Global.SaveModel(model, $@"Regression_{algorithm.Key}.zip"); var metrics = Metrics.EvaluateRegressionModel(model, mlContext.Data.LoadFromEnumerable(testdata)); MachineLearning.ConsoleHelper.ConsoleWriteHeader($@"Evaluate metrics for {algorithm.Key} algorithm."); foreach (var prop in metrics.GetType().GetProperties()) { Console.WriteLine($@"{prop.Name} : {prop.GetValue(metrics)}"); } if (metrics.MeanSquaredError < mse) { mse = metrics.MeanSquaredError; bestAlg = algorithm.Key; } //var predictedList = new List<TaxiFareRegression>(); //foreach (var t in testdata) //{ // var predict = engine.Predict(t); // predictedList.Add(predict); // Console.WriteLine(string.Format(@"Actual : {0,5} / Predict {1,5} ({2,0}%)", Math.Round(t.fare_amount, 2), Math.Round(predict.Predicted_Score, 2), predict.CalculateVariance(t))); //} var predictedList = engine.Predict(testdata); VisualizeRegression(algorithm.Key, testdata, predictedList, metrics, $"{algorithm.Key}_reg.svg"); } Console.ForegroundColor = ConsoleColor.Green; Console.WriteLine($@"Best algorithm based-on Mean Squared Error : {bestAlg}"); Console.ForegroundColor = ConsoleColor.White; }