static async Task ClusteringExample(bool train = true) { var bestAlg = string.Empty; double avgdist = double.MaxValue; var mlContext = new MLContext(); var sqlConnection = $@"Server = localhost;database = Local;user = sa;password = sa"; IEnumerable <Iris> traindata = null; using (var connection = new SQLServer(sqlConnection)) { traindata = await connection.ExecuteReaderAsync <Iris>("SELECT * FROM [iris] ORDER BY NEWID()"); } var trainSize = (int)(traindata.Count() * 0.8); var testdata = traindata.Skip(trainSize).ToList(); traindata = traindata.Take(trainSize).ToList(); var algorithms = new Dictionary <string, Func <IEnumerable <Iris>, Action <ITransformer>, PredictionEngine <Iris, IrisClustering> > >() { { "KMeans", (data, action) => Clustering.KMeans <Iris, IrisClustering>(data, 3, additionModelAction: action) }, }; foreach (var algorithm in algorithms) { PredictionEngine <Iris, IrisClustering> engine = default; ITransformer model = default; var path = $@"Clustering_{algorithm.Key}.zip"; if (File.Exists(path) && !train) { model = MachineLearning.Global.LoadModel(path); engine = mlContext.Model.CreatePredictionEngine <Iris, IrisClustering>(model); } else { engine = algorithm.Value(traindata, (mdl) => { model = mdl; }); } MachineLearning.Global.SaveModel(model, path); MachineLearning.ConsoleHelper.ConsoleWriteHeader($@"Evaluate metrics for {algorithm.Key} algorithm."); var dataframe = new MLContext().Data.LoadFromEnumerable(testdata); var metrics = Metrics.EvaluateClusteringMetrics(model, dataframe); foreach (var prop in metrics.GetType().GetProperties()) { Console.WriteLine($@"{prop.Name} : {prop.GetValue(metrics)}"); } if (metrics.AverageDistance < avgdist) { avgdist = metrics.AverageDistance; bestAlg = algorithm.Key; } var predictedData = new List <IrisClustering>(); foreach (var t in testdata) { var temp = t.Label; var predict = engine.Predict(t); predictedData.Add(predict); Console.WriteLine(string.Format(@"Cluster ID : {0,5}", predict.PredictedLabel)); } VisualizeClustering(predictedData, "clustering.svg"); } Console.ForegroundColor = ConsoleColor.Green; Console.WriteLine($@"Best algorithm based-on Average Distance : {bestAlg}"); Console.ForegroundColor = ConsoleColor.White; }
static async Task BinaryClassifier(bool train = true) { var sqlConnection = $@"Server = localhost;database = Local;user = sa;password = sa"; IEnumerable <HeartData> traindata = null; IEnumerable <HeartData> testdata = null; using (var connection = new SQLServer(sqlConnection)) { traindata = await connection.ExecuteReaderAsync <HeartData>($@"SELECT * FROM HeartTraining"); testdata = await connection.ExecuteReaderAsync <HeartData>($@"SELECT * FROM HeartTest"); } double accuracy = double.MinValue; string bestAlg = string.Empty; var mlContext = new MLContext(); var algorithms = new Dictionary <string, Func <IEnumerable <HeartData>, Action <ITransformer>, PredictionEngine <HeartData, HeartPredict> > >() { { "FastTree", (data, action) => BinaryClassification.FastTree <HeartData, HeartPredict>(data, additionModelAction: action) }, { "FastForest", (data, action) => BinaryClassification.FastForest <HeartData, HeartPredict>(data, additionModelAction: action) }, { "SdcaLogisticRegression", (data, action) => BinaryClassification.SdcaLogisticRegression <HeartData, HeartPredict>(data, additionModelAction: action) }, { "AveragedPerceptron", (data, action) => BinaryClassification.AveragedPerceptron <HeartData, HeartPredict>(data, additionModelAction: action) }, { "LinearSVM", (data, action) => BinaryClassification.LinearSVM <HeartData, HeartPredict>(data, additionModelAction: action) } }; foreach (var algorithm in algorithms) { PredictionEngine <HeartData, HeartPredict> engine = default; ITransformer model = default; var path = $@"BClassification_{algorithm.Key}.zip"; if (File.Exists(path) && !train) { model = MachineLearning.Global.LoadModel(path); engine = mlContext.Model.CreatePredictionEngine <HeartData, HeartPredict>(model); } else { engine = algorithm.Value(traindata, (mdl) => { model = mdl; }); } Global.SaveModel(model, $@"Binaryclass_{algorithm.Key}.zip"); ConsoleHelper.ConsoleWriteHeader($@"Evaluate metrics for {algorithm.Key} algorithm."); try { var metrics = Metrics.EvaluateBinaryClassificationMetrics(model, mlContext.Data.LoadFromEnumerable(testdata), labelColumnName: nameof(HeartData.Label)); foreach (var prop in metrics.GetType().GetProperties()) { Console.WriteLine($@"{prop.Name} : {prop.GetValue(metrics)}"); } if (metrics.Accuracy > accuracy) { accuracy = metrics.Accuracy; bestAlg = algorithm.Key; } } catch (Exception e) { Console.WriteLine($"Unable to evaluate metrics : {e.Message}"); } foreach (var t in testdata) { var predict = engine.Predict(t); Console.WriteLine(string.Format(@"Actual {0,5} / Predict {1,5} with prob of {2,5}", t.Label, predict.PredictedLabel, predict.Probability)); } } Console.ForegroundColor = ConsoleColor.Green; Console.WriteLine($@"Best algorithm based-on accuracy : {bestAlg}"); Console.ForegroundColor = ConsoleColor.White; }
static async Task MulticlassClassificationExample(bool train = true) { var bestAlg = string.Empty; double logLoss = double.MaxValue; var mlContext = new MLContext(); var sqlConnection = $@"Server = localhost;database = Local;user = sa;password = sa"; IEnumerable <Iris> traindata = null; using (var connection = new SQLServer(sqlConnection)) { traindata = await connection.ExecuteReaderAsync <Iris>("SELECT * FROM [Iris] ORDER BY NEWID()"); } var testdata = traindata.Take(20); var algorithms = new Dictionary <string, Func <IEnumerable <Iris>, Action <ITransformer>, PredictionEngine <Iris, IrisClassification> > >() { { "SdcaNonCalibrated", (data, action) => MulticlassClassfication.SdcaNonCalibrated <Iris, IrisClassification>(data, additionModelAction: action) }, { "SdcaMaximumEntropy", (data, action) => MulticlassClassfication.SdcaMaximumEntropy <Iris, IrisClassification>(data, additionModelAction: action) }, { "LbfgsMaximumEntropy", (data, action) => MulticlassClassfication.LbfgsMaximumEntropy <Iris, IrisClassification>(data, additionModelAction: action) }, { "NaiveBayes", (data, action) => MulticlassClassfication.NaiveBayes <Iris, IrisClassification>(data, additionModelAction: action) }, }; foreach (var algorithm in algorithms) { PredictionEngine <Iris, IrisClassification> engine = default; ITransformer model = default; var path = $@"MClassification_{algorithm.Key}.zip"; if (File.Exists(path) && !train) { model = MachineLearning.Global.LoadModel(path); engine = mlContext.Model.CreatePredictionEngine <Iris, IrisClassification>(model); } else { engine = algorithm.Value(traindata, (mdl) => { model = mdl; }); } MachineLearning.Global.SaveModel(model, $@"Multiclass_{algorithm.Key}.zip"); MachineLearning.ConsoleHelper.ConsoleWriteHeader($@"Evaluate metrics for {algorithm.Key} algorithm."); var metrics = Metrics.EvaluateMulticlassClassificationMetrics(model, mlContext.Data.LoadFromEnumerable(testdata), labelColumnName: nameof(Iris.Label)); foreach (var prop in metrics.GetType().GetProperties()) { Console.WriteLine($@"{prop.Name} : {prop.GetValue(metrics)}"); } if (metrics.LogLoss < logLoss) { logLoss = metrics.LogLoss; bestAlg = algorithm.Key; } List <IrisClassification> irisClassifications = new List <IrisClassification>(); foreach (var t in testdata) { var predict = engine.Predict(t); irisClassifications.Add(predict); Console.WriteLine(string.Format(@"Actual : {0,5} / Predict {1,5} {2}", t.Label, predict.PredictedLabel, predict.IsCorrectPredict(t))); } //VisualizeMulticlassClassification(algorithm.Key, testdata, irisClassifications, $"{algorithm.Key}_clsf.svg"); } Console.ForegroundColor = ConsoleColor.Green; Console.WriteLine($@"Best algorithm based-on Log Loss : {bestAlg}"); Console.ForegroundColor = ConsoleColor.White; }