Ejemplo n.º 1
0
        static async Task ClusteringExample(bool train = true)
        {
            var                bestAlg       = string.Empty;
            double             avgdist       = double.MaxValue;
            var                mlContext     = new MLContext();
            var                sqlConnection = $@"Server = localhost;database = Local;user = sa;password = sa";
            IEnumerable <Iris> traindata     = null;

            using (var connection = new SQLServer(sqlConnection))
            {
                traindata = await connection.ExecuteReaderAsync <Iris>("SELECT * FROM [iris] ORDER BY NEWID()");
            }
            var trainSize = (int)(traindata.Count() * 0.8);
            var testdata  = traindata.Skip(trainSize).ToList();

            traindata = traindata.Take(trainSize).ToList();


            var algorithms = new Dictionary <string, Func <IEnumerable <Iris>, Action <ITransformer>, PredictionEngine <Iris, IrisClustering> > >()
            {
                { "KMeans", (data, action) => Clustering.KMeans <Iris, IrisClustering>(data, 3, additionModelAction: action) },
            };

            foreach (var algorithm in algorithms)
            {
                PredictionEngine <Iris, IrisClustering> engine = default;
                ITransformer model = default;
                var          path  = $@"Clustering_{algorithm.Key}.zip";
                if (File.Exists(path) && !train)
                {
                    model  = MachineLearning.Global.LoadModel(path);
                    engine = mlContext.Model.CreatePredictionEngine <Iris, IrisClustering>(model);
                }
                else
                {
                    engine = algorithm.Value(traindata, (mdl) =>
                    {
                        model = mdl;
                    });
                }
                MachineLearning.Global.SaveModel(model, path);
                MachineLearning.ConsoleHelper.ConsoleWriteHeader($@"Evaluate metrics for {algorithm.Key} algorithm.");
                var dataframe = new MLContext().Data.LoadFromEnumerable(testdata);
                var metrics   = Metrics.EvaluateClusteringMetrics(model, dataframe);
                foreach (var prop in metrics.GetType().GetProperties())
                {
                    Console.WriteLine($@"{prop.Name} : {prop.GetValue(metrics)}");
                }
                if (metrics.AverageDistance < avgdist)
                {
                    avgdist = metrics.AverageDistance;
                    bestAlg = algorithm.Key;
                }
                var predictedData = new List <IrisClustering>();
                foreach (var t in testdata)
                {
                    var temp    = t.Label;
                    var predict = engine.Predict(t);
                    predictedData.Add(predict);
                    Console.WriteLine(string.Format(@"Cluster ID : {0,5}", predict.PredictedLabel));
                }
                VisualizeClustering(predictedData, "clustering.svg");
            }
            Console.ForegroundColor = ConsoleColor.Green;
            Console.WriteLine($@"Best algorithm based-on Average Distance : {bestAlg}");
            Console.ForegroundColor = ConsoleColor.White;
        }
Ejemplo n.º 2
0
        static async Task BinaryClassifier(bool train = true)
        {
            var sqlConnection = $@"Server = localhost;database = Local;user = sa;password = sa";

            IEnumerable <HeartData> traindata = null;
            IEnumerable <HeartData> testdata  = null;

            using (var connection = new SQLServer(sqlConnection))
            {
                traindata = await connection.ExecuteReaderAsync <HeartData>($@"SELECT * FROM HeartTraining");

                testdata = await connection.ExecuteReaderAsync <HeartData>($@"SELECT * FROM HeartTest");
            }

            double accuracy  = double.MinValue;
            string bestAlg   = string.Empty;
            var    mlContext = new MLContext();

            var algorithms = new Dictionary <string, Func <IEnumerable <HeartData>, Action <ITransformer>, PredictionEngine <HeartData, HeartPredict> > >()
            {
                { "FastTree", (data, action) => BinaryClassification.FastTree <HeartData, HeartPredict>(data, additionModelAction: action) },
                { "FastForest", (data, action) => BinaryClassification.FastForest <HeartData, HeartPredict>(data, additionModelAction: action) },
                { "SdcaLogisticRegression", (data, action) => BinaryClassification.SdcaLogisticRegression <HeartData, HeartPredict>(data, additionModelAction: action) },
                { "AveragedPerceptron", (data, action) => BinaryClassification.AveragedPerceptron <HeartData, HeartPredict>(data, additionModelAction: action) },
                { "LinearSVM", (data, action) => BinaryClassification.LinearSVM <HeartData, HeartPredict>(data, additionModelAction: action) }
            };

            foreach (var algorithm in algorithms)
            {
                PredictionEngine <HeartData, HeartPredict> engine = default;
                ITransformer model = default;
                var          path  = $@"BClassification_{algorithm.Key}.zip";
                if (File.Exists(path) && !train)
                {
                    model  = MachineLearning.Global.LoadModel(path);
                    engine = mlContext.Model.CreatePredictionEngine <HeartData, HeartPredict>(model);
                }
                else
                {
                    engine = algorithm.Value(traindata, (mdl) =>
                    {
                        model = mdl;
                    });
                }
                Global.SaveModel(model, $@"Binaryclass_{algorithm.Key}.zip");
                ConsoleHelper.ConsoleWriteHeader($@"Evaluate metrics for {algorithm.Key} algorithm.");
                try
                {
                    var metrics = Metrics.EvaluateBinaryClassificationMetrics(model, mlContext.Data.LoadFromEnumerable(testdata), labelColumnName: nameof(HeartData.Label));
                    foreach (var prop in metrics.GetType().GetProperties())
                    {
                        Console.WriteLine($@"{prop.Name} : {prop.GetValue(metrics)}");
                    }
                    if (metrics.Accuracy > accuracy)
                    {
                        accuracy = metrics.Accuracy;
                        bestAlg  = algorithm.Key;
                    }
                }
                catch (Exception e)
                {
                    Console.WriteLine($"Unable to evaluate metrics : {e.Message}");
                }

                foreach (var t in testdata)
                {
                    var predict = engine.Predict(t);
                    Console.WriteLine(string.Format(@"Actual {0,5} / Predict {1,5} with prob of {2,5}", t.Label, predict.PredictedLabel, predict.Probability));
                }
            }
            Console.ForegroundColor = ConsoleColor.Green;
            Console.WriteLine($@"Best algorithm based-on accuracy : {bestAlg}");
            Console.ForegroundColor = ConsoleColor.White;
        }
Ejemplo n.º 3
0
        static async Task MulticlassClassificationExample(bool train = true)
        {
            var                bestAlg       = string.Empty;
            double             logLoss       = double.MaxValue;
            var                mlContext     = new MLContext();
            var                sqlConnection = $@"Server = localhost;database = Local;user = sa;password = sa";
            IEnumerable <Iris> traindata     = null;

            using (var connection = new SQLServer(sqlConnection))
            {
                traindata = await connection.ExecuteReaderAsync <Iris>("SELECT * FROM [Iris] ORDER BY NEWID()");
            }
            var testdata = traindata.Take(20);

            var algorithms = new Dictionary <string, Func <IEnumerable <Iris>, Action <ITransformer>, PredictionEngine <Iris, IrisClassification> > >()
            {
                { "SdcaNonCalibrated", (data, action) => MulticlassClassfication.SdcaNonCalibrated <Iris, IrisClassification>(data, additionModelAction: action) },
                { "SdcaMaximumEntropy", (data, action) => MulticlassClassfication.SdcaMaximumEntropy <Iris, IrisClassification>(data, additionModelAction: action) },
                { "LbfgsMaximumEntropy", (data, action) => MulticlassClassfication.LbfgsMaximumEntropy <Iris, IrisClassification>(data, additionModelAction: action) },
                { "NaiveBayes", (data, action) => MulticlassClassfication.NaiveBayes <Iris, IrisClassification>(data, additionModelAction: action) },
            };

            foreach (var algorithm in algorithms)
            {
                PredictionEngine <Iris, IrisClassification> engine = default;
                ITransformer model = default;
                var          path  = $@"MClassification_{algorithm.Key}.zip";
                if (File.Exists(path) && !train)
                {
                    model  = MachineLearning.Global.LoadModel(path);
                    engine = mlContext.Model.CreatePredictionEngine <Iris, IrisClassification>(model);
                }
                else
                {
                    engine = algorithm.Value(traindata, (mdl) =>
                    {
                        model = mdl;
                    });
                }
                MachineLearning.Global.SaveModel(model, $@"Multiclass_{algorithm.Key}.zip");
                MachineLearning.ConsoleHelper.ConsoleWriteHeader($@"Evaluate metrics for {algorithm.Key} algorithm.");
                var metrics = Metrics.EvaluateMulticlassClassificationMetrics(model, mlContext.Data.LoadFromEnumerable(testdata), labelColumnName: nameof(Iris.Label));
                foreach (var prop in metrics.GetType().GetProperties())
                {
                    Console.WriteLine($@"{prop.Name} : {prop.GetValue(metrics)}");
                }
                if (metrics.LogLoss < logLoss)
                {
                    logLoss = metrics.LogLoss;
                    bestAlg = algorithm.Key;
                }
                List <IrisClassification> irisClassifications = new List <IrisClassification>();
                foreach (var t in testdata)
                {
                    var predict = engine.Predict(t);
                    irisClassifications.Add(predict);
                    Console.WriteLine(string.Format(@"Actual : {0,5} / Predict {1,5} {2}", t.Label, predict.PredictedLabel, predict.IsCorrectPredict(t)));
                }
                //VisualizeMulticlassClassification(algorithm.Key, testdata, irisClassifications, $"{algorithm.Key}_clsf.svg");
            }
            Console.ForegroundColor = ConsoleColor.Green;
            Console.WriteLine($@"Best algorithm based-on Log Loss : {bestAlg}");
            Console.ForegroundColor = ConsoleColor.White;
        }