예제 #1
0
        public override void Train(List <DataSet> data, List <float> labels = null)
        {
            if (TrainedModel != null)
            {
                throw new InvalidOperationException("May only train/load a model once");
            }

#if ML_LEGACY
            var pipeline = new LearningPipeline();

            // add data
            pipeline.Add(CollectionDataSource.Create(data));

            // choose what to predict
            pipeline.Add(new ColumnCopier(("Score", "Label")));

            // add columns as features
            // do not include the features which should be predicted
            pipeline.Add(new ColumnConcatenator("Features", DataSet.ColumnNames()));

            // add a regression prediction
            pipeline.Add(new FastTreeRegressor());

            // train the model
            TrainedModel = pipeline.Train <DataSet, DataSetPrediction>();
#else
            // add data
            var textLoader = GetTextLoader(Context);

            // spill to disk !?!?! since there is no way to load from a collection
            var pathToData = "";
            try
            {
                // write data to disk
                pathToData = WriteToDisk(data);

                // read in data
                IDataView dataView = textLoader.Load(pathToData);
                InputSchema = dataView.Schema;

                // configurations
                var dataPipeline = Context.Transforms.CopyColumns(outputColumnName: "Label", inputColumnName: nameof(DataSet.Score))
                                   .Append(Context.Transforms.Concatenate("Features", DataSet.ColumnNames()));

                // set the training algorithm
                var trainer          = Context.Regression.Trainers.Sdca(labelColumnName: "Label", featureColumnName: "Features");
                var trainingPipeline = dataPipeline.Append(trainer);

                TrainedModel = trainingPipeline.Fit(dataView);
            }
            finally
            {
                // cleanup
                if (!string.IsNullOrWhiteSpace(pathToData) && File.Exists(pathToData))
                {
                    File.Delete(pathToData);
                }
            }
#endif
        }
예제 #2
0
        public static void Evaluate(PredictionModel <SentimentData, SentimentPrediction> model)
        {
            var testData = new List <SentimentData>()
            {
                new SentimentData {
                    Sentiment     = 6f,
                    SentimentText = "such good thing"
                },
                new SentimentData {
                    Sentiment     = -9.3f,
                    SentimentText = "f*****g article"
                }
            };

            var collection = CollectionDataSource.Create(testData);
            var evaluator  = new BinaryClassificationEvaluator();
            BinaryClassificationMetrics metrics = evaluator.Evaluate(model, collection);

            Console.WriteLine();
            Console.WriteLine("PredictionModel quality metrics evaluation");
            Console.WriteLine("------------------------------------------");
            Console.WriteLine($"Accuracy: {metrics.Accuracy:P2}");
            Console.WriteLine($"Auc: {metrics.Auc:P2}");
            Console.WriteLine($"F1Score: {metrics.F1Score:P2}");
        }
예제 #3
0
        internal static async Task <PredictionModel <IrisData, ClusterPrediction> > TrainAsync()
        {
            // LearningPipeline holds all steps of the learning process: data, transforms, learners.
            var pipeline = new LearningPipeline
            {
                // The TextLoader loads a dataset. The schema of the dataset is specified by passing a class containing
                // all the column names and their types.
                CollectionDataSource.Create <IrisData>(GetIrisDataSet()),
                //new TextLoader(DataPath).CreateFrom<IrisData>(useHeader: true),
                // ColumnConcatenator concatenates all columns into Features column
                new ColumnConcatenator("Features",
                                       "SepalLength",
                                       "SepalWidth",
                                       "PetalLength",
                                       "PetalWidth"),
                // KMeansPlusPlusClusterer is an algorithm that will be used to build clusters. We set the number of clusters to 3.
                new KMeansPlusPlusClusterer()
                {
                    K = 3
                }
            };

            Console.WriteLine("=============== Training model ===============");
            var model = pipeline.Train <IrisData, ClusterPrediction>();

            Console.WriteLine("=============== End training ===============");

            // Saving the model as a .zip file.
            await model.WriteAsync(ModelPath);

            Console.WriteLine("The model is saved to {0}", ModelPath);

            return(model);
        }
        public async Task <PredictionModel <UsageOfLightBulbModel, UsageOfLightBulbPredictionModel> > Train()
        {
            var dataFromDb = await _lightBulbRepository.GetAllLightBulbs();

            var dataToTrain = new List <UsageOfLightBulbModel>(dataFromDb);

            var collection = CollectionDataSource.Create(dataToTrain);

            var pipeline = new LearningPipeline
            {
                collection,
                new ColumnCopier(("IsOn", "Label")),
                new ColumnConcatenator(
                    "Features",
                    "LightBulbID",
                    "Month",
                    "Day",
                    "Time"),
                new FastTreeRegressor(),
            };

            model = pipeline.Train <UsageOfLightBulbModel, UsageOfLightBulbPredictionModel>();

            //  await model.WriteAsync(_modelpath);
            return(model);
        }
예제 #5
0
        public static string CreateNNetworkAndLearn(List <NnRow> rows)
        {
            // Prepare data
            double trainingSplitRatio = 0.7;
            int    trainCount         = (int)(rows.Count * trainingSplitRatio);
            var    trainData          = new MLNetData[trainCount];
            var    testData           = new MLNetData[rows.Count - trainCount];

            MLNetData[] allData = Convert(rows);
            // Split into Training and Testing sets
            Array.Copy(allData, 0, trainData, 0, trainCount);
            Array.Copy(allData, trainCount, testData, 0, rows.Count - trainCount);
            var allCollection   = CollectionDataSource.Create(allData);
            var trainCollection = CollectionDataSource.Create(trainData);
            var testCollection  = CollectionDataSource.Create(testData);


            double acc, auc, f1;
            PredictionModel <MLNetData, MLNetPredict> modelAll, modelTrain, modelBest;

            //(acc, auc, f1, modelAll) = TrainAndGetMetrics(allCollection, allCollection, new AveragedPerceptronBinaryClassifier            ()); // acc 0.83, auc 0.86, f1 0.45
            //(acc, auc, f1, modelAll) = TrainAndGetMetrics(allCollection, allCollection, new FastForestBinaryClassifier                    ()); // acc 0.85, auc 0.89, f1 0.46
            (acc, auc, f1, modelBest) = TrainAndGetMetrics(trainCollection, testCollection, new FastTreeBinaryClassifier());                       // acc 0.95, auc 0.97, f1 0.85
            //(acc, auc, f1, modelAll) = TrainAndGetMetrics(allCollection, allCollection, new FieldAwareFactorizationMachineBinaryClassifier()); // acc 0.85, auc 0.88, f1 0.56
            //(acc, auc, f1, modelAll) = TrainAndGetMetrics(allCollection, allCollection, new GeneralizedAdditiveModelBinaryClassifier      ()); // acc 0.81, auc 0.80, f1 NaN
            //(acc, auc, f1, modelAll) = TrainAndGetMetrics(allCollection, allCollection, new LinearSvmBinaryClassifier                     ()); // acc 0.82, auc 0.86, f1 0.16
            //(acc, auc, f1, modelAll) = TrainAndGetMetrics(allCollection, allCollection, new LogisticRegressionBinaryClassifier            ()); // acc 0.84, auc 0.86, f1 0.40
            //(acc, auc, f1, modelAll) = TrainAndGetMetrics(allCollection, allCollection, new StochasticDualCoordinateAscentBinaryClassifier()); // acc 0.84, auc 0.86, f1 0.40
            //(acc, auc, f1, modelAll) = TrainAndGetMetrics(allCollection, allCollection, new StochasticGradientDescentBinaryClassifier     ()); // acc 0.83, auc 0.86, f1 0.29

            ///(acc, auc, f1, modelAll) = TrainAndGetMetrics(allCollection, allCollection, new EnsembleBinaryClassifier                      ());


            //(acc, auc, f1, modelTrain) = TrainAndGetMetrics(trainCollection, testCollection, new AveragedPerceptronBinaryClassifier            ()); // acc 0.82, auc 0.84, f1 0.45
            //(acc, auc, f1, modelTrain) = TrainAndGetMetrics(trainCollection, testCollection, new FastForestBinaryClassifier                    ()); // acc 0.82, auc 0.83, f1 0.23
            //(acc, auc, f1, modelTrain) = TrainAndGetMetrics(trainCollection, testCollection, new FastTreeBinaryClassifier                      ()); // acc 0.82, auc 0.84, f1 0.46
            //(acc, auc, f1, modelTrain) = TrainAndGetMetrics(trainCollection, testCollection, new FieldAwareFactorizationMachineBinaryClassifier()); // acc 0.83, auc 0.85, f1 0.37
            //(acc, auc, f1, modelTrain) = TrainAndGetMetrics(trainCollection, testCollection, new GeneralizedAdditiveModelBinaryClassifier      ()); // acc 0.81, auc 0.75, f1 NaN
            //(acc, auc, f1, modelTrain) = TrainAndGetMetrics(trainCollection, testCollection, new LinearSvmBinaryClassifier                     ()); // acc 0.81, auc 0.83, f1 0.14
            //(acc, auc, f1, modelTrain) = TrainAndGetMetrics(trainCollection, testCollection, new LogisticRegressionBinaryClassifier            ()); // acc 0.83, auc 0.84, f1 0.39
            //(acc, auc, f1, modelTrain) = TrainAndGetMetrics(trainCollection, testCollection, new StochasticDualCoordinateAscentBinaryClassifier()); // acc 0.82, auc 0.84, f1 0.43
            //(acc, auc, f1, modelTrain) = TrainAndGetMetrics(trainCollection, testCollection, new StochasticGradientDescentBinaryClassifier     ()); // acc 0.83, auc 0.83, f1 0.34


            // Evaluate a training model
            //Console.WriteLine($"Accuracy: {metrics.Accuracy:P2}");
            //Console.WriteLine($"Auc: {metrics.Auc:P2}");
            //Console.WriteLine($"F1Score: {metrics.F1Score:P2}");
            //var cv = new CrossValidator();
            //CrossValidationOutput<MLNetData, MLNetPredict> cvRes = cv.CrossValidate<MLNetData, MLNetPredict>(pipelineAll);
            //Console.WriteLine($"Rms = {metrics.Rms}");
            //Console.WriteLine($"RSquared = {metrics.RSquared}");


            // Train the overall model
            string NnModelPath = @"NnInputs\mlDotNet_Datacup.model";

            modelBest.WriteAsync(NnModelPath);
            return(NnModelPath);
        }
예제 #6
0
        public async Task TrainModelAsync(string csvPath, string modelPath)
        {
            var pipeline = new LearningPipeline();

            pipeline.Add(CollectionDataSource.Create(new CsvReader().GetData(csvPath)));

            pipeline.Add(new Dictionarizer(("Categories", "Label")));

            pipeline.Add(new TextFeaturizer("Name", "Name"));

            pipeline.Add(new TextFeaturizer("GenericName", "GenericName"));

            pipeline.Add(new ColumnConcatenator("Features", "Name", "GenericName"));

            pipeline.Add(new StochasticDualCoordinateAscentClassifier()
            {
                NumThreads = Math.Max(2, Environment.ProcessorCount - 1)
            });
            pipeline.Add(new PredictedLabelColumnOriginalValueConverter()
            {
                PredictedLabelColumn = "PredictedLabel"
            });

            Console.WriteLine("=============== Training model ===============");

            var model = pipeline.Train <Product, ProductCategoryPrediction>();

            await model.WriteAsync(modelPath).ConfigureAwait(false);

            Console.WriteLine("=============== End training ===============");
            Console.WriteLine("The model is saved to {0}", modelPath);
        }
예제 #7
0
        static async Task <PredictionModel <Open311Data, Open311DataPrediction> > TrainOpen311(string dataPath)
        {
            var pipeline   = new LearningPipeline();
            var dataSource = CollectionDataSource.Create(OpenFile(dataPath, 3, 0, 1, 2));

            pipeline.Add(dataSource);
            pipeline.Add(new Dictionarizer(@"Label"));
            pipeline.Add(new TextFeaturizer(@"Features", @"Request")
            {
                KeepDiacritics       = false,
                KeepPunctuations     = false,
                TextCase             = TextNormalizerTransformCaseNormalizationMode.Lower,
                OutputTokens         = true,
                Language             = TextTransformLanguage.German,
                StopWordsRemover     = new PredefinedStopWordsRemover(),
                VectorNormalizer     = TextTransformTextNormKind.L2,
                CharFeatureExtractor = new NGramNgramExtractor()
                {
                    NgramLength = 3, AllLengths = false
                },
                WordFeatureExtractor = new NGramNgramExtractor()
                {
                    NgramLength = 3, AllLengths = true
                }
            });
            pipeline.Add(new StochasticDualCoordinateAscentClassifier());
            pipeline.Add(new PredictedLabelColumnOriginalValueConverter {
                PredictedLabelColumn = @"PredictedLabel"
            });

            var model = pipeline.Train <Open311Data, Open311DataPrediction>();
            await model.WriteAsync(_modelPath);

            return(model);
        }
예제 #8
0
        public void PredictClusters()
        {
            int n        = 1000;
            int k        = 4;
            var rand     = new Random(1);
            var clusters = new ClusteringData[k];
            var data     = new ClusteringData[n];

            for (int i = 0; i < k; i++)
            {
                //pick clusters as points on circle with angle to axis X equal to 360*i/k
                clusters[i] = new ClusteringData {
                    Points = new float[2] {
                        (float)Math.Cos(Math.PI * i * 2 / k), (float)Math.Sin(Math.PI * i * 2 / k)
                    }
                };
            }
            // create data points by randomly picking cluster and shifting point slightly away from it.
            for (int i = 0; i < n; i++)
            {
                var index = rand.Next(0, k);
                var shift = (rand.NextDouble() - 0.5) / 10;
                data[i] = new ClusteringData
                {
                    Points = new float[2]
                    {
                        (float)(clusters[index].Points[0] + shift),
                        (float)(clusters[index].Points[1] + shift)
                    }
                };
            }
            var pipeline = new LearningPipeline(seed: 1, conc: 1);

            pipeline.Add(CollectionDataSource.Create(data));
            pipeline.Add(new KMeansPlusPlusClusterer()
            {
                K = k
            });
            var model = pipeline.Train <ClusteringData, ClusteringPrediction>();
            //validate that initial points we pick up as centers of cluster during data generation belong to different clusters.
            var labels = new HashSet <uint>();

            for (int i = 0; i < k; i++)
            {
                var scores = model.Predict(clusters[i]);
                Assert.True(!labels.Contains(scores.SelectedClusterId));
                labels.Add(scores.SelectedClusterId);
            }

            var            evaluator = new ClusterEvaluator();
            var            testData  = CollectionDataSource.Create(clusters);
            ClusterMetrics metrics   = evaluator.Evaluate(model, testData);

            //Label is not specified, so NMI would be equal to NaN
            Assert.Equal(metrics.Nmi, double.NaN);
            //Calculate dbi is false by default so Dbi would be 0
            Assert.Equal(metrics.Dbi, (double)0.0);
            Assert.Equal(metrics.AvgMinScore, (double)0.0, 5);
        }
예제 #9
0
        /// <summary>
        /// 训练并生成模型
        /// </summary>
        /// <returns></returns>
        public static async Task <PredictionModel <JiaMiTu, JiaMiTuPrediction> > Train(IEnumerable <JiaMiTu> trainData, string modelFileName, string labelColumn, string[] oneHotColumns, string[] features, string[] drops)
        {
            //创建学习管道
            var pipeline = new LearningPipeline();

            //加载和转换您的数据
            //var textLoader = new TextLoader<JiaMiTu>(DataPath, useHeader: true, separator: ",");

            //pipeline.Add(textLoader);
            pipeline.Add(CollectionDataSource.Create(trainData));
            //使用该ColumnCopier()功能将“票价_帐户”列复制到名为“标签”的新列中。此列是标签。
            pipeline.Add(new ColumnCopier((labelColumn, "Label")));
            //一个对象叫ColumnDropper,可以用来在训练开始前舍弃掉不需要的字段,比如id,对结果没有任何影响,因此可以去掉
            if (drops.Count() > 0)
            {
                pipeline.Add(new ColumnDropper()
                {
                    Column = drops
                });
            }
            //进行一些特征工程来转换数据,以便它可以有效地用于机器学习。该训练模型需要算法的数字功能,
            //您变换中的分类数据(vendor_id,rate_code,和payment_type)为数字。
            //该CategoricalOneHotVectorizer()
            //函数为每个列中的值分配一个数字键。通过添加以下代码来转换您的数据:
            if (oneHotColumns.Count() > 0)
            {
                pipeline.Add(new CategoricalOneHotVectorizer(oneHotColumns));
            }
            //数据准备的最后一步是使用该功能将所有功能组合到一个向量中ColumnConcatenator()。这一必要步骤
            //有助于算法轻松处理您的功能。按照您在最后一步中编写的内容添加以下代码:
            //请注意,“trip_time_in_secs”列不包括在内。你已经确定它不是一个有用的预测功能。
            pipeline.Add(new ColumnConcatenator("Features",
                                                features
                                                ));
            //在将数据添加到流水线并将其转换为正确的输入格式之后,您可以选择一种学习算法(学习者)。学习算
            //法训练模型。你为这个问题选择了一个回归任务,所以你增加了一个学习者调用FastTreeRegressor()到
            //使用梯度提升的管道。
            //渐变增强是回归问题的机器学习技术。它以逐步的方式构建每个回归树。它使用预定义的损失函数来测
            //量每个步骤中的错误,并在下一步中对其进行修正。结果是预测模型实际上是较弱预测模型的集合。
            pipeline.Add(new FastTreeRegressor());
            //泊松回归
            //pipeline.Add(new PoissonRegressor());
            //训练模型
            //最后一步是训练模型。在此之前,管道中没有任何东西被执行。该pipeline.Train<T_Input, T_Output>()
            //函数接受预定义的JiaMiTu类类型并输出一个JiaMiTuPrediction类型。将这最后一段代码添加到Train()
            //函数中:
            PredictionModel <JiaMiTu, JiaMiTuPrediction> model = pipeline.Train <JiaMiTu, JiaMiTuPrediction>();

            //改性Train()方法为异步方法public static async Task<PredictionModel<JiaMiTu, JiaMiTuPrediction>> Train()
            ///通过生么预测什么
            if (!string.IsNullOrEmpty(modelFileName))
            {
                await model.WriteAsync(modelFileName);
            }

            return(model);
        }
예제 #10
0
        public static async Task <PredictionModel <SentimentData, SentimentPrediction> > Train(IMongoDatabase db)
        {
            // LearningPipeline allows you to add steps in order to keep everything together
            // during the learning process.
            // <Snippet5>
            var pipeline = new LearningPipeline();
            // </Snippet5>

            // <Snippet6>
            var collection = db.GetCollection <SentimentData>("review_train");
            var documents  = collection.Find <SentimentData>(new BsonDocument()).ToEnumerable();

            pipeline.Add(CollectionDataSource.Create(documents));
            // </Snippet6>

            // TextFeaturizer is a transform that is used to featurize an input column.
            // This is used to format and clean the data.
            // <Snippet7>
            pipeline.Add(new TextFeaturizer("Features", "text")
            {
                KeepDiacritics   = false,
                KeepPunctuations = false,
                TextCase         = TextNormalizerTransformCaseNormalizationMode.Lower,
            });
            //</Snippet7>

            // Adds a FastTreeBinaryClassifier, the decision tree learner for this project, and
            // three hyperparameters to be used for tuning decision tree performance.
            // <Snippet8>
            pipeline.Add(new FastTreeBinaryClassifier()
            {
                NumLeaves           = 100,
                NumTrees            = 50,
                MinDocumentsInLeafs = 2,
                LearningRates       = 0.4f,
            });
            // </Snippet8>

            // Train the pipeline based on the dataset that has been loaded, transformed.
            // <Snippet9>
            PredictionModel <SentimentData, SentimentPrediction> model =
                pipeline.Train <SentimentData, SentimentPrediction>();
            // </Snippet9>

            // Saves the model we trained to a zip file.
            // <Snippet10>
            await model.WriteAsync(_modelpath);

            // </Snippet10>

            // Returns the model we trained to use for evaluation.
            // <Snippet11>
            return(model);
            // </Snippet11>
        }
예제 #11
0
        static void Main(string[] args)
        {
            var trainData = GeneratePData(2000);

            Print(trainData.Take(20));
            var testData = GeneratePData(50, test: true);

            // ML görevi için obje oluşturur
            var learningPipe    = new LearningPipeline();
            var trainCollection = CollectionDataSource.Create(trainData);

            learningPipe.Add(trainCollection);
            // Verilerin kolon isimleri olan labelları numeric indexe çevirir.
            learningPipe.Add(new Dictionarizer("Label"));

            learningPipe.Add(
                new ColumnConcatenator("Features", "UnitA", "UnitS", "Volume"));

            // Algoritma sınıflandırması
            learningPipe.Add(new StochasticDualCoordinateAscentClassifier());

            // Tahmin edilen kolon değerini çevir
            learningPipe.Add(new PredictedLabelColumnOriginalValueConverter()
            {
                PredictedLabelColumn = "PredictedLabel"
            });

            // Modeli eğitme
            var model = learningPipe.Train <ProcessData, ProcessPrediction>();

            // Model değerlendirilmesi ve kesin kontrol
            var evaluator = new ClassificationEvaluator();
            var metrics   = evaluator.Evaluate(model, trainCollection);

            Console.WriteLine("AccuracyMicro: " + metrics.AccuracyMicro);
            Console.WriteLine("LogLoss: " + metrics.LogLoss);

            // Test datayı tahmin et
            var predicted = model.Predict(testData);

            // Testdata ve tahmin edilen labelı string içinde topla
            var results = testData.Zip(predicted, (t, p) => new ProcessData
            {
                UnitA  = t.UnitA,
                UnitS  = t.UnitS,
                Volume = t.Volume,
                Label  = p.PredictedLabels
            }).ToList();

            // Sonucu yazdır
            Print(results);

            Console.ReadLine();
        }
        public void EvaluateModel(IEnumerable <PivotData> testData, PredictionModel <PivotData, ClusteringPrediction> model)
        {
            ConsoleWriteHeader("Metrics for Customer Segmentation");
            var            testDataSource = CollectionDataSource.Create(testData);
            var            evaluator      = new ClusterEvaluator();
            ClusterMetrics metrics        = evaluator.Evaluate(model, testDataSource);

            Console.WriteLine($"Average mean score: {metrics.AvgMinScore:0.##}");
            //Console.WriteLine($"*       Davies-Bouldin Index: {metrics.Dbi:#.##}");
            //Console.WriteLine($"*       Normalized mutual information: {metrics.Nmi:#.##}");
        }
예제 #13
0
        public static void Evaluate(PredictionModel <PokerHandData, PokerHandPrediction> model, IEnumerable <PokerHandData> data)
        {
            var evaluator  = new ClassificationEvaluator();
            var collection = CollectionDataSource.Create(data);
            var metrics    = evaluator.Evaluate(model, collection);

            Console.WriteLine();
            Console.WriteLine("PredictionModel quality metrics evaluation");
            Console.WriteLine("------------------------------------------");
            Console.WriteLine($"LogLossReduction: {metrics.LogLossReduction }");
            Console.WriteLine($"LogLoss: {metrics.LogLoss }");
        }
예제 #14
0
        public static PredictionModel <PokerHandData, PokerHandPrediction> Train(IEnumerable <PokerHandData> data)
        {
            var pipeline   = new LearningPipeline();
            var collection = CollectionDataSource.Create(data);

            pipeline.Add(collection);
            pipeline.Add(new ColumnConcatenator("Features", "IsSameSuit", "IsStraight", "FourOfKind", "ThreeOfKind", "PairsCount"));
            pipeline.Add(new LogisticRegressionClassifier());
            var model = pipeline.Train <PokerHandData, PokerHandPrediction>();

            return(model);
        }
        public void BooleanLabelPipeline()
        {
            var data = new BooleanLabelData[1];

            data[0]          = new BooleanLabelData();
            data[0].Features = new float[] { 0.0f, 1.0f };
            data[0].Label    = false;
            var pipeline = new LearningPipeline();

            pipeline.Add(CollectionDataSource.Create(data));
            pipeline.Add(new FastForestBinaryClassifier());
            var model = pipeline.Train <Data, Prediction>();
        }
예제 #16
0
        protected LearningPipeline BuildModel(IEnumerable <SalesRecommendationData> salesData)
        {
            ConsoleWriteHeader("Build model pipeline");

            var pipeline = new LearningPipeline();

            pipeline.Add(CollectionDataSource.Create(salesData));

            // One Hot Encoding using Hash Vector. The new columns are named as the original ones, but adding the suffix "_OH"
            pipeline.Add(new CategoricalHashOneHotVectorizer((nameof(SalesRecommendationData.ProductId), nameof(SalesRecommendationData.ProductId) + "_OH"))
            {
                HashBits = 18
            });
예제 #17
0
        public PowerballPrediction PredictPowerball()
        {
            var pipeline = new LearningPipeline();
            var allPicks = _ctx.Powerballs;
            var data     = new List <PowerballData>();

            foreach (var powerball in allPicks)
            {
                var ts = powerball.draw_date - DateTime.Now;

                var newPick = new PowerballData()
                {
                    Ball1     = Convert.ToInt32(powerball.ball1),
                    Ball2     = Convert.ToInt32(powerball.ball2),
                    Ball3     = Convert.ToInt32(powerball.ball3),
                    Ball4     = Convert.ToInt32(powerball.ball4),
                    Ball5     = Convert.ToInt32(powerball.ball5),
                    PowerBall = Convert.ToInt32(powerball.powerball),
                    daysAgo   = (float)ts.TotalDays
                };

                data.Add(newPick);
            }

            var collection = CollectionDataSource.Create(data);

            pipeline.Add(collection);
            pipeline.Add(new ColumnCopier(("daysAgo", "Label")));
            pipeline.Add(new CategoricalOneHotVectorizer("id"));
            pipeline.Add(new ColumnConcatenator("Features", "id", "daysAgo"));
            pipeline.Add(new FastTreeRegressor());

            var model = pipeline.Train <PowerballData, PowerballPrediction>();

            var eval = new RegressionEvaluator();
            RegressionMetrics metrics = eval.Evaluate(model, collection);

            Console.WriteLine($"Rms = {metrics.Rms}");
            Console.WriteLine($"RSquared = {metrics.RSquared}");

            var nextPowerball = _ctx.NextPowerball.FirstOrDefault();

            var predictedDays = (nextPowerball.next_jackpot_date.AddDays(1)) - DateTime.Now;

            var prediction = model.Predict(new PowerballData()
            {
                Ball1 = 0, Ball2 = 0, Ball3 = 0, Ball4 = 0, Ball5 = 0, PowerBall = 0, daysAgo = predictedDays.Days
            });

            return(prediction);
        }
예제 #18
0
        /// <summary>
        /// 评估模型
        /// </summary>
        /// <param name="model"></param>
        public static void Evaluate(IEnumerable <JiaMiTu> testData, PredictionModel <JiaMiTu, JiaMiTuPrediction> model)
        {
            //var testData = new TextLoader<JiaMiTu>(TestDataPath, useHeader: true, separator: ",");
            var test                  = CollectionDataSource.Create(testData);
            var evaluator             = new RegressionEvaluator();
            RegressionMetrics metrics = evaluator.Evaluate(model, test);

            // Rms should be around 2.795276
            //RMS是评估回归问题的一个指标。它越低,你的模型就越好。将以下代码添加到该Evaluate()函数中以打印模型的RMS。
            Console.WriteLine("Rms=" + metrics.Rms);
            Console.WriteLine("LossFn=" + metrics.LossFn);
            //Squared是评估回归问题的另一个指标。RSquared将是介于0和1之间的值。越接近1,模型越好。将下面的代码添加到该Evaluate()函数中以打印模型的RSquared值。
            Console.WriteLine("RSquared = " + metrics.RSquared);
        }
예제 #19
0
        public override ModelFitness Evaluate(List <ModelDataSet> data, ModelValue prediction)
        {
            if (TrainedModel == null)
            {
                throw new Exception("Must initialize the model before calling");
            }

            lock (TrainedModel)
            {
#if ML_LEGACY
                var testData  = CollectionDataSource.Create(data);
                var evaluator = new RegressionEvaluator();
                var metrics   = evaluator.Evaluate(TrainedModel, testData);

                return(new ModelFitness()
                {
                    RMS = metrics.Rms,
                    RSquared = metrics.RSquared
                });
#else
                var textLoader = GetTextLoader(Context, prediction);

                var pathToData = "";
                try
                {
                    // ugh have to spill data to disk for it to work!
                    pathToData = WriteToDisk(data, prediction);

                    IDataView dataView    = textLoader.Read(pathToData);
                    var       predictions = TrainedModel.Transform(dataView);
                    var       metrics     = Context.Regression.Evaluate(predictions, label: "Label", score: "Score");

                    return(new ModelFitness()
                    {
                        RMS = metrics.Rms,
                        RSquared = metrics.RSquared
                    });
                }
                finally
                {
                    // cleanup
                    if (!string.IsNullOrWhiteSpace(pathToData) && File.Exists(pathToData))
                    {
                        File.Delete(pathToData);
                    }
                }
#endif
            }
        }
예제 #20
0
        public void PredictClusters()
        {
            int n        = 1000;
            int k        = 5;
            var rand     = new Random();
            var clusters = new ClusteringData[k];
            var data     = new ClusteringData[n];

            for (int i = 0; i < k; i++)
            {
                //pick clusters as points on circle with angle to axis X equal to 360*i/k
                clusters[i] = new ClusteringData {
                    Points = new float[2] {
                        (float)Math.Cos(Math.PI * i * 2 / k), (float)Math.Sin(Math.PI * i * 2 / k)
                    }
                };
            }
            // create data points by randomly picking cluster and shifting point slightly away from it.
            for (int i = 0; i < n; i++)
            {
                var index = rand.Next(0, k);
                var shift = (rand.NextDouble() - 0.5) / k;
                data[i] = new ClusteringData
                {
                    Points = new float[2]
                    {
                        (float)(clusters[index].Points[0] + shift),
                        (float)(clusters[index].Points[1] + shift)
                    }
                };
            }
            var pipeline = new LearningPipeline();

            pipeline.Add(CollectionDataSource.Create(data));
            pipeline.Add(new KMeansPlusPlusClusterer()
            {
                K = k
            });
            var model = pipeline.Train <ClusteringData, ClusteringPrediction>();
            //validate that initial points we pick up as centers of cluster during data generation belong to different clusters.
            var labels = new HashSet <uint>();

            for (int i = 0; i < k; i++)
            {
                var scores = model.Predict(clusters[i]);
                Assert.True(!labels.Contains(scores.SelectedClusterId));
                labels.Add(scores.SelectedClusterId);
            }
        }
        public void NoTransformPipeline()
        {
            var data = new Data[1];

            data[0] = new Data
            {
                Features = new float[] { 0.0f, 1.0f },
                Label    = 0f
            };
            var pipeline = new Legacy.LearningPipeline();

            pipeline.Add(CollectionDataSource.Create(data));
            pipeline.Add(new FastForestBinaryClassifier());
            var model = pipeline.Train <Data, Prediction>();
        }
예제 #22
0
            private static PredictionModel <WorkshopData, ClusterPrediction> Train()
            {
                var pipeline = new LearningPipeline();

                //building dataset of WorkshopData
                List <WorkshopData> data = new List <WorkshopData>();
                string line;

                using (var reader = File.OpenText(_dataPath))
                {
                    while ((line = reader.ReadLine()) != null)
                    {
                        string        convertedData       = line;
                        List <string> WorkshopFeaturesSet = convertedData.Split(',').ToList();
                        WorkshopData  wd = new WorkshopData
                        {
                            price    = float.Parse(WorkshopFeaturesSet[0]),
                            duration = float.Parse(WorkshopFeaturesSet[1]),
                            day      = float.Parse(WorkshopFeaturesSet[2]),
                            time     = float.Parse(WorkshopFeaturesSet[3]),
                            teacher  = float.Parse(WorkshopFeaturesSet[4])
                        };
                        data.Add(wd);
                    }
                }

                var collection = CollectionDataSource.Create(data);

                pipeline.Add(collection);
                pipeline.Add(new ColumnConcatenator(
                                 "Features",
                                 "price",
                                 "duration",
                                 "day",
                                 "time",
                                 "teacher")
                             );

                pipeline.Add(new KMeansPlusPlusClusterer()
                {
                    K = 3
                });


                var model = pipeline.Train <WorkshopData, ClusterPrediction>();

                return(model);
            }
예제 #23
0
        static async Task <PredictionModel <IrisData, IrisPrediction> > TrainAsync()
        {
            LearningPipeline pipeline = new LearningPipeline();

            var collection = CollectionDataSource.Create(await getData(@"./myconsoleapp/traindata.json"));

            pipeline.Add(collection);

            pipeline.Add(new ColumnConcatenator("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth"));

            pipeline.Add(new StochasticDualCoordinateAscentClassifier());

            var model = pipeline.Train <IrisData, IrisPrediction>();

            return(model);
        }
        public void CanTrainProperties()
        {
            var pipeline = new Legacy.LearningPipeline();
            var data     = new List <IrisData>()
            {
                new IrisData {
                    SepalLength = 1f, SepalWidth = 1f, PetalLength = 0.3f, PetalWidth = 5.1f, Label = 1
                },
                new IrisData {
                    SepalLength = 1f, SepalWidth = 1f, PetalLength = 0.3f, PetalWidth = 5.1f, Label = 1
                },
                new IrisData {
                    SepalLength = 1.2f, SepalWidth = 0.5f, PetalLength = 0.3f, PetalWidth = 5.1f, Label = 0
                }
            };
            var collection = CollectionDataSource.Create(data);

            pipeline.Add(collection);
            pipeline.Add(new ColumnConcatenator(outputColumn: "Features",
                                                "SepalLength", "SepalWidth", "PetalLength", "PetalWidth"));
            pipeline.Add(new StochasticDualCoordinateAscentClassifier());
            var model = pipeline.Train <IrisData, IrisPredictionProperties>();

            IrisPredictionProperties prediction = model.Predict(new IrisData
            {
                SepalLength = 3.3f,
                SepalWidth  = 1.6f,
                PetalLength = 0.2f,
                PetalWidth  = 5.1f,
            });

            pipeline   = new Legacy.LearningPipeline();
            collection = CollectionDataSource.Create(data.AsEnumerable());
            pipeline.Add(collection);
            pipeline.Add(new ColumnConcatenator(outputColumn: "Features",
                                                "SepalLength", "SepalWidth", "PetalLength", "PetalWidth"));
            pipeline.Add(new StochasticDualCoordinateAscentClassifier());
            model = pipeline.Train <IrisData, IrisPredictionProperties>();

            prediction = model.Predict(new IrisData
            {
                SepalLength = 3.3f,
                SepalWidth  = 1.6f,
                PetalLength = 0.2f,
                PetalWidth  = 5.1f,
            });
        }
예제 #25
0
        public static Task TrainModel()
        {
            var pipeline = new LearningPipeline();
            var data     = new List <Artist> {
                new Artist {
                    name = "radiohead"
                },
                new Artist {
                    name = "radiohead"
                }
            };
            var collection = CollectionDataSource.Create(data);

            pipeline.Add(collection);

            return(null);
        }
예제 #26
0
        public void Evaluate(PredictionModel <NrlResult, ClusterPrediction> model,
                             IEnumerable <NrlResult> nrlResults)
        {
            var testData = CollectionDataSource.Create(nrlResults);

            var evaluator = new BinaryClassificationEvaluator();

            Console.WriteLine("=============== Evaluating model ===============");

            var metrics = evaluator.Evaluate(model, testData);

            Console.WriteLine($"Accuracy: {metrics.Accuracy:P2}");
            Console.WriteLine($"Auc: {metrics.Auc:P2}");
            Console.WriteLine($"F1Score: {metrics.F1Score:P2}");
            Console.WriteLine("=============== End evaluating ===============");
            Console.WriteLine();
        }
        public void CanSuccessfullyApplyATransform()
        {
            var collection = CollectionDataSource.Create(new List <Input>()
            {
                new Input {
                    Number1 = 1, String1 = "1"
                }
            });
            var        environment = new MLContext();
            Experiment experiment  = environment.CreateExperiment();

            Legacy.ILearningPipelineDataStep output = (Legacy.ILearningPipelineDataStep)collection.ApplyStep(null, experiment);

            Assert.NotNull(output.Data);
            Assert.NotNull(output.Data.VarName);
            Assert.Null(output.Model);
        }
예제 #28
0
        static void EvaluateOpen311(PredictionModel <Open311Data, Open311DataPrediction> model, string testDataPath)
        {
            var testData  = CollectionDataSource.Create(OpenFile(testDataPath, 3, 0, 1, 2));
            var evaluator = new ClassificationEvaluator();
            var metrics   = evaluator.Evaluate(model, testData);

            Console.WriteLine();
            Console.WriteLine("PredictionModel quality metrics evaluation");
            Console.WriteLine("------------------------------------------");
            Console.WriteLine($"Accuracy Macro: {metrics.AccuracyMacro:P2}");
            Console.WriteLine($"Accuracy Micro: {metrics.AccuracyMicro:P2}");
            Console.WriteLine($"Top KAccuracy: {metrics.TopKAccuracy:P2}");
            Console.WriteLine($"LogLoss: {metrics.LogLoss:P2}");
            for (var classIndex = 0; classIndex < metrics.PerClassLogLoss.Length; classIndex++)
            {
                Console.WriteLine($"Class: {classIndex} - {metrics.PerClassLogLoss[classIndex]:P2}");
            }
        }
예제 #29
0
        private static PredictionModel <BookData, ClusterPrediction> Train()
        {
            var pipeline = new LearningPipeline();
            // pipeline.Add(new TextLoader(_dataPath).CreateFrom<BookData>(separator: ','));

            //building dataset of BookData
            List <BookData> data = new List <BookData>();
            string          line;

            using (var reader = File.OpenText(_dataPath))
            {
                while ((line = reader.ReadLine()) != null)
                {
                    string        convertedData   = line;
                    List <string> BookFeaturesSet = convertedData.Split(',').ToList();
                    BookData      bd = new BookData
                    {
                        genre       = float.Parse(BookFeaturesSet[0]), //book.Genre,
                        releaseTime = float.Parse(BookFeaturesSet[1]),
                        price       = float.Parse(BookFeaturesSet[2])  //book.Price
                    };
                    data.Add(bd);
                }
            }

            var collection = CollectionDataSource.Create(data);

            pipeline.Add(collection);
            pipeline.Add(new ColumnConcatenator(
                             "Features",
                             "price",
                             "genre",
                             "releaseTime")
                         );

            pipeline.Add(new KMeansPlusPlusClusterer()
            {
                K = 5
            });

            var model = pipeline.Train <BookData, ClusterPrediction>();

            return(model);
        }
예제 #30
0
        public PredictionModel <NrlResult, ClusterPrediction> TrainData(IEnumerable <NrlResult> nrlResults)
        {
            var pipeline = new LearningPipeline
            {
                CollectionDataSource.Create(nrlResults),
                new Dictionarizer("Label"),
                new ColumnConcatenator("Features", "PreviousWeeksHomeFor", "PreviousWeeksHomeAgainst",
                                       "PreviousWeeksAwayFor", "PreviousWeeksAwayAgainst", "AwayTeamAwayForm", "AwayTeamHomeForm",
                                       "HomeTeamAwayForm", "HomeTeamHomeForm", "HomeTeamLastWeekScore", "AwayTeamLastWeekScore"),
                new StochasticDualCoordinateAscentClassifier(),
                //new KMeansPlusPlusClusterer { K = 3 },
                //new FastTreeBinaryClassifier {NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2},

                //new PredictedLabelColumnOriginalValueConverter {PredictedLabelColumn = "PredictedLabel"}
            };


            return(pipeline.Train <NrlResult, ClusterPrediction>());
        }