public override void Train(List <DataSet> data, List <float> labels = null) { if (TrainedModel != null) { throw new InvalidOperationException("May only train/load a model once"); } #if ML_LEGACY var pipeline = new LearningPipeline(); // add data pipeline.Add(CollectionDataSource.Create(data)); // choose what to predict pipeline.Add(new ColumnCopier(("Score", "Label"))); // add columns as features // do not include the features which should be predicted pipeline.Add(new ColumnConcatenator("Features", DataSet.ColumnNames())); // add a regression prediction pipeline.Add(new FastTreeRegressor()); // train the model TrainedModel = pipeline.Train <DataSet, DataSetPrediction>(); #else // add data var textLoader = GetTextLoader(Context); // spill to disk !?!?! since there is no way to load from a collection var pathToData = ""; try { // write data to disk pathToData = WriteToDisk(data); // read in data IDataView dataView = textLoader.Load(pathToData); InputSchema = dataView.Schema; // configurations var dataPipeline = Context.Transforms.CopyColumns(outputColumnName: "Label", inputColumnName: nameof(DataSet.Score)) .Append(Context.Transforms.Concatenate("Features", DataSet.ColumnNames())); // set the training algorithm var trainer = Context.Regression.Trainers.Sdca(labelColumnName: "Label", featureColumnName: "Features"); var trainingPipeline = dataPipeline.Append(trainer); TrainedModel = trainingPipeline.Fit(dataView); } finally { // cleanup if (!string.IsNullOrWhiteSpace(pathToData) && File.Exists(pathToData)) { File.Delete(pathToData); } } #endif }
public static void Evaluate(PredictionModel <SentimentData, SentimentPrediction> model) { var testData = new List <SentimentData>() { new SentimentData { Sentiment = 6f, SentimentText = "such good thing" }, new SentimentData { Sentiment = -9.3f, SentimentText = "f*****g article" } }; var collection = CollectionDataSource.Create(testData); var evaluator = new BinaryClassificationEvaluator(); BinaryClassificationMetrics metrics = evaluator.Evaluate(model, collection); Console.WriteLine(); Console.WriteLine("PredictionModel quality metrics evaluation"); Console.WriteLine("------------------------------------------"); Console.WriteLine($"Accuracy: {metrics.Accuracy:P2}"); Console.WriteLine($"Auc: {metrics.Auc:P2}"); Console.WriteLine($"F1Score: {metrics.F1Score:P2}"); }
internal static async Task <PredictionModel <IrisData, ClusterPrediction> > TrainAsync() { // LearningPipeline holds all steps of the learning process: data, transforms, learners. var pipeline = new LearningPipeline { // The TextLoader loads a dataset. The schema of the dataset is specified by passing a class containing // all the column names and their types. CollectionDataSource.Create <IrisData>(GetIrisDataSet()), //new TextLoader(DataPath).CreateFrom<IrisData>(useHeader: true), // ColumnConcatenator concatenates all columns into Features column new ColumnConcatenator("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth"), // KMeansPlusPlusClusterer is an algorithm that will be used to build clusters. We set the number of clusters to 3. new KMeansPlusPlusClusterer() { K = 3 } }; Console.WriteLine("=============== Training model ==============="); var model = pipeline.Train <IrisData, ClusterPrediction>(); Console.WriteLine("=============== End training ==============="); // Saving the model as a .zip file. await model.WriteAsync(ModelPath); Console.WriteLine("The model is saved to {0}", ModelPath); return(model); }
public async Task <PredictionModel <UsageOfLightBulbModel, UsageOfLightBulbPredictionModel> > Train() { var dataFromDb = await _lightBulbRepository.GetAllLightBulbs(); var dataToTrain = new List <UsageOfLightBulbModel>(dataFromDb); var collection = CollectionDataSource.Create(dataToTrain); var pipeline = new LearningPipeline { collection, new ColumnCopier(("IsOn", "Label")), new ColumnConcatenator( "Features", "LightBulbID", "Month", "Day", "Time"), new FastTreeRegressor(), }; model = pipeline.Train <UsageOfLightBulbModel, UsageOfLightBulbPredictionModel>(); // await model.WriteAsync(_modelpath); return(model); }
public static string CreateNNetworkAndLearn(List <NnRow> rows) { // Prepare data double trainingSplitRatio = 0.7; int trainCount = (int)(rows.Count * trainingSplitRatio); var trainData = new MLNetData[trainCount]; var testData = new MLNetData[rows.Count - trainCount]; MLNetData[] allData = Convert(rows); // Split into Training and Testing sets Array.Copy(allData, 0, trainData, 0, trainCount); Array.Copy(allData, trainCount, testData, 0, rows.Count - trainCount); var allCollection = CollectionDataSource.Create(allData); var trainCollection = CollectionDataSource.Create(trainData); var testCollection = CollectionDataSource.Create(testData); double acc, auc, f1; PredictionModel <MLNetData, MLNetPredict> modelAll, modelTrain, modelBest; //(acc, auc, f1, modelAll) = TrainAndGetMetrics(allCollection, allCollection, new AveragedPerceptronBinaryClassifier ()); // acc 0.83, auc 0.86, f1 0.45 //(acc, auc, f1, modelAll) = TrainAndGetMetrics(allCollection, allCollection, new FastForestBinaryClassifier ()); // acc 0.85, auc 0.89, f1 0.46 (acc, auc, f1, modelBest) = TrainAndGetMetrics(trainCollection, testCollection, new FastTreeBinaryClassifier()); // acc 0.95, auc 0.97, f1 0.85 //(acc, auc, f1, modelAll) = TrainAndGetMetrics(allCollection, allCollection, new FieldAwareFactorizationMachineBinaryClassifier()); // acc 0.85, auc 0.88, f1 0.56 //(acc, auc, f1, modelAll) = TrainAndGetMetrics(allCollection, allCollection, new GeneralizedAdditiveModelBinaryClassifier ()); // acc 0.81, auc 0.80, f1 NaN //(acc, auc, f1, modelAll) = TrainAndGetMetrics(allCollection, allCollection, new LinearSvmBinaryClassifier ()); // acc 0.82, auc 0.86, f1 0.16 //(acc, auc, f1, modelAll) = TrainAndGetMetrics(allCollection, allCollection, new LogisticRegressionBinaryClassifier ()); // acc 0.84, auc 0.86, f1 0.40 //(acc, auc, f1, modelAll) = TrainAndGetMetrics(allCollection, allCollection, new StochasticDualCoordinateAscentBinaryClassifier()); // acc 0.84, auc 0.86, f1 0.40 //(acc, auc, f1, modelAll) = TrainAndGetMetrics(allCollection, allCollection, new StochasticGradientDescentBinaryClassifier ()); // acc 0.83, auc 0.86, f1 0.29 ///(acc, auc, f1, modelAll) = TrainAndGetMetrics(allCollection, allCollection, new EnsembleBinaryClassifier ()); //(acc, auc, f1, modelTrain) = TrainAndGetMetrics(trainCollection, testCollection, new AveragedPerceptronBinaryClassifier ()); // acc 0.82, auc 0.84, f1 0.45 //(acc, auc, f1, modelTrain) = TrainAndGetMetrics(trainCollection, testCollection, new FastForestBinaryClassifier ()); // acc 0.82, auc 0.83, f1 0.23 //(acc, auc, f1, modelTrain) = TrainAndGetMetrics(trainCollection, testCollection, new FastTreeBinaryClassifier ()); // acc 0.82, auc 0.84, f1 0.46 //(acc, auc, f1, modelTrain) = TrainAndGetMetrics(trainCollection, testCollection, new FieldAwareFactorizationMachineBinaryClassifier()); // acc 0.83, auc 0.85, f1 0.37 //(acc, auc, f1, modelTrain) = TrainAndGetMetrics(trainCollection, testCollection, new GeneralizedAdditiveModelBinaryClassifier ()); // acc 0.81, auc 0.75, f1 NaN //(acc, auc, f1, modelTrain) = TrainAndGetMetrics(trainCollection, testCollection, new LinearSvmBinaryClassifier ()); // acc 0.81, auc 0.83, f1 0.14 //(acc, auc, f1, modelTrain) = TrainAndGetMetrics(trainCollection, testCollection, new LogisticRegressionBinaryClassifier ()); // acc 0.83, auc 0.84, f1 0.39 //(acc, auc, f1, modelTrain) = TrainAndGetMetrics(trainCollection, testCollection, new StochasticDualCoordinateAscentBinaryClassifier()); // acc 0.82, auc 0.84, f1 0.43 //(acc, auc, f1, modelTrain) = TrainAndGetMetrics(trainCollection, testCollection, new StochasticGradientDescentBinaryClassifier ()); // acc 0.83, auc 0.83, f1 0.34 // Evaluate a training model //Console.WriteLine($"Accuracy: {metrics.Accuracy:P2}"); //Console.WriteLine($"Auc: {metrics.Auc:P2}"); //Console.WriteLine($"F1Score: {metrics.F1Score:P2}"); //var cv = new CrossValidator(); //CrossValidationOutput<MLNetData, MLNetPredict> cvRes = cv.CrossValidate<MLNetData, MLNetPredict>(pipelineAll); //Console.WriteLine($"Rms = {metrics.Rms}"); //Console.WriteLine($"RSquared = {metrics.RSquared}"); // Train the overall model string NnModelPath = @"NnInputs\mlDotNet_Datacup.model"; modelBest.WriteAsync(NnModelPath); return(NnModelPath); }
public async Task TrainModelAsync(string csvPath, string modelPath) { var pipeline = new LearningPipeline(); pipeline.Add(CollectionDataSource.Create(new CsvReader().GetData(csvPath))); pipeline.Add(new Dictionarizer(("Categories", "Label"))); pipeline.Add(new TextFeaturizer("Name", "Name")); pipeline.Add(new TextFeaturizer("GenericName", "GenericName")); pipeline.Add(new ColumnConcatenator("Features", "Name", "GenericName")); pipeline.Add(new StochasticDualCoordinateAscentClassifier() { NumThreads = Math.Max(2, Environment.ProcessorCount - 1) }); pipeline.Add(new PredictedLabelColumnOriginalValueConverter() { PredictedLabelColumn = "PredictedLabel" }); Console.WriteLine("=============== Training model ==============="); var model = pipeline.Train <Product, ProductCategoryPrediction>(); await model.WriteAsync(modelPath).ConfigureAwait(false); Console.WriteLine("=============== End training ==============="); Console.WriteLine("The model is saved to {0}", modelPath); }
static async Task <PredictionModel <Open311Data, Open311DataPrediction> > TrainOpen311(string dataPath) { var pipeline = new LearningPipeline(); var dataSource = CollectionDataSource.Create(OpenFile(dataPath, 3, 0, 1, 2)); pipeline.Add(dataSource); pipeline.Add(new Dictionarizer(@"Label")); pipeline.Add(new TextFeaturizer(@"Features", @"Request") { KeepDiacritics = false, KeepPunctuations = false, TextCase = TextNormalizerTransformCaseNormalizationMode.Lower, OutputTokens = true, Language = TextTransformLanguage.German, StopWordsRemover = new PredefinedStopWordsRemover(), VectorNormalizer = TextTransformTextNormKind.L2, CharFeatureExtractor = new NGramNgramExtractor() { NgramLength = 3, AllLengths = false }, WordFeatureExtractor = new NGramNgramExtractor() { NgramLength = 3, AllLengths = true } }); pipeline.Add(new StochasticDualCoordinateAscentClassifier()); pipeline.Add(new PredictedLabelColumnOriginalValueConverter { PredictedLabelColumn = @"PredictedLabel" }); var model = pipeline.Train <Open311Data, Open311DataPrediction>(); await model.WriteAsync(_modelPath); return(model); }
public void PredictClusters() { int n = 1000; int k = 4; var rand = new Random(1); var clusters = new ClusteringData[k]; var data = new ClusteringData[n]; for (int i = 0; i < k; i++) { //pick clusters as points on circle with angle to axis X equal to 360*i/k clusters[i] = new ClusteringData { Points = new float[2] { (float)Math.Cos(Math.PI * i * 2 / k), (float)Math.Sin(Math.PI * i * 2 / k) } }; } // create data points by randomly picking cluster and shifting point slightly away from it. for (int i = 0; i < n; i++) { var index = rand.Next(0, k); var shift = (rand.NextDouble() - 0.5) / 10; data[i] = new ClusteringData { Points = new float[2] { (float)(clusters[index].Points[0] + shift), (float)(clusters[index].Points[1] + shift) } }; } var pipeline = new LearningPipeline(seed: 1, conc: 1); pipeline.Add(CollectionDataSource.Create(data)); pipeline.Add(new KMeansPlusPlusClusterer() { K = k }); var model = pipeline.Train <ClusteringData, ClusteringPrediction>(); //validate that initial points we pick up as centers of cluster during data generation belong to different clusters. var labels = new HashSet <uint>(); for (int i = 0; i < k; i++) { var scores = model.Predict(clusters[i]); Assert.True(!labels.Contains(scores.SelectedClusterId)); labels.Add(scores.SelectedClusterId); } var evaluator = new ClusterEvaluator(); var testData = CollectionDataSource.Create(clusters); ClusterMetrics metrics = evaluator.Evaluate(model, testData); //Label is not specified, so NMI would be equal to NaN Assert.Equal(metrics.Nmi, double.NaN); //Calculate dbi is false by default so Dbi would be 0 Assert.Equal(metrics.Dbi, (double)0.0); Assert.Equal(metrics.AvgMinScore, (double)0.0, 5); }
/// <summary> /// 训练并生成模型 /// </summary> /// <returns></returns> public static async Task <PredictionModel <JiaMiTu, JiaMiTuPrediction> > Train(IEnumerable <JiaMiTu> trainData, string modelFileName, string labelColumn, string[] oneHotColumns, string[] features, string[] drops) { //创建学习管道 var pipeline = new LearningPipeline(); //加载和转换您的数据 //var textLoader = new TextLoader<JiaMiTu>(DataPath, useHeader: true, separator: ","); //pipeline.Add(textLoader); pipeline.Add(CollectionDataSource.Create(trainData)); //使用该ColumnCopier()功能将“票价_帐户”列复制到名为“标签”的新列中。此列是标签。 pipeline.Add(new ColumnCopier((labelColumn, "Label"))); //一个对象叫ColumnDropper,可以用来在训练开始前舍弃掉不需要的字段,比如id,对结果没有任何影响,因此可以去掉 if (drops.Count() > 0) { pipeline.Add(new ColumnDropper() { Column = drops }); } //进行一些特征工程来转换数据,以便它可以有效地用于机器学习。该训练模型需要算法的数字功能, //您变换中的分类数据(vendor_id,rate_code,和payment_type)为数字。 //该CategoricalOneHotVectorizer() //函数为每个列中的值分配一个数字键。通过添加以下代码来转换您的数据: if (oneHotColumns.Count() > 0) { pipeline.Add(new CategoricalOneHotVectorizer(oneHotColumns)); } //数据准备的最后一步是使用该功能将所有功能组合到一个向量中ColumnConcatenator()。这一必要步骤 //有助于算法轻松处理您的功能。按照您在最后一步中编写的内容添加以下代码: //请注意,“trip_time_in_secs”列不包括在内。你已经确定它不是一个有用的预测功能。 pipeline.Add(new ColumnConcatenator("Features", features )); //在将数据添加到流水线并将其转换为正确的输入格式之后,您可以选择一种学习算法(学习者)。学习算 //法训练模型。你为这个问题选择了一个回归任务,所以你增加了一个学习者调用FastTreeRegressor()到 //使用梯度提升的管道。 //渐变增强是回归问题的机器学习技术。它以逐步的方式构建每个回归树。它使用预定义的损失函数来测 //量每个步骤中的错误,并在下一步中对其进行修正。结果是预测模型实际上是较弱预测模型的集合。 pipeline.Add(new FastTreeRegressor()); //泊松回归 //pipeline.Add(new PoissonRegressor()); //训练模型 //最后一步是训练模型。在此之前,管道中没有任何东西被执行。该pipeline.Train<T_Input, T_Output>() //函数接受预定义的JiaMiTu类类型并输出一个JiaMiTuPrediction类型。将这最后一段代码添加到Train() //函数中: PredictionModel <JiaMiTu, JiaMiTuPrediction> model = pipeline.Train <JiaMiTu, JiaMiTuPrediction>(); //改性Train()方法为异步方法public static async Task<PredictionModel<JiaMiTu, JiaMiTuPrediction>> Train() ///通过生么预测什么 if (!string.IsNullOrEmpty(modelFileName)) { await model.WriteAsync(modelFileName); } return(model); }
public static async Task <PredictionModel <SentimentData, SentimentPrediction> > Train(IMongoDatabase db) { // LearningPipeline allows you to add steps in order to keep everything together // during the learning process. // <Snippet5> var pipeline = new LearningPipeline(); // </Snippet5> // <Snippet6> var collection = db.GetCollection <SentimentData>("review_train"); var documents = collection.Find <SentimentData>(new BsonDocument()).ToEnumerable(); pipeline.Add(CollectionDataSource.Create(documents)); // </Snippet6> // TextFeaturizer is a transform that is used to featurize an input column. // This is used to format and clean the data. // <Snippet7> pipeline.Add(new TextFeaturizer("Features", "text") { KeepDiacritics = false, KeepPunctuations = false, TextCase = TextNormalizerTransformCaseNormalizationMode.Lower, }); //</Snippet7> // Adds a FastTreeBinaryClassifier, the decision tree learner for this project, and // three hyperparameters to be used for tuning decision tree performance. // <Snippet8> pipeline.Add(new FastTreeBinaryClassifier() { NumLeaves = 100, NumTrees = 50, MinDocumentsInLeafs = 2, LearningRates = 0.4f, }); // </Snippet8> // Train the pipeline based on the dataset that has been loaded, transformed. // <Snippet9> PredictionModel <SentimentData, SentimentPrediction> model = pipeline.Train <SentimentData, SentimentPrediction>(); // </Snippet9> // Saves the model we trained to a zip file. // <Snippet10> await model.WriteAsync(_modelpath); // </Snippet10> // Returns the model we trained to use for evaluation. // <Snippet11> return(model); // </Snippet11> }
static void Main(string[] args) { var trainData = GeneratePData(2000); Print(trainData.Take(20)); var testData = GeneratePData(50, test: true); // ML görevi için obje oluşturur var learningPipe = new LearningPipeline(); var trainCollection = CollectionDataSource.Create(trainData); learningPipe.Add(trainCollection); // Verilerin kolon isimleri olan labelları numeric indexe çevirir. learningPipe.Add(new Dictionarizer("Label")); learningPipe.Add( new ColumnConcatenator("Features", "UnitA", "UnitS", "Volume")); // Algoritma sınıflandırması learningPipe.Add(new StochasticDualCoordinateAscentClassifier()); // Tahmin edilen kolon değerini çevir learningPipe.Add(new PredictedLabelColumnOriginalValueConverter() { PredictedLabelColumn = "PredictedLabel" }); // Modeli eğitme var model = learningPipe.Train <ProcessData, ProcessPrediction>(); // Model değerlendirilmesi ve kesin kontrol var evaluator = new ClassificationEvaluator(); var metrics = evaluator.Evaluate(model, trainCollection); Console.WriteLine("AccuracyMicro: " + metrics.AccuracyMicro); Console.WriteLine("LogLoss: " + metrics.LogLoss); // Test datayı tahmin et var predicted = model.Predict(testData); // Testdata ve tahmin edilen labelı string içinde topla var results = testData.Zip(predicted, (t, p) => new ProcessData { UnitA = t.UnitA, UnitS = t.UnitS, Volume = t.Volume, Label = p.PredictedLabels }).ToList(); // Sonucu yazdır Print(results); Console.ReadLine(); }
public void EvaluateModel(IEnumerable <PivotData> testData, PredictionModel <PivotData, ClusteringPrediction> model) { ConsoleWriteHeader("Metrics for Customer Segmentation"); var testDataSource = CollectionDataSource.Create(testData); var evaluator = new ClusterEvaluator(); ClusterMetrics metrics = evaluator.Evaluate(model, testDataSource); Console.WriteLine($"Average mean score: {metrics.AvgMinScore:0.##}"); //Console.WriteLine($"* Davies-Bouldin Index: {metrics.Dbi:#.##}"); //Console.WriteLine($"* Normalized mutual information: {metrics.Nmi:#.##}"); }
public static void Evaluate(PredictionModel <PokerHandData, PokerHandPrediction> model, IEnumerable <PokerHandData> data) { var evaluator = new ClassificationEvaluator(); var collection = CollectionDataSource.Create(data); var metrics = evaluator.Evaluate(model, collection); Console.WriteLine(); Console.WriteLine("PredictionModel quality metrics evaluation"); Console.WriteLine("------------------------------------------"); Console.WriteLine($"LogLossReduction: {metrics.LogLossReduction }"); Console.WriteLine($"LogLoss: {metrics.LogLoss }"); }
public static PredictionModel <PokerHandData, PokerHandPrediction> Train(IEnumerable <PokerHandData> data) { var pipeline = new LearningPipeline(); var collection = CollectionDataSource.Create(data); pipeline.Add(collection); pipeline.Add(new ColumnConcatenator("Features", "IsSameSuit", "IsStraight", "FourOfKind", "ThreeOfKind", "PairsCount")); pipeline.Add(new LogisticRegressionClassifier()); var model = pipeline.Train <PokerHandData, PokerHandPrediction>(); return(model); }
public void BooleanLabelPipeline() { var data = new BooleanLabelData[1]; data[0] = new BooleanLabelData(); data[0].Features = new float[] { 0.0f, 1.0f }; data[0].Label = false; var pipeline = new LearningPipeline(); pipeline.Add(CollectionDataSource.Create(data)); pipeline.Add(new FastForestBinaryClassifier()); var model = pipeline.Train <Data, Prediction>(); }
protected LearningPipeline BuildModel(IEnumerable <SalesRecommendationData> salesData) { ConsoleWriteHeader("Build model pipeline"); var pipeline = new LearningPipeline(); pipeline.Add(CollectionDataSource.Create(salesData)); // One Hot Encoding using Hash Vector. The new columns are named as the original ones, but adding the suffix "_OH" pipeline.Add(new CategoricalHashOneHotVectorizer((nameof(SalesRecommendationData.ProductId), nameof(SalesRecommendationData.ProductId) + "_OH")) { HashBits = 18 });
public PowerballPrediction PredictPowerball() { var pipeline = new LearningPipeline(); var allPicks = _ctx.Powerballs; var data = new List <PowerballData>(); foreach (var powerball in allPicks) { var ts = powerball.draw_date - DateTime.Now; var newPick = new PowerballData() { Ball1 = Convert.ToInt32(powerball.ball1), Ball2 = Convert.ToInt32(powerball.ball2), Ball3 = Convert.ToInt32(powerball.ball3), Ball4 = Convert.ToInt32(powerball.ball4), Ball5 = Convert.ToInt32(powerball.ball5), PowerBall = Convert.ToInt32(powerball.powerball), daysAgo = (float)ts.TotalDays }; data.Add(newPick); } var collection = CollectionDataSource.Create(data); pipeline.Add(collection); pipeline.Add(new ColumnCopier(("daysAgo", "Label"))); pipeline.Add(new CategoricalOneHotVectorizer("id")); pipeline.Add(new ColumnConcatenator("Features", "id", "daysAgo")); pipeline.Add(new FastTreeRegressor()); var model = pipeline.Train <PowerballData, PowerballPrediction>(); var eval = new RegressionEvaluator(); RegressionMetrics metrics = eval.Evaluate(model, collection); Console.WriteLine($"Rms = {metrics.Rms}"); Console.WriteLine($"RSquared = {metrics.RSquared}"); var nextPowerball = _ctx.NextPowerball.FirstOrDefault(); var predictedDays = (nextPowerball.next_jackpot_date.AddDays(1)) - DateTime.Now; var prediction = model.Predict(new PowerballData() { Ball1 = 0, Ball2 = 0, Ball3 = 0, Ball4 = 0, Ball5 = 0, PowerBall = 0, daysAgo = predictedDays.Days }); return(prediction); }
/// <summary> /// 评估模型 /// </summary> /// <param name="model"></param> public static void Evaluate(IEnumerable <JiaMiTu> testData, PredictionModel <JiaMiTu, JiaMiTuPrediction> model) { //var testData = new TextLoader<JiaMiTu>(TestDataPath, useHeader: true, separator: ","); var test = CollectionDataSource.Create(testData); var evaluator = new RegressionEvaluator(); RegressionMetrics metrics = evaluator.Evaluate(model, test); // Rms should be around 2.795276 //RMS是评估回归问题的一个指标。它越低,你的模型就越好。将以下代码添加到该Evaluate()函数中以打印模型的RMS。 Console.WriteLine("Rms=" + metrics.Rms); Console.WriteLine("LossFn=" + metrics.LossFn); //Squared是评估回归问题的另一个指标。RSquared将是介于0和1之间的值。越接近1,模型越好。将下面的代码添加到该Evaluate()函数中以打印模型的RSquared值。 Console.WriteLine("RSquared = " + metrics.RSquared); }
public override ModelFitness Evaluate(List <ModelDataSet> data, ModelValue prediction) { if (TrainedModel == null) { throw new Exception("Must initialize the model before calling"); } lock (TrainedModel) { #if ML_LEGACY var testData = CollectionDataSource.Create(data); var evaluator = new RegressionEvaluator(); var metrics = evaluator.Evaluate(TrainedModel, testData); return(new ModelFitness() { RMS = metrics.Rms, RSquared = metrics.RSquared }); #else var textLoader = GetTextLoader(Context, prediction); var pathToData = ""; try { // ugh have to spill data to disk for it to work! pathToData = WriteToDisk(data, prediction); IDataView dataView = textLoader.Read(pathToData); var predictions = TrainedModel.Transform(dataView); var metrics = Context.Regression.Evaluate(predictions, label: "Label", score: "Score"); return(new ModelFitness() { RMS = metrics.Rms, RSquared = metrics.RSquared }); } finally { // cleanup if (!string.IsNullOrWhiteSpace(pathToData) && File.Exists(pathToData)) { File.Delete(pathToData); } } #endif } }
public void PredictClusters() { int n = 1000; int k = 5; var rand = new Random(); var clusters = new ClusteringData[k]; var data = new ClusteringData[n]; for (int i = 0; i < k; i++) { //pick clusters as points on circle with angle to axis X equal to 360*i/k clusters[i] = new ClusteringData { Points = new float[2] { (float)Math.Cos(Math.PI * i * 2 / k), (float)Math.Sin(Math.PI * i * 2 / k) } }; } // create data points by randomly picking cluster and shifting point slightly away from it. for (int i = 0; i < n; i++) { var index = rand.Next(0, k); var shift = (rand.NextDouble() - 0.5) / k; data[i] = new ClusteringData { Points = new float[2] { (float)(clusters[index].Points[0] + shift), (float)(clusters[index].Points[1] + shift) } }; } var pipeline = new LearningPipeline(); pipeline.Add(CollectionDataSource.Create(data)); pipeline.Add(new KMeansPlusPlusClusterer() { K = k }); var model = pipeline.Train <ClusteringData, ClusteringPrediction>(); //validate that initial points we pick up as centers of cluster during data generation belong to different clusters. var labels = new HashSet <uint>(); for (int i = 0; i < k; i++) { var scores = model.Predict(clusters[i]); Assert.True(!labels.Contains(scores.SelectedClusterId)); labels.Add(scores.SelectedClusterId); } }
public void NoTransformPipeline() { var data = new Data[1]; data[0] = new Data { Features = new float[] { 0.0f, 1.0f }, Label = 0f }; var pipeline = new Legacy.LearningPipeline(); pipeline.Add(CollectionDataSource.Create(data)); pipeline.Add(new FastForestBinaryClassifier()); var model = pipeline.Train <Data, Prediction>(); }
private static PredictionModel <WorkshopData, ClusterPrediction> Train() { var pipeline = new LearningPipeline(); //building dataset of WorkshopData List <WorkshopData> data = new List <WorkshopData>(); string line; using (var reader = File.OpenText(_dataPath)) { while ((line = reader.ReadLine()) != null) { string convertedData = line; List <string> WorkshopFeaturesSet = convertedData.Split(',').ToList(); WorkshopData wd = new WorkshopData { price = float.Parse(WorkshopFeaturesSet[0]), duration = float.Parse(WorkshopFeaturesSet[1]), day = float.Parse(WorkshopFeaturesSet[2]), time = float.Parse(WorkshopFeaturesSet[3]), teacher = float.Parse(WorkshopFeaturesSet[4]) }; data.Add(wd); } } var collection = CollectionDataSource.Create(data); pipeline.Add(collection); pipeline.Add(new ColumnConcatenator( "Features", "price", "duration", "day", "time", "teacher") ); pipeline.Add(new KMeansPlusPlusClusterer() { K = 3 }); var model = pipeline.Train <WorkshopData, ClusterPrediction>(); return(model); }
static async Task <PredictionModel <IrisData, IrisPrediction> > TrainAsync() { LearningPipeline pipeline = new LearningPipeline(); var collection = CollectionDataSource.Create(await getData(@"./myconsoleapp/traindata.json")); pipeline.Add(collection); pipeline.Add(new ColumnConcatenator("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")); pipeline.Add(new StochasticDualCoordinateAscentClassifier()); var model = pipeline.Train <IrisData, IrisPrediction>(); return(model); }
public void CanTrainProperties() { var pipeline = new Legacy.LearningPipeline(); var data = new List <IrisData>() { new IrisData { SepalLength = 1f, SepalWidth = 1f, PetalLength = 0.3f, PetalWidth = 5.1f, Label = 1 }, new IrisData { SepalLength = 1f, SepalWidth = 1f, PetalLength = 0.3f, PetalWidth = 5.1f, Label = 1 }, new IrisData { SepalLength = 1.2f, SepalWidth = 0.5f, PetalLength = 0.3f, PetalWidth = 5.1f, Label = 0 } }; var collection = CollectionDataSource.Create(data); pipeline.Add(collection); pipeline.Add(new ColumnConcatenator(outputColumn: "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")); pipeline.Add(new StochasticDualCoordinateAscentClassifier()); var model = pipeline.Train <IrisData, IrisPredictionProperties>(); IrisPredictionProperties prediction = model.Predict(new IrisData { SepalLength = 3.3f, SepalWidth = 1.6f, PetalLength = 0.2f, PetalWidth = 5.1f, }); pipeline = new Legacy.LearningPipeline(); collection = CollectionDataSource.Create(data.AsEnumerable()); pipeline.Add(collection); pipeline.Add(new ColumnConcatenator(outputColumn: "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")); pipeline.Add(new StochasticDualCoordinateAscentClassifier()); model = pipeline.Train <IrisData, IrisPredictionProperties>(); prediction = model.Predict(new IrisData { SepalLength = 3.3f, SepalWidth = 1.6f, PetalLength = 0.2f, PetalWidth = 5.1f, }); }
public static Task TrainModel() { var pipeline = new LearningPipeline(); var data = new List <Artist> { new Artist { name = "radiohead" }, new Artist { name = "radiohead" } }; var collection = CollectionDataSource.Create(data); pipeline.Add(collection); return(null); }
public void Evaluate(PredictionModel <NrlResult, ClusterPrediction> model, IEnumerable <NrlResult> nrlResults) { var testData = CollectionDataSource.Create(nrlResults); var evaluator = new BinaryClassificationEvaluator(); Console.WriteLine("=============== Evaluating model ==============="); var metrics = evaluator.Evaluate(model, testData); Console.WriteLine($"Accuracy: {metrics.Accuracy:P2}"); Console.WriteLine($"Auc: {metrics.Auc:P2}"); Console.WriteLine($"F1Score: {metrics.F1Score:P2}"); Console.WriteLine("=============== End evaluating ==============="); Console.WriteLine(); }
public void CanSuccessfullyApplyATransform() { var collection = CollectionDataSource.Create(new List <Input>() { new Input { Number1 = 1, String1 = "1" } }); var environment = new MLContext(); Experiment experiment = environment.CreateExperiment(); Legacy.ILearningPipelineDataStep output = (Legacy.ILearningPipelineDataStep)collection.ApplyStep(null, experiment); Assert.NotNull(output.Data); Assert.NotNull(output.Data.VarName); Assert.Null(output.Model); }
static void EvaluateOpen311(PredictionModel <Open311Data, Open311DataPrediction> model, string testDataPath) { var testData = CollectionDataSource.Create(OpenFile(testDataPath, 3, 0, 1, 2)); var evaluator = new ClassificationEvaluator(); var metrics = evaluator.Evaluate(model, testData); Console.WriteLine(); Console.WriteLine("PredictionModel quality metrics evaluation"); Console.WriteLine("------------------------------------------"); Console.WriteLine($"Accuracy Macro: {metrics.AccuracyMacro:P2}"); Console.WriteLine($"Accuracy Micro: {metrics.AccuracyMicro:P2}"); Console.WriteLine($"Top KAccuracy: {metrics.TopKAccuracy:P2}"); Console.WriteLine($"LogLoss: {metrics.LogLoss:P2}"); for (var classIndex = 0; classIndex < metrics.PerClassLogLoss.Length; classIndex++) { Console.WriteLine($"Class: {classIndex} - {metrics.PerClassLogLoss[classIndex]:P2}"); } }
private static PredictionModel <BookData, ClusterPrediction> Train() { var pipeline = new LearningPipeline(); // pipeline.Add(new TextLoader(_dataPath).CreateFrom<BookData>(separator: ',')); //building dataset of BookData List <BookData> data = new List <BookData>(); string line; using (var reader = File.OpenText(_dataPath)) { while ((line = reader.ReadLine()) != null) { string convertedData = line; List <string> BookFeaturesSet = convertedData.Split(',').ToList(); BookData bd = new BookData { genre = float.Parse(BookFeaturesSet[0]), //book.Genre, releaseTime = float.Parse(BookFeaturesSet[1]), price = float.Parse(BookFeaturesSet[2]) //book.Price }; data.Add(bd); } } var collection = CollectionDataSource.Create(data); pipeline.Add(collection); pipeline.Add(new ColumnConcatenator( "Features", "price", "genre", "releaseTime") ); pipeline.Add(new KMeansPlusPlusClusterer() { K = 5 }); var model = pipeline.Train <BookData, ClusterPrediction>(); return(model); }
public PredictionModel <NrlResult, ClusterPrediction> TrainData(IEnumerable <NrlResult> nrlResults) { var pipeline = new LearningPipeline { CollectionDataSource.Create(nrlResults), new Dictionarizer("Label"), new ColumnConcatenator("Features", "PreviousWeeksHomeFor", "PreviousWeeksHomeAgainst", "PreviousWeeksAwayFor", "PreviousWeeksAwayAgainst", "AwayTeamAwayForm", "AwayTeamHomeForm", "HomeTeamAwayForm", "HomeTeamHomeForm", "HomeTeamLastWeekScore", "AwayTeamLastWeekScore"), new StochasticDualCoordinateAscentClassifier(), //new KMeansPlusPlusClusterer { K = 3 }, //new FastTreeBinaryClassifier {NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2}, //new PredictedLabelColumnOriginalValueConverter {PredictedLabelColumn = "PredictedLabel"} }; return(pipeline.Train <NrlResult, ClusterPrediction>()); }