Esempio n. 1
0
        public override void Train(List <DataSet> data, List <float> labels = null)
        {
            if (TrainedModel != null)
            {
                throw new InvalidOperationException("May only train/load a model once");
            }

#if ML_LEGACY
            var pipeline = new LearningPipeline();

            // add data
            pipeline.Add(CollectionDataSource.Create(data));

            // choose what to predict
            pipeline.Add(new ColumnCopier(("Score", "Label")));

            // add columns as features
            // do not include the features which should be predicted
            pipeline.Add(new ColumnConcatenator("Features", DataSet.ColumnNames()));

            // add a regression prediction
            pipeline.Add(new FastTreeRegressor());

            // train the model
            TrainedModel = pipeline.Train <DataSet, DataSetPrediction>();
#else
            // add data
            var textLoader = GetTextLoader(Context);

            // spill to disk !?!?! since there is no way to load from a collection
            var pathToData = "";
            try
            {
                // write data to disk
                pathToData = WriteToDisk(data);

                // read in data
                IDataView dataView = textLoader.Load(pathToData);
                InputSchema = dataView.Schema;

                // configurations
                var dataPipeline = Context.Transforms.CopyColumns(outputColumnName: "Label", inputColumnName: nameof(DataSet.Score))
                                   .Append(Context.Transforms.Concatenate("Features", DataSet.ColumnNames()));

                // set the training algorithm
                var trainer          = Context.Regression.Trainers.Sdca(labelColumnName: "Label", featureColumnName: "Features");
                var trainingPipeline = dataPipeline.Append(trainer);

                TrainedModel = trainingPipeline.Fit(dataView);
            }
            finally
            {
                // cleanup
                if (!string.IsNullOrWhiteSpace(pathToData) && File.Exists(pathToData))
                {
                    File.Delete(pathToData);
                }
            }
#endif
        }
Esempio n. 2
0
        public static void Evaluate(PredictionModel <SentimentData, SentimentPrediction> model)
        {
            var testData = new List <SentimentData>()
            {
                new SentimentData {
                    Sentiment     = 6f,
                    SentimentText = "such good thing"
                },
                new SentimentData {
                    Sentiment     = -9.3f,
                    SentimentText = "f*****g article"
                }
            };

            var collection = CollectionDataSource.Create(testData);
            var evaluator  = new BinaryClassificationEvaluator();
            BinaryClassificationMetrics metrics = evaluator.Evaluate(model, collection);

            Console.WriteLine();
            Console.WriteLine("PredictionModel quality metrics evaluation");
            Console.WriteLine("------------------------------------------");
            Console.WriteLine($"Accuracy: {metrics.Accuracy:P2}");
            Console.WriteLine($"Auc: {metrics.Auc:P2}");
            Console.WriteLine($"F1Score: {metrics.F1Score:P2}");
        }
        public void GroupbyOneColumnSortSubList()
        {
            var rs = new ReportSettings();
            var gc = new GroupColumn()
            {
                ColumnName      = "GroupItem",
                SortDirection   = ListSortDirection.Ascending,
                GroupSortColumn = new SortColumn()
                {
                    ColumnName    = "Randomint",
                    SortDirection = ListSortDirection.Ascending
                }
            };

            rs.GroupColumnsCollection.Add(gc);
            var collectionSource = new CollectionDataSource(list, rs);

            collectionSource.Bind();
            var testKey     = String.Empty;
            var testSubKey  = -1;
            var groupedList = collectionSource.GroupedList;

            foreach (var element in groupedList)
            {
                Assert.That(element.Key, Is.GreaterThan(testKey));
                testKey = element.Key.ToString();
                foreach (Contributor sub in element)
                {
                    Assert.That(sub.RandomInt, Is.GreaterThanOrEqualTo(testSubKey));
                    testSubKey = sub.RandomInt;
                }
                testSubKey = -1;
            }
        }
Esempio n. 4
0
        public async Task TrainModelAsync(string csvPath, string modelPath)
        {
            var pipeline = new LearningPipeline();

            pipeline.Add(CollectionDataSource.Create(new CsvReader().GetData(csvPath)));

            pipeline.Add(new Dictionarizer(("Categories", "Label")));

            pipeline.Add(new TextFeaturizer("Name", "Name"));

            pipeline.Add(new TextFeaturizer("GenericName", "GenericName"));

            pipeline.Add(new ColumnConcatenator("Features", "Name", "GenericName"));

            pipeline.Add(new StochasticDualCoordinateAscentClassifier()
            {
                NumThreads = Math.Max(2, Environment.ProcessorCount - 1)
            });
            pipeline.Add(new PredictedLabelColumnOriginalValueConverter()
            {
                PredictedLabelColumn = "PredictedLabel"
            });

            Console.WriteLine("=============== Training model ===============");

            var model = pipeline.Train <Product, ProductCategoryPrediction>();

            await model.WriteAsync(modelPath).ConfigureAwait(false);

            Console.WriteLine("=============== End training ===============");
            Console.WriteLine("The model is saved to {0}", modelPath);
        }
        public async Task <PredictionModel <UsageOfLightBulbModel, UsageOfLightBulbPredictionModel> > Train()
        {
            var dataFromDb = await _lightBulbRepository.GetAllLightBulbs();

            var dataToTrain = new List <UsageOfLightBulbModel>(dataFromDb);

            var collection = CollectionDataSource.Create(dataToTrain);

            var pipeline = new LearningPipeline
            {
                collection,
                new ColumnCopier(("IsOn", "Label")),
                new ColumnConcatenator(
                    "Features",
                    "LightBulbID",
                    "Month",
                    "Day",
                    "Time"),
                new FastTreeRegressor(),
            };

            model = pipeline.Train <UsageOfLightBulbModel, UsageOfLightBulbPredictionModel>();

            //  await model.WriteAsync(_modelpath);
            return(model);
        }
        public void DataItemWithNoColumnNameHasErrorMessageInDbValue()
        {
            var baseRow  = new BaseRowItem();
            var dataItem = new BaseDataItem()
            {
            };

            baseRow.Items.Add(dataItem);

            var row = new System.Collections.Generic.List <IPrintableObject>();

            row.Add(baseRow);
            var reportSettings   = new ReportSettings();
            var collectionSource = new CollectionDataSource(list, reportSettings);

            collectionSource.Bind();
            foreach (var element in collectionSource.SortedList)
            {
                collectionSource.Fill(row, element);
                var r = (BaseRowItem)row[0];
                foreach (var result in r.Items)
                {
                    Assert.That(((BaseDataItem)result).DBValue.StartsWith("Missing"), Is.EqualTo(true));
                }
            }
        }
        public void FillDataIncludedInRow()
        {
            var baseRow             = new BaseRowItem();
            var dataItemsCollection = CreateDataItems();

            baseRow.Items.AddRange(dataItemsCollection);

            var row = new System.Collections.Generic.List <IPrintableObject>();

            row.Add(baseRow);
            var reportSettings   = new ReportSettings();
            var collectionSource = new CollectionDataSource(list, reportSettings);

            collectionSource.Bind();
            int i = 0;

            foreach (var element in collectionSource.SortedList)
            {
                collectionSource.Fill(row, element);
                var r = (BaseRowItem)row[0];
                foreach (var result in r.Items)
                {
                    Assert.That(((BaseDataItem)result).DBValue, Is.Not.Empty);
                }
                i++;
            }
            Assert.That(i, Is.EqualTo(collectionSource.Count));
        }
Esempio n. 8
0
 public ExpressionRunner(Collection <ExportPage> pages, ReportSettings reportSettings, CollectionDataSource dataSource)
 {
     this.pages          = pages;
     this.dataSource     = dataSource;
     this.reportSettings = reportSettings;
     Visitor             = new ExpressionVisitor(reportSettings);
 }
Esempio n. 9
0
        public static string CreateNNetworkAndLearn(List <NnRow> rows)
        {
            // Prepare data
            double trainingSplitRatio = 0.7;
            int    trainCount         = (int)(rows.Count * trainingSplitRatio);
            var    trainData          = new MLNetData[trainCount];
            var    testData           = new MLNetData[rows.Count - trainCount];

            MLNetData[] allData = Convert(rows);
            // Split into Training and Testing sets
            Array.Copy(allData, 0, trainData, 0, trainCount);
            Array.Copy(allData, trainCount, testData, 0, rows.Count - trainCount);
            var allCollection   = CollectionDataSource.Create(allData);
            var trainCollection = CollectionDataSource.Create(trainData);
            var testCollection  = CollectionDataSource.Create(testData);


            double acc, auc, f1;
            PredictionModel <MLNetData, MLNetPredict> modelAll, modelTrain, modelBest;

            //(acc, auc, f1, modelAll) = TrainAndGetMetrics(allCollection, allCollection, new AveragedPerceptronBinaryClassifier            ()); // acc 0.83, auc 0.86, f1 0.45
            //(acc, auc, f1, modelAll) = TrainAndGetMetrics(allCollection, allCollection, new FastForestBinaryClassifier                    ()); // acc 0.85, auc 0.89, f1 0.46
            (acc, auc, f1, modelBest) = TrainAndGetMetrics(trainCollection, testCollection, new FastTreeBinaryClassifier());                       // acc 0.95, auc 0.97, f1 0.85
            //(acc, auc, f1, modelAll) = TrainAndGetMetrics(allCollection, allCollection, new FieldAwareFactorizationMachineBinaryClassifier()); // acc 0.85, auc 0.88, f1 0.56
            //(acc, auc, f1, modelAll) = TrainAndGetMetrics(allCollection, allCollection, new GeneralizedAdditiveModelBinaryClassifier      ()); // acc 0.81, auc 0.80, f1 NaN
            //(acc, auc, f1, modelAll) = TrainAndGetMetrics(allCollection, allCollection, new LinearSvmBinaryClassifier                     ()); // acc 0.82, auc 0.86, f1 0.16
            //(acc, auc, f1, modelAll) = TrainAndGetMetrics(allCollection, allCollection, new LogisticRegressionBinaryClassifier            ()); // acc 0.84, auc 0.86, f1 0.40
            //(acc, auc, f1, modelAll) = TrainAndGetMetrics(allCollection, allCollection, new StochasticDualCoordinateAscentBinaryClassifier()); // acc 0.84, auc 0.86, f1 0.40
            //(acc, auc, f1, modelAll) = TrainAndGetMetrics(allCollection, allCollection, new StochasticGradientDescentBinaryClassifier     ()); // acc 0.83, auc 0.86, f1 0.29

            ///(acc, auc, f1, modelAll) = TrainAndGetMetrics(allCollection, allCollection, new EnsembleBinaryClassifier                      ());


            //(acc, auc, f1, modelTrain) = TrainAndGetMetrics(trainCollection, testCollection, new AveragedPerceptronBinaryClassifier            ()); // acc 0.82, auc 0.84, f1 0.45
            //(acc, auc, f1, modelTrain) = TrainAndGetMetrics(trainCollection, testCollection, new FastForestBinaryClassifier                    ()); // acc 0.82, auc 0.83, f1 0.23
            //(acc, auc, f1, modelTrain) = TrainAndGetMetrics(trainCollection, testCollection, new FastTreeBinaryClassifier                      ()); // acc 0.82, auc 0.84, f1 0.46
            //(acc, auc, f1, modelTrain) = TrainAndGetMetrics(trainCollection, testCollection, new FieldAwareFactorizationMachineBinaryClassifier()); // acc 0.83, auc 0.85, f1 0.37
            //(acc, auc, f1, modelTrain) = TrainAndGetMetrics(trainCollection, testCollection, new GeneralizedAdditiveModelBinaryClassifier      ()); // acc 0.81, auc 0.75, f1 NaN
            //(acc, auc, f1, modelTrain) = TrainAndGetMetrics(trainCollection, testCollection, new LinearSvmBinaryClassifier                     ()); // acc 0.81, auc 0.83, f1 0.14
            //(acc, auc, f1, modelTrain) = TrainAndGetMetrics(trainCollection, testCollection, new LogisticRegressionBinaryClassifier            ()); // acc 0.83, auc 0.84, f1 0.39
            //(acc, auc, f1, modelTrain) = TrainAndGetMetrics(trainCollection, testCollection, new StochasticDualCoordinateAscentBinaryClassifier()); // acc 0.82, auc 0.84, f1 0.43
            //(acc, auc, f1, modelTrain) = TrainAndGetMetrics(trainCollection, testCollection, new StochasticGradientDescentBinaryClassifier     ()); // acc 0.83, auc 0.83, f1 0.34


            // Evaluate a training model
            //Console.WriteLine($"Accuracy: {metrics.Accuracy:P2}");
            //Console.WriteLine($"Auc: {metrics.Auc:P2}");
            //Console.WriteLine($"F1Score: {metrics.F1Score:P2}");
            //var cv = new CrossValidator();
            //CrossValidationOutput<MLNetData, MLNetPredict> cvRes = cv.CrossValidate<MLNetData, MLNetPredict>(pipelineAll);
            //Console.WriteLine($"Rms = {metrics.Rms}");
            //Console.WriteLine($"RSquared = {metrics.RSquared}");


            // Train the overall model
            string NnModelPath = @"NnInputs\mlDotNet_Datacup.model";

            modelBest.WriteAsync(NnModelPath);
            return(NnModelPath);
        }
Esempio n. 10
0
        internal static async Task <PredictionModel <IrisData, ClusterPrediction> > TrainAsync()
        {
            // LearningPipeline holds all steps of the learning process: data, transforms, learners.
            var pipeline = new LearningPipeline
            {
                // The TextLoader loads a dataset. The schema of the dataset is specified by passing a class containing
                // all the column names and their types.
                CollectionDataSource.Create <IrisData>(GetIrisDataSet()),
                //new TextLoader(DataPath).CreateFrom<IrisData>(useHeader: true),
                // ColumnConcatenator concatenates all columns into Features column
                new ColumnConcatenator("Features",
                                       "SepalLength",
                                       "SepalWidth",
                                       "PetalLength",
                                       "PetalWidth"),
                // KMeansPlusPlusClusterer is an algorithm that will be used to build clusters. We set the number of clusters to 3.
                new KMeansPlusPlusClusterer()
                {
                    K = 3
                }
            };

            Console.WriteLine("=============== Training model ===============");
            var model = pipeline.Train <IrisData, ClusterPrediction>();

            Console.WriteLine("=============== End training ===============");

            // Saving the model as a .zip file.
            await model.WriteAsync(ModelPath);

            Console.WriteLine("The model is saved to {0}", ModelPath);

            return(model);
        }
Esempio n. 11
0
        static async Task <PredictionModel <Open311Data, Open311DataPrediction> > TrainOpen311(string dataPath)
        {
            var pipeline   = new LearningPipeline();
            var dataSource = CollectionDataSource.Create(OpenFile(dataPath, 3, 0, 1, 2));

            pipeline.Add(dataSource);
            pipeline.Add(new Dictionarizer(@"Label"));
            pipeline.Add(new TextFeaturizer(@"Features", @"Request")
            {
                KeepDiacritics       = false,
                KeepPunctuations     = false,
                TextCase             = TextNormalizerTransformCaseNormalizationMode.Lower,
                OutputTokens         = true,
                Language             = TextTransformLanguage.German,
                StopWordsRemover     = new PredefinedStopWordsRemover(),
                VectorNormalizer     = TextTransformTextNormKind.L2,
                CharFeatureExtractor = new NGramNgramExtractor()
                {
                    NgramLength = 3, AllLengths = false
                },
                WordFeatureExtractor = new NGramNgramExtractor()
                {
                    NgramLength = 3, AllLengths = true
                }
            });
            pipeline.Add(new StochasticDualCoordinateAscentClassifier());
            pipeline.Add(new PredictedLabelColumnOriginalValueConverter {
                PredictedLabelColumn = @"PredictedLabel"
            });

            var model = pipeline.Train <Open311Data, Open311DataPrediction>();
            await model.WriteAsync(_modelPath);

            return(model);
        }
Esempio n. 12
0
        public void PredictClusters()
        {
            int n        = 1000;
            int k        = 4;
            var rand     = new Random(1);
            var clusters = new ClusteringData[k];
            var data     = new ClusteringData[n];

            for (int i = 0; i < k; i++)
            {
                //pick clusters as points on circle with angle to axis X equal to 360*i/k
                clusters[i] = new ClusteringData {
                    Points = new float[2] {
                        (float)Math.Cos(Math.PI * i * 2 / k), (float)Math.Sin(Math.PI * i * 2 / k)
                    }
                };
            }
            // create data points by randomly picking cluster and shifting point slightly away from it.
            for (int i = 0; i < n; i++)
            {
                var index = rand.Next(0, k);
                var shift = (rand.NextDouble() - 0.5) / 10;
                data[i] = new ClusteringData
                {
                    Points = new float[2]
                    {
                        (float)(clusters[index].Points[0] + shift),
                        (float)(clusters[index].Points[1] + shift)
                    }
                };
            }
            var pipeline = new LearningPipeline(seed: 1, conc: 1);

            pipeline.Add(CollectionDataSource.Create(data));
            pipeline.Add(new KMeansPlusPlusClusterer()
            {
                K = k
            });
            var model = pipeline.Train <ClusteringData, ClusteringPrediction>();
            //validate that initial points we pick up as centers of cluster during data generation belong to different clusters.
            var labels = new HashSet <uint>();

            for (int i = 0; i < k; i++)
            {
                var scores = model.Predict(clusters[i]);
                Assert.True(!labels.Contains(scores.SelectedClusterId));
                labels.Add(scores.SelectedClusterId);
            }

            var            evaluator = new ClusterEvaluator();
            var            testData  = CollectionDataSource.Create(clusters);
            ClusterMetrics metrics   = evaluator.Evaluate(model, testData);

            //Label is not specified, so NMI would be equal to NaN
            Assert.Equal(metrics.Nmi, double.NaN);
            //Calculate dbi is false by default so Dbi would be 0
            Assert.Equal(metrics.Dbi, (double)0.0);
            Assert.Equal(metrics.AvgMinScore, (double)0.0, 5);
        }
Esempio n. 13
0
 void CreateDataSource()
 {
     DataSource = new CollectionDataSource(List, ReportModel.ReportSettings);
     if (DataSourceContainsData())
     {
         DataSource.Bind();
     }
 }
Esempio n. 14
0
        /// <summary>
        /// 训练并生成模型
        /// </summary>
        /// <returns></returns>
        public static async Task <PredictionModel <JiaMiTu, JiaMiTuPrediction> > Train(IEnumerable <JiaMiTu> trainData, string modelFileName, string labelColumn, string[] oneHotColumns, string[] features, string[] drops)
        {
            //创建学习管道
            var pipeline = new LearningPipeline();

            //加载和转换您的数据
            //var textLoader = new TextLoader<JiaMiTu>(DataPath, useHeader: true, separator: ",");

            //pipeline.Add(textLoader);
            pipeline.Add(CollectionDataSource.Create(trainData));
            //使用该ColumnCopier()功能将“票价_帐户”列复制到名为“标签”的新列中。此列是标签。
            pipeline.Add(new ColumnCopier((labelColumn, "Label")));
            //一个对象叫ColumnDropper,可以用来在训练开始前舍弃掉不需要的字段,比如id,对结果没有任何影响,因此可以去掉
            if (drops.Count() > 0)
            {
                pipeline.Add(new ColumnDropper()
                {
                    Column = drops
                });
            }
            //进行一些特征工程来转换数据,以便它可以有效地用于机器学习。该训练模型需要算法的数字功能,
            //您变换中的分类数据(vendor_id,rate_code,和payment_type)为数字。
            //该CategoricalOneHotVectorizer()
            //函数为每个列中的值分配一个数字键。通过添加以下代码来转换您的数据:
            if (oneHotColumns.Count() > 0)
            {
                pipeline.Add(new CategoricalOneHotVectorizer(oneHotColumns));
            }
            //数据准备的最后一步是使用该功能将所有功能组合到一个向量中ColumnConcatenator()。这一必要步骤
            //有助于算法轻松处理您的功能。按照您在最后一步中编写的内容添加以下代码:
            //请注意,“trip_time_in_secs”列不包括在内。你已经确定它不是一个有用的预测功能。
            pipeline.Add(new ColumnConcatenator("Features",
                                                features
                                                ));
            //在将数据添加到流水线并将其转换为正确的输入格式之后,您可以选择一种学习算法(学习者)。学习算
            //法训练模型。你为这个问题选择了一个回归任务,所以你增加了一个学习者调用FastTreeRegressor()到
            //使用梯度提升的管道。
            //渐变增强是回归问题的机器学习技术。它以逐步的方式构建每个回归树。它使用预定义的损失函数来测
            //量每个步骤中的错误,并在下一步中对其进行修正。结果是预测模型实际上是较弱预测模型的集合。
            pipeline.Add(new FastTreeRegressor());
            //泊松回归
            //pipeline.Add(new PoissonRegressor());
            //训练模型
            //最后一步是训练模型。在此之前,管道中没有任何东西被执行。该pipeline.Train<T_Input, T_Output>()
            //函数接受预定义的JiaMiTu类类型并输出一个JiaMiTuPrediction类型。将这最后一段代码添加到Train()
            //函数中:
            PredictionModel <JiaMiTu, JiaMiTuPrediction> model = pipeline.Train <JiaMiTu, JiaMiTuPrediction>();

            //改性Train()方法为异步方法public static async Task<PredictionModel<JiaMiTu, JiaMiTuPrediction>> Train()
            ///通过生么预测什么
            if (!string.IsNullOrEmpty(modelFileName))
            {
                await model.WriteAsync(modelFileName);
            }

            return(model);
        }
Esempio n. 15
0
        public static async Task <PredictionModel <SentimentData, SentimentPrediction> > Train(IMongoDatabase db)
        {
            // LearningPipeline allows you to add steps in order to keep everything together
            // during the learning process.
            // <Snippet5>
            var pipeline = new LearningPipeline();
            // </Snippet5>

            // <Snippet6>
            var collection = db.GetCollection <SentimentData>("review_train");
            var documents  = collection.Find <SentimentData>(new BsonDocument()).ToEnumerable();

            pipeline.Add(CollectionDataSource.Create(documents));
            // </Snippet6>

            // TextFeaturizer is a transform that is used to featurize an input column.
            // This is used to format and clean the data.
            // <Snippet7>
            pipeline.Add(new TextFeaturizer("Features", "text")
            {
                KeepDiacritics   = false,
                KeepPunctuations = false,
                TextCase         = TextNormalizerTransformCaseNormalizationMode.Lower,
            });
            //</Snippet7>

            // Adds a FastTreeBinaryClassifier, the decision tree learner for this project, and
            // three hyperparameters to be used for tuning decision tree performance.
            // <Snippet8>
            pipeline.Add(new FastTreeBinaryClassifier()
            {
                NumLeaves           = 100,
                NumTrees            = 50,
                MinDocumentsInLeafs = 2,
                LearningRates       = 0.4f,
            });
            // </Snippet8>

            // Train the pipeline based on the dataset that has been loaded, transformed.
            // <Snippet9>
            PredictionModel <SentimentData, SentimentPrediction> model =
                pipeline.Train <SentimentData, SentimentPrediction>();
            // </Snippet9>

            // Saves the model we trained to a zip file.
            // <Snippet10>
            await model.WriteAsync(_modelpath);

            // </Snippet10>

            // Returns the model we trained to use for evaluation.
            // <Snippet11>
            return(model);
            // </Snippet11>
        }
Esempio n. 16
0
        static void Main(string[] args)
        {
            var trainData = GeneratePData(2000);

            Print(trainData.Take(20));
            var testData = GeneratePData(50, test: true);

            // ML görevi için obje oluşturur
            var learningPipe    = new LearningPipeline();
            var trainCollection = CollectionDataSource.Create(trainData);

            learningPipe.Add(trainCollection);
            // Verilerin kolon isimleri olan labelları numeric indexe çevirir.
            learningPipe.Add(new Dictionarizer("Label"));

            learningPipe.Add(
                new ColumnConcatenator("Features", "UnitA", "UnitS", "Volume"));

            // Algoritma sınıflandırması
            learningPipe.Add(new StochasticDualCoordinateAscentClassifier());

            // Tahmin edilen kolon değerini çevir
            learningPipe.Add(new PredictedLabelColumnOriginalValueConverter()
            {
                PredictedLabelColumn = "PredictedLabel"
            });

            // Modeli eğitme
            var model = learningPipe.Train <ProcessData, ProcessPrediction>();

            // Model değerlendirilmesi ve kesin kontrol
            var evaluator = new ClassificationEvaluator();
            var metrics   = evaluator.Evaluate(model, trainCollection);

            Console.WriteLine("AccuracyMicro: " + metrics.AccuracyMicro);
            Console.WriteLine("LogLoss: " + metrics.LogLoss);

            // Test datayı tahmin et
            var predicted = model.Predict(testData);

            // Testdata ve tahmin edilen labelı string içinde topla
            var results = testData.Zip(predicted, (t, p) => new ProcessData
            {
                UnitA  = t.UnitA,
                UnitS  = t.UnitS,
                Volume = t.Volume,
                Label  = p.PredictedLabels
            }).ToList();

            // Sonucu yazdır
            Print(results);

            Console.ReadLine();
        }
        public void EvaluateModel(IEnumerable <PivotData> testData, PredictionModel <PivotData, ClusteringPrediction> model)
        {
            ConsoleWriteHeader("Metrics for Customer Segmentation");
            var            testDataSource = CollectionDataSource.Create(testData);
            var            evaluator      = new ClusterEvaluator();
            ClusterMetrics metrics        = evaluator.Evaluate(model, testDataSource);

            Console.WriteLine($"Average mean score: {metrics.AvgMinScore:0.##}");
            //Console.WriteLine($"*       Davies-Bouldin Index: {metrics.Dbi:#.##}");
            //Console.WriteLine($"*       Normalized mutual information: {metrics.Nmi:#.##}");
        }
Esempio n. 18
0
        public static PredictionModel <PokerHandData, PokerHandPrediction> Train(IEnumerable <PokerHandData> data)
        {
            var pipeline   = new LearningPipeline();
            var collection = CollectionDataSource.Create(data);

            pipeline.Add(collection);
            pipeline.Add(new ColumnConcatenator("Features", "IsSameSuit", "IsStraight", "FourOfKind", "ThreeOfKind", "PairsCount"));
            pipeline.Add(new LogisticRegressionClassifier());
            var model = pipeline.Train <PokerHandData, PokerHandPrediction>();

            return(model);
        }
Esempio n. 19
0
        public static void Evaluate(PredictionModel <PokerHandData, PokerHandPrediction> model, IEnumerable <PokerHandData> data)
        {
            var evaluator  = new ClassificationEvaluator();
            var collection = CollectionDataSource.Create(data);
            var metrics    = evaluator.Evaluate(model, collection);

            Console.WriteLine();
            Console.WriteLine("PredictionModel quality metrics evaluation");
            Console.WriteLine("------------------------------------------");
            Console.WriteLine($"LogLossReduction: {metrics.LogLossReduction }");
            Console.WriteLine($"LogLoss: {metrics.LogLoss }");
        }
Esempio n. 20
0
        public string Get()
        {
            DataSource dataSource = new DataSourceCreator(Name, KeyValues).Create();

            if (dataSource.GetType() == typeof(PagingDataSource))
            {
                PagingDataSource ds = dataSource as PagingDataSource;

                IEnumerable <string> jsonCollection;
                if (ds.Expands == null || ds.Expands.Length == 0)
                {
                    jsonCollection = ODataQuerier.GetPagingCollection(ds.Entity, ds.Select, ds.Filter, ds.Orderby, ds.Skip, ds.Top, ds.Parameters);
                }
                else
                {
                    jsonCollection = ODataQuerier.GetPagingCollection(ds.Entity, ds.Select, ds.Filter, ds.Orderby, ds.Skip, ds.Top, ds.Expands, ds.Parameters);
                }
                string json = string.Format("[{0}]", string.Join(",", jsonCollection));

                int count = ODataQuerier.Count(ds.Entity, ds.Filter, ds.Parameters);
                json = string.Format("{{\"@count\":{0},\"value\":{1}}}", count, json);
                return(json);
            }
            else if (dataSource.GetType() == typeof(CollectionDataSource))
            {
                CollectionDataSource ds = dataSource as CollectionDataSource;

                IEnumerable <string> jsonCollection;
                if (ds.Expands == null || ds.Expands.Length == 0)
                {
                    jsonCollection = ODataQuerier.GetCollection(ds.Entity, ds.Select, ds.Filter, ds.Orderby, ds.Parameters);
                }
                else
                {
                    jsonCollection = ODataQuerier.GetCollection(ds.Entity, ds.Select, ds.Filter, ds.Orderby, ds.Expands, ds.Parameters);
                }
                return(string.Format("[{0}]", string.Join(",", jsonCollection)));
            }
            else if (dataSource.GetType() == typeof(DefaultGetterDataSource))
            {
                DefaultGetterDataSource ds = dataSource as DefaultGetterDataSource;
                return(ODataQuerier.GetDefault(dataSource.Entity, ds.Select));
            }
            else if (dataSource.GetType() == typeof(CountDataSource))
            {
                CountDataSource ds    = dataSource as CountDataSource;
                int             count = ODataQuerier.Count(ds.Entity, ds.Filter, ds.Parameters);
                return(string.Format("{{\"Count\": {0}}}", count));
            }

            throw new NotSupportedException(dataSource.GetType().ToString());
        }
Esempio n. 21
0
        protected LearningPipeline BuildModel(IEnumerable <SalesRecommendationData> salesData)
        {
            ConsoleWriteHeader("Build model pipeline");

            var pipeline = new LearningPipeline();

            pipeline.Add(CollectionDataSource.Create(salesData));

            // One Hot Encoding using Hash Vector. The new columns are named as the original ones, but adding the suffix "_OH"
            pipeline.Add(new CategoricalHashOneHotVectorizer((nameof(SalesRecommendationData.ProductId), nameof(SalesRecommendationData.ProductId) + "_OH"))
            {
                HashBits = 18
            });
        public void BooleanLabelPipeline()
        {
            var data = new BooleanLabelData[1];

            data[0]          = new BooleanLabelData();
            data[0].Features = new float[] { 0.0f, 1.0f };
            data[0].Label    = false;
            var pipeline = new LearningPipeline();

            pipeline.Add(CollectionDataSource.Create(data));
            pipeline.Add(new FastForestBinaryClassifier());
            var model = pipeline.Train <Data, Prediction>();
        }
Esempio n. 23
0
        public PowerballPrediction PredictPowerball()
        {
            var pipeline = new LearningPipeline();
            var allPicks = _ctx.Powerballs;
            var data     = new List <PowerballData>();

            foreach (var powerball in allPicks)
            {
                var ts = powerball.draw_date - DateTime.Now;

                var newPick = new PowerballData()
                {
                    Ball1     = Convert.ToInt32(powerball.ball1),
                    Ball2     = Convert.ToInt32(powerball.ball2),
                    Ball3     = Convert.ToInt32(powerball.ball3),
                    Ball4     = Convert.ToInt32(powerball.ball4),
                    Ball5     = Convert.ToInt32(powerball.ball5),
                    PowerBall = Convert.ToInt32(powerball.powerball),
                    daysAgo   = (float)ts.TotalDays
                };

                data.Add(newPick);
            }

            var collection = CollectionDataSource.Create(data);

            pipeline.Add(collection);
            pipeline.Add(new ColumnCopier(("daysAgo", "Label")));
            pipeline.Add(new CategoricalOneHotVectorizer("id"));
            pipeline.Add(new ColumnConcatenator("Features", "id", "daysAgo"));
            pipeline.Add(new FastTreeRegressor());

            var model = pipeline.Train <PowerballData, PowerballPrediction>();

            var eval = new RegressionEvaluator();
            RegressionMetrics metrics = eval.Evaluate(model, collection);

            Console.WriteLine($"Rms = {metrics.Rms}");
            Console.WriteLine($"RSquared = {metrics.RSquared}");

            var nextPowerball = _ctx.NextPowerball.FirstOrDefault();

            var predictedDays = (nextPowerball.next_jackpot_date.AddDays(1)) - DateTime.Now;

            var prediction = model.Predict(new PowerballData()
            {
                Ball1 = 0, Ball2 = 0, Ball3 = 0, Ball4 = 0, Ball5 = 0, PowerBall = 0, daysAgo = predictedDays.Days
            });

            return(prediction);
        }
Esempio n. 24
0
        public virtual DataSource Create()
        {
            string entity = Datasource.Attribute("entity").Value;
            string select = GetSelect(Datasource);
            string filter = GetFilter(Datasource);

            XAttribute xAttribute = Datasource.Attribute("count");

            if (xAttribute != null)
            {
                return(new CountDataSource()
                {
                    Entity = entity, Filter = filter, Parameters = Parameters
                });
            }

            xAttribute = Datasource.Attribute("default");
            if (xAttribute != null)
            {
                return(new DefaultGetterDataSource()
                {
                    Entity = entity, Select = select
                });
            }

            SomeDataSource someDataSource;
            string         orderby   = GetOrderby(Datasource);
            string         pageIndex = GetPageIndex();
            string         pageSize  = GetPageSize();

            if (pageSize == null && pageIndex == null)
            {
                someDataSource = new CollectionDataSource()
                {
                    Entity = entity, Select = select, Filter = filter, Orderby = orderby
                };
            }
            else
            {
                long lPageIndex = long.Parse(pageIndex ?? "0");
                long lPageSize  = long.Parse(pageSize ?? "0");
                someDataSource = new PagingDataSource()
                {
                    Entity = entity, Select = select, Filter = filter, Orderby = orderby, PageIndex = lPageIndex, PageSize = lPageSize
                };
            }

            someDataSource.Expands    = GetExpands(Datasource);
            someDataSource.Parameters = Parameters;
            return(someDataSource);
        }
Esempio n. 25
0
        /// <summary>
        /// 评估模型
        /// </summary>
        /// <param name="model"></param>
        public static void Evaluate(IEnumerable <JiaMiTu> testData, PredictionModel <JiaMiTu, JiaMiTuPrediction> model)
        {
            //var testData = new TextLoader<JiaMiTu>(TestDataPath, useHeader: true, separator: ",");
            var test                  = CollectionDataSource.Create(testData);
            var evaluator             = new RegressionEvaluator();
            RegressionMetrics metrics = evaluator.Evaluate(model, test);

            // Rms should be around 2.795276
            //RMS是评估回归问题的一个指标。它越低,你的模型就越好。将以下代码添加到该Evaluate()函数中以打印模型的RMS。
            Console.WriteLine("Rms=" + metrics.Rms);
            Console.WriteLine("LossFn=" + metrics.LossFn);
            //Squared是评估回归问题的另一个指标。RSquared将是介于0和1之间的值。越接近1,模型越好。将下面的代码添加到该Evaluate()函数中以打印模型的RSquared值。
            Console.WriteLine("RSquared = " + metrics.RSquared);
        }
        public void RowContainsRowAndItem()
        {
            var row   = new System.Collections.Generic.List <IPrintableObject>();
            var gItem = new BaseDataItem()
            {
                ColumnName = "GroupItem"
            };

            row.Add(gItem);

            var baseRow = new BaseRowItem();

            var ric = new System.Collections.Generic.List <IPrintableObject>()
            {
                new BaseDataItem()
                {
                    ColumnName = "Lastname"
                },
                new BaseDataItem()
                {
                    ColumnName = "Firstname"
                }
            };

            baseRow.Items.AddRange(ric);
            row.Add(baseRow);
            var rs = new ReportSettings();
            var collectionSource = new CollectionDataSource(list, rs);

            collectionSource.Bind();
            int i = 0;

            foreach (var element in collectionSource.SortedList)
            {
                collectionSource.Fill(row, element);
                var res = (BaseDataItem)row.Find(c => ((BaseDataItem)c).ColumnName == "GroupItem");
                Assert.That(res.DBValue, Is.Not.Empty);
                i++;
            }

            /*
             * do {
             *      collectionSource.Fill(row);
             *      var res = (BaseDataItem)row.Find(c => ((BaseDataItem)c).ColumnName == "GroupItem");
             *      Assert.That(res.DBValue,Is.Not.Empty);
             *      i ++;
             * }while (collectionSource.MoveNext());
             */
            Assert.That(i, Is.EqualTo(collectionSource.Count));
        }
Esempio n. 27
0
        public override ModelFitness Evaluate(List <ModelDataSet> data, ModelValue prediction)
        {
            if (TrainedModel == null)
            {
                throw new Exception("Must initialize the model before calling");
            }

            lock (TrainedModel)
            {
#if ML_LEGACY
                var testData  = CollectionDataSource.Create(data);
                var evaluator = new RegressionEvaluator();
                var metrics   = evaluator.Evaluate(TrainedModel, testData);

                return(new ModelFitness()
                {
                    RMS = metrics.Rms,
                    RSquared = metrics.RSquared
                });
#else
                var textLoader = GetTextLoader(Context, prediction);

                var pathToData = "";
                try
                {
                    // ugh have to spill data to disk for it to work!
                    pathToData = WriteToDisk(data, prediction);

                    IDataView dataView    = textLoader.Read(pathToData);
                    var       predictions = TrainedModel.Transform(dataView);
                    var       metrics     = Context.Regression.Evaluate(predictions, label: "Label", score: "Score");

                    return(new ModelFitness()
                    {
                        RMS = metrics.Rms,
                        RSquared = metrics.RSquared
                    });
                }
                finally
                {
                    // cleanup
                    if (!string.IsNullOrWhiteSpace(pathToData) && File.Exists(pathToData))
                    {
                        File.Delete(pathToData);
                    }
                }
#endif
            }
        }
        public void PredictClusters()
        {
            int n        = 1000;
            int k        = 5;
            var rand     = new Random();
            var clusters = new ClusteringData[k];
            var data     = new ClusteringData[n];

            for (int i = 0; i < k; i++)
            {
                //pick clusters as points on circle with angle to axis X equal to 360*i/k
                clusters[i] = new ClusteringData {
                    Points = new float[2] {
                        (float)Math.Cos(Math.PI * i * 2 / k), (float)Math.Sin(Math.PI * i * 2 / k)
                    }
                };
            }
            // create data points by randomly picking cluster and shifting point slightly away from it.
            for (int i = 0; i < n; i++)
            {
                var index = rand.Next(0, k);
                var shift = (rand.NextDouble() - 0.5) / k;
                data[i] = new ClusteringData
                {
                    Points = new float[2]
                    {
                        (float)(clusters[index].Points[0] + shift),
                        (float)(clusters[index].Points[1] + shift)
                    }
                };
            }
            var pipeline = new LearningPipeline();

            pipeline.Add(CollectionDataSource.Create(data));
            pipeline.Add(new KMeansPlusPlusClusterer()
            {
                K = k
            });
            var model = pipeline.Train <ClusteringData, ClusteringPrediction>();
            //validate that initial points we pick up as centers of cluster during data generation belong to different clusters.
            var labels = new HashSet <uint>();

            for (int i = 0; i < k; i++)
            {
                var scores = model.Predict(clusters[i]);
                Assert.True(!labels.Contains(scores.SelectedClusterId));
                labels.Add(scores.SelectedClusterId);
            }
        }
        public void NoTransformPipeline()
        {
            var data = new Data[1];

            data[0] = new Data
            {
                Features = new float[] { 0.0f, 1.0f },
                Label    = 0f
            };
            var pipeline = new Legacy.LearningPipeline();

            pipeline.Add(CollectionDataSource.Create(data));
            pipeline.Add(new FastForestBinaryClassifier());
            var model = pipeline.Train <Data, Prediction>();
        }
        public void CreateExportlist()
        {
            collection = new Collection <ExportText>();
            collection.Add(new ExportText()
            {
                Text = String.Empty
            });

            helper = new AggregateFuctionHelper();
            aggregateCollection = helper.AggregateCollection;
            dataSource          = new CollectionDataSource(aggregateCollection, new ReportSettings());
            dataSource.Bind();
            visitor = new ExpressionVisitor(new ReportSettings());
            visitor.SetCurrentDataSource(dataSource.SortedList);
        }
Esempio n. 31
0
        public override void ViewDidLoad()
        {
            base.ViewDidLoad ();
            _collectionDataSource = new CollectionDataSource(this);
            _cd = new ClickDelegate(this);

            this.View.BackgroundColor = UIColor.FromPatternImage(Images.Background);

            CollectionView = new CollectionViewBinding.PSCollectionView(this.View.Bounds);
            CollectionView.AutoresizingMask = UIViewAutoresizing.FlexibleWidth | UIViewAutoresizing.FlexibleHeight;
            CollectionView.NumColsLandscape = 4;
            CollectionView.NumColsPortrait = 4;
            CollectionView.BackgroundColor = UIColor.Clear;
            CollectionView.PSCollectionViewDataSourceDelegate = _collectionDataSource;
            CollectionView.PSCollectionViewDelegate = _cd;

            if (UIDevice.CurrentDevice.UserInterfaceIdiom == UIUserInterfaceIdiom.Pad)
            {
                CollectionView.NumColsPortrait = CollectionView.NumColsLandscape = 5;
            }

            this.View.AddSubview(CollectionView);
        }