public override void Train(List <DataSet> data, List <float> labels = null) { if (TrainedModel != null) { throw new InvalidOperationException("May only train/load a model once"); } #if ML_LEGACY var pipeline = new LearningPipeline(); // add data pipeline.Add(CollectionDataSource.Create(data)); // choose what to predict pipeline.Add(new ColumnCopier(("Score", "Label"))); // add columns as features // do not include the features which should be predicted pipeline.Add(new ColumnConcatenator("Features", DataSet.ColumnNames())); // add a regression prediction pipeline.Add(new FastTreeRegressor()); // train the model TrainedModel = pipeline.Train <DataSet, DataSetPrediction>(); #else // add data var textLoader = GetTextLoader(Context); // spill to disk !?!?! since there is no way to load from a collection var pathToData = ""; try { // write data to disk pathToData = WriteToDisk(data); // read in data IDataView dataView = textLoader.Load(pathToData); InputSchema = dataView.Schema; // configurations var dataPipeline = Context.Transforms.CopyColumns(outputColumnName: "Label", inputColumnName: nameof(DataSet.Score)) .Append(Context.Transforms.Concatenate("Features", DataSet.ColumnNames())); // set the training algorithm var trainer = Context.Regression.Trainers.Sdca(labelColumnName: "Label", featureColumnName: "Features"); var trainingPipeline = dataPipeline.Append(trainer); TrainedModel = trainingPipeline.Fit(dataView); } finally { // cleanup if (!string.IsNullOrWhiteSpace(pathToData) && File.Exists(pathToData)) { File.Delete(pathToData); } } #endif }
public static void Evaluate(PredictionModel <SentimentData, SentimentPrediction> model) { var testData = new List <SentimentData>() { new SentimentData { Sentiment = 6f, SentimentText = "such good thing" }, new SentimentData { Sentiment = -9.3f, SentimentText = "f*****g article" } }; var collection = CollectionDataSource.Create(testData); var evaluator = new BinaryClassificationEvaluator(); BinaryClassificationMetrics metrics = evaluator.Evaluate(model, collection); Console.WriteLine(); Console.WriteLine("PredictionModel quality metrics evaluation"); Console.WriteLine("------------------------------------------"); Console.WriteLine($"Accuracy: {metrics.Accuracy:P2}"); Console.WriteLine($"Auc: {metrics.Auc:P2}"); Console.WriteLine($"F1Score: {metrics.F1Score:P2}"); }
public void GroupbyOneColumnSortSubList() { var rs = new ReportSettings(); var gc = new GroupColumn() { ColumnName = "GroupItem", SortDirection = ListSortDirection.Ascending, GroupSortColumn = new SortColumn() { ColumnName = "Randomint", SortDirection = ListSortDirection.Ascending } }; rs.GroupColumnsCollection.Add(gc); var collectionSource = new CollectionDataSource(list, rs); collectionSource.Bind(); var testKey = String.Empty; var testSubKey = -1; var groupedList = collectionSource.GroupedList; foreach (var element in groupedList) { Assert.That(element.Key, Is.GreaterThan(testKey)); testKey = element.Key.ToString(); foreach (Contributor sub in element) { Assert.That(sub.RandomInt, Is.GreaterThanOrEqualTo(testSubKey)); testSubKey = sub.RandomInt; } testSubKey = -1; } }
public async Task TrainModelAsync(string csvPath, string modelPath) { var pipeline = new LearningPipeline(); pipeline.Add(CollectionDataSource.Create(new CsvReader().GetData(csvPath))); pipeline.Add(new Dictionarizer(("Categories", "Label"))); pipeline.Add(new TextFeaturizer("Name", "Name")); pipeline.Add(new TextFeaturizer("GenericName", "GenericName")); pipeline.Add(new ColumnConcatenator("Features", "Name", "GenericName")); pipeline.Add(new StochasticDualCoordinateAscentClassifier() { NumThreads = Math.Max(2, Environment.ProcessorCount - 1) }); pipeline.Add(new PredictedLabelColumnOriginalValueConverter() { PredictedLabelColumn = "PredictedLabel" }); Console.WriteLine("=============== Training model ==============="); var model = pipeline.Train <Product, ProductCategoryPrediction>(); await model.WriteAsync(modelPath).ConfigureAwait(false); Console.WriteLine("=============== End training ==============="); Console.WriteLine("The model is saved to {0}", modelPath); }
public async Task <PredictionModel <UsageOfLightBulbModel, UsageOfLightBulbPredictionModel> > Train() { var dataFromDb = await _lightBulbRepository.GetAllLightBulbs(); var dataToTrain = new List <UsageOfLightBulbModel>(dataFromDb); var collection = CollectionDataSource.Create(dataToTrain); var pipeline = new LearningPipeline { collection, new ColumnCopier(("IsOn", "Label")), new ColumnConcatenator( "Features", "LightBulbID", "Month", "Day", "Time"), new FastTreeRegressor(), }; model = pipeline.Train <UsageOfLightBulbModel, UsageOfLightBulbPredictionModel>(); // await model.WriteAsync(_modelpath); return(model); }
public void DataItemWithNoColumnNameHasErrorMessageInDbValue() { var baseRow = new BaseRowItem(); var dataItem = new BaseDataItem() { }; baseRow.Items.Add(dataItem); var row = new System.Collections.Generic.List <IPrintableObject>(); row.Add(baseRow); var reportSettings = new ReportSettings(); var collectionSource = new CollectionDataSource(list, reportSettings); collectionSource.Bind(); foreach (var element in collectionSource.SortedList) { collectionSource.Fill(row, element); var r = (BaseRowItem)row[0]; foreach (var result in r.Items) { Assert.That(((BaseDataItem)result).DBValue.StartsWith("Missing"), Is.EqualTo(true)); } } }
public void FillDataIncludedInRow() { var baseRow = new BaseRowItem(); var dataItemsCollection = CreateDataItems(); baseRow.Items.AddRange(dataItemsCollection); var row = new System.Collections.Generic.List <IPrintableObject>(); row.Add(baseRow); var reportSettings = new ReportSettings(); var collectionSource = new CollectionDataSource(list, reportSettings); collectionSource.Bind(); int i = 0; foreach (var element in collectionSource.SortedList) { collectionSource.Fill(row, element); var r = (BaseRowItem)row[0]; foreach (var result in r.Items) { Assert.That(((BaseDataItem)result).DBValue, Is.Not.Empty); } i++; } Assert.That(i, Is.EqualTo(collectionSource.Count)); }
public ExpressionRunner(Collection <ExportPage> pages, ReportSettings reportSettings, CollectionDataSource dataSource) { this.pages = pages; this.dataSource = dataSource; this.reportSettings = reportSettings; Visitor = new ExpressionVisitor(reportSettings); }
public static string CreateNNetworkAndLearn(List <NnRow> rows) { // Prepare data double trainingSplitRatio = 0.7; int trainCount = (int)(rows.Count * trainingSplitRatio); var trainData = new MLNetData[trainCount]; var testData = new MLNetData[rows.Count - trainCount]; MLNetData[] allData = Convert(rows); // Split into Training and Testing sets Array.Copy(allData, 0, trainData, 0, trainCount); Array.Copy(allData, trainCount, testData, 0, rows.Count - trainCount); var allCollection = CollectionDataSource.Create(allData); var trainCollection = CollectionDataSource.Create(trainData); var testCollection = CollectionDataSource.Create(testData); double acc, auc, f1; PredictionModel <MLNetData, MLNetPredict> modelAll, modelTrain, modelBest; //(acc, auc, f1, modelAll) = TrainAndGetMetrics(allCollection, allCollection, new AveragedPerceptronBinaryClassifier ()); // acc 0.83, auc 0.86, f1 0.45 //(acc, auc, f1, modelAll) = TrainAndGetMetrics(allCollection, allCollection, new FastForestBinaryClassifier ()); // acc 0.85, auc 0.89, f1 0.46 (acc, auc, f1, modelBest) = TrainAndGetMetrics(trainCollection, testCollection, new FastTreeBinaryClassifier()); // acc 0.95, auc 0.97, f1 0.85 //(acc, auc, f1, modelAll) = TrainAndGetMetrics(allCollection, allCollection, new FieldAwareFactorizationMachineBinaryClassifier()); // acc 0.85, auc 0.88, f1 0.56 //(acc, auc, f1, modelAll) = TrainAndGetMetrics(allCollection, allCollection, new GeneralizedAdditiveModelBinaryClassifier ()); // acc 0.81, auc 0.80, f1 NaN //(acc, auc, f1, modelAll) = TrainAndGetMetrics(allCollection, allCollection, new LinearSvmBinaryClassifier ()); // acc 0.82, auc 0.86, f1 0.16 //(acc, auc, f1, modelAll) = TrainAndGetMetrics(allCollection, allCollection, new LogisticRegressionBinaryClassifier ()); // acc 0.84, auc 0.86, f1 0.40 //(acc, auc, f1, modelAll) = TrainAndGetMetrics(allCollection, allCollection, new StochasticDualCoordinateAscentBinaryClassifier()); // acc 0.84, auc 0.86, f1 0.40 //(acc, auc, f1, modelAll) = TrainAndGetMetrics(allCollection, allCollection, new StochasticGradientDescentBinaryClassifier ()); // acc 0.83, auc 0.86, f1 0.29 ///(acc, auc, f1, modelAll) = TrainAndGetMetrics(allCollection, allCollection, new EnsembleBinaryClassifier ()); //(acc, auc, f1, modelTrain) = TrainAndGetMetrics(trainCollection, testCollection, new AveragedPerceptronBinaryClassifier ()); // acc 0.82, auc 0.84, f1 0.45 //(acc, auc, f1, modelTrain) = TrainAndGetMetrics(trainCollection, testCollection, new FastForestBinaryClassifier ()); // acc 0.82, auc 0.83, f1 0.23 //(acc, auc, f1, modelTrain) = TrainAndGetMetrics(trainCollection, testCollection, new FastTreeBinaryClassifier ()); // acc 0.82, auc 0.84, f1 0.46 //(acc, auc, f1, modelTrain) = TrainAndGetMetrics(trainCollection, testCollection, new FieldAwareFactorizationMachineBinaryClassifier()); // acc 0.83, auc 0.85, f1 0.37 //(acc, auc, f1, modelTrain) = TrainAndGetMetrics(trainCollection, testCollection, new GeneralizedAdditiveModelBinaryClassifier ()); // acc 0.81, auc 0.75, f1 NaN //(acc, auc, f1, modelTrain) = TrainAndGetMetrics(trainCollection, testCollection, new LinearSvmBinaryClassifier ()); // acc 0.81, auc 0.83, f1 0.14 //(acc, auc, f1, modelTrain) = TrainAndGetMetrics(trainCollection, testCollection, new LogisticRegressionBinaryClassifier ()); // acc 0.83, auc 0.84, f1 0.39 //(acc, auc, f1, modelTrain) = TrainAndGetMetrics(trainCollection, testCollection, new StochasticDualCoordinateAscentBinaryClassifier()); // acc 0.82, auc 0.84, f1 0.43 //(acc, auc, f1, modelTrain) = TrainAndGetMetrics(trainCollection, testCollection, new StochasticGradientDescentBinaryClassifier ()); // acc 0.83, auc 0.83, f1 0.34 // Evaluate a training model //Console.WriteLine($"Accuracy: {metrics.Accuracy:P2}"); //Console.WriteLine($"Auc: {metrics.Auc:P2}"); //Console.WriteLine($"F1Score: {metrics.F1Score:P2}"); //var cv = new CrossValidator(); //CrossValidationOutput<MLNetData, MLNetPredict> cvRes = cv.CrossValidate<MLNetData, MLNetPredict>(pipelineAll); //Console.WriteLine($"Rms = {metrics.Rms}"); //Console.WriteLine($"RSquared = {metrics.RSquared}"); // Train the overall model string NnModelPath = @"NnInputs\mlDotNet_Datacup.model"; modelBest.WriteAsync(NnModelPath); return(NnModelPath); }
internal static async Task <PredictionModel <IrisData, ClusterPrediction> > TrainAsync() { // LearningPipeline holds all steps of the learning process: data, transforms, learners. var pipeline = new LearningPipeline { // The TextLoader loads a dataset. The schema of the dataset is specified by passing a class containing // all the column names and their types. CollectionDataSource.Create <IrisData>(GetIrisDataSet()), //new TextLoader(DataPath).CreateFrom<IrisData>(useHeader: true), // ColumnConcatenator concatenates all columns into Features column new ColumnConcatenator("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth"), // KMeansPlusPlusClusterer is an algorithm that will be used to build clusters. We set the number of clusters to 3. new KMeansPlusPlusClusterer() { K = 3 } }; Console.WriteLine("=============== Training model ==============="); var model = pipeline.Train <IrisData, ClusterPrediction>(); Console.WriteLine("=============== End training ==============="); // Saving the model as a .zip file. await model.WriteAsync(ModelPath); Console.WriteLine("The model is saved to {0}", ModelPath); return(model); }
static async Task <PredictionModel <Open311Data, Open311DataPrediction> > TrainOpen311(string dataPath) { var pipeline = new LearningPipeline(); var dataSource = CollectionDataSource.Create(OpenFile(dataPath, 3, 0, 1, 2)); pipeline.Add(dataSource); pipeline.Add(new Dictionarizer(@"Label")); pipeline.Add(new TextFeaturizer(@"Features", @"Request") { KeepDiacritics = false, KeepPunctuations = false, TextCase = TextNormalizerTransformCaseNormalizationMode.Lower, OutputTokens = true, Language = TextTransformLanguage.German, StopWordsRemover = new PredefinedStopWordsRemover(), VectorNormalizer = TextTransformTextNormKind.L2, CharFeatureExtractor = new NGramNgramExtractor() { NgramLength = 3, AllLengths = false }, WordFeatureExtractor = new NGramNgramExtractor() { NgramLength = 3, AllLengths = true } }); pipeline.Add(new StochasticDualCoordinateAscentClassifier()); pipeline.Add(new PredictedLabelColumnOriginalValueConverter { PredictedLabelColumn = @"PredictedLabel" }); var model = pipeline.Train <Open311Data, Open311DataPrediction>(); await model.WriteAsync(_modelPath); return(model); }
public void PredictClusters() { int n = 1000; int k = 4; var rand = new Random(1); var clusters = new ClusteringData[k]; var data = new ClusteringData[n]; for (int i = 0; i < k; i++) { //pick clusters as points on circle with angle to axis X equal to 360*i/k clusters[i] = new ClusteringData { Points = new float[2] { (float)Math.Cos(Math.PI * i * 2 / k), (float)Math.Sin(Math.PI * i * 2 / k) } }; } // create data points by randomly picking cluster and shifting point slightly away from it. for (int i = 0; i < n; i++) { var index = rand.Next(0, k); var shift = (rand.NextDouble() - 0.5) / 10; data[i] = new ClusteringData { Points = new float[2] { (float)(clusters[index].Points[0] + shift), (float)(clusters[index].Points[1] + shift) } }; } var pipeline = new LearningPipeline(seed: 1, conc: 1); pipeline.Add(CollectionDataSource.Create(data)); pipeline.Add(new KMeansPlusPlusClusterer() { K = k }); var model = pipeline.Train <ClusteringData, ClusteringPrediction>(); //validate that initial points we pick up as centers of cluster during data generation belong to different clusters. var labels = new HashSet <uint>(); for (int i = 0; i < k; i++) { var scores = model.Predict(clusters[i]); Assert.True(!labels.Contains(scores.SelectedClusterId)); labels.Add(scores.SelectedClusterId); } var evaluator = new ClusterEvaluator(); var testData = CollectionDataSource.Create(clusters); ClusterMetrics metrics = evaluator.Evaluate(model, testData); //Label is not specified, so NMI would be equal to NaN Assert.Equal(metrics.Nmi, double.NaN); //Calculate dbi is false by default so Dbi would be 0 Assert.Equal(metrics.Dbi, (double)0.0); Assert.Equal(metrics.AvgMinScore, (double)0.0, 5); }
void CreateDataSource() { DataSource = new CollectionDataSource(List, ReportModel.ReportSettings); if (DataSourceContainsData()) { DataSource.Bind(); } }
/// <summary> /// 训练并生成模型 /// </summary> /// <returns></returns> public static async Task <PredictionModel <JiaMiTu, JiaMiTuPrediction> > Train(IEnumerable <JiaMiTu> trainData, string modelFileName, string labelColumn, string[] oneHotColumns, string[] features, string[] drops) { //创建学习管道 var pipeline = new LearningPipeline(); //加载和转换您的数据 //var textLoader = new TextLoader<JiaMiTu>(DataPath, useHeader: true, separator: ","); //pipeline.Add(textLoader); pipeline.Add(CollectionDataSource.Create(trainData)); //使用该ColumnCopier()功能将“票价_帐户”列复制到名为“标签”的新列中。此列是标签。 pipeline.Add(new ColumnCopier((labelColumn, "Label"))); //一个对象叫ColumnDropper,可以用来在训练开始前舍弃掉不需要的字段,比如id,对结果没有任何影响,因此可以去掉 if (drops.Count() > 0) { pipeline.Add(new ColumnDropper() { Column = drops }); } //进行一些特征工程来转换数据,以便它可以有效地用于机器学习。该训练模型需要算法的数字功能, //您变换中的分类数据(vendor_id,rate_code,和payment_type)为数字。 //该CategoricalOneHotVectorizer() //函数为每个列中的值分配一个数字键。通过添加以下代码来转换您的数据: if (oneHotColumns.Count() > 0) { pipeline.Add(new CategoricalOneHotVectorizer(oneHotColumns)); } //数据准备的最后一步是使用该功能将所有功能组合到一个向量中ColumnConcatenator()。这一必要步骤 //有助于算法轻松处理您的功能。按照您在最后一步中编写的内容添加以下代码: //请注意,“trip_time_in_secs”列不包括在内。你已经确定它不是一个有用的预测功能。 pipeline.Add(new ColumnConcatenator("Features", features )); //在将数据添加到流水线并将其转换为正确的输入格式之后,您可以选择一种学习算法(学习者)。学习算 //法训练模型。你为这个问题选择了一个回归任务,所以你增加了一个学习者调用FastTreeRegressor()到 //使用梯度提升的管道。 //渐变增强是回归问题的机器学习技术。它以逐步的方式构建每个回归树。它使用预定义的损失函数来测 //量每个步骤中的错误,并在下一步中对其进行修正。结果是预测模型实际上是较弱预测模型的集合。 pipeline.Add(new FastTreeRegressor()); //泊松回归 //pipeline.Add(new PoissonRegressor()); //训练模型 //最后一步是训练模型。在此之前,管道中没有任何东西被执行。该pipeline.Train<T_Input, T_Output>() //函数接受预定义的JiaMiTu类类型并输出一个JiaMiTuPrediction类型。将这最后一段代码添加到Train() //函数中: PredictionModel <JiaMiTu, JiaMiTuPrediction> model = pipeline.Train <JiaMiTu, JiaMiTuPrediction>(); //改性Train()方法为异步方法public static async Task<PredictionModel<JiaMiTu, JiaMiTuPrediction>> Train() ///通过生么预测什么 if (!string.IsNullOrEmpty(modelFileName)) { await model.WriteAsync(modelFileName); } return(model); }
public static async Task <PredictionModel <SentimentData, SentimentPrediction> > Train(IMongoDatabase db) { // LearningPipeline allows you to add steps in order to keep everything together // during the learning process. // <Snippet5> var pipeline = new LearningPipeline(); // </Snippet5> // <Snippet6> var collection = db.GetCollection <SentimentData>("review_train"); var documents = collection.Find <SentimentData>(new BsonDocument()).ToEnumerable(); pipeline.Add(CollectionDataSource.Create(documents)); // </Snippet6> // TextFeaturizer is a transform that is used to featurize an input column. // This is used to format and clean the data. // <Snippet7> pipeline.Add(new TextFeaturizer("Features", "text") { KeepDiacritics = false, KeepPunctuations = false, TextCase = TextNormalizerTransformCaseNormalizationMode.Lower, }); //</Snippet7> // Adds a FastTreeBinaryClassifier, the decision tree learner for this project, and // three hyperparameters to be used for tuning decision tree performance. // <Snippet8> pipeline.Add(new FastTreeBinaryClassifier() { NumLeaves = 100, NumTrees = 50, MinDocumentsInLeafs = 2, LearningRates = 0.4f, }); // </Snippet8> // Train the pipeline based on the dataset that has been loaded, transformed. // <Snippet9> PredictionModel <SentimentData, SentimentPrediction> model = pipeline.Train <SentimentData, SentimentPrediction>(); // </Snippet9> // Saves the model we trained to a zip file. // <Snippet10> await model.WriteAsync(_modelpath); // </Snippet10> // Returns the model we trained to use for evaluation. // <Snippet11> return(model); // </Snippet11> }
static void Main(string[] args) { var trainData = GeneratePData(2000); Print(trainData.Take(20)); var testData = GeneratePData(50, test: true); // ML görevi için obje oluşturur var learningPipe = new LearningPipeline(); var trainCollection = CollectionDataSource.Create(trainData); learningPipe.Add(trainCollection); // Verilerin kolon isimleri olan labelları numeric indexe çevirir. learningPipe.Add(new Dictionarizer("Label")); learningPipe.Add( new ColumnConcatenator("Features", "UnitA", "UnitS", "Volume")); // Algoritma sınıflandırması learningPipe.Add(new StochasticDualCoordinateAscentClassifier()); // Tahmin edilen kolon değerini çevir learningPipe.Add(new PredictedLabelColumnOriginalValueConverter() { PredictedLabelColumn = "PredictedLabel" }); // Modeli eğitme var model = learningPipe.Train <ProcessData, ProcessPrediction>(); // Model değerlendirilmesi ve kesin kontrol var evaluator = new ClassificationEvaluator(); var metrics = evaluator.Evaluate(model, trainCollection); Console.WriteLine("AccuracyMicro: " + metrics.AccuracyMicro); Console.WriteLine("LogLoss: " + metrics.LogLoss); // Test datayı tahmin et var predicted = model.Predict(testData); // Testdata ve tahmin edilen labelı string içinde topla var results = testData.Zip(predicted, (t, p) => new ProcessData { UnitA = t.UnitA, UnitS = t.UnitS, Volume = t.Volume, Label = p.PredictedLabels }).ToList(); // Sonucu yazdır Print(results); Console.ReadLine(); }
public void EvaluateModel(IEnumerable <PivotData> testData, PredictionModel <PivotData, ClusteringPrediction> model) { ConsoleWriteHeader("Metrics for Customer Segmentation"); var testDataSource = CollectionDataSource.Create(testData); var evaluator = new ClusterEvaluator(); ClusterMetrics metrics = evaluator.Evaluate(model, testDataSource); Console.WriteLine($"Average mean score: {metrics.AvgMinScore:0.##}"); //Console.WriteLine($"* Davies-Bouldin Index: {metrics.Dbi:#.##}"); //Console.WriteLine($"* Normalized mutual information: {metrics.Nmi:#.##}"); }
public static PredictionModel <PokerHandData, PokerHandPrediction> Train(IEnumerable <PokerHandData> data) { var pipeline = new LearningPipeline(); var collection = CollectionDataSource.Create(data); pipeline.Add(collection); pipeline.Add(new ColumnConcatenator("Features", "IsSameSuit", "IsStraight", "FourOfKind", "ThreeOfKind", "PairsCount")); pipeline.Add(new LogisticRegressionClassifier()); var model = pipeline.Train <PokerHandData, PokerHandPrediction>(); return(model); }
public static void Evaluate(PredictionModel <PokerHandData, PokerHandPrediction> model, IEnumerable <PokerHandData> data) { var evaluator = new ClassificationEvaluator(); var collection = CollectionDataSource.Create(data); var metrics = evaluator.Evaluate(model, collection); Console.WriteLine(); Console.WriteLine("PredictionModel quality metrics evaluation"); Console.WriteLine("------------------------------------------"); Console.WriteLine($"LogLossReduction: {metrics.LogLossReduction }"); Console.WriteLine($"LogLoss: {metrics.LogLoss }"); }
public string Get() { DataSource dataSource = new DataSourceCreator(Name, KeyValues).Create(); if (dataSource.GetType() == typeof(PagingDataSource)) { PagingDataSource ds = dataSource as PagingDataSource; IEnumerable <string> jsonCollection; if (ds.Expands == null || ds.Expands.Length == 0) { jsonCollection = ODataQuerier.GetPagingCollection(ds.Entity, ds.Select, ds.Filter, ds.Orderby, ds.Skip, ds.Top, ds.Parameters); } else { jsonCollection = ODataQuerier.GetPagingCollection(ds.Entity, ds.Select, ds.Filter, ds.Orderby, ds.Skip, ds.Top, ds.Expands, ds.Parameters); } string json = string.Format("[{0}]", string.Join(",", jsonCollection)); int count = ODataQuerier.Count(ds.Entity, ds.Filter, ds.Parameters); json = string.Format("{{\"@count\":{0},\"value\":{1}}}", count, json); return(json); } else if (dataSource.GetType() == typeof(CollectionDataSource)) { CollectionDataSource ds = dataSource as CollectionDataSource; IEnumerable <string> jsonCollection; if (ds.Expands == null || ds.Expands.Length == 0) { jsonCollection = ODataQuerier.GetCollection(ds.Entity, ds.Select, ds.Filter, ds.Orderby, ds.Parameters); } else { jsonCollection = ODataQuerier.GetCollection(ds.Entity, ds.Select, ds.Filter, ds.Orderby, ds.Expands, ds.Parameters); } return(string.Format("[{0}]", string.Join(",", jsonCollection))); } else if (dataSource.GetType() == typeof(DefaultGetterDataSource)) { DefaultGetterDataSource ds = dataSource as DefaultGetterDataSource; return(ODataQuerier.GetDefault(dataSource.Entity, ds.Select)); } else if (dataSource.GetType() == typeof(CountDataSource)) { CountDataSource ds = dataSource as CountDataSource; int count = ODataQuerier.Count(ds.Entity, ds.Filter, ds.Parameters); return(string.Format("{{\"Count\": {0}}}", count)); } throw new NotSupportedException(dataSource.GetType().ToString()); }
protected LearningPipeline BuildModel(IEnumerable <SalesRecommendationData> salesData) { ConsoleWriteHeader("Build model pipeline"); var pipeline = new LearningPipeline(); pipeline.Add(CollectionDataSource.Create(salesData)); // One Hot Encoding using Hash Vector. The new columns are named as the original ones, but adding the suffix "_OH" pipeline.Add(new CategoricalHashOneHotVectorizer((nameof(SalesRecommendationData.ProductId), nameof(SalesRecommendationData.ProductId) + "_OH")) { HashBits = 18 });
public void BooleanLabelPipeline() { var data = new BooleanLabelData[1]; data[0] = new BooleanLabelData(); data[0].Features = new float[] { 0.0f, 1.0f }; data[0].Label = false; var pipeline = new LearningPipeline(); pipeline.Add(CollectionDataSource.Create(data)); pipeline.Add(new FastForestBinaryClassifier()); var model = pipeline.Train <Data, Prediction>(); }
public PowerballPrediction PredictPowerball() { var pipeline = new LearningPipeline(); var allPicks = _ctx.Powerballs; var data = new List <PowerballData>(); foreach (var powerball in allPicks) { var ts = powerball.draw_date - DateTime.Now; var newPick = new PowerballData() { Ball1 = Convert.ToInt32(powerball.ball1), Ball2 = Convert.ToInt32(powerball.ball2), Ball3 = Convert.ToInt32(powerball.ball3), Ball4 = Convert.ToInt32(powerball.ball4), Ball5 = Convert.ToInt32(powerball.ball5), PowerBall = Convert.ToInt32(powerball.powerball), daysAgo = (float)ts.TotalDays }; data.Add(newPick); } var collection = CollectionDataSource.Create(data); pipeline.Add(collection); pipeline.Add(new ColumnCopier(("daysAgo", "Label"))); pipeline.Add(new CategoricalOneHotVectorizer("id")); pipeline.Add(new ColumnConcatenator("Features", "id", "daysAgo")); pipeline.Add(new FastTreeRegressor()); var model = pipeline.Train <PowerballData, PowerballPrediction>(); var eval = new RegressionEvaluator(); RegressionMetrics metrics = eval.Evaluate(model, collection); Console.WriteLine($"Rms = {metrics.Rms}"); Console.WriteLine($"RSquared = {metrics.RSquared}"); var nextPowerball = _ctx.NextPowerball.FirstOrDefault(); var predictedDays = (nextPowerball.next_jackpot_date.AddDays(1)) - DateTime.Now; var prediction = model.Predict(new PowerballData() { Ball1 = 0, Ball2 = 0, Ball3 = 0, Ball4 = 0, Ball5 = 0, PowerBall = 0, daysAgo = predictedDays.Days }); return(prediction); }
public virtual DataSource Create() { string entity = Datasource.Attribute("entity").Value; string select = GetSelect(Datasource); string filter = GetFilter(Datasource); XAttribute xAttribute = Datasource.Attribute("count"); if (xAttribute != null) { return(new CountDataSource() { Entity = entity, Filter = filter, Parameters = Parameters }); } xAttribute = Datasource.Attribute("default"); if (xAttribute != null) { return(new DefaultGetterDataSource() { Entity = entity, Select = select }); } SomeDataSource someDataSource; string orderby = GetOrderby(Datasource); string pageIndex = GetPageIndex(); string pageSize = GetPageSize(); if (pageSize == null && pageIndex == null) { someDataSource = new CollectionDataSource() { Entity = entity, Select = select, Filter = filter, Orderby = orderby }; } else { long lPageIndex = long.Parse(pageIndex ?? "0"); long lPageSize = long.Parse(pageSize ?? "0"); someDataSource = new PagingDataSource() { Entity = entity, Select = select, Filter = filter, Orderby = orderby, PageIndex = lPageIndex, PageSize = lPageSize }; } someDataSource.Expands = GetExpands(Datasource); someDataSource.Parameters = Parameters; return(someDataSource); }
/// <summary> /// 评估模型 /// </summary> /// <param name="model"></param> public static void Evaluate(IEnumerable <JiaMiTu> testData, PredictionModel <JiaMiTu, JiaMiTuPrediction> model) { //var testData = new TextLoader<JiaMiTu>(TestDataPath, useHeader: true, separator: ","); var test = CollectionDataSource.Create(testData); var evaluator = new RegressionEvaluator(); RegressionMetrics metrics = evaluator.Evaluate(model, test); // Rms should be around 2.795276 //RMS是评估回归问题的一个指标。它越低,你的模型就越好。将以下代码添加到该Evaluate()函数中以打印模型的RMS。 Console.WriteLine("Rms=" + metrics.Rms); Console.WriteLine("LossFn=" + metrics.LossFn); //Squared是评估回归问题的另一个指标。RSquared将是介于0和1之间的值。越接近1,模型越好。将下面的代码添加到该Evaluate()函数中以打印模型的RSquared值。 Console.WriteLine("RSquared = " + metrics.RSquared); }
public void RowContainsRowAndItem() { var row = new System.Collections.Generic.List <IPrintableObject>(); var gItem = new BaseDataItem() { ColumnName = "GroupItem" }; row.Add(gItem); var baseRow = new BaseRowItem(); var ric = new System.Collections.Generic.List <IPrintableObject>() { new BaseDataItem() { ColumnName = "Lastname" }, new BaseDataItem() { ColumnName = "Firstname" } }; baseRow.Items.AddRange(ric); row.Add(baseRow); var rs = new ReportSettings(); var collectionSource = new CollectionDataSource(list, rs); collectionSource.Bind(); int i = 0; foreach (var element in collectionSource.SortedList) { collectionSource.Fill(row, element); var res = (BaseDataItem)row.Find(c => ((BaseDataItem)c).ColumnName == "GroupItem"); Assert.That(res.DBValue, Is.Not.Empty); i++; } /* * do { * collectionSource.Fill(row); * var res = (BaseDataItem)row.Find(c => ((BaseDataItem)c).ColumnName == "GroupItem"); * Assert.That(res.DBValue,Is.Not.Empty); * i ++; * }while (collectionSource.MoveNext()); */ Assert.That(i, Is.EqualTo(collectionSource.Count)); }
public override ModelFitness Evaluate(List <ModelDataSet> data, ModelValue prediction) { if (TrainedModel == null) { throw new Exception("Must initialize the model before calling"); } lock (TrainedModel) { #if ML_LEGACY var testData = CollectionDataSource.Create(data); var evaluator = new RegressionEvaluator(); var metrics = evaluator.Evaluate(TrainedModel, testData); return(new ModelFitness() { RMS = metrics.Rms, RSquared = metrics.RSquared }); #else var textLoader = GetTextLoader(Context, prediction); var pathToData = ""; try { // ugh have to spill data to disk for it to work! pathToData = WriteToDisk(data, prediction); IDataView dataView = textLoader.Read(pathToData); var predictions = TrainedModel.Transform(dataView); var metrics = Context.Regression.Evaluate(predictions, label: "Label", score: "Score"); return(new ModelFitness() { RMS = metrics.Rms, RSquared = metrics.RSquared }); } finally { // cleanup if (!string.IsNullOrWhiteSpace(pathToData) && File.Exists(pathToData)) { File.Delete(pathToData); } } #endif } }
public void PredictClusters() { int n = 1000; int k = 5; var rand = new Random(); var clusters = new ClusteringData[k]; var data = new ClusteringData[n]; for (int i = 0; i < k; i++) { //pick clusters as points on circle with angle to axis X equal to 360*i/k clusters[i] = new ClusteringData { Points = new float[2] { (float)Math.Cos(Math.PI * i * 2 / k), (float)Math.Sin(Math.PI * i * 2 / k) } }; } // create data points by randomly picking cluster and shifting point slightly away from it. for (int i = 0; i < n; i++) { var index = rand.Next(0, k); var shift = (rand.NextDouble() - 0.5) / k; data[i] = new ClusteringData { Points = new float[2] { (float)(clusters[index].Points[0] + shift), (float)(clusters[index].Points[1] + shift) } }; } var pipeline = new LearningPipeline(); pipeline.Add(CollectionDataSource.Create(data)); pipeline.Add(new KMeansPlusPlusClusterer() { K = k }); var model = pipeline.Train <ClusteringData, ClusteringPrediction>(); //validate that initial points we pick up as centers of cluster during data generation belong to different clusters. var labels = new HashSet <uint>(); for (int i = 0; i < k; i++) { var scores = model.Predict(clusters[i]); Assert.True(!labels.Contains(scores.SelectedClusterId)); labels.Add(scores.SelectedClusterId); } }
public void NoTransformPipeline() { var data = new Data[1]; data[0] = new Data { Features = new float[] { 0.0f, 1.0f }, Label = 0f }; var pipeline = new Legacy.LearningPipeline(); pipeline.Add(CollectionDataSource.Create(data)); pipeline.Add(new FastForestBinaryClassifier()); var model = pipeline.Train <Data, Prediction>(); }
public void CreateExportlist() { collection = new Collection <ExportText>(); collection.Add(new ExportText() { Text = String.Empty }); helper = new AggregateFuctionHelper(); aggregateCollection = helper.AggregateCollection; dataSource = new CollectionDataSource(aggregateCollection, new ReportSettings()); dataSource.Bind(); visitor = new ExpressionVisitor(new ReportSettings()); visitor.SetCurrentDataSource(dataSource.SortedList); }
public override void ViewDidLoad() { base.ViewDidLoad (); _collectionDataSource = new CollectionDataSource(this); _cd = new ClickDelegate(this); this.View.BackgroundColor = UIColor.FromPatternImage(Images.Background); CollectionView = new CollectionViewBinding.PSCollectionView(this.View.Bounds); CollectionView.AutoresizingMask = UIViewAutoresizing.FlexibleWidth | UIViewAutoresizing.FlexibleHeight; CollectionView.NumColsLandscape = 4; CollectionView.NumColsPortrait = 4; CollectionView.BackgroundColor = UIColor.Clear; CollectionView.PSCollectionViewDataSourceDelegate = _collectionDataSource; CollectionView.PSCollectionViewDelegate = _cd; if (UIDevice.CurrentDevice.UserInterfaceIdiom == UIUserInterfaceIdiom.Pad) { CollectionView.NumColsPortrait = CollectionView.NumColsLandscape = 5; } this.View.AddSubview(CollectionView); }