public static void DetectAnomalies(MLContext mlContext, IDataView dataView) { // Train const int PValueSize = 30; const int SeasonalitySize = 30; const int TrainingSize = 90; const int ConfidenceInterval = 98; string outputColumnName = nameof(SpikePrediction.Prediction); string inputColumnName = signalName; var trainigPipeLine = mlContext.Transforms.DetectSpikeBySsa( outputColumnName, inputColumnName, confidence: ConfidenceInterval, pvalueHistoryLength: PValueSize, trainingWindowSize: TrainingSize, seasonalityWindowSize: SeasonalitySize); ITransformer trainedModel = trainigPipeLine.Fit(dataView); // Predict var transformedData = trainedModel.Transform(dataView); IEnumerable <SpikePrediction> predictions = mlContext.Data.CreateEnumerable <SpikePrediction>(transformedData, false); var colCDN = dataView.GetColumn <float>(signalName).ToArray(); var colTime = dataView.GetColumn <DateTime>("Timestamp").ToArray(); Display(predictions, colCDN, colTime); }
public static void DetectAnomalies(MLContext mlContext, IDataView dataView) { ITransformer trainedModel = mlContext.Model.Load(ModelPath, out var modelInputSchema); var transformedData = trainedModel.Transform(dataView); // Getting the data of the newly created column as an IEnumerable IEnumerable <SpikePrediction> predictions = mlContext.Data.CreateEnumerable <SpikePrediction>(transformedData, false); var colCDN = dataView.GetColumn <float>("ConsumptionDiffNormalized").ToArray(); var colTime = dataView.GetColumn <DateTime>("time").ToArray(); // Output the input data and predictions Console.WriteLine("======Displaying anomalies in the Power meter data========="); Console.WriteLine("Date \tReadingDiff\tAlert\tScore\tP-Value"); int i = 0; foreach (var p in predictions) { if (p.Prediction[0] == 1) { Console.BackgroundColor = ConsoleColor.DarkYellow; Console.ForegroundColor = ConsoleColor.Black; } Console.WriteLine("{0}\t{1:0.0000}\t{2:0.00}\t{3:0.00}\t{4:0.00}", colTime[i], colCDN[i], p.Prediction[0], p.Prediction[1], p.Prediction[2]); Console.ResetColor(); i++; } }
private (int, string[], bool[]) PullDataFromDataView(IDataView testData, Guid modelId, MLContext mlContext, string featuresColumn, string labelsColumn) { var sizeTestSet = testData.GetColumn <string>(mlContext, featuresColumn).Count(); var testSetFeatures = testData.GetColumn <string>(mlContext, featuresColumn).Take(sizeTestSet).ToArray(); // The features are raw meaning the are still strings and not array's of number var testSetLabels = testData.GetColumn <bool>(mlContext, labelsColumn).Take(sizeTestSet).ToArray(); // The labels are pulled parallel from the features this.loadedModel = mlContext.Model.Load(this.fileSystemRepository.GetModelFileStream(modelId)); return(sizeTestSet, testSetFeatures, testSetLabels); }
public static void BuildTrainEvaluateModel(MLContext ml, IDataView dataView) { // Configure the Estimator const int PValueSize = 30; const int SeasonalitySize = 30; const int TrainingSize = 90; const int ConfidenceInterval = 98; string outputColumnName = nameof(SpikePrediction.Prediction); string inputColumnName = nameof(MeterData.ConsumptionDiffNormalized); var estimator = ml.Transforms.SsaSpikeEstimator( outputColumnName, inputColumnName, confidence: ConfidenceInterval, pvalueHistoryLength: PValueSize, trainingWindowSize: TrainingSize, seasonalityWindowSize: SeasonalitySize); var model = estimator.Fit(dataView); var transformedData = model.Transform(dataView); // Getting the data of the newly created column as an IEnumerable IEnumerable <SpikePrediction> predictionColumn = ml.Data.CreateEnumerable <SpikePrediction>(transformedData, false); var colCDN = dataView.GetColumn <float>(ml, "ConsumptionDiffNormalized").ToArray(); var colTime = dataView.GetColumn <DateTime>(ml, "time").ToArray(); // Output the input data and predictions Console.WriteLine($"{outputColumnName} column obtained post-transformation."); Console.WriteLine("Date \tReadingDiff\tAlert\tScore\tP-Value"); int i = 0; foreach (var p in predictionColumn) { if (p.Prediction[0] == 1) { Console.BackgroundColor = ConsoleColor.DarkYellow; Console.ForegroundColor = ConsoleColor.Black; } Console.WriteLine("{0}\t{1:0.0000}\t{2:0.00}\t{3:0.00}\t{4:0.00}", colTime[i], colCDN[i], p.Prediction[0], p.Prediction[1], p.Prediction[2]); Console.ResetColor(); i++; } }
private static IEnumerable <DayInfo> DetectAnomalies(MLContext context, IDataView dataView) { ITransformer trainedModel = context.Model.Load(ModelPath, out _); var transformedData = trainedModel.Transform(dataView); // Getting the data of the newly created column as an IEnumerable IEnumerable <SpikePrediction> predictions = context.Data.CreateEnumerable <SpikePrediction>(transformedData, false); var columnCount = dataView.GetColumn <float>(nameof(DayInfo.Count)).ToArray(); var columnDate = dataView.GetColumn <DateTime>(nameof(DayInfo.Date)).ToArray(); // Output the input data and predictions Console.WriteLine("======Displaying anomalies in the PressCenters.com data========="); Console.WriteLine("Date \tCount\tAlert\tScore\tP-Value"); var anomalies = new List <DayInfo>(); int i = 0; foreach (var p in predictions) { if (p.Prediction[0] > 0) { anomalies.Add(new DayInfo { Date = columnDate[i], Count = columnCount[i] }); Console.BackgroundColor = ConsoleColor.DarkYellow; Console.ForegroundColor = ConsoleColor.Black; } // if (p.Prediction[0] > 0) { Console.WriteLine( "{0}\t{1:0}\t{2:0.00}\t{3:0.00}\t{4:0.00}", columnDate[i].ToLongDateString().PadRight(25), columnCount[i], p.Prediction[0], p.Prediction[1], p.Prediction[2]); Console.ResetColor(); } i++; } return(anomalies); }
private void Button_Click(object sender, RoutedEventArgs e) { try { string modelLocation = @"D:\model.onnx"; /*MLContext mlContext = new MLContext(); * string modelLocation = "/ADNMenuSample;Component/model.zip"; * * Uri uri = new Uri(modelLocation, UriKind.Relative); * System.Windows.Resources.StreamResourceInfo info = Application.GetResourceStream(uri); * ITransformer trainedModel = mlContext.Model.Load(info.Stream, out DataViewSchema modelSchema);*/ MLContext mlContext = new MLContext(); var data = mlContext.Data.LoadFromEnumerable(new List <ImageData>()); var pipeline = mlContext.Transforms.LoadImages(outputColumnName: "image", imageFolder: "", inputColumnName: nameof(ImageData.ImagePath)) .Append(mlContext.Transforms.ResizeImages(outputColumnName: ModelSettings.ModelInput, imageWidth: ImageResNetSettings.imageWidth, imageHeight: ImageResNetSettings.imageHeight, inputColumnName: "image")) .Append(mlContext.Transforms.ExtractPixels(outputColumnName: ModelSettings.ModelInput)) .Append(mlContext.Transforms.CustomMapping(new NormalizeMapping().GetMapping(), contractName: nameof(NormalizeMapping))) .Append(mlContext.Transforms.ApplyOnnxModel(modelFile: modelLocation, outputColumnNames: new[] { ModelSettings.ModelOutput }, inputColumnNames: new[] { ModelSettings.ModelInput })); var model = pipeline.Fit(data); mlContext.ComponentCatalog.RegisterAssembly(typeof(NormalizeMapping).Assembly); IEnumerable <ImageData> image = ImageData.ReadImageFromPaths(new string[] { @"E:\test.jpg" }); IDataView imageDataView = mlContext.Data.LoadFromEnumerable(image); IDataView scoredData = model.Transform(imageDataView); List <float[]> probabilities = scoredData.GetColumn <float[]>(ModelSettings.ModelOutput).ToList(); } catch (Exception ex) { MessageBox.Show(ex.Message); } }
// Get Data // row# : recipeId, Ingredient (id, name), score (1=present, 0=NOT_present in recipe) public Data[] GetData(IDataView dataView, IDataView features) { int[] ingredients = dataView.GetColumn <int>(dataView.Schema["ingrId"]).ToArray(); int[] recipes = dataView.GetColumn <int>(dataView.Schema["recipeId"]).ToArray(); float[] scores = dataView.GetColumn <float>(dataView.Schema["Label"]).ToArray(); string[] ingrNames = features.GetColumn <string>(features.Schema["ingrName"]).ToArray(); Data[] data = new Data[recipes.Length]; for (int row = 0; row < recipes.Length; row++) { data[row] = new Data(recipes[row], new Ingredient(ingredients[row], ingrNames[ingredients[row]]), (int)scores[row]); } return(data); }
// This method using 'DebuggerExtensions.Preview()' should only be used when debugging/developing, not for release/production trainings public static void PeekVectorColumnDataInConsole(MLContext mlContext, string columnName, IDataView dataView, IEstimator <ITransformer> pipeline, int numberOfRows = 4) { string msg = string.Format("Peek data in DataView: : Show {0} rows with just the '{1}' column", numberOfRows, columnName); ConsoleWriteHeader(msg); ITransformer transformer = pipeline.Fit(dataView); IDataView transformedData = transformer.Transform(dataView); // Extract the 'Features' column. List <float[]> someColumnData = transformedData.GetColumn <float[]>(columnName) .Take(numberOfRows).ToList(); // print to console the peeked rows int currentRow = 0; someColumnData.ForEach(row => { currentRow++; String concatColumn = String.Empty; foreach (float f in row) { concatColumn += f.ToString(); } Console.WriteLine(); string rowMsg = string.Format("**** Row {0} with '{1}' field value ****", currentRow, columnName); Console.WriteLine(rowMsg); Console.WriteLine(concatColumn); Console.WriteLine(); }); }
public string Classify(string imageFilePath) { MLContext mlContext = new MLContext(); var pipeline = mlContext.Transforms.LoadImages(outputColumnName: "image", imageFolder: Path.GetDirectoryName(imageFilePath), inputColumnName: "ImagePath") .Append(mlContext.Transforms.ResizeImages(outputColumnName: "image", imageWidth: ImageWidth, imageHeight: ImageHeight, inputColumnName: "image")) .Append(mlContext.Transforms.ExtractPixels(outputColumnName: "data", inputColumnName: "image", orderOfExtraction: ColorsOrder)) .Append(mlContext.Transforms.ApplyOnnxModel(modelFile: _modelFilePath.Value, outputColumnNames: new[] { ModelOutput }, inputColumnNames: new[] { ModelInput })); IEnumerable <ImageData> images = new[] { new ImageData() { ImagePath = Path.GetFileName(imageFilePath) } }; IDataView imageData = mlContext.Data.LoadFromEnumerable(images); var model = pipeline.Fit(imageData); IDataView scoredData = model.Transform(imageData); var tags = scoredData.GetColumn <string[]>("classLabel"); var firstTag = tags.FirstOrDefault()?.FirstOrDefault(); return(firstTag); }
// predict using the loaded model protected IEnumerable <float[]> PredictDataUsingModel(IDataView data, ITransformer model) { IDataView scoredData = model.Transform(data); IEnumerable <float[]> probabilities = scoredData.GetColumn <float[]>(modelSettings.modelOutput); return(probabilities); }
/// <summary> /// Auto-detect purpose for the data view columns. /// </summary> public static PurposeInference.Column[] InferPurposes(MLContext context, IDataView data, string label, PurposeInference.Column[] columnOverrides = null) { var labelColumn = data.GetColumn(label); // select columns to include in inferencing var columnIndices = CalcIncludedIndices(data.Schema.Count, labelColumn.Index, columnOverrides); // do purpose inferencing var intermediateCols = InferPurposes(context, data, columnIndices); // result to return to caller var result = new PurposeInference.Column[data.Schema.Count]; // add label column to result result[labelColumn.Index] = (new IntermediateColumn(data, labelColumn.Index, ColumnPurpose.Label)).GetColumn(); // add inferred columns to result foreach (var intermediateCol in intermediateCols) { result[intermediateCol.ColumnIndex] = intermediateCol.GetColumn(); } // add overrides to result if (columnOverrides != null) { foreach (var columnOverride in columnOverrides) { result[columnOverride.ColumnIndex] = columnOverride; } } return(result); }
private static void InitDataSet(MLContext mlContext) { IDataView data = mlContext.Data.LoadFromTextFile <Appointment>(TrainDataPath); var cnt = data.GetColumn <float>(nameof(Appointment.IceCream)).Count(); Console.WriteLine(cnt); }
private static void PrintDataColumn(IDataView encodedData, string columnName) { var countSelectColumn = encodedData.GetColumn <float[]>(encodedData.Schema[columnName]); ConsoleHelper.ConsoleWriteHeader("==========One Hot Encoding Results========="); int j = 0; foreach (var row in countSelectColumn) { for (int i = 0; i < row.Length; i++) { Console.Write($"{row[i]}\t"); } Console.WriteLine(); j++; if (j > 10) { break; } } }
public async Task <List <YearWithValue> > PredictCo2OverYearsAsync(Model modelContainer, int futureYear, int coa_id, List <YearWithValue> emissions, CNTK cNTK) { //Get Population till future year List <YearWithValue> population = await dB.GetPopulationByCoaIdAsync(coa_id); //get population that is known if (CompareBiggestValueToFutureYear(population, futureYear)) //check if known population is enough to predict emission { population = await cNTK.PredictPopulationAsync(coa_id, futureYear, population); //get population to predict emission } EmissionModel[] populationData = new EmissionModel[population.Count]; for (int i = 0; i < populationData.Count(); i++) { populationData[i] = new EmissionModel() { Year = population[i].Year, Population = population[i].Value.value }; } PredictionEngine <EmissionModel, EmissionPrediction> predictionEngine = modelContainer.mLContext.Model.CreatePredictionEngine <EmissionModel, EmissionPrediction>(modelContainer.trainedModel); IDataView inputData = modelContainer.mLContext.Data.LoadFromEnumerable(populationData); IDataView predictions = modelContainer.trainedModel.Transform(inputData); float[] scoreColumn = predictions.GetColumn <float>("Score").ToArray(); for (int i = emissions.Count; i < scoreColumn.Length; i++) { emissions.Add(new YearWithValue(population[i].Year, new Wert(scoreColumn[i], true))); } return(emissions); }
public float[] Evaluate(ITransformer transformer, DateTime dateTime) { if (_evaluation.ContainsKey(dateTime)) { float[] score = new[] { _evaluation[dateTime] }; return(score); } Dictionary <string, float> feature = new Dictionary <string, float>(); List <Dictionary <string, float> > features = new List <Dictionary <string, float> >(); var featuresDictionnary = GetFeatures(dateTime); if (featuresDictionnary == null) { return(null); } foreach (var item in featuresDictionnary) { feature.Add(item.Key, item.Value); } features.Add(feature); IDataView data = new FloatsDataView(features); MLContext mlContext = new MLContext(); IDataView result = transformer.Transform(data); float[] scoreColumn = result.GetColumn <float>("Score").ToArray(); return(scoreColumn); }
private IEnumerable <float[]> PredictDataUsingModel(IDataView testData, ITransformer model) { IDataView scoredData = model.Transform(testData); IEnumerable <float[]> probabilities = scoredData.GetColumn <float[]>(TinyYoloModelSettings.ModelOutput); return(probabilities); }
// Get Features (ingredient names) public string[] GetFeatures() { MLContext ml = new MLContext(); IDataView features_view = GetDataView(ModelChoice.NB, ml, DataPurpose.FEATURES); string[] features = features_view.GetColumn <string>(features_view.Schema["ingrName"]).ToArray(); return(features); }
private static ITransformer TrainModel(MLContext mlContext) { // STEP 1: Common data loading configuration IDataView baseTrainingDataView = mlContext.Data.LoadFromTextFile <TaxiTrip>(TrainDataPath, hasHeader: true, separatorChar: ','); IDataView testDataView = mlContext.Data.LoadFromTextFile <TaxiTrip>(TestDataPath, hasHeader: true, separatorChar: ','); //Sample code of removing extreme data like "outliers" for FareAmounts higher than $150 and lower than $1 which can be error-data var cnt = baseTrainingDataView.GetColumn <float>(nameof(TaxiTrip.FareAmount)).Count(); IDataView trainingDataView = mlContext.Data.FilterRowsByColumn(baseTrainingDataView, nameof(TaxiTrip.FareAmount), lowerBound: 1, upperBound: 150); var cnt2 = trainingDataView.GetColumn <float>(nameof(TaxiTrip.FareAmount)).Count(); // STEP 2: Common data process configuration with pipeline data transformations var dataProcessPipeline = mlContext.Transforms.CopyColumns(outputColumnName: "Label", inputColumnName: nameof(TaxiTrip.FareAmount)) .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "VendorIdEncoded", inputColumnName: nameof(TaxiTrip.VendorId))) .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "RateCodeEncoded", inputColumnName: nameof(TaxiTrip.RateCode))) .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "PaymentTypeEncoded", inputColumnName: nameof(TaxiTrip.PaymentType))) .Append(mlContext.Transforms.NormalizeMeanVariance(outputColumnName: nameof(TaxiTrip.PassengerCount))) .Append(mlContext.Transforms.NormalizeMeanVariance(outputColumnName: nameof(TaxiTrip.TripTime))) .Append(mlContext.Transforms.NormalizeMeanVariance(outputColumnName: nameof(TaxiTrip.TripDistance))) .Append(mlContext.Transforms.Concatenate("Features", "VendorIdEncoded", "RateCodeEncoded", "PaymentTypeEncoded", nameof(TaxiTrip.PassengerCount) , nameof(TaxiTrip.TripTime), nameof(TaxiTrip.TripDistance))); // (OPTIONAL) Peek data (such as 5 records) in training DataView after applying the ProcessPipeline's transformations into "Features" ConsoleHelper.PeekDataViewInConsole(mlContext, trainingDataView, dataProcessPipeline, 5); ConsoleHelper.PeekVectorColumnDataInConsole(mlContext, "Features", trainingDataView, dataProcessPipeline, 5); // STEP 3: Set the training algorithm, then create and config the modelBuilder - Selected Trainer (SDCA Regression algorithm) var trainer = mlContext.Regression.Trainers.Sdca(labelColumnName: "Label", featureColumnName: "Features"); var trainingPipeline = dataProcessPipeline.Append(trainer); // STEP 4: Train the model fitting to the DataSet //The pipeline is trained on the dataset that has been loaded and transformed. Console.WriteLine("=============== Training the model ==============="); var trainedModel = trainingPipeline.Fit(trainingDataView); ConsoleHelper.ConsoleWriteHeader("=============== End of training process ==============="); // Append feature contribution calculator in the pipeline. This will be used // at prediction time for explainability. var fccPipeline = trainedModel.Append(mlContext.Transforms .CalculateFeatureContribution(trainedModel.LastTransformer) .Fit(dataProcessPipeline.Fit(trainingDataView).Transform(trainingDataView))); // STEP 5: Evaluate the model and show accuracy stats Console.WriteLine("===== Evaluating Model's accuracy with Test data ====="); IDataView predictions = fccPipeline.Transform(testDataView); var metrics = mlContext.Regression.Evaluate(predictions, labelColumnName: "Label", scoreColumnName: "Score"); Common.ConsoleHelper.PrintRegressionMetrics(trainer.ToString(), metrics); // STEP 6: Save/persist the trained model to a .ZIP file mlContext.Model.Save(fccPipeline, trainingDataView.Schema, ModelPath); Console.WriteLine("The model is saved to {0}", ModelPath); return(fccPipeline); }
private IEnumerable <float[]> PredictDataUsingModel(IDataView testData, ITransformer model) { //Console.WriteLine("=====Identify the objects in the images====="); //Console.WriteLine(""); IDataView scoredData = model.Transform(testData); IEnumerable <float[]> probabilities = scoredData.GetColumn <float[]>(modelSettings.modelOutput); return(probabilities); }
/// <summary> /// Sample code of removing extreme data like "outliers" for FareAmounts higher than $150 and lower than $1 which can be error-data. /// </summary> /// <param name="mlContext"></param> /// <param name="baseTrainingDataView"></param> /// <returns></returns> private static IDataView GetTrainingDataView(MLContext mlContext, IDataView baseTrainingDataView) { var cnt = baseTrainingDataView.GetColumn <float>(nameof(AirTravel.AirFare)).Count(); IDataView trainingDataView = mlContext.Data.FilterRowsByColumn(baseTrainingDataView, nameof(AirTravel.AirFare), lowerBound: 30, upperBound: 1400); var cnt2 = trainingDataView.GetColumn <float>(nameof(AirTravel.AirFare)).Count(); return(trainingDataView); }
private IEnumerable <float[]> PredictDataUsingModel(IDataView testData, ITransformer model) { Console.WriteLine($"Images location: {imagesFolder}"); Console.WriteLine(""); Console.WriteLine("=====Identify the objects in the images====="); Console.WriteLine(""); IDataView scoredData = model.Transform(testData); IEnumerable <float[]> probabilities = scoredData.GetColumn <float[]>(TinyYoloModelSettings.ModelOutput); return(probabilities); }
// Get recommendations for a recipe // the ingredient score is equivalent to the posterior public Recommendation[] RecipeRecommendations(double[][] model, int[] recipe, bool laplace, bool normalize, bool prior) { MLContext ml = new MLContext(); DataManager dm = new DataManager(); // get features (unique ingredients) string[] ingr_names = dm.GetFeatures(); // get number of training recipes IDataView allrecipes = dm.GetDataView(ModelChoice.NB, ml, DataPurpose.TRAIN); int num_recipes = allrecipes.GetColumn <int>(allrecipes.Schema["recipeId"]).ToArray().Length; Recommendation[] recommendations = new Recommendation[ingr_names.Length]; // iterate through all features (unique ingredients) for (int f = 0; f < ingr_names.Length; f++) { double likelihood = 1.0; // iterate through all the ingredients in the recipe foreach (int i in recipe) { // ignore matching ingredients if (i != f) { // laplace smoothing if (laplace == true) { likelihood *= (model[i][f] + 1.0) / (model[f][f] + ingr_names.Length); } else { likelihood *= model[i][f] / model[f][f]; } // normalize if (normalize == true) { likelihood /= model[i][i]; } } } // prior if (prior == true) { likelihood *= model[f][f] / num_recipes; } recommendations[f] = new Recommendation(new Ingredient(f, ingr_names[f]), likelihood); } // sort recommendations = recommendations.OrderByDescending(t => t.score).ToArray(); return(recommendations); }
private static ITransformer BuildTrainEvaluateAndSaveModel(MLContext mlContext) { // STEP 1: Common data loading configuration IDataView baseTrainingDataView = mlContext.Data.ReadFromTextFile <TaxiTrip>(TrainDataPath, hasHeader: true, separatorChar: ','); IDataView testDataView = mlContext.Data.ReadFromTextFile <TaxiTrip>(TestDataPath, hasHeader: true, separatorChar: ','); //Sample code of removing extreme data like "outliers" for FareAmounts higher than $150 and lower than $1 which can be error-data var cnt = baseTrainingDataView.GetColumn <float>(mlContext, nameof(TaxiTrip.FareAmount)).Count(); IDataView trainingDataView = mlContext.Data.FilterByColumn(baseTrainingDataView, nameof(TaxiTrip.FareAmount), lowerBound: 1, upperBound: 150); var cnt2 = trainingDataView.GetColumn <float>(mlContext, nameof(TaxiTrip.FareAmount)).Count(); // STEP 2: Common data process configuration with pipeline data transformations var dataProcessPipeline = mlContext.Transforms.CopyColumns(outputColumnName: DefaultColumnNames.Label, inputColumnName: nameof(TaxiTrip.FareAmount)) .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: VendorIdEncoded, inputColumnName: nameof(TaxiTrip.VendorId))) .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: RateCodeEncoded, inputColumnName: nameof(TaxiTrip.RateCode))) .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: PaymentTypeEncoded, inputColumnName: nameof(TaxiTrip.PaymentType))) .Append(mlContext.Transforms.Normalize(outputColumnName: nameof(TaxiTrip.PassengerCount), mode: NormalizerMode.MeanVariance)) .Append(mlContext.Transforms.Normalize(outputColumnName: nameof(TaxiTrip.TripTime), mode: NormalizerMode.MeanVariance)) .Append(mlContext.Transforms.Normalize(outputColumnName: nameof(TaxiTrip.TripDistance), mode: NormalizerMode.MeanVariance)) .Append(mlContext.Transforms.Concatenate(DefaultColumnNames.Features, VendorIdEncoded, RateCodeEncoded, PaymentTypeEncoded, nameof(TaxiTrip.PassengerCount) , nameof(TaxiTrip.TripTime), nameof(TaxiTrip.TripDistance))); // (OPTIONAL) Peek data (such as 5 records) in training DataView after applying the ProcessPipeline's transformations into "Features" ConsoleHelper.PeekDataViewInConsole(mlContext, trainingDataView, dataProcessPipeline, 5); ConsoleHelper.PeekVectorColumnDataInConsole(mlContext, DefaultColumnNames.Features, trainingDataView, dataProcessPipeline, 5); // STEP 3: Set the training algorithm, then create and config the modelBuilder - Selected Trainer (SDCA Regression algorithm) var trainer = mlContext.Regression.Trainers.StochasticDualCoordinateAscent(labelColumn: DefaultColumnNames.Label, featureColumn: DefaultColumnNames.Features); var trainingPipeline = dataProcessPipeline.Append(trainer); // STEP 4: Train the model fitting to the DataSet //The pipeline is trained on the dataset that has been loaded and transformed. Console.WriteLine("=============== Training the model ==============="); var trainedModel = trainingPipeline.Fit(trainingDataView); // STEP 5: Evaluate the model and show accuracy stats Console.WriteLine("===== Evaluating Model's accuracy with Test data ====="); IDataView predictions = trainedModel.Transform(testDataView); var metrics = mlContext.Regression.Evaluate(predictions, label: DefaultColumnNames.Label, score: DefaultColumnNames.Score); Common.ConsoleHelper.PrintRegressionMetrics(trainer.ToString(), metrics); // STEP 6: Save/persist the trained model to a .ZIP file using (var fs = File.Create(ModelPath)) trainedModel.SaveTo(mlContext, fs); Console.WriteLine("The model is saved to {0}", ModelPath); return(trainedModel); }
// Evaluate Non-negative Matrix Factorization public void EvaluateNMF(ITransformer model) { Console.WriteLine("\nEvaluating NMF..."); MLContext mlContext = new MLContext(); // get test data DataManager dm = new DataManager(); // test data IDataView testData = dm.GetDataView(ModelChoice.NMF, mlContext, DataPurpose.TEST); Data[] test_data = dm.GetRecipes(ModelChoice.NMF, DataPurpose.TEST); // train data IDataView trainData = dm.GetDataView(ModelChoice.NMF, mlContext, DataPurpose.TRAIN); Data[] train_data = dm.GetRecipes(ModelChoice.NMF, DataPurpose.TRAIN); // features string[] features = dm.GetFeatures(); int[] recipeArray = testData.GetColumn <int>(testData.Schema["recipeId"]).ToArray(); Results results = new Results(0); Recommender recommender = new Recommender(); // distinct test recipes int[] distinct_recipes = recipeArray.Distinct().ToArray(); // for each test recipe foreach (int r in distinct_recipes) { Recommendation[] recommendations = new Recommendation[features.Length]; Data[] recipe = test_data.Where(d => d.recipeId == r && d.score == 1).ToArray(); Data[] trecipe = train_data.Where(d => d.recipeId == r && d.score == 1).ToArray(); // get recipe r Data[] combined = recipe.Concat(trecipe).ToArray(); int[] current_recipe = dm.GetRecipe(combined.ToArray()); // iterate through all features for (int i = 0; i < dm.GetFeatures().Length; i++) { // make prediction (get score) double prediction = recommender.SinglePrediction(mlContext, model, i, r); // save score of ingredient recommendations[i] = new Recommendation(new Ingredient(i, features[i]), prediction); } // sort recommendations = recommendations.OrderByDescending(d => d.score).ToArray(); results = GetResults(results, recommendations, current_recipe); } // Display accuracy results results.ShowResults(); Console.WriteLine(); }
private static void PrintDataColumn(IDataView transformedData, string columnName) { var countSelectColumn = transformedData.GetColumn <float[]>(transformedData.Schema[columnName]); foreach (var row in countSelectColumn) { for (var i = 0; i < row.Length; i++) { Console.Write($"{row[i]}\t"); } Console.WriteLine(); } }
private static ITransformer BuildTrainEvaluteAndSaveModel(MLContext mLContext) { TextLoader textLoader1 = mLContext.Data.CreateTextLoader(new[] { new TextLoader.Column("temperature", DataKind.Single, 0), new TextLoader.Column("floor1", DataKind.Single, 1), new TextLoader.Column("floor2", DataKind.Single, 2), new TextLoader.Column("floor3", DataKind.Single, 3), new TextLoader.Column("floor4", DataKind.Single, 4), new TextLoader.Column("areaEvaluated", DataKind.Single, 5) } , hasHeader: true, separatorChar: ',' ); IDataView baseTrainingDataView = textLoader1.Load(pathDataTrain); IDataView testDataView = textLoader1.Load(pathDataTest); var cnt = baseTrainingDataView.GetColumn <float>(mLContext, "areaEvaluated").Count(); IDataView trainingDataView = mLContext.Data.FilterRowsByColumn(baseTrainingDataView, "areaEvaluated", lowerBound: 150, upperBound: 350); var cnt2 = trainingDataView.GetColumn <float>(mLContext, "areaEvaluated").Count(); var dataProcessPipeline = mLContext.Transforms.CopyColumns("Label", "areaEvaluated") .Append(mLContext.Transforms.Normalize(outputColumnName: "temperature", inputColumnName: "temperature", mode: NormalizerMode.MeanVariance)) .Append(mLContext.Transforms.Normalize(outputColumnName: "floor1", inputColumnName: "floor1", mode: NormalizerMode.MeanVariance)) .Append(mLContext.Transforms.Normalize(outputColumnName: "floor2", inputColumnName: "floor2", mode: NormalizerMode.MeanVariance)) .Append(mLContext.Transforms.Normalize(outputColumnName: "floor3", inputColumnName: "floor3", mode: NormalizerMode.MeanVariance)) .Append(mLContext.Transforms.Normalize(outputColumnName: "floor4", inputColumnName: "floor4", mode: NormalizerMode.MeanVariance)) .Append(mLContext.Transforms.Concatenate("Features", "temperature", "floor1", "floor2", "floor3", "floor4") ); var trainer = mLContext.Regression.Trainers.StochasticDualCoordinateAscent(labelColumnName: "Label", featureColumnName: "Features"); var trainingPipeline = dataProcessPipeline.Append(trainer); MessageBox.Show("Well phase1"); //System.Console.WriteLine("training the model"); var trainedModel = trainingPipeline.Fit(trainingDataView); //System.Console.WriteLine("Testing model"); MessageBox.Show("Well phase2"); IDataView predictions = trainedModel.Transform(testDataView); var metrics = mLContext.Regression.Evaluate(predictions, label: "Label", score: "Score"); using (var fs = File.Create(ModelPath)) trainedModel.SaveTo(mLContext, fs); //System.Console.WriteLine("the model is saved to {0}", ModelPath); MessageBox.Show("fin saved"); return(trainedModel); }
private void VerifyDataFrameColumnAndDataViewColumnValues <T>(string columnName, IDataView data, DataFrame df, int maxRows = -1) { int cc = 0; var nameDataViewColumn = data.GetColumn <T>(columnName); foreach (var value in nameDataViewColumn) { if (maxRows != -1 && cc >= maxRows) { return; } Assert.Equal(value, df.Columns[columnName][cc++]); } }
/// <summary> /// Создание прогнозов /// </summary> /// <param name="testData"></param> /// <param name="model"></param> /// <returns></returns> private IEnumerable <float[]> PredictDataUsingModel(IDataView testData, ITransformer model) { Console.WriteLine($"Images location: {ImagesFolder}"); Console.WriteLine(""); Console.WriteLine("=====Identify the objects in the images====="); Console.WriteLine(""); // оценка данных IDataView scoredData = model.Transform(testData); // Извлекаем прогнозируемые вероятности и возвращаем их для дополнительной обработки IEnumerable <float[]> probabilities = scoredData.GetColumn <float[]>(TinyYoloModelSettings.ModelOutput); return(probabilities); }
protected override void Train(string[] args) { var modelObject = Activator.CreateInstance <EmploymentHistory>(); IDataView baseTrainingDataView = MlContext.Data.LoadFromTextFile <EmploymentHistory>(args[(int)CommandLineArguments.INPUT_FILE], hasHeader: true, separatorChar: ','); var testDataView = MlContext.Data.LoadFromTextFile <EmploymentHistory>(args[(int)CommandLineArguments.INPUT_FILE], hasHeader: true, separatorChar: ','); var cnt = baseTrainingDataView.GetColumn <float>(nameof(EmploymentHistory.DurationInMonths)).Count(); IDataView trainingDataView = MlContext.Data.FilterRowsByColumn(baseTrainingDataView, nameof(EmploymentHistory.DurationInMonths), lowerBound: 1, upperBound: 150); var cnt2 = trainingDataView.GetColumn <float>(nameof(EmploymentHistory.DurationInMonths)).Count(); var dataProcessPipeline = MlContext.Transforms.CopyColumns("Label", nameof(EmploymentHistory.DurationInMonths)) .Append(MlContext.Transforms.Categorical.OneHotEncoding("PositionNameEncoded", "PositionName")) .Append(MlContext.Transforms.NormalizeMeanVariance("IsMarried")) .Append(MlContext.Transforms.NormalizeMeanVariance("BSDegree")) .Append(MlContext.Transforms.NormalizeMeanVariance("MSDegree")) .Append(MlContext.Transforms.NormalizeMeanVariance("YearsExperience") .Append(MlContext.Transforms.NormalizeMeanVariance("AgeAtHire")) .Append(MlContext.Transforms.NormalizeMeanVariance("HasKids")) .Append(MlContext.Transforms.NormalizeMeanVariance("WithinMonthOfVesting")) .Append(MlContext.Transforms.NormalizeMeanVariance("DeskDecorations")) .Append(MlContext.Transforms.NormalizeMeanVariance("LongCommute")) .Append(MlContext.Transforms.Concatenate("Features", "PositionNameEncoded", "IsMarried", "BSDegree", "MSDegree", "YearsExperience", "AgeAtHire", "HasKids", "WithinMonthOfVesting", "DeskDecorations", "LongCommute"))); var trainer = MlContext.Regression.Trainers.Sdca(labelColumnName: "Label", featureColumnName: "Features"); var trainingPipeline = dataProcessPipeline.Append(trainer); var trainedModel = trainingPipeline.Fit(trainingDataView); var dataWithPredictions = trainedModel.Transform(testDataView); var metrics = MlContext.BinaryClassification.Evaluate(dataWithPredictions, predictedLabelColumnName: nameof(EmploymentHistoryPrediction.DurationInMonths)); Console.WriteLine($"Accuracy: {metrics.Accuracy}"); Console.WriteLine($"AUC: {metrics.AreaUnderRocCurve}"); Console.WriteLine($"F1 Score: {metrics.F1Score}"); Console.WriteLine($"Negative Precision: {metrics.NegativePrecision}"); Console.WriteLine($"Negative Recall: {metrics.NegativeRecall}"); Console.WriteLine($"Positive Precision: {metrics.PositivePrecision}"); Console.WriteLine($"Positive Recall: {metrics.PositiveRecall}"); using (var fs = File.Create(args[(int)CommandLineArguments.OUTPUT_FILE])) { MlContext.Model.Save(trainedModel, trainingDataView.Schema, fs); } Console.WriteLine($"Saved model to {args[(int)CommandLineArguments.OUTPUT_FILE]}"); }
public void Train(string trainingFileName) { if (!File.Exists(trainingFileName)) { Console.WriteLine($"Failed to find training data file ({trainingFileName}"); return; } var trainingDataView = MlContext.Data.LoadFromTextFile <EmploymentHistory>(trainingFileName, ','); IDataView trainingDataView = MlContext.Data.FilterRowsByColumn(trainingDataView, nameof(TaxiTrip.FareAmount), lowerBound: 1, upperBound: 150); var cnt2 = trainingDataView.GetColumn <float>(nameof(TaxiTrip.FareAmount)).Count(); var dataSplit = MlContext.Data.TrainTestSplit(trainingDataView, testFraction: 0.4); var dataProcessPipeline = MlContext.Transforms.CopyColumns("Label", nameof(EmploymentHistory.DurationInMonths)) .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.IsMarried))) .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.BSDegree))) .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.MSDegree))) .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.YearsExperience)) .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.AgeAtHire))) .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.HasKids))) .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.WithinMonthOfVesting))) .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.DeskDecorations))) .Append(MlContext.Transforms.NormalizeMeanVariance(nameof(EmploymentHistory.LongCommute))) .Append(MlContext.Transforms.Concatenate("Features", typeof(EmploymentHistory).ToPropertyList <EmploymentHistory>(nameof(EmploymentHistory.DurationInMonths))))); var trainer = MlContext.Regression.Trainers.Sdca(labelColumnName: "Label", featureColumnName: "Features"); var trainingPipeline = dataProcessPipeline.Append(trainer); ITransformer trainedModel = trainingPipeline.Fit(dataSplit.TrainSet); MlContext.Model.Save(trainedModel, dataSplit.TrainSet.Schema, ModelPath); var testSetTransform = trainedModel.Transform(dataSplit.TestSet); var modelMetrics = MlContext.Regression.Evaluate(testSetTransform); Console.WriteLine($"Loss Function: {modelMetrics.LossFunction:0.##}{Environment.NewLine}" + $"Mean Absolute Error: {modelMetrics.MeanAbsoluteError:#.##}{Environment.NewLine}" + $"Mean Squared Error: {modelMetrics.MeanSquaredError:#.##}{Environment.NewLine}" + $"RSquared: {modelMetrics.RSquared:0.##}{Environment.NewLine}" + $"Root Mean Squared Error: {modelMetrics.RootMeanSquaredError:#.##}"); }