/// <summary> /// Creates an in-memory EF Core Database and loads it with airline data /// </summary> /// <param name="options"></param> private static void LoadAirlinesData(DbContextOptions <AirlinesContext> options) { // Load data into the DB using (var airlinesModel = new AirlinesContext(options)) { // Key for fake Ids int key = 0; // Load the FlightCode Data from csv using (TextReader reader = new StreamReader(@"TrainingData\ManyFlightCodes.csv")) { var csvReader = new CsvReader(reader); var flightCodes = csvReader.GetRecords <FlightCodeFeatures>(); airlinesModel.FlightCodes.AddRange(flightCodes.Select(f => { var fc = new FlightCodes(); fc.Id = ++key; fc.FlightCode = f.FlightCode; fc.Iatacode = f.IATACode; return(fc); })); airlinesModel.SaveChanges(); } } }
public static ITransformer TrainModel(DbContextOptions <AirlinesContext> dbOptions, bool cacheData = false, int concurrency = 0, int nth = 1) { ITransformer trainedModel = null; // Create an ML.NET environment var mlContext = new MLContext(seed: 0); // Train from EF DBContext using (var airlinesModel = new AirlinesContext(dbOptions)) { // Create an enumerable view of the DB training data var flightCodeTrainingData = airlinesModel.FlightCodes.Where(fc => fc.Id % nth == 0).AsEnumerable() .Select(f => new FlightCodeFeatures() { FlightCode = f.FlightCode, IATACode = f.Iatacode }); var trainingDataView = mlContext.Data.LoadFromEnumerable(flightCodeTrainingData); // Set the key column (IATACode), featurize the text FlightCode column (to a long) and add it to the features collection var dataProcessPipeline = mlContext.Transforms.Conversion.MapValueToKey(outputColumnName: "Label", inputColumnName: nameof(FlightCodeFeatures.IATACode)) .Append(mlContext.Transforms.Text.FeaturizeText(outputColumnName: "FlightCodeFeaturized", inputColumnName: nameof(FlightCodeFeatures.FlightCode))) .Append(mlContext.Transforms.Concatenate(outputColumnName: "Features", "FlightCodeFeaturized")); if (cacheData) { // Optionally cache the input (used if multiple passes required) dataProcessPipeline.AppendCacheCheckpoint(mlContext); } // Define the trainer to be used IEstimator <ITransformer> trainer = null; trainer = mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy(); // Create a training pipeline that adds the trainer to the data pipeline and maps prediction to a string in the output (default name) var trainingPipeline = dataProcessPipeline.Append(trainer) .Append(mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel")); // Do the actual training, reads the features and builds the model Console.WriteLine($"Starting training"); var watch = System.Diagnostics.Stopwatch.StartNew(); trainedModel = trainingPipeline.Fit(trainingDataView); watch.Stop(); long elapsedMs = watch.ElapsedMilliseconds; Console.WriteLine($"Training took {elapsedMs / 1000f} secs"); Console.WriteLine(); } return(trainedModel); }