public static TrainCatalogBase.TrainTestData LoadData(MLContext mlContext) { IDataView dataView = mlContext.Data.LoadFromTextFile <SentimentData>(_dataPath, hasHeader: false); TrainCatalogBase.TrainTestData splitDataView = mlContext.BinaryClassification.TrainTestSplit(dataView, testFraction: 0.2); return(splitDataView); }
// </SnippetDeclareGlobalVariables> static void Main(string[] args) { // Create ML.NET context/local environment - allows you to add steps in order to keep everything together // during the learning process. //Create ML Context with seed for repeatable/deterministic results // <SnippetCreateMLContext> MLContext mlContext = new MLContext(); // </SnippetCreateMLContext> // <SnippetCallLoadData> TrainCatalogBase.TrainTestData splitDataView = LoadData(mlContext); // </SnippetCallLoadData> // <SnippetCallBuildAndTrainModel> ITransformer model = BuildAndTrainModel(mlContext, splitDataView.TrainSet); // </SnippetCallBuildAndTrainModel> // <SnippetCallEvaluate> Evaluate(mlContext, model, splitDataView.TestSet); // </SnippetCallEvaluate> // <SnippetCallUseModelWithSingleItem> UseModelWithSingleItem(mlContext, model); // </SnippetCallUseModelWithSingleItem> // <SnippetCallUseLoadedModelWithBatchItems> UseLoadedModelWithBatchItems(mlContext); // </SnippetCallUseLoadedModelWithBatchItems> Console.WriteLine(); Console.WriteLine("=============== End of process ==============="); }
static void Main(string[] args) { //Create ML.NET context/environment //It allows you to add steps in order to keep everything together during ML process. MLContext mlContext = new MLContext(); //Load data for training TrainCatalogBase.TrainTestData splitDataView = LoadData(mlContext); //Build and train model ITransformer model = BuildAndTrainModel(mlContext, splitDataView.TrainSet); //Evaluate model using test data //optionally save it for future use Evaluate(mlContext, model, splitDataView.TestSet); //Use model with single data item UseModelWithSingleItem(mlContext, model); //Load saved model and use with multiple data items UseLoadedModelWithBatchItems(mlContext); //Load saved model and use with user input UseLoadedModelWithUserInput(mlContext); Console.WriteLine(); Console.WriteLine("=== End of process ==="); Console.WriteLine(); Console.WriteLine("Press any key to continue..."); Console.ReadKey(); }
static void Main(string[] args) { MLContext mlContext = new MLContext(); TrainCatalogBase.TrainTestData splitDataView = LoadData(mlContext); ITransformer model = BuildAndTrainModel(mlContext, splitDataView.TrainSet); Evaluate(mlContext, model, splitDataView.TestSet); }
// Prepare a pipeline for training, train it, and create a prediction object public void BuildModel() { // Randomly split the dataset by a val. One for training and the other to test the trained model against TrainCatalogBase.TrainTestData splitDataView = _mlContext.BinaryClassification.TrainTestSplit(_dataView, testFraction: 0.2); var dataProcessPipeline = _mlContext.Transforms.Text.FeaturizeText(outputColumnName: DefaultColumnNames.Features, inputColumnName: nameof(BinaryClassificationData.SentimentText)); var trainer = _mlContext.BinaryClassification.Trainers.FastTree(labelColumnName: "Sentiment", featureColumnName: DefaultColumnNames.Features); var pipeline = dataProcessPipeline.Append(trainer); ITransformer trainedModel = pipeline.Fit(splitDataView.TrainSet); _predEngine = trainedModel.CreatePredictionEngine <BinaryClassificationData, BinaryClassificationPrediction>(_mlContext); }
public static TrainCatalogBase.TrainTestData LoadData(MLContext mlContext) { //Load training data from text file //Please note that you can also load data from databases or in-memory collections. IDataView dataView = mlContext.Data.LoadFromTextFile <SentimentData>(_dataPath, hasHeader: false); //Split data into training and test data //The testFraction 0.2 will use 80% data for training and 20% data for testing the model TrainCatalogBase.TrainTestData splitDataView = mlContext.BinaryClassification.TrainTestSplit(dataView, testFraction: 0.2); return(splitDataView); }
static void Main(string[] args) { MLContext mlContext = new MLContext(); TrainCatalogBase.TrainTestData splitDataView = LoadData(mlContext); ITransformer model = BuildAndTrainModel(mlContext, splitDataView.TrainSet); Evaluate(mlContext, model, splitDataView.TestSet); UseModelWithSingleItem(mlContext, model); UseLoadedModelWithBatchItems(mlContext); Console.ReadKey(); }
static void Main(string[] args) { Console.WriteLine("Hello World!"); MLContext mlContext = new MLContext(); TrainCatalogBase.TrainTestData splitDataView = LoadData(mlContext); ITransformer model = BuildAndTrainModel(mlContext, splitDataView.TrainSet); Evaluate(mlContext, model, splitDataView.TestSet); UseModelWithSingleItem(mlContext, model); Console.WriteLine("Goodbye World!"); var end = Console.ReadLine(); }
public async static Task MainAsync(string[] args) { Console.WriteLine($"Loading Full Dataset from db"); DocumentDBRepository <SongRecord> .Initialize(collectionId : "Songs"); var songs = await DocumentDBRepository <SongRecord> .GetItemsAsync(x => true, -1); foreach (var genre in (Genre[])Enum.GetValues(typeof(Genre))) { Console.WriteLine(); Console.WriteLine($"=============== Genre: {genre.ToString()} ==============="); var lyricData = songs .Select(x => new Lyric { Genre = x.Genre.Contains(genre.ToString()), Text = x.Lyrics }); // Only bother with data with more than 20 examples if (lyricData.Count(x => x.Genre) > 20) { Console.WriteLine($"Loading data"); var mlContext = new MLContext(seed: 0); IDataView trainingDataView = mlContext.Data.LoadFromEnumerable(lyricData); TrainCatalogBase.TrainTestData splitDataView = mlContext.BinaryClassification.TrainTestSplit(trainingDataView, testFraction: 0.2); var model = BuildAndTrainModel(splitDataView.TrainSet, mlContext); Evaluate(mlContext, model, splitDataView.TestSet); using (var stream = new MemoryStream()) { mlContext.Model.Save(model, stream); await BlobRepository <object> .UploadFromStream(stream, $"{genre.ToString()}GenrePrediction"); } } else { Console.WriteLine($"Not enough data"); } } }
// Prepare a pipeline for training, train it, and create a prediction object public void BuildModel() { // Template code taken from: https://github.com/dotnet/samples/blob/master/machine-learning/tutorials/GitHubIssueClassification/Program.cs TrainCatalogBase.TrainTestData splitDataView = _mlContext.MulticlassClassification.TrainTestSplit(_dataView, testFraction: 0.2); var pipeline = _mlContext.Transforms.Conversion.MapValueToKey(inputColumnName: "Area", outputColumnName: "Label") .Append(_mlContext.Transforms.Text.FeaturizeText(inputColumnName: "Title", outputColumnName: "TitleFeaturized")) .Append(_mlContext.Transforms.Text.FeaturizeText(inputColumnName: "Description", outputColumnName: "DescriptionFeaturized")) .Append(_mlContext.Transforms.Concatenate("Features", "TitleFeaturized", "DescriptionFeaturized")) .AppendCacheCheckpoint(_mlContext); var trainingPipeline = pipeline.Append(_mlContext.MulticlassClassification.Trainers.StochasticDualCoordinateAscent(DefaultColumnNames.Label, DefaultColumnNames.Features)) .Append(_mlContext.Transforms.Conversion.MapKeyToValue("PredictedLabel")); var _trainedModel = trainingPipeline.Fit(splitDataView.TrainSet); _predEngine = _trainedModel.CreatePredictionEngine <MulticlassClassificationData, MulticlassClassificationPrediction>(_mlContext); }
public static TrainCatalogBase.TrainTestData LoadData(MLContext mlContext) { //Note that this case, loading your training data from a file, //is the easiest way to get started, but ML.NET also allows you //to load data from databases or in-memory collections. // <SnippetLoadData> IDataView dataView = mlContext.Data.LoadFromTextFile <SentimentData>(_dataPath, hasHeader: false); // </SnippetLoadData> // <SnippetSplitData> TrainCatalogBase.TrainTestData splitDataView = mlContext.BinaryClassification.TrainTestSplit(dataView, testFraction: 0.2); // </SnippetSplitData> // <SnippetReturnSplitData> return(splitDataView); // </SnippetReturnSplitData> }
static void Main(string[] args) { MLContext mlContext = new MLContext(); TrainCatalogBase.TrainTestData splitDataView = LoadData(mlContext); ITransformer model = BuildAndTrainModel(mlContext, splitDataView.TrainSet); Evaluate(mlContext, model, splitDataView.TestSet); UseModelWithSingleItem(mlContext, model, "This was a horrible meal."); UseModelWithSingleItem(mlContext, model, "This was an amazing meal."); UseModelWithSingleItem(mlContext, model, "I didn't like this meal."); UseModelWithSingleItem(mlContext, model, "I love this spaghetti."); UseModelWithSingleItem(mlContext, model, "I don't like this spaghetti."); UseLoadedModelWithBatchItems(mlContext); Console.ReadLine(); }
static void Main(string[] args) { // Create MLContext to be shared across the model creation workflow objects MLContext mlContext = new MLContext(); // Read the training data var data = mlContext.Data.LoadFromTextFile <TrafficData>(TrainDataPath, separatorChar: '\t', hasHeader: false); // Split dataset in two parts: TrainingDataset (80%) and TestDataset (20%) TrainCatalogBase.TrainTestData dataSplit = mlContext.Regression.TrainTestSplit(data, testFraction: 0.2); var trainingData = dataSplit.TrainSet; var preview = trainingData.Preview(); Console.WriteLine($"******************************************"); Console.WriteLine($"Loaded training data: {preview}"); Console.WriteLine($"******************************************"); var testData = dataSplit.TestSet; preview = testData.Preview(); Console.WriteLine($"******************************************"); Console.WriteLine($"Loaded test data: {preview}"); Console.WriteLine($"******************************************"); // Get an array of the average data points var avgPoints = GetAvgChartPointsFromData(mlContext.Data.CreateEnumerable <TrafficData>(trainingData, reuseRowObject: true)); // Generate graph with training data ChartGeneratorUtil.PlotRegressionChart(new PlotChartGeneratorModel { Title = "Internet traffic over the day", LabelX = "Time of day", LabelY = "Internet traffic (Gbps)", ImageName = "InternetTrafficOverTheDay.png", PointsList = new List <PlotChartPointsList> { new PlotChartPointsList { Points = ChartGeneratorUtil.GetChartPointsFromFile(TrainDataPath, 0, 1, hasHeader: false).ToList(), Color = CommonConstants.PPLplotColorBlue, PaintDots = false }, new PlotChartPointsList { Points = ChartGeneratorUtil.GetChartPointsFromFile(TrainDataPath, 0, 2, hasHeader: false).ToList(), Color = CommonConstants.PPLplotColorGreen, PaintDots = false }, new PlotChartPointsList { Points = ChartGeneratorUtil.GetChartPointsFromFile(TrainDataPath, 0, 3, hasHeader: false).ToList(), Color = CommonConstants.PPLplotColorRed, PaintDots = false }, new PlotChartPointsList { Points = ChartGeneratorUtil.GetChartPointsFromFile(TrainDataPath, 0, 4, hasHeader: false).ToList(), Color = CommonConstants.PPLplotColorBlack, PaintDots = false }, new PlotChartPointsList { Points = ChartGeneratorUtil.GetChartPointsFromFile(TrainDataPath, 0, 5, hasHeader: false).ToList(), Color = CommonConstants.PPLplotColorRed2, PaintDots = false }, new PlotChartPointsList { Points = ChartGeneratorUtil.GetChartPointsFromFile(TrainDataPath, 0, 6, hasHeader: false).ToList(), Color = CommonConstants.PPLplotColorRed3, PaintDots = false }, new PlotChartPointsList { Points = avgPoints.ToList(), Color = CommonConstants.PPLplotColorBlue } }, MaxLimitX = 24, MaxLimitY = 70, DrawRegressionLine = false }); // Create the pipeline var pipeline = // Specify the Poisson regression trainer mlContext.Transforms.Concatenate("Features", "Time", "AverageMeasure") .Append(mlContext.Regression.Trainers.PoissonRegression()); // Train the model var model = pipeline.Fit(trainingData); // Use the trained model to predict the internet traffic var predictionEngine = model.CreatePredictionEngine <TrafficData, TrafficPrediction>(mlContext); // This represents the time 12:30 var time = 12.5f; // Obtain the prediction var prediction = predictionEngine.Predict(new TrafficData { Time = time, HistoricalMeasures = new float[] { 43.5f, 45.3f, 41.9f, 40.3f, 31.5f, 44.6f } }); Console.WriteLine($"At t={time}, predicted internet traffic is {prediction.InternetTraffic} Gbps."); Console.ReadKey(); }
static void Main(string[] args) { MLContext mlContext = new MLContext(); TrainCatalogBase.TrainTestData splitDataView = LoadData(mlContext); }
private static void Main(string[] args) { //Create the MLContext to share across components for deterministic results MLContext mlContext = new MLContext(seed: 1); //Seed set to any number so you have a deterministic environment // STEP 1: Common data loading configuration IDataView fullData = mlContext.Data.LoadFromTextFile(path: DataPath, columns: new[] { new TextLoader.Column(DefaultColumnNames.Label, DataKind.Single, 0), new TextLoader.Column(nameof(IrisData.SepalLength), DataKind.Single, 1), new TextLoader.Column(nameof(IrisData.SepalWidth), DataKind.Single, 2), new TextLoader.Column(nameof(IrisData.PetalLength), DataKind.Single, 3), new TextLoader.Column(nameof(IrisData.PetalWidth), DataKind.Single, 4), }, hasHeader: true, separatorChar: '\t'); //Split dataset in two parts: TrainingDataset (80%) and TestDataset (20%) TrainCatalogBase.TrainTestData trainTestData = mlContext.Clustering.TrainTestSplit(fullData, testFraction: 0.2); trainingDataView = trainTestData.TrainSet; testingDataView = trainTestData.TestSet; //STEP 2: Process data transformations in pipeline var dataProcessPipeline = mlContext.Transforms.Concatenate(DefaultColumnNames.Features, nameof(IrisData.SepalLength), nameof(IrisData.SepalWidth), nameof(IrisData.PetalLength), nameof(IrisData.PetalWidth)); // (Optional) Peek data in training DataView after applying the ProcessPipeline's transformations Common.ConsoleHelper.PeekDataViewInConsole(mlContext, trainingDataView, dataProcessPipeline, 10); Common.ConsoleHelper.PeekVectorColumnDataInConsole(mlContext, DefaultColumnNames.Features, trainingDataView, dataProcessPipeline, 10); // STEP 3: Create and train the model var trainer = mlContext.Clustering.Trainers.KMeans(featureColumnName: DefaultColumnNames.Features, clustersCount: 3); var trainingPipeline = dataProcessPipeline.Append(trainer); var trainedModel = trainingPipeline.Fit(trainingDataView); // STEP4: Evaluate accuracy of the model IDataView predictions = trainedModel.Transform(testingDataView); var metrics = mlContext.Clustering.Evaluate(predictions, score: DefaultColumnNames.Score, features: DefaultColumnNames.Features); ConsoleHelper.PrintClusteringMetrics(trainer.ToString(), metrics); // STEP5: Save/persist the model as a .ZIP file using (var fs = new FileStream(ModelPath, FileMode.Create, FileAccess.Write, FileShare.Write)) mlContext.Model.Save(trainedModel, fs); Console.WriteLine("=============== End of training process ==============="); Console.WriteLine("=============== Predict a cluster for a single case (Single Iris data sample) ==============="); // Test with one sample text var sampleIrisData = new IrisData() { SepalLength = 3.3f, SepalWidth = 1.6f, PetalLength = 0.2f, PetalWidth = 5.1f, }; using (var stream = new FileStream(ModelPath, FileMode.Open, FileAccess.Read, FileShare.Read)) { ITransformer model = mlContext.Model.Load(stream); // Create prediction engine related to the loaded trained model var predEngine = model.CreatePredictionEngine <IrisData, IrisPrediction>(mlContext); //Score var resultprediction = predEngine.Predict(sampleIrisData); Console.WriteLine($"Cluster assigned for setosa flowers:" + resultprediction.SelectedClusterId); } Console.WriteLine("=============== End of process, hit any key to finish ==============="); Console.ReadKey(); }
public static TrainCatalogBase.TrainTestData LoadData(MLContext mlContext) { //IDataView dataView = mlContext.Data.LoadFromTextFile<SentimentData>(_dataPath, hasHeader: false); //TrainCatalogBase.TrainTestData splitDataView = mlContext.BinaryClassification.TrainTestSplit(dataView, testFraction: 0.2); var database = client.GetDatabase("Energy"); //var collectionTP = database.GetCollection<BsonDocument>("Resultado.TrabajosPlanificadosPropuestas"); var collection = database.GetCollection <TrabajoPlanificadoMongo>("TrabajoPlanificado"); var documents = collection.Find <TrabajoPlanificadoMongo>(new BsonDocument()).ToList(); var TPdata = new List <TrabajoPlanificadoPropuestaData>(); int vcount = 0; //TrabajoPlanificado foreach (var document in documents) { var rowSet = new List <TrabajoPlanificadoPropuestaData>(); var row = new TrabajoPlanificadoPropuestaData(); bool hasValorable = false; //Resultado.Propuestas foreach (var propuesta in document.Resultado.TrabajosPlanificadosPropuestas) { //Horas por Propuesta for (DateTime i = propuesta.FechaHoraInicio; i < propuesta.FechaHoraFin; i = i.AddHours(1)) { decimal value = 0; if (document.Resultado.PrevisionPreciosPorFechaHora.TryGetValue(i, out value)) { row.PrevisionPreciosPorFechaHora += (float)value; } else { row.PrevisionPreciosPorFechaHora += (float)value; } document.Resultado.CostesOperacionPorFechaHora.TryGetValue(i, out value); row.CostesOperacionPorFechaHora += (float)value; document.Resultado.PrevisionProduccionPorFechaHora.TryGetValue(i, out value); row.PrevisionProduccionPorFechaHora += (float)value; document.Resultado.RetribucionesPorFechaHora.TryGetValue(i, out value); row.RetribucionesPorFechaHora += (float)value; } row.Valorable = propuesta.Valorable; if (propuesta.Valorable) { hasValorable = true; vcount++; } rowSet.Add(row); } //Idea: Dont add propuestas that have time ranges out of the range of the measurements //Idea: Only add propuestas that have a valorable member in their result set. //Idea: because each instalation has different predictors available, train a different model per instalation if (hasValorable) { TPdata = TPdata.Concat(rowSet).ToList(); } } IDataView dataView = mlContext.Data.LoadFromEnumerable(TPdata); Console.WriteLine("PropuestaSets with valorable menbers: {0}", TPdata.Count()); Console.WriteLine("Propuestas of valorable label: {0}", vcount); TrainCatalogBase.TrainTestData splitDataView = mlContext.BinaryClassification.TrainTestSplit(dataView, testFraction: 0.25); return(splitDataView); }