// This method using 'DebuggerExtensions.Preview()' should only be used when debugging/developing, not for release/production trainings public static void PeekDataViewInConsole(MLContext mlContext, IDataView dataView, IEstimator <ITransformer> pipeline, int numberOfRows = 4) { string msg = string.Format("Peek data in DataView: Showing {0} rows with the columns", numberOfRows.ToString()); ConsoleWriteHeader(msg); //https://github.com/dotnet/machinelearning/blob/master/docs/code/MlNetCookBook.md#how-do-i-look-at-the-intermediate-data ITransformer transformer = pipeline.Fit(dataView); IDataView transformedData = transformer.Transform(dataView); // 'transformedData' is a 'promise' of data, lazy-loading. call Preview //and iterate through the returned collection from preview. DataDebuggerPreview preViewTransformedData = transformedData.Preview(maxRows: numberOfRows); foreach (DataDebuggerPreview.RowInfo row in preViewTransformedData.RowView) { KeyValuePair <string, object>[] ColumnCollection = row.Values; string lineToPrint = "Row--> "; foreach (KeyValuePair <string, object> column in ColumnCollection) { lineToPrint += $"| {column.Key}:{column.Value}"; } Console.WriteLine(lineToPrint + "\n"); } }
public static void ShowDataViewInConsole(MLContext mlContext, IDataView dataView, string[] features, int numberOfRows = 1) { ConsoleHelper.Write(ConsoleColor.White, $"Preview data fra træningssæt - viser antal rækker: {numberOfRows}"); var table = new ConsoleTable("FELT", "VÆRDI"); table.Options.EnableCount = false; var preViewTransformedData = dataView.Preview(maxRows: numberOfRows); foreach (var row in preViewTransformedData.RowView) { var ColumnCollection = row.Values; foreach (KeyValuePair <string, object> column in ColumnCollection) { if (features.Contains(column.Key)) { table.AddRow(column.Key, column.Value); } } } Console.WriteLine(" "); table.Write(); }
public static void Load() { DataViewSchema schema; var ctx = new MLContext(); var model = ctx.Model.Load(File.OpenRead("../train-model/lgbm_nimbus.zip"), out schema); IDataView testData = ctx.Data.LoadFromTextFile <Dummy>( "../train-model/dummy_test.csv", hasHeader: true, separatorChar: ','); var keys = new VBuffer <ReadOnlyMemory <char> >(); schema.GetColumnOrNull("c").Value.GetKeyValues(ref keys); var test = ctx.Transforms.Conversion.MapValueToKey("c", "c", addKeyValueAnnotationsAsText: true, keyData: ctx.Data.LoadFromEnumerable(keys.GetValues().ToArray().Select(x => new { Key = x }))) .Fit(testData).Transform(testData); // var tr = model.GetRowToRowMapper(testData.Schema); // var predictions = ctx.Model.CreatePredictionEngine<Fares, Prediction>(model); var p = test.Preview(10); var p2 = testData.Preview(10); var preds = model.Transform(test).GetColumn <float>("Score").ToArray(); Console.WriteLine(string.Join('\n', preds)); }
public void TimeSeriesForcasting(IEnumerable <Req> data) { //Convert training file into IDataView object (ready for processing) IDataView trainingData = MlContext.Data.LoadFromEnumerable <Req>(data); var preview = trainingData.Preview(); //Define time series analysis pipeline var forecastingPipeline = MlContext.Forecasting.ForecastBySsa( outputColumnName: "forcastedQty", inputColumnName: nameof(Req.Qty), windowSize: 7, seriesLength: 30, trainSize: 20, horizon: 2); var model = forecastingPipeline.Fit(trainingData); var forcastingEngine = model.CreateTimeSeriesEngine <Req, ForcastQty>(MlContext); var forcasts = forcastingEngine.Predict(); foreach (var f in forcasts.forcastedQty) { System.Diagnostics.Debug.WriteLine(f); } }
static void Main(string[] args) { MLContext mlContext = new MLContext(); // Read the training data from a file IDataView trainingDataView = mlContext.Data.LoadFromTextFile <ChocolateData>(path: "data/chocolate-data.txt", hasHeader: true); var preview = trainingDataView.Preview(10); Console.WriteLine($"******************************************"); Console.WriteLine($"Loaded training data: {preview.ToString()}"); Console.WriteLine($"******************************************"); foreach (var columnInfo in preview.ColumnView) { Console.Write($"{columnInfo.Column.Name},"); } Console.WriteLine(); foreach (var rowInfo in preview.RowView) { foreach (var row in rowInfo.Values) { Console.Write($"{row.Value},"); } Console.WriteLine(); } Console.ReadKey(); }
private Task AddPoints(object regressor, IDataView testDataView) { return(Task.Run(() => { var prediction = ((IPredictMany)regressor).PredictMany(testDataView); result = prediction.Cast <IScore>().ToArray(); testDataPreview = testDataView.Preview(100000); }).ContinueWith(async a => await UpdatePlot(CancellationToken.None), TaskScheduler.FromCurrentSynchronizationContext())); }
public static void DisplayDatasets(IDataView trainingData, IDataView testData) { if (trainingData == null || testData == null) { Console.WriteLine("Task 1 \"Load data\" not completed.\n"); } else { Console.WriteLine("Training data set size: " + trainingData.Preview(1000000)); Console.WriteLine("Test data set size: " + testData.Preview(1000000) + "\n"); } }
public void GivenValidData_ShouldReturnTrue() { //Arrange var mlContext = new MLContext(seed: 1); //Act IDataView dataView = mlContext.Data.LoadFromTextFile <ProductReview>(@"product_reviews.csv", hasHeader: true, separatorChar: ','); dataView.Preview(); //Assert Assert.True(true); }
private static void TestSinglePrediction(MLContext mlContext, ITransformer model, string input) { var predictionFunction = mlContext.Model.CreatePredictionEngine <MachineLog, BrokenPrediction>(model); IDataView dataView = mlContext.Data.LoadFromTextFile <MachineLog>(_testDataPath, hasHeader: false, separatorChar: ','); var bla = dataView.Preview(10); var machineLogSample = GetMachineLog(input); var prediction = predictionFunction.Predict(machineLogSample); Console.WriteLine(prediction.Broken); }
public void GivenInValidData_ShouldThrowException() { //Arrange var mlContext = new MLContext(seed: 1); //Act IDataView dataView = mlContext.Data.LoadFromTextFile <ProductReview>("product_reviews - bad data.csv", hasHeader: true, separatorChar: ','); Action dataDelegate = () => dataView.Preview(); //Asset Assert.That(dataDelegate, Throws.TypeOf <FormatException>()); }
static void Main(string[] args) { //Define data path var dataPath = "Data/iris.csv"; //Create Context var mlContext = new MLContext(); //Load Data var data = mlContext.Data.ReadFromTextFile <IrisData>(dataPath, hasHeader: true, separatorChar: ','); //Split data into train and test set var(trainData, testData) = Operations.SplitData(mlContext, data); //Preview Tranining Data var trainDataPreview = trainData.Preview(); //Train model ITransformer trainedModel = Operations.Train(mlContext, trainData); //Apply trained model to test data IDataView transformedData = trainedModel.Transform(testData); //Preview transformed test data var transformedDataPreview = transformedData.Preview(); //Evaluate model using test data double rSquared = Operations.Evaluate(mlContext, trainedModel, testData); Console.WriteLine("RSquared Metric:\t{0:P4}", rSquared); IrisData testInput = new IrisData { SepalLength = 3.3f, SepalWidth = 1.6f, PetalLength = 0.2f, PetalWidth = 5.1f }; PredictionEngine <IrisData, IrisPrediction> predictionEngine = mlContext.Model.CreatePredictionEngine <IrisData, IrisPrediction>(trainedModel); //Make prediction on unseen instance of data using trained model string prediction = Operations.Predict(predictionEngine, testInput); Console.WriteLine("The prediction is {0}", prediction); //Save Model Operations.SaveModel(mlContext, trainedModel, "iris_model.zip"); Console.ReadKey(); }
public static void PreviewData(IDataView data, int numberofitems) { var preview = data.Preview(numberofitems); foreach (var row in preview.RowView) { foreach (var column in row.Values) { Console.WriteLine(column); } Console.WriteLine(); Console.WriteLine(); } }
public void CreateDataViews(string trainingDataPath, string validationDataPath) { // Pipeline IEstimator <ITransformer> pipeline = MLContext.Transforms.ReplaceMissingValues( outputColumnName: "FixedAcidity", inputColumnName: "OriginalFixedAcidity", replacementMode: MissingValueReplacingEstimator.ReplacementMode.Mean) .Append(MLContext.Transforms.DropColumns("OriginalFixedAcidity")); // No need to add this, it will be done automatically. //.Append(MLContext.Transforms.Concatenate("Features", // new[] // { // "FixedAcidity", // "VolatileAcidity", // "CitricAcid", // "ResidualSugar", // "Chlorides", // "FreeSulfurDioxide", // "TotalSulfurDioxide", // "Density", // "Ph", // "Sulphates", // "Alcohol"})); // Training data var trainingData = MLContext.Data.LoadFromTextFile <AutomationData>( path: trainingDataPath, separatorChar: ';', hasHeader: true); ITransformer model = pipeline.Fit(trainingData); _trainingDataView = model.Transform(trainingData); _trainingDataView = MLContext.Data.Cache(_trainingDataView); // Check the content on a breakpoint: var sneakPeek = _trainingDataView.Preview(); // Test data var validationData = MLContext.Data.LoadFromTextFile <AutomationData>( path: validationDataPath, separatorChar: ';', hasHeader: true); model = pipeline.Fit(validationData); _validationDataView = model.Transform(validationData); //_validationDataView = MLContext.Data.Cache(_validationDataView); }
public static void PeekDataViewInConsole(IDataView dataView, int numberOfRows = 5) { var preViewTransformedData = dataView.Preview(maxRows: numberOfRows); foreach (var row in preViewTransformedData.RowView) { var ColumnCollection = row.Values; string lineToPrint = "Row--> "; foreach (KeyValuePair <string, object> column in ColumnCollection) { lineToPrint += $"| {column.Key}:{column.Value}"; } Console.WriteLine(lineToPrint + "\n"); } }
public static void ShowDataViewInConsole(MLContext mlContext, IDataView dataView, int numberOfRows = 4) { var msg = $"Show data in DataView: Showing {numberOfRows.ToString()} rows with the columns"; ConsoleWriteHeader(msg); var preViewTransformedData = dataView.Preview(maxRows: numberOfRows); foreach (var row in preViewTransformedData.RowView) { var columnCollection = row.Values; var lineToPrint = columnCollection.Aggregate("Row--> ", (current, column) => current + $"| {column.Key}:{column.Value}"); Console.WriteLine(lineToPrint + "\n"); } }
private async Task Init(IDataView dataView) { await await Task.Run(() => (from _ in dataView.Preview().ColumnView let Min = System.Convert.ToDouble(_.Values.Min()) let Max = System.Convert.ToDouble(_.Values.Max()) select new SliderItem { Key = _.Column.Name, Min = Min, Max = Max, Value = (Min + Max) / 2 }).ToArray()) .ContinueWith(async a => await this.Dispatcher.InvokeAsync(async() => sliderItemsControl.Data = await a ), TaskScheduler.FromCurrentSynchronizationContext()); }
public async Task <(IDataView training, IDataView testing)> LoadData() { DatabaseLoader loader = mlContext.Data.CreateDatabaseLoader <MovieRating>(); string connectionString = this.configuration.GetConnectionString("DefaultConnection") + ";MultipleActiveResultSets=true"; string sqlCommand = "SELECT [r].[MediaId] AS movieId, [r].[CreatorId] AS userId, CAST([r].[Score] AS REAL) AS Label FROM Rating AS r"; DatabaseSource dbSource = new DatabaseSource(SqlClientFactory.Instance, connectionString, sqlCommand); IDataView data = loader.Load(dbSource); var preview = data.Preview(); DataOperationsCatalog.TrainTestData dataSplit = mlContext.Data.TrainTestSplit(data, testFraction: 0.2); return(dataSplit.TrainSet, dataSplit.TestSet); }
public static void PeekData(IDataView dataView, int numberOfRowsToDisplay = 5) { var preview = dataView.Preview(maxRows: numberOfRowsToDisplay); Console.WriteLine("Sample of data:"); foreach (var row in preview.RowView) { string rowData = "Row = "; foreach (var value in row.Values) { rowData += $"{value.Key} - {value.Value} "; } Console.Write(rowData + Environment.NewLine); } }
static void Display(IDataView data) { var preview = data.Preview(maxRows: 1); string display = ""; for (int i = 0; i < preview.RowView.Length; i++) { foreach (var item in preview.RowView[i].Values) { display += $"{item.Key} : {item. Value} "; } Console.WriteLine("-------------"); Console.WriteLine(display); display = ""; } }
private static void DisplayColumns(IDataView data) { var preview = data.Preview(maxRows: 5); string previewData = ""; for (int i = 0; i < preview.RowView.Length; i++) { foreach (var item in preview.RowView[i].Values) { previewData += $"{item.Key}: {item.Value} "; } Console.WriteLine("----------------------------------"); Console.WriteLine(previewData); previewData = ""; } }
public static void ShowDataViewInConsole(MLContext mlContext, IDataView dataView, int numberOfRows = 4) { string msg = string.Format("Show data in DataView: Showing {0} rows with the columns", numberOfRows.ToString()); ConsoleWriteHeader(msg); var preViewTransformedData = dataView.Preview(maxRows: numberOfRows); foreach (var row in preViewTransformedData.RowView) { var ColumnCollection = row.Values; string lineToPrint = "Row--> "; foreach (KeyValuePair <string, object> column in ColumnCollection) { lineToPrint += $"| {column.Key}:{column.Value}"; } Console.WriteLine(lineToPrint + "\n"); } }
/// <summary> /// Get Regression metrics for a persisted model /// </summary> /// <param name="appPath"></param> /// <param name="mlContext"></param> /// <param name="labelColumn"></param> /// <param name="algorithmTypeName"></param> /// <param name="validationData"></param> /// <returns></returns> public static RegressionMetrics GetRegressionModelMetrics(string appPath, MLContext mlContext, string labelColumn, string algorithmTypeName, IDataView validationData) { // Retrieve model path var loadedModelPath = Utilities.GetModelPath(appPath, algorithmTypeName, false, labelColumn, true); // Load model for both prediction types var loadedModel = LoadModel(mlContext, loadedModelPath); // Apply the transformation pipeline to the data (i.e. normalization, probability score etc.) var transformedData = loadedModel.Transform(validationData); #if DEBUG var validationDataPreview = validationData.Preview(100); var transformedDataPreview = transformedData.Preview(100); #endif // Evaluate the model metrics using validation data var metrics = mlContext.Regression.Evaluate(transformedData, labelColumnName: labelColumn); return(metrics); }
public static DataTable ToDataTable(IDataView dataView) { DataTable dt = null; if (dataView != null) { dt = new DataTable(); var preview = dataView.Preview(); dt.Columns.AddRange(preview.Schema.Select(x => new DataColumn(x.Name)).ToArray()); foreach (var row in preview.RowView) { var r = dt.NewRow(); foreach (var col in row.Values) { r[col.Key] = col.Value; } dt.Rows.Add(r); } } return(dt); }
public static void ShowDataViewInConsole(MLContext mlContext, IDataView dataView, int numberOfRows = 4) { string msg = string.Format("Show data in DataView: Showing {0} rows with the columns", numberOfRows.ToString()); ConsoleWriteHeader(msg); var preViewTransformedData = dataView.Preview(maxRows: numberOfRows); var lst = new List <string>(); foreach (var row in preViewTransformedData.RowView) { var ColumnCollection = row.Values; string lineToPrint = "Row--> "; foreach (KeyValuePair <string, object> column in ColumnCollection) { lineToPrint += $"| {column.Key}:{column.Value}"; } lst.Add(lineToPrint); Console.WriteLine(lineToPrint + "\n"); } System.IO.File.WriteAllLines("../../../../../../dataset/titanic/train_balanced.csv", lst); }
public static void Show(IDataView traningData, int maxRows = 10) { var preview = traningData.Preview(maxRows); Console.WriteLine("**************************************"); Console.WriteLine($"Loaded training data: {preview}"); Console.WriteLine("**************************************"); foreach (ColumnInfo columnInfo in preview.ColumnView) { Console.Write($"{columnInfo.Column.Name} "); } Console.WriteLine(); foreach (RowInfo rowInfo in preview.RowView) { foreach (KeyValuePair <string, object> row in rowInfo.Values) { Console.Write($"{row.Value} "); } Console.WriteLine(); } Console.WriteLine("**************************************"); }
public void LoadDataFromDatabase() { DatabaseLoader loader = this.mlContext.Data.CreateDatabaseLoader <MediaKeywordModel>(); string connectionString = "Server=.;Database=CinemaHub;Trusted_Connection=True;MultipleActiveResultSets=true"; string queryForData = "SELECT r.CreatorId AS UserId, IIF(r.Score > 6, CAST(1 AS BIT), CAST(0 AS BIT)) AS IsLiked, string_agg(k.Name, ', ') AS Keywords FROM Rating AS r INNER JOIN MediaKeywords AS mk ON mk.MediaId = r.MediaId INNER JOIN Keywords AS k ON k.Id = mk.KeywordId GROUP BY r.MediaId, r.CreatorId, r.Score"; DatabaseSource dbSource = new DatabaseSource(SqlClientFactory.Instance, connectionString, queryForData); IDataView data = loader.Load(dbSource); DataOperationsCatalog.TrainTestData trainTestSplit = mlContext.Data.TrainTestSplit(data, testFraction: 0.2); IDataView trainingData = trainTestSplit.TrainSet; IDataView testData = trainTestSplit.TestSet; var preview1 = trainingData.Preview(); var preview = testData.Preview(); var model = this.BuildAndTrainModel(this.mlContext, trainingData); this.Evaluate(this.mlContext, model, testData); var sampleData = new MediaKeywordModel() { Keywords = "wegfwegweg", UserId = "p[", }; var sampleData1 = new MediaKeywordModel() { Keywords = "sequel, aftercreditsstinger, superhero", UserId = "eb28dc2e-182c-4040-9797-295d21c00681", }; this.UseModelWithSingleItem(this.mlContext, model, sampleData); this.UseModelWithSingleItem(this.mlContext, model, sampleData1); }
public static (IDataView training, IDataView test) LoadData(MLContext mlContext) { DatabaseLoader loader = mlContext.Data.CreateDatabaseLoader <CourseRatingMl>(); string connectionString = @"Data Source=Recommender.db"; connectionString = Environment.GetEnvironmentVariable("CONNECTION_STRING"); string sqlComand = "SELECT user_id AS UserId, course_id as CourseId, rating as Rating, rating_id FROM course_ratings"; // Npgsql.NpgsqlFactory.Instance DatabaseSource dbSource = new DatabaseSource( // SQLiteFactory.Instance, Npgsql.NpgsqlFactory.Instance, connectionString, sqlComand); IDataView dataView = loader.Load(dbSource); DataOperationsCatalog.TrainTestData dataSplit = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.1); var tmp = dataView.Preview(maxRows: 10); IDataView trainingDataView = dataSplit.TrainSet; IDataView testDataView = dataSplit.TestSet; return(dataView, testDataView); }
public void TestIDataView() { IDataView dataView = MakeDataFrameWithAllColumnTypes(10, withNulls: false); DataDebuggerPreview preview = dataView.Preview(); Assert.Equal(10, preview.RowView.Length); Assert.Equal(15, preview.ColumnView.Length); Assert.Equal("Byte", preview.ColumnView[0].Column.Name); Assert.Equal((byte)0, preview.ColumnView[0].Values[0]); Assert.Equal((byte)1, preview.ColumnView[0].Values[1]); Assert.Equal("Decimal", preview.ColumnView[1].Column.Name); Assert.Equal((double)0, preview.ColumnView[1].Values[0]); Assert.Equal((double)1, preview.ColumnView[1].Values[1]); Assert.Equal("Double", preview.ColumnView[2].Column.Name); Assert.Equal((double)0, preview.ColumnView[2].Values[0]); Assert.Equal((double)1, preview.ColumnView[2].Values[1]); Assert.Equal("Float", preview.ColumnView[3].Column.Name); Assert.Equal((float)0, preview.ColumnView[3].Values[0]); Assert.Equal((float)1, preview.ColumnView[3].Values[1]); Assert.Equal("Int", preview.ColumnView[4].Column.Name); Assert.Equal((int)0, preview.ColumnView[4].Values[0]); Assert.Equal((int)1, preview.ColumnView[4].Values[1]); Assert.Equal("Long", preview.ColumnView[5].Column.Name); Assert.Equal((long)0, preview.ColumnView[5].Values[0]); Assert.Equal((long)1, preview.ColumnView[5].Values[1]); Assert.Equal("Sbyte", preview.ColumnView[6].Column.Name); Assert.Equal((sbyte)0, preview.ColumnView[6].Values[0]); Assert.Equal((sbyte)1, preview.ColumnView[6].Values[1]); Assert.Equal("Short", preview.ColumnView[7].Column.Name); Assert.Equal((short)0, preview.ColumnView[7].Values[0]); Assert.Equal((short)1, preview.ColumnView[7].Values[1]); Assert.Equal("Uint", preview.ColumnView[8].Column.Name); Assert.Equal((uint)0, preview.ColumnView[8].Values[0]); Assert.Equal((uint)1, preview.ColumnView[8].Values[1]); Assert.Equal("Ulong", preview.ColumnView[9].Column.Name); Assert.Equal((ulong)0, preview.ColumnView[9].Values[0]); Assert.Equal((ulong)1, preview.ColumnView[9].Values[1]); Assert.Equal("Ushort", preview.ColumnView[10].Column.Name); Assert.Equal((ushort)0, preview.ColumnView[10].Values[0]); Assert.Equal((ushort)1, preview.ColumnView[10].Values[1]); Assert.Equal("String", preview.ColumnView[11].Column.Name); Assert.Equal("0".AsMemory(), preview.ColumnView[11].Values[0]); Assert.Equal("1".AsMemory(), preview.ColumnView[11].Values[1]); Assert.Equal("Char", preview.ColumnView[12].Column.Name); Assert.Equal((ushort)65, preview.ColumnView[12].Values[0]); Assert.Equal((ushort)66, preview.ColumnView[12].Values[1]); Assert.Equal("Bool", preview.ColumnView[13].Column.Name); Assert.Equal(true, preview.ColumnView[13].Values[0]); Assert.Equal(false, preview.ColumnView[13].Values[1]); Assert.Equal("ArrowString", preview.ColumnView[14].Column.Name); Assert.Equal("foo".ToString(), preview.ColumnView[14].Values[0].ToString()); Assert.Equal("foo".ToString(), preview.ColumnView[14].Values[1].ToString()); }
public void TestIDataViewWithNulls() { int length = 10; IDataView dataView = MakeDataFrameWithAllColumnTypes(length, withNulls: true); DataDebuggerPreview preview = dataView.Preview(); Assert.Equal(length, preview.RowView.Length); Assert.Equal(15, preview.ColumnView.Length); Assert.Equal("Byte", preview.ColumnView[0].Column.Name); Assert.Equal((byte)0, preview.ColumnView[0].Values[0]); Assert.Equal((byte)1, preview.ColumnView[0].Values[1]); Assert.Equal((byte)4, preview.ColumnView[0].Values[4]); Assert.Equal((byte)0, preview.ColumnView[0].Values[5]); // null row Assert.Equal((byte)6, preview.ColumnView[0].Values[6]); Assert.Equal("Decimal", preview.ColumnView[1].Column.Name); Assert.Equal((double)0, preview.ColumnView[1].Values[0]); Assert.Equal((double)1, preview.ColumnView[1].Values[1]); Assert.Equal((double)4, preview.ColumnView[1].Values[4]); Assert.Equal(double.NaN, preview.ColumnView[1].Values[5]); // null row Assert.Equal((double)6, preview.ColumnView[1].Values[6]); Assert.Equal("Double", preview.ColumnView[2].Column.Name); Assert.Equal((double)0, preview.ColumnView[2].Values[0]); Assert.Equal((double)1, preview.ColumnView[2].Values[1]); Assert.Equal((double)4, preview.ColumnView[2].Values[4]); Assert.Equal(double.NaN, preview.ColumnView[2].Values[5]); // null row Assert.Equal((double)6, preview.ColumnView[2].Values[6]); Assert.Equal("Float", preview.ColumnView[3].Column.Name); Assert.Equal((float)0, preview.ColumnView[3].Values[0]); Assert.Equal((float)1, preview.ColumnView[3].Values[1]); Assert.Equal((float)4, preview.ColumnView[3].Values[4]); Assert.Equal(float.NaN, preview.ColumnView[3].Values[5]); // null row Assert.Equal((float)6, preview.ColumnView[3].Values[6]); Assert.Equal("Int", preview.ColumnView[4].Column.Name); Assert.Equal((int)0, preview.ColumnView[4].Values[0]); Assert.Equal((int)1, preview.ColumnView[4].Values[1]); Assert.Equal((int)4, preview.ColumnView[4].Values[4]); Assert.Equal((int)0, preview.ColumnView[4].Values[5]); // null row Assert.Equal((int)6, preview.ColumnView[4].Values[6]); Assert.Equal("Long", preview.ColumnView[5].Column.Name); Assert.Equal((long)0, preview.ColumnView[5].Values[0]); Assert.Equal((long)1, preview.ColumnView[5].Values[1]); Assert.Equal((long)4, preview.ColumnView[5].Values[4]); Assert.Equal((long)0, preview.ColumnView[5].Values[5]); // null row Assert.Equal((long)6, preview.ColumnView[5].Values[6]); Assert.Equal("Sbyte", preview.ColumnView[6].Column.Name); Assert.Equal((sbyte)0, preview.ColumnView[6].Values[0]); Assert.Equal((sbyte)1, preview.ColumnView[6].Values[1]); Assert.Equal((sbyte)4, preview.ColumnView[6].Values[4]); Assert.Equal((sbyte)0, preview.ColumnView[6].Values[5]); // null row Assert.Equal((sbyte)6, preview.ColumnView[6].Values[6]); Assert.Equal("Short", preview.ColumnView[7].Column.Name); Assert.Equal((short)0, preview.ColumnView[7].Values[0]); Assert.Equal((short)1, preview.ColumnView[7].Values[1]); Assert.Equal((short)4, preview.ColumnView[7].Values[4]); Assert.Equal((short)0, preview.ColumnView[7].Values[5]); // null row Assert.Equal((short)6, preview.ColumnView[7].Values[6]); Assert.Equal("Uint", preview.ColumnView[8].Column.Name); Assert.Equal((uint)0, preview.ColumnView[8].Values[0]); Assert.Equal((uint)1, preview.ColumnView[8].Values[1]); Assert.Equal((uint)4, preview.ColumnView[8].Values[4]); Assert.Equal((uint)0, preview.ColumnView[8].Values[5]); // null row Assert.Equal((uint)6, preview.ColumnView[8].Values[6]); Assert.Equal("Ulong", preview.ColumnView[9].Column.Name); Assert.Equal((ulong)0, preview.ColumnView[9].Values[0]); Assert.Equal((ulong)1, preview.ColumnView[9].Values[1]); Assert.Equal((ulong)4, preview.ColumnView[9].Values[4]); Assert.Equal((ulong)0, preview.ColumnView[9].Values[5]); // null row Assert.Equal((ulong)6, preview.ColumnView[9].Values[6]); Assert.Equal("Ushort", preview.ColumnView[10].Column.Name); Assert.Equal((ushort)0, preview.ColumnView[10].Values[0]); Assert.Equal((ushort)1, preview.ColumnView[10].Values[1]); Assert.Equal((ushort)4, preview.ColumnView[10].Values[4]); Assert.Equal((ushort)0, preview.ColumnView[10].Values[5]); // null row Assert.Equal((ushort)6, preview.ColumnView[10].Values[6]); Assert.Equal("String", preview.ColumnView[11].Column.Name); Assert.Equal("0", preview.ColumnView[11].Values[0].ToString()); Assert.Equal("1", preview.ColumnView[11].Values[1].ToString()); Assert.Equal("4", preview.ColumnView[11].Values[4].ToString()); Assert.Equal("", preview.ColumnView[11].Values[5].ToString()); // null row Assert.Equal("6", preview.ColumnView[11].Values[6].ToString()); Assert.Equal("Char", preview.ColumnView[12].Column.Name); Assert.Equal((ushort)65, preview.ColumnView[12].Values[0]); Assert.Equal((ushort)66, preview.ColumnView[12].Values[1]); Assert.Equal((ushort)69, preview.ColumnView[12].Values[4]); Assert.Equal((ushort)0, preview.ColumnView[12].Values[5]); // null row Assert.Equal((ushort)71, preview.ColumnView[12].Values[6]); Assert.Equal("Bool", preview.ColumnView[13].Column.Name); Assert.Equal(true, preview.ColumnView[13].Values[0]); Assert.Equal(false, preview.ColumnView[13].Values[1]); Assert.Equal(true, preview.ColumnView[13].Values[4]); Assert.Equal(false, preview.ColumnView[13].Values[5]); // null row Assert.Equal(true, preview.ColumnView[13].Values[6]); Assert.Equal("ArrowString", preview.ColumnView[14].Column.Name); Assert.Equal("foo", preview.ColumnView[14].Values[0].ToString()); Assert.Equal("foo", preview.ColumnView[14].Values[1].ToString()); Assert.Equal("foo", preview.ColumnView[14].Values[4].ToString()); Assert.Equal("", preview.ColumnView[14].Values[5].ToString()); // null row Assert.Equal("foo", preview.ColumnView[14].Values[6].ToString()); }
static void Main(string[] args) { var mlContext = new MLContext(); // localdb SQL database connection string using a filepath to attach the database file into localdb string dbFilePath = Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "Database", "Iris.mdf"); string connectionString = $"Data Source = (LocalDB)\\MSSQLLocalDB;AttachDbFilename={dbFilePath};Database=Iris;Integrated Security = True"; // ConnString Example: localdb SQL database connection string for 'localdb default location' (usually files located at /Users/YourUser/) //string connectionString = @"Data Source=(localdb)\MSSQLLocalDb;Initial Catalog=YOUR_DATABASE;Integrated Security=True;Pooling=False"; // // ConnString Example: on-premises SQL Server Database (Integrated security) //string connectionString = @"Data Source=YOUR_SERVER;Initial Catalog=YOUR_DATABASE;Integrated Security=True;Pooling=False"; // // ConnString Example: Azure SQL Database connection string //string connectionString = @"Server=tcp:yourserver.database.windows.net,1433; Initial Catalog = YOUR_DATABASE; Persist Security Info = False; User ID = YOUR_USER; Password = YOUR_PASSWORD; MultipleActiveResultSets = False; Encrypt = True; TrustServerCertificate = False; Connection Timeout = 60; ConnectRetryCount = 5; ConnectRetryInterval = 10;"; string commandText = "SELECT * from IrisData"; DatabaseLoader loader = mlContext.Data.CreateDatabaseLoader <IrisData>(); DatabaseSource dbSource = new DatabaseSource(SqlClientFactory.Instance, connectionString, commandText); IDataView dataView = loader.Load(dbSource); var pre = dataView.Preview(); var trainTestData = mlContext.Data.TrainTestSplit(dataView); var finalTransformerPipeLine = mlContext.Transforms.Conversion.MapValueToKey(inputColumnName: "class", outputColumnName: "KeyColumn"). Append(mlContext.Transforms.Concatenate("Features", nameof(IrisData.petal_length), nameof(IrisData.petal_width), nameof(IrisData.sepal_length), nameof(IrisData.sepal_width))); // Apply the ML algorithm var trainingPipeLine = finalTransformerPipeLine.Append(mlContext.MulticlassClassification.Trainers.LbfgsMaximumEntropy(labelColumnName: "KeyColumn", featureColumnName: "Features")) .Append(mlContext.Transforms.Conversion.MapKeyToValue(outputColumnName: "class", inputColumnName: "KeyColumn")); Console.WriteLine("Training the ML model while streaming data from a SQL database..."); Stopwatch watch = new Stopwatch(); watch.Start(); var model = trainingPipeLine.Fit(trainTestData.TrainSet); watch.Stop(); Console.WriteLine("Elapsed time for training the model = {0} seconds", watch.ElapsedMilliseconds / 1000); Console.WriteLine("Evaluating the model..."); Stopwatch watch2 = new Stopwatch(); watch2.Start(); var predictions = model.Transform(trainTestData.TestSet); // Now that we have the test predictions, calculate the metrics of those predictions and output the results. var metrics = mlContext.MulticlassClassification.Evaluate(predictions, "KeyColumn", "Score"); watch2.Stop(); Console.WriteLine("Elapsed time for evaluating the model = {0} seconds", watch2.ElapsedMilliseconds / 1000); ConsoleHelper.PrintMultiClassClassificationMetrics("==== Evaluation Metrics training from a Database ====", metrics); Console.WriteLine("Trying a single prediction:"); var predictionEngine = mlContext.Model.CreatePredictionEngine <IrisData, DataPrediction>(model); var sampleData1 = new IrisData() { sepal_length = 6.1f, sepal_width = 3f, petal_length = 4.9f, petal_width = 1.8f, class1 = string.Empty }; var sampleData2 = new IrisData() { sepal_length = 5.1f, sepal_width = 3.5f, petal_length = 1.4f, petal_width = 0.2f, class1 = string.Empty }; var irisPred1 = predictionEngine.Predict(sampleData1); var irisPred2 = predictionEngine.Predict(sampleData2); // Since we apply MapValueToKey estimator with default parameters, key values // depends on order of occurence in data file. Which is "Iris-setosa", "Iris-versicolor", "Iris-virginica" // So if we have Score column equal to [0.2, 0.3, 0.5] that's mean what score for // Iris-setosa is 0.2 // Iris-versicolor is 0.3 // Iris-virginica is 0.5. //Add a dictionary to map the above float values to strings. Dictionary <float, string> IrisFlowers = new Dictionary <float, string>(); IrisFlowers.Add(0, "Setosa"); IrisFlowers.Add(1, "versicolor"); IrisFlowers.Add(2, "virginica"); Console.WriteLine($"Predicted Label 1: {IrisFlowers[Array.IndexOf(irisPred1.Score, irisPred1.Score.Max())]} - Score:{irisPred1.Score.Max()}", Color.YellowGreen); Console.WriteLine($"Predicted Label 2: {IrisFlowers[Array.IndexOf(irisPred2.Score, irisPred2.Score.Max())]} - Score:{irisPred2.Score.Max()}", Color.YellowGreen); Console.WriteLine(); //*** Detach database from localdb only if you used a conn-string with a filepath to attach the database file into localdb *** Console.WriteLine("... Detaching database from SQL localdb ..."); DetachDatabase(connectionString); Console.WriteLine("=============== Press any key ==============="); Console.ReadKey(); }