public void Train() { var transforms = _context.Transforms; var data = GetTrainData(); // Preparation var dataProcessPipeline = transforms.Categorical.OneHotEncoding("PredictedLabel", "Area") .Append(transforms.Text.FeaturizeText("TitleFeaturized", "Title")) .Append(transforms.Text.FeaturizeText("DescriptionFeaturized", "Description")) // Learning alg only reads from Features column .Append(transforms.Concatenate("Features", "TitleFeaturized", "DescriptionFeaturized")) .Append(_context.Transforms.NormalizeMinMax("Features", "Features")) // Sample Caching the DataView so estimators iterating over the data multiple times, // instead of always reading from file, using the cache might get better performance. .AppendCacheCheckpoint(_context); // Estimator var estimator = _context.Regression.Trainers.LbfgsPoissonRegression("PredictedLabel", "Features"); ITransformer prepModel = dataProcessPipeline.Fit(data); var prepData = prepModel.Transform(data); ITransformer trainedModel = estimator.Fit(prepData); ITransformer trainedPipe = prepModel.Append(trainedModel); // Evalulate trained data Evaluate(trainedPipe); Test(trainedPipe, prepData.Schema); // Save model to disk _context.Model.Save(prepModel, data.Schema, _prepSavePath); _context.Model.Save(trainedModel, prepData.Schema, _modelSavePath); }
public void CreatePredictionModel() { // Define a feature contribution calculator for all the features. // Don't normalize the contributions. // https://docs.microsoft.com/en-us/dotnet/api/microsoft.ml.transforms.featurecontributioncalculatingestimator?view=ml-dotnet // "Does this estimator need to look at the data to train its parameters? No" var regressionData = _regressionModel.Transform(MLContext.Data.TakeRows(_transformedData, 1)); var featureContributionCalculator = MLContext.Transforms .CalculateFeatureContribution(_regressionModel, normalize: false) // Estimator .Fit(regressionData); // Transformer // Create the full transformer chain. var scoringPipeline = _transformationModel .Append(_regressionModel) .Append(featureContributionCalculator); // Create the prediction engine. _predictionEngine = MLContext.Model.CreatePredictionEngine <FeatureContributionData, FeatureContributionPrediction>(scoringPipeline); }
public void TrainModel(string testImagePath = null) { #region Notes: Fundamental components /* Main components: * IDataView, * ITransformer, * IEstimator */ //IDataView demoDataView; //ITransformer demoITransformer; //IEstimator<ITransformer> demoIEstimator; #endregion Notes: Fundamental components #region Notes: Conventional column names /* Conventional column names: * Input: * Label * Features * Output: * PredictedLabel * Score */ #endregion Notes: Conventional column names #region Notes: Usual training process /* Usual training process: * 1. Load training/test datasets (IDataView) * 2. Build training pipeline (IEstimator) * 2.1 Construct preProcessing pipeline (IEstimator) (optional) * 2.2 Configure trainer (IEstimator) * 2.3 Construct postProcessing pipeline (optional) * 2.4 Construct training pipeline (preProcessing pipelin + trainer + postProcessing pipline * 3. Train model using training dataset (ITransformer) * 4. Evaluate model perfomance * 4.1 Make predictions on test data using trained model (IDataView) * 4.2 Compute evaluation metrics (Metrics staticsitcs) * (optional) Retrain on full dataset (Itransformer) * 5. Save model to filesystem * 6. Make single prediction */ #endregion Notes: Usual training process // Load data IDataView imagesInfo = LoadData(_dataFolder); imagesInfo = mlContext.Data.ShuffleRows(imagesInfo); DataOperationsCatalog.TrainTestData dataSplit = mlContext.Data.TrainTestSplit(imagesInfo, testFraction: 0.2); // Pre processing IEstimator <ITransformer> e_preProcessing_readImageBytes = mlContext.Transforms.LoadRawImageBytes( inputColumnName: nameof(ImageFileInputModel.ImagePath), outputColumnName: nameof(ImageInputModel.Image), imageFolder: _dataFolder); IEstimator <ITransformer> e_preProcessing_labelKeyMapping = mlContext.Transforms.Conversion.MapValueToKey( inputColumnName: nameof(BaseInputModel.Label), outputColumnName: "LabelAsKey", keyOrdinality: Microsoft.ML.Transforms.ValueToKeyMappingEstimator.KeyOrdinality.ByValue); ITransformer t_preProcessing_labelKeyMapping = e_preProcessing_labelKeyMapping.Fit(imagesInfo); ITransformer t_preProcessing_readImageBytes = e_preProcessing_readImageBytes.Fit(imagesInfo); ITransformer t_preProcessingPipeline = t_preProcessing_labelKeyMapping.Append(t_preProcessing_readImageBytes); // Core Model training pipeline IDataView testSetTransformed = t_preProcessingPipeline.Transform(dataSplit.TestSet); ImageClassificationTrainer.Options trainerSettings = new ImageClassificationTrainer.Options { FeatureColumnName = nameof(ImageInputModel.Image), LabelColumnName = "LabelAsKey", Arch = ImageClassificationTrainer.Architecture.ResnetV2101, Epoch = 100, BatchSize = 200, LearningRate = 0.05f, MetricsCallback = (m) => Console.WriteLine(m), ValidationSet = testSetTransformed, WorkspacePath = _workspaceFolder }; IEstimator <ITransformer> e_trainer = mlContext.MulticlassClassification.Trainers.ImageClassification(trainerSettings); IEstimator <ITransformer> e_postProcessing_labelKeyMapping = mlContext.Transforms.Conversion.MapKeyToValue( inputColumnName: "PredictedLabel", outputColumnName: nameof(PredictionModel.PredictedLabel)); IEstimator <ITransformer> trainingPipeline = e_trainer.Append(e_postProcessing_labelKeyMapping); // Train #region Notes: On metadata /* * Metadata source: https://aka.ms/mlnet-resources/resnet_v2_101_299.meta * System.IO.Path.GetTempPath() - C:\Users\User\AppData\Local\Temp\ */ #endregion ITransformer trainedModel = Train(trainingPipeline, t_preProcessingPipeline.Transform(dataSplit.TrainSet)); #region Notes: Model composition //var extractPixelsEst = mlContext.Transforms.ExtractPixels(); //var resizeEst = mlContext.Transforms.ResizeImages(); //IEstimator<ITransformer> est = mlContext.Model.LoadTensorFlowModel("MODEL_PATH") //.ScoreTensorFlowModel( //outputColumnNames: new[] { "some-name" }, //inputColumnNames: new[] { "Features" }, addBatchDimensionInput: true); #endregion Model composition // Evaluate/Save FileSystemModel ITransformer fileSystemModel = t_preProcessingPipeline.Append(trainedModel); Evaluate(fileSystemModel, dataSplit.TestSet); SaveModel(fileSystemModel, new DataViewSchema.Column[] { imagesInfo.Schema.First(x => x.Name == nameof(ImageFileInputModel.ImagePath)), imagesInfo.Schema.First(x => x.Name == nameof(BaseInputModel.Label)) }, ResolveModelFileName("fromFile")); // Evaluate/Save InMemoryModel IDataView testSetImageExtracted = t_preProcessing_readImageBytes.Transform(dataSplit.TrainSet); ITransformer inMemoryModel = t_preProcessing_labelKeyMapping.Append(trainedModel); Evaluate(inMemoryModel, testSetImageExtracted); SaveModel(inMemoryModel, new DataViewSchema.Column[] { testSetImageExtracted.Schema.First(x => x.Name == nameof(ImageFileInputModel.ImagePath)), testSetImageExtracted.Schema.First(x => x.Name == nameof(BaseInputModel.Label)) }, ResolveModelFileName("inMemory")); //Try a single prediction if (!string.IsNullOrWhiteSpace(testImagePath)) { MakeSinglePrediction(testImagePath); } }
static void Main(string[] args) { var DataDir = new DirectoryInfo(GetAbsolutePath(@"..\..\..\Data")); //Create MLContext MLContext mlContext = new MLContext(); //Load Data File var Lines = File.ReadAllLines(Path.Combine(DataDir.FullName, TrainingFile)); var ListData = new List <BreastCancerData>(); int counter = 0; Lines.ToList().ForEach(x => { counter++; //skip header if (counter > 1) { var Cols = x.Split(','); ListData.Add(new BreastCancerData() { SampleNo = float.Parse(Cols[0]), ClumpThickness = float.Parse(Cols[1]), UniformityOfCellSize = float.Parse(Cols[2]), UniformityOfCellShape = float.Parse(Cols[3]), MarginalAdhesion = float.Parse(Cols[4]), SingleEpithelialCellSize = float.Parse(Cols[5]), BareNuclei = float.Parse(Cols[6] == "?" ? "0" : Cols[6]), BlandChromatin = float.Parse(Cols[7]), NormalNucleoli = float.Parse(Cols[8]), Mitoses = float.Parse(Cols[9]), ClassCategory = int.Parse(Cols[10]), IsBenign = Cols[10] == "4" ? false:true }); } }); IDataView allData = mlContext.Data.LoadFromEnumerable <BreastCancerData>(ListData); DataOperationsCatalog.TrainTestData dataSplit = mlContext.Data.TrainTestSplit(allData, testFraction: 0.2); IDataView trainData = dataSplit.TrainSet; IDataView testData = dataSplit.TestSet; // Data process configuration with pipeline data transformations var dataPrepTransform = mlContext.Transforms.CopyColumns("Label", "IsBenign") .Append(mlContext.Transforms.IndicateMissingValues(new[] { new InputOutputColumnPair("BareNuclei_MissingIndicator", "BareNuclei") })) .Append(mlContext.Transforms.Conversion.ConvertType(new[] { new InputOutputColumnPair("BareNuclei_MissingIndicator", "BareNuclei_MissingIndicator") })) .Append(mlContext.Transforms.ReplaceMissingValues(new[] { new InputOutputColumnPair("BareNuclei", "BareNuclei") })) .Append(mlContext.Transforms.Concatenate("Features", new[] { "BareNuclei_MissingIndicator", "BareNuclei", "ClumpThickness", "UniformityOfCellSize", "UniformityOfCellShape", "MarginalAdhesion", "SingleEpithelialCellSize", "BlandChromatin", "NormalNucleoli", "Mitoses" })) .Append(mlContext.Transforms.NormalizeMinMax("Features", "Features")) .AppendCacheCheckpoint(mlContext); // Create data prep transformer ITransformer dataPrepTransformer = dataPrepTransform.Fit(trainData); IDataView transformedTrainingData = dataPrepTransformer.Transform(trainData); var SdcaEstimator = mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(labelColumnName: "Label", featureColumnName: "Features"); // Build machine learning model var trainedModel = dataPrepTransformer.Append(SdcaEstimator.Fit(transformedTrainingData)); // Apply data prep transformer to test data IDataView testDataPredictions = trainedModel.Transform(testData); // Measure trained model performance // Extract model metrics and get eval params var metrics = mlContext.BinaryClassification.Evaluate(testDataPredictions); Console.WriteLine(); Console.WriteLine("Model quality metrics evaluation"); Console.WriteLine("--------------------------------"); Console.WriteLine($"Accuracy: {metrics.Accuracy:P2}"); Console.WriteLine($"Auc: {metrics.AreaUnderRocCurve:P2}"); Console.WriteLine($"F1Score: {metrics.F1Score:P2}"); Console.WriteLine("=============== End of model evaluation ==============="); var modelRelativePath = GetAbsolutePath("MLModel.zip"); mlContext.Model.Save(trainedModel, trainData.Schema, GetAbsolutePath(modelRelativePath)); Console.WriteLine("The model is saved to {0}", GetAbsolutePath(modelRelativePath)); ITransformer mlModel = mlContext.Model.Load(GetAbsolutePath(modelRelativePath), out DataViewSchema inputSchema); var predEngine = mlContext.Model.CreatePredictionEngine <BreastCancerData, PredictionBreastCancerData>(mlModel); // Create sample data to do a single prediction with it /* * //jinak * BreastCancerData sampleData = new BreastCancerData() * { * SampleNo = 0, * ClumpThickness = 5, * UniformityOfCellSize = 1, * UniformityOfCellShape = 1, * MarginalAdhesion = 1, * SingleEpithelialCellSize = 2, * BareNuclei = 1, * BlandChromatin = 3, * NormalNucleoli = 1, * Mitoses = 1 * * };*/ //ganas BreastCancerData sampleData = new BreastCancerData() { SampleNo = 0, ClumpThickness = 8, UniformityOfCellSize = 10, UniformityOfCellShape = 10, MarginalAdhesion = 8, SingleEpithelialCellSize = 7, BareNuclei = 10, BlandChromatin = 9, NormalNucleoli = 7, Mitoses = 1 }; // Try a single prediction PredictionBreastCancerData predictionResult = predEngine.Predict(sampleData); Console.WriteLine($"Single Prediction --> Predicted: { (predictionResult.Prediction ? "Jinak":"Ganas") }"); Console.ReadKey(); }
public ITransformer Fit(IEstimator <ITransformer> estimator, DateTime to) { Console.WriteLine($"Running {estimator.GetType().Name}"); MLContext mlContext = new MLContext(); DateTime from = _featurables.Min(c => c.Begin); //to = _featurables.Max(c => c.End); Period period = Period.S; int periodCount = 5; List <Dictionary <string, float> > features = new List <Dictionary <string, float> >(); for (DateTime date = from; date < to; date = date.AddPeriod(period, periodCount)) { if (_featurables.All(c => c.HasFeatures(date)) && _pipExpectation._futurPip.ContainsKey(date)) { Dictionary <string, float> feature = new Dictionary <string, float>(); var featuresDictionnary = GetFeatures(date); if (featuresDictionnary == null) { continue; } foreach (var item in featuresDictionnary) { feature.Add(item.Key, item.Value); } feature["Label"] = decimal.ToSingle(_pipExpectation._futurPip[date].Long); features.Add(feature); } } IDataView data = new FloatsDataView(features); using (FileStream stream = new FileStream("data.tsv", FileMode.Create)) mlContext.Data.SaveAsText(data, stream); string[] featureNames = features.First().Keys.Where(c => c != "Label").ToArray(); var pipeline = mlContext.Transforms.Concatenate("Features", featureNames) .Append(mlContext.Transforms.NormalizeMeanVariance("Features")) .Append(mlContext.Transforms.NormalizeMinMax("Features")) .AppendCacheCheckpoint(mlContext); //.Append(mlContext.Regression.Trainers.OnlineGradientDescent(numberOfIterations:10)); // Create data prep transformer ITransformer transformData = pipeline.Fit(data); IDataView transformedData = transformData.Transform(data); //IEstimator<ITransformer> estimator = mlContext.Regression.Trainers.OnlineGradientDescent(numberOfIterations:10); var cvResults = mlContext.Regression.CrossValidate(transformedData, estimator, numberOfFolds: 5); // Apply transforms to training data var models = cvResults.OrderByDescending(fold => fold.Metrics.RSquared).ToArray(); // Get Top Model var topModel = models[0]; Console.WriteLine($"\tR2: { topModel.Metrics.RSquared}"); Console.WriteLine($"\tLossfunction: { topModel.Metrics.LossFunction}"); Console.WriteLine($"\tMeanAbsoluteError: { topModel.Metrics.MeanAbsoluteError}"); //RegressionMetrics trainedModelMetrics = mlContext.Regression.Evaluate(testDataPredictions); //double rSquared = trainedModelMetrics.RSquared; return(transformData.Append(topModel.Model)); }
static void Main(string[] args) { //Create MLContext MLContext mlContext = new MLContext(); //Load Data File IDataView trainData = mlContext.Data.LoadFromTextFile <SeedData>(GetAbsolutePath("../../../Data/seeds_dataset.txt"), separatorChar: '\t', hasHeader: false); //Data process configuration with pipeline data transformations var dataPrepTransform = mlContext.Transforms.Conversion.MapValueToKey("Label", "Category") .Append(mlContext.Transforms.Concatenate("Features", new[] { "Area", "Perimeter", "Compactness", "Length", "Width", "AsymmetryCoefficient", "LengthOfKernel" })) .Append(mlContext.Transforms.NormalizeMinMax("Features", "Features")) .AppendCacheCheckpoint(mlContext); // Create data prep transformer ITransformer dataPrepTransformer = dataPrepTransform.Fit(trainData); IDataView transformedTrainingData = dataPrepTransformer.Transform(trainData); // Choose learner var SdcaEstimator = mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy(labelColumnName: "Label", featureColumnName: "Features"); // Build machine learning model var trainedModel = dataPrepTransformer.Append(SdcaEstimator.Fit(transformedTrainingData)); // Measure trained model performance var testData = trainedModel.Transform(transformedTrainingData); var testMetrics = mlContext.MulticlassClassification.Evaluate(testData); Console.WriteLine($"*************************************************************************************************************"); Console.WriteLine($"* Metrics for Multi-class Classification model - Test Data "); Console.WriteLine($"*------------------------------------------------------------------------------------------------------------"); Console.WriteLine($"* MicroAccuracy: {testMetrics.MicroAccuracy:0.###}"); Console.WriteLine($"* MacroAccuracy: {testMetrics.MacroAccuracy:0.###}"); Console.WriteLine($"* LogLoss: {testMetrics.LogLoss:#.###}"); Console.WriteLine($"* LogLossReduction: {testMetrics.LogLossReduction:#.###}"); Console.WriteLine($"*************************************************************************************************************"); var modelRelativePath = GetAbsolutePath("MLModel.zip"); mlContext.Model.Save(trainedModel, trainData.Schema, GetAbsolutePath(modelRelativePath)); Console.WriteLine("The model is saved to {0}", GetAbsolutePath(modelRelativePath)); ITransformer mlModel = mlContext.Model.Load(GetAbsolutePath(modelRelativePath), out DataViewSchema inputSchema); var predEngine = mlContext.Model.CreatePredictionEngine <SeedData, SeedPrediction>(mlModel); VBuffer <int> keys = default; predEngine.OutputSchema["PredictedLabel"].GetKeyValues(ref keys); var labelsArray = keys.DenseValues().ToArray(); Dictionary <int, string> CancerTypes = new Dictionary <int, string>(); CancerTypes.Add(1, "Kama"); CancerTypes.Add(2, "Rosa"); CancerTypes.Add(3, "Canadian"); // Create sample data to do a single prediction with it var sampleData = mlContext.Data.CreateEnumerable <SeedData>(trainData, false).First(); // Try a single prediction SeedPrediction predictionResult = predEngine.Predict(sampleData); var Maks = Array.IndexOf(predictionResult.Score, predictionResult.Score.Max()); Console.WriteLine($"Single Prediction --> Predicted label and score: {CancerTypes[labelsArray[Maks]]}: {predictionResult.Score[Maks]:0.####}"); Console.ReadKey(); }
static void Main(string[] args) { var filePath = GetAbsolutePath("../../../Data/listings_tsv.txt"); List <ListingData> DataFromCSV = new List <ListingData>(); int RowCount = 0; foreach (var line in File.ReadAllLines(filePath)) { RowCount++; //skip header if (RowCount > 1) { var cols = line.Split('\t'); DataFromCSV.Add(new ListingData() { name = cols[1], neighbourhood = cols[5], room_type = cols[8], price = float.Parse(cols[9]), minimum_nights = int.Parse(cols[10]), availability_365 = int.Parse(cols[15]), //calculate recommendation (min night <5, num review > 1, rev per month > 0.1, host list count >= 1, availability per year > 10 Label = (int.Parse(cols[10]) < 5 && int.Parse(cols[11]) > 1 && float.Parse(cols[13]) > 0.1 && int.Parse(cols[14]) >= 1 && int.Parse(cols[15]) > 10) }); } } //Create MLContext MLContext mlContext = new MLContext(); //Load Data File IDataView trainData = mlContext.Data.LoadFromEnumerable <ListingData>(DataFromCSV); var xx = trainData.Preview(); //Data process configuration with pipeline data transformations var dataPrepTransform = mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "neighbourhood_encoded", inputColumnName: "neighbourhood") .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "room_type_encoded", inputColumnName: "room_type")) .Append(mlContext.Transforms.Concatenate("Features", new[] { "neighbourhood_encoded", "room_type_encoded", "price", "minimum_nights", "availability_365", "latitude", "longitude" })) .AppendCacheCheckpoint(mlContext); // Create data transformer ITransformer dataPrepTransformer = dataPrepTransform.Fit(trainData); IDataView transformedTrainingData = dataPrepTransformer.Transform(trainData); // Choose learner var CluteringEstimator = mlContext.BinaryClassification.Trainers.FieldAwareFactorizationMachine(new string[] { "Features" }); // Build machine learning model var trainedModel = dataPrepTransformer.Append(CluteringEstimator.Fit(transformedTrainingData)); // Measure trained model performance var testData = trainedModel.Transform(transformedTrainingData); var metrics = mlContext.BinaryClassification.Evaluate(testData); Console.WriteLine(); Console.WriteLine("Model quality metrics evaluation"); Console.WriteLine("--------------------------------"); Console.WriteLine($"Accuracy: {metrics.Accuracy:P2}"); Console.WriteLine($"Auc: {metrics.AreaUnderRocCurve:P2}"); Console.WriteLine($"F1Score: {metrics.F1Score:P2}"); Console.WriteLine("=============== End of model evaluation ==============="); var modelRelativePath = GetAbsolutePath("MLModel.zip"); mlContext.Model.Save(trainedModel, trainData.Schema, GetAbsolutePath(modelRelativePath)); Console.WriteLine("The model is saved to {0}", GetAbsolutePath(modelRelativePath)); ITransformer mlModel = mlContext.Model.Load(GetAbsolutePath(modelRelativePath), out DataViewSchema inputSchema); var predEngine = mlContext.Model.CreatePredictionEngine <ListingData, ListingPrediction>(mlModel); // Create sample data to do a single prediction with it var sampleDatas = mlContext.Data.CreateEnumerable <ListingData>(trainData, false).Take(10); foreach (var sampleData in sampleDatas) { // Try a single prediction ListingPrediction predictionResult = predEngine.Predict(sampleData); Console.WriteLine($"Single Prediction {sampleData.name} --> Predicted: { (predictionResult.PredictedLabel ? "Recommended" : "Not Recommended") }"); } Console.ReadKey(); }
static void Main(string[] args) { //Create MLContext MLContext mlContext = new MLContext(); //Load Data File IDataView trainData = mlContext.Data.LoadFromTextFile <IncomeData>(GetAbsolutePath("../../../Data/AdultIncome.csv"), separatorChar: ',', hasHeader: true); //Data process configuration with pipeline data transformations var dataPrepTransform = mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "workclass_encoded", inputColumnName: "workclass") .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "education_encoded", inputColumnName: "education")) .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "marital_status_encoded", inputColumnName: "marital_status")) .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "occupation_encoded", inputColumnName: "occupation")) .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "relationship_encoded", inputColumnName: "relationship")) .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "race_encoded", inputColumnName: "race")) .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "sex_encoded", inputColumnName: "sex")) .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "native_country_encoded", inputColumnName: "native_country")) .Append(mlContext.Transforms.Concatenate("Features", new[] { "age", "fnlwgt", "education_num", "marital_status_encoded", "relationship_encoded", "race_encoded", "sex_encoded", "hours_per_week" })) .Append(mlContext.Transforms.NormalizeMinMax("Features", "Features")) .AppendCacheCheckpoint(mlContext); // Create data transformer ITransformer dataPrepTransformer = dataPrepTransform.Fit(trainData); IDataView transformedTrainingData = dataPrepTransformer.Transform(trainData); // Choose learner var CluteringEstimator = mlContext.Clustering.Trainers.KMeans(featureColumnName: "Features", numberOfClusters: 2); // Build machine learning model var trainedModel = dataPrepTransformer.Append(CluteringEstimator.Fit(transformedTrainingData)); // Measure trained model performance var testData = trainedModel.Transform(transformedTrainingData); var testMetrics = mlContext.Clustering.Evaluate(testData); Console.WriteLine($"*************************************************************************************************************"); Console.WriteLine($"* Metrics for Clustering model - Test Data "); Console.WriteLine($"*------------------------------------------------------------------------------------------------------------"); Console.WriteLine($"* AverageDistance: {testMetrics.AverageDistance:0.###}"); Console.WriteLine($"* DaviesBouldinIndex: {testMetrics.DaviesBouldinIndex:0.###}"); Console.WriteLine($"* NormalizedMutualInformation:{testMetrics.NormalizedMutualInformation:#.###}"); Console.WriteLine($"*************************************************************************************************************"); var modelRelativePath = GetAbsolutePath("MLModel.zip"); mlContext.Model.Save(trainedModel, trainData.Schema, GetAbsolutePath(modelRelativePath)); Console.WriteLine("The model is saved to {0}", GetAbsolutePath(modelRelativePath)); ITransformer mlModel = mlContext.Model.Load(GetAbsolutePath(modelRelativePath), out DataViewSchema inputSchema); var predEngine = mlContext.Model.CreatePredictionEngine <IncomeData, ClusterPrediction>(mlModel); // Create sample data to do a single prediction with it var sampleData1 = mlContext.Data.CreateEnumerable <IncomeData>(trainData, false).First(); var sampleData2 = mlContext.Data.CreateEnumerable <IncomeData>(trainData, false).Skip(1).First(); // Try a single prediction ClusterPrediction predictionResult = predEngine.Predict(sampleData1); Console.WriteLine($"Sample 1"); Console.WriteLine($"Cluster: {predictionResult.PredictedClusterId}"); Console.WriteLine($"Distances: {string.Join(" ", predictionResult.Distances)}"); Console.WriteLine($"Sample 2"); ClusterPrediction predictionResult2 = predEngine.Predict(sampleData2); Console.WriteLine($"Cluster: {predictionResult2.PredictedClusterId}"); Console.WriteLine($"Distances: {string.Join(" ", predictionResult2.Distances)}"); Console.WriteLine("both sample must be on the same cluster.."); Console.ReadKey(); }
static void Main(string[] args) { //Create MLContext MLContext mlContext = new MLContext(); var filePath = GetAbsolutePath("../../../Data/IENS_USER_ITEM.csv"); //Load Data File IDataView trainData = mlContext.Data.LoadFromTextFile <RestaurantData>(filePath, separatorChar: ',', hasHeader: true); //var xx = trainData.Preview(); //Data process configuration with pipeline data transformations var dataPrepTransform = mlContext.Transforms.Conversion.MapValueToKey(outputColumnName: "RestaurantNameEncoded", inputColumnName: nameof(RestaurantData.RestaurantName)) .Append(mlContext.Transforms.Conversion.MapValueToKey(outputColumnName: "ReviewerEncoded", inputColumnName: nameof(RestaurantData.Reviewer))) .Append(mlContext.Transforms.CopyColumns("Label", nameof(RestaurantData.Score))) .AppendCacheCheckpoint(mlContext); // Create data transformer ITransformer dataPrepTransformer = dataPrepTransform.Fit(trainData); IDataView transformedTrainingData = dataPrepTransformer.Transform(trainData); // Choose learner var Estimator = mlContext.Recommendation().Trainers.MatrixFactorization( labelColumnName: "Label", matrixColumnIndexColumnName: "RestaurantNameEncoded", matrixRowIndexColumnName: "ReviewerEncoded"); // Build machine learning model var trainedModel = dataPrepTransformer.Append(Estimator.Fit(transformedTrainingData)); // Measure trained model performance var testData = trainedModel.Transform(transformedTrainingData); var metrics = mlContext.Regression.Evaluate(testData, labelColumnName: "Label", scoreColumnName: "Score"); Console.WriteLine(); Console.WriteLine("Model quality metrics evaluation"); Console.WriteLine("Root Mean Squared Error : " + metrics.RootMeanSquaredError.ToString()); Console.WriteLine("RSquared: " + metrics.RSquared.ToString()); Console.WriteLine("=============== End of model evaluation ==============="); var modelRelativePath = GetAbsolutePath("MLModel.zip"); mlContext.Model.Save(trainedModel, trainData.Schema, GetAbsolutePath(modelRelativePath)); Console.WriteLine("The model is saved to {0}", GetAbsolutePath(modelRelativePath)); ITransformer mlModel = mlContext.Model.Load(GetAbsolutePath(modelRelativePath), out DataViewSchema inputSchema); var predEngine = mlContext.Model.CreatePredictionEngine <RestaurantData, RestaurantPrediction>(mlModel); // Create sample data to do a single prediction with it var sampleDatas = mlContext.Data.CreateEnumerable <RestaurantData>(trainData, false).Take(10); foreach (var sampleData in sampleDatas) { // Try a single prediction RestaurantPrediction predictionResult = predEngine.Predict(sampleData); if (Math.Round(predictionResult.Score, 1) > 7.5) { Console.WriteLine("Restaurant " + sampleData.RestaurantName + " is recommended for reviewer " + sampleData.Reviewer); } else { Console.WriteLine("Restaurant " + sampleData.RestaurantName + " is not recommended for reviewer " + sampleData.Reviewer); } } Console.ReadKey(); }