예제 #1
0
        public void Train()
        {
            var transforms = _context.Transforms;

            var data = GetTrainData();

            // Preparation
            var dataProcessPipeline = transforms.Categorical.OneHotEncoding("PredictedLabel", "Area")
                                      .Append(transforms.Text.FeaturizeText("TitleFeaturized", "Title"))
                                      .Append(transforms.Text.FeaturizeText("DescriptionFeaturized", "Description"))
                                      // Learning alg only reads from Features column
                                      .Append(transforms.Concatenate("Features", "TitleFeaturized", "DescriptionFeaturized"))
                                      .Append(_context.Transforms.NormalizeMinMax("Features", "Features"))
                                      // Sample Caching the DataView so estimators iterating over the data multiple times,
                                      // instead of always reading from file, using the cache might get better performance.
                                      .AppendCacheCheckpoint(_context);

            // Estimator
            var estimator = _context.Regression.Trainers.LbfgsPoissonRegression("PredictedLabel", "Features");

            ITransformer prepModel    = dataProcessPipeline.Fit(data);
            var          prepData     = prepModel.Transform(data);
            ITransformer trainedModel = estimator.Fit(prepData);

            ITransformer trainedPipe = prepModel.Append(trainedModel);

            // Evalulate trained data
            Evaluate(trainedPipe);

            Test(trainedPipe, prepData.Schema);

            // Save model to disk
            _context.Model.Save(prepModel, data.Schema, _prepSavePath);
            _context.Model.Save(trainedModel, prepData.Schema, _modelSavePath);
        }
예제 #2
0
        public void CreatePredictionModel()
        {
            // Define a feature contribution calculator for all the features.
            // Don't normalize the contributions.
            // https://docs.microsoft.com/en-us/dotnet/api/microsoft.ml.transforms.featurecontributioncalculatingestimator?view=ml-dotnet
            // "Does this estimator need to look at the data to train its parameters? No"
            var regressionData = _regressionModel.Transform(MLContext.Data.TakeRows(_transformedData, 1));

            var featureContributionCalculator = MLContext.Transforms
                                                .CalculateFeatureContribution(_regressionModel, normalize: false) // Estimator
                                                .Fit(regressionData);                                             // Transformer

            // Create the full transformer chain.
            var scoringPipeline = _transformationModel
                                  .Append(_regressionModel)
                                  .Append(featureContributionCalculator);

            // Create the prediction engine.
            _predictionEngine = MLContext.Model.CreatePredictionEngine <FeatureContributionData, FeatureContributionPrediction>(scoringPipeline);
        }
예제 #3
0
        public void TrainModel(string testImagePath = null)
        {
            #region Notes: Fundamental components

            /*  Main components:
             *      IDataView,
             *      ITransformer,
             *      IEstimator
             */

            //IDataView demoDataView;
            //ITransformer demoITransformer;
            //IEstimator<ITransformer> demoIEstimator;
            #endregion Notes: Fundamental components
            #region Notes: Conventional column names

            /*  Conventional column names:
             *      Input:
             *          Label
             *          Features
             *      Output:
             *          PredictedLabel
             *          Score
             */
            #endregion Notes: Conventional column names
            #region Notes: Usual training process

            /*  Usual training process:
             *      1. Load training/test datasets (IDataView)
             *      2. Build training pipeline (IEstimator)
             *          2.1   Construct preProcessing pipeline (IEstimator) (optional)
             *          2.2   Configure trainer (IEstimator)
             *          2.3   Construct postProcessing pipeline (optional)
             *          2.4   Construct training pipeline (preProcessing pipelin + trainer + postProcessing pipline
             *      3. Train model using training dataset (ITransformer)
             *      4. Evaluate model perfomance
             *          4.1 Make predictions on test data using trained model (IDataView)
             *          4.2 Compute evaluation metrics (Metrics staticsitcs)
             *      (optional) Retrain on full dataset (Itransformer)
             *      5. Save model to filesystem
             *      6. Make single prediction
             */
            #endregion Notes: Usual training process

            // Load data
            IDataView imagesInfo = LoadData(_dataFolder);
            imagesInfo = mlContext.Data.ShuffleRows(imagesInfo);
            DataOperationsCatalog.TrainTestData dataSplit = mlContext.Data.TrainTestSplit(imagesInfo, testFraction: 0.2);

            // Pre processing
            IEstimator <ITransformer> e_preProcessing_readImageBytes = mlContext.Transforms.LoadRawImageBytes(
                inputColumnName: nameof(ImageFileInputModel.ImagePath),
                outputColumnName: nameof(ImageInputModel.Image),
                imageFolder: _dataFolder);

            IEstimator <ITransformer> e_preProcessing_labelKeyMapping = mlContext.Transforms.Conversion.MapValueToKey(
                inputColumnName: nameof(BaseInputModel.Label),
                outputColumnName: "LabelAsKey",
                keyOrdinality: Microsoft.ML.Transforms.ValueToKeyMappingEstimator.KeyOrdinality.ByValue);


            ITransformer t_preProcessing_labelKeyMapping = e_preProcessing_labelKeyMapping.Fit(imagesInfo);
            ITransformer t_preProcessing_readImageBytes  = e_preProcessing_readImageBytes.Fit(imagesInfo);
            ITransformer t_preProcessingPipeline         = t_preProcessing_labelKeyMapping.Append(t_preProcessing_readImageBytes);


            // Core Model training pipeline
            IDataView testSetTransformed = t_preProcessingPipeline.Transform(dataSplit.TestSet);
            ImageClassificationTrainer.Options trainerSettings = new ImageClassificationTrainer.Options
            {
                FeatureColumnName = nameof(ImageInputModel.Image),
                LabelColumnName   = "LabelAsKey",
                Arch            = ImageClassificationTrainer.Architecture.ResnetV2101,
                Epoch           = 100,
                BatchSize       = 200,
                LearningRate    = 0.05f,
                MetricsCallback = (m) => Console.WriteLine(m),
                ValidationSet   = testSetTransformed,
                WorkspacePath   = _workspaceFolder
            };

            IEstimator <ITransformer> e_trainer = mlContext.MulticlassClassification.Trainers.ImageClassification(trainerSettings);
            IEstimator <ITransformer> e_postProcessing_labelKeyMapping = mlContext.Transforms.Conversion.MapKeyToValue(
                inputColumnName: "PredictedLabel",
                outputColumnName: nameof(PredictionModel.PredictedLabel));

            IEstimator <ITransformer> trainingPipeline = e_trainer.Append(e_postProcessing_labelKeyMapping);

            // Train
            #region Notes: On metadata

            /*
             * Metadata source: https://aka.ms/mlnet-resources/resnet_v2_101_299.meta
             * System.IO.Path.GetTempPath() -  C:\Users\User\AppData\Local\Temp\
             */
            #endregion
            ITransformer trainedModel = Train(trainingPipeline, t_preProcessingPipeline.Transform(dataSplit.TrainSet));

            #region Notes: Model composition
            //var extractPixelsEst = mlContext.Transforms.ExtractPixels();
            //var resizeEst = mlContext.Transforms.ResizeImages();
            //IEstimator<ITransformer> est = mlContext.Model.LoadTensorFlowModel("MODEL_PATH")
            //.ScoreTensorFlowModel(
            //outputColumnNames: new[] { "some-name" },
            //inputColumnNames: new[] { "Features" }, addBatchDimensionInput: true);
            #endregion Model composition

            // Evaluate/Save FileSystemModel
            ITransformer fileSystemModel = t_preProcessingPipeline.Append(trainedModel);
            Evaluate(fileSystemModel, dataSplit.TestSet);
            SaveModel(fileSystemModel,
                      new DataViewSchema.Column[] {
                imagesInfo.Schema.First(x => x.Name == nameof(ImageFileInputModel.ImagePath)),
                imagesInfo.Schema.First(x => x.Name == nameof(BaseInputModel.Label))
            },
                      ResolveModelFileName("fromFile"));

            // Evaluate/Save InMemoryModel
            IDataView testSetImageExtracted = t_preProcessing_readImageBytes.Transform(dataSplit.TrainSet);

            ITransformer inMemoryModel = t_preProcessing_labelKeyMapping.Append(trainedModel);
            Evaluate(inMemoryModel, testSetImageExtracted);
            SaveModel(inMemoryModel,
                      new DataViewSchema.Column[] {
                testSetImageExtracted.Schema.First(x => x.Name == nameof(ImageFileInputModel.ImagePath)),
                testSetImageExtracted.Schema.First(x => x.Name == nameof(BaseInputModel.Label))
            },
                      ResolveModelFileName("inMemory"));

            //Try a single prediction
            if (!string.IsNullOrWhiteSpace(testImagePath))
            {
                MakeSinglePrediction(testImagePath);
            }
        }
예제 #4
0
        static void Main(string[] args)
        {
            var DataDir = new DirectoryInfo(GetAbsolutePath(@"..\..\..\Data"));
            //Create MLContext
            MLContext mlContext = new MLContext();

            //Load Data File
            var Lines    = File.ReadAllLines(Path.Combine(DataDir.FullName, TrainingFile));
            var ListData = new List <BreastCancerData>();
            int counter  = 0;

            Lines.ToList().ForEach(x => {
                counter++;
                //skip header
                if (counter > 1)
                {
                    var Cols = x.Split(',');
                    ListData.Add(new BreastCancerData()
                    {
                        SampleNo = float.Parse(Cols[0]), ClumpThickness = float.Parse(Cols[1]), UniformityOfCellSize = float.Parse(Cols[2]), UniformityOfCellShape = float.Parse(Cols[3]), MarginalAdhesion = float.Parse(Cols[4]), SingleEpithelialCellSize = float.Parse(Cols[5]), BareNuclei = float.Parse(Cols[6] == "?" ? "0" : Cols[6]), BlandChromatin = float.Parse(Cols[7]), NormalNucleoli = float.Parse(Cols[8]), Mitoses = float.Parse(Cols[9]), ClassCategory = int.Parse(Cols[10]), IsBenign = Cols[10] == "4" ? false:true
                    });
                }
            });

            IDataView allData = mlContext.Data.LoadFromEnumerable <BreastCancerData>(ListData);

            DataOperationsCatalog.TrainTestData dataSplit = mlContext.Data.TrainTestSplit(allData, testFraction: 0.2);
            IDataView trainData = dataSplit.TrainSet;
            IDataView testData  = dataSplit.TestSet;

            // Data process configuration with pipeline data transformations
            var dataPrepTransform = mlContext.Transforms.CopyColumns("Label", "IsBenign")
                                    .Append(mlContext.Transforms.IndicateMissingValues(new[] { new InputOutputColumnPair("BareNuclei_MissingIndicator", "BareNuclei") }))
                                    .Append(mlContext.Transforms.Conversion.ConvertType(new[] { new InputOutputColumnPair("BareNuclei_MissingIndicator", "BareNuclei_MissingIndicator") }))
                                    .Append(mlContext.Transforms.ReplaceMissingValues(new[] { new InputOutputColumnPair("BareNuclei", "BareNuclei") }))
                                    .Append(mlContext.Transforms.Concatenate("Features", new[] { "BareNuclei_MissingIndicator", "BareNuclei", "ClumpThickness", "UniformityOfCellSize", "UniformityOfCellShape", "MarginalAdhesion", "SingleEpithelialCellSize", "BlandChromatin", "NormalNucleoli", "Mitoses" }))
                                    .Append(mlContext.Transforms.NormalizeMinMax("Features", "Features"))
                                    .AppendCacheCheckpoint(mlContext);

            // Create data prep transformer
            ITransformer dataPrepTransformer     = dataPrepTransform.Fit(trainData);
            IDataView    transformedTrainingData = dataPrepTransformer.Transform(trainData);

            var SdcaEstimator = mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(labelColumnName: "Label", featureColumnName: "Features");

            // Build machine learning model
            var trainedModel = dataPrepTransformer.Append(SdcaEstimator.Fit(transformedTrainingData));

            // Apply data prep transformer to test data
            IDataView testDataPredictions = trainedModel.Transform(testData);

            // Measure trained model performance
            // Extract model metrics and get eval params
            var metrics = mlContext.BinaryClassification.Evaluate(testDataPredictions);

            Console.WriteLine();
            Console.WriteLine("Model quality metrics evaluation");
            Console.WriteLine("--------------------------------");
            Console.WriteLine($"Accuracy: {metrics.Accuracy:P2}");
            Console.WriteLine($"Auc: {metrics.AreaUnderRocCurve:P2}");
            Console.WriteLine($"F1Score: {metrics.F1Score:P2}");
            Console.WriteLine("=============== End of model evaluation ===============");

            var modelRelativePath = GetAbsolutePath("MLModel.zip");

            mlContext.Model.Save(trainedModel, trainData.Schema, GetAbsolutePath(modelRelativePath));
            Console.WriteLine("The model is saved to {0}", GetAbsolutePath(modelRelativePath));

            ITransformer mlModel    = mlContext.Model.Load(GetAbsolutePath(modelRelativePath), out DataViewSchema inputSchema);
            var          predEngine = mlContext.Model.CreatePredictionEngine <BreastCancerData, PredictionBreastCancerData>(mlModel);

            // Create sample data to do a single prediction with it

            /*
             * //jinak
             * BreastCancerData sampleData = new BreastCancerData()
             * {
             *  SampleNo = 0,
             *  ClumpThickness = 5,
             *  UniformityOfCellSize = 1,
             *  UniformityOfCellShape = 1,
             *  MarginalAdhesion = 1,
             *  SingleEpithelialCellSize = 2,
             *  BareNuclei = 1,
             *  BlandChromatin = 3,
             *  NormalNucleoli = 1,
             *  Mitoses = 1
             *
             * };*/
            //ganas
            BreastCancerData sampleData = new BreastCancerData()
            {
                SampleNo                 = 0,
                ClumpThickness           = 8,
                UniformityOfCellSize     = 10,
                UniformityOfCellShape    = 10,
                MarginalAdhesion         = 8,
                SingleEpithelialCellSize = 7,
                BareNuclei               = 10,
                BlandChromatin           = 9,
                NormalNucleoli           = 7,
                Mitoses = 1
            };
            // Try a single prediction
            PredictionBreastCancerData predictionResult = predEngine.Predict(sampleData);

            Console.WriteLine($"Single Prediction --> Predicted:  { (predictionResult.Prediction ? "Jinak":"Ganas") }");
            Console.ReadKey();
        }
예제 #5
0
        public ITransformer Fit(IEstimator <ITransformer> estimator, DateTime to)
        {
            Console.WriteLine($"Running {estimator.GetType().Name}");
            MLContext mlContext = new MLContext();

            DateTime from = _featurables.Min(c => c.Begin);
            //to = _featurables.Max(c => c.End);
            Period period      = Period.S;
            int    periodCount = 5;

            List <Dictionary <string, float> > features = new List <Dictionary <string, float> >();

            for (DateTime date = from; date < to; date = date.AddPeriod(period, periodCount))
            {
                if (_featurables.All(c => c.HasFeatures(date)) && _pipExpectation._futurPip.ContainsKey(date))
                {
                    Dictionary <string, float> feature = new Dictionary <string, float>();
                    var featuresDictionnary            = GetFeatures(date);

                    if (featuresDictionnary == null)
                    {
                        continue;
                    }

                    foreach (var item in featuresDictionnary)
                    {
                        feature.Add(item.Key, item.Value);
                    }
                    feature["Label"] = decimal.ToSingle(_pipExpectation._futurPip[date].Long);
                    features.Add(feature);
                }
            }

            IDataView data = new FloatsDataView(features);

            using (FileStream stream = new FileStream("data.tsv", FileMode.Create))
                mlContext.Data.SaveAsText(data, stream);

            string[] featureNames = features.First().Keys.Where(c => c != "Label").ToArray();

            var pipeline = mlContext.Transforms.Concatenate("Features", featureNames)
                           .Append(mlContext.Transforms.NormalizeMeanVariance("Features"))
                           .Append(mlContext.Transforms.NormalizeMinMax("Features"))
                           .AppendCacheCheckpoint(mlContext);
            //.Append(mlContext.Regression.Trainers.OnlineGradientDescent(numberOfIterations:10));

            // Create data prep transformer
            ITransformer transformData   = pipeline.Fit(data);
            IDataView    transformedData = transformData.Transform(data);

            //IEstimator<ITransformer> estimator = mlContext.Regression.Trainers.OnlineGradientDescent(numberOfIterations:10);

            var cvResults = mlContext.Regression.CrossValidate(transformedData, estimator, numberOfFolds: 5);


            // Apply transforms to training data
            var models = cvResults.OrderByDescending(fold => fold.Metrics.RSquared).ToArray();

            // Get Top Model
            var topModel = models[0];

            Console.WriteLine($"\tR2:                { topModel.Metrics.RSquared}");
            Console.WriteLine($"\tLossfunction:      { topModel.Metrics.LossFunction}");
            Console.WriteLine($"\tMeanAbsoluteError: { topModel.Metrics.MeanAbsoluteError}");

            //RegressionMetrics trainedModelMetrics = mlContext.Regression.Evaluate(testDataPredictions);
            //double rSquared = trainedModelMetrics.RSquared;

            return(transformData.Append(topModel.Model));
        }
예제 #6
0
        static void Main(string[] args)
        {
            //Create MLContext
            MLContext mlContext = new MLContext();

            //Load Data File
            IDataView trainData = mlContext.Data.LoadFromTextFile <SeedData>(GetAbsolutePath("../../../Data/seeds_dataset.txt"), separatorChar: '\t', hasHeader: false);

            //Data process configuration with pipeline data transformations
            var dataPrepTransform = mlContext.Transforms.Conversion.MapValueToKey("Label", "Category")
                                    .Append(mlContext.Transforms.Concatenate("Features", new[] { "Area", "Perimeter", "Compactness", "Length", "Width", "AsymmetryCoefficient", "LengthOfKernel" }))
                                    .Append(mlContext.Transforms.NormalizeMinMax("Features", "Features"))
                                    .AppendCacheCheckpoint(mlContext);

            // Create data prep transformer
            ITransformer dataPrepTransformer     = dataPrepTransform.Fit(trainData);
            IDataView    transformedTrainingData = dataPrepTransformer.Transform(trainData);
            // Choose learner
            var SdcaEstimator = mlContext.MulticlassClassification.Trainers.SdcaMaximumEntropy(labelColumnName: "Label", featureColumnName: "Features");

            // Build machine learning model
            var trainedModel = dataPrepTransformer.Append(SdcaEstimator.Fit(transformedTrainingData));

            // Measure trained model performance
            var testData    = trainedModel.Transform(transformedTrainingData);
            var testMetrics = mlContext.MulticlassClassification.Evaluate(testData);

            Console.WriteLine($"*************************************************************************************************************");
            Console.WriteLine($"*       Metrics for Multi-class Classification model - Test Data     ");
            Console.WriteLine($"*------------------------------------------------------------------------------------------------------------");
            Console.WriteLine($"*       MicroAccuracy:    {testMetrics.MicroAccuracy:0.###}");
            Console.WriteLine($"*       MacroAccuracy:    {testMetrics.MacroAccuracy:0.###}");
            Console.WriteLine($"*       LogLoss:          {testMetrics.LogLoss:#.###}");
            Console.WriteLine($"*       LogLossReduction: {testMetrics.LogLossReduction:#.###}");
            Console.WriteLine($"*************************************************************************************************************");

            var modelRelativePath = GetAbsolutePath("MLModel.zip");

            mlContext.Model.Save(trainedModel, trainData.Schema, GetAbsolutePath(modelRelativePath));
            Console.WriteLine("The model is saved to {0}", GetAbsolutePath(modelRelativePath));

            ITransformer  mlModel    = mlContext.Model.Load(GetAbsolutePath(modelRelativePath), out DataViewSchema inputSchema);
            var           predEngine = mlContext.Model.CreatePredictionEngine <SeedData, SeedPrediction>(mlModel);
            VBuffer <int> keys       = default;

            predEngine.OutputSchema["PredictedLabel"].GetKeyValues(ref keys);

            var labelsArray = keys.DenseValues().ToArray();

            Dictionary <int, string> CancerTypes = new Dictionary <int, string>();

            CancerTypes.Add(1, "Kama");

            CancerTypes.Add(2, "Rosa");

            CancerTypes.Add(3, "Canadian");

            // Create sample data to do a single prediction with it
            var sampleData = mlContext.Data.CreateEnumerable <SeedData>(trainData, false).First();

            // Try a single prediction
            SeedPrediction predictionResult = predEngine.Predict(sampleData);
            var            Maks             = Array.IndexOf(predictionResult.Score, predictionResult.Score.Max());

            Console.WriteLine($"Single Prediction --> Predicted label and score:  {CancerTypes[labelsArray[Maks]]}: {predictionResult.Score[Maks]:0.####}");

            Console.ReadKey();
        }
예제 #7
0
        static void Main(string[] args)
        {
            var filePath = GetAbsolutePath("../../../Data/listings_tsv.txt");
            List <ListingData> DataFromCSV = new List <ListingData>();
            int RowCount = 0;

            foreach (var line in File.ReadAllLines(filePath))
            {
                RowCount++;
                //skip header
                if (RowCount > 1)
                {
                    var cols = line.Split('\t');
                    DataFromCSV.Add(new ListingData()
                    {
                        name             = cols[1],
                        neighbourhood    = cols[5],
                        room_type        = cols[8],
                        price            = float.Parse(cols[9]),
                        minimum_nights   = int.Parse(cols[10]),
                        availability_365 = int.Parse(cols[15]),
                        //calculate recommendation (min night <5, num review > 1, rev per month > 0.1, host list count >= 1, availability per year > 10
                        Label = (int.Parse(cols[10]) < 5 && int.Parse(cols[11]) > 1 && float.Parse(cols[13]) > 0.1 && int.Parse(cols[14]) >= 1 && int.Parse(cols[15]) > 10)
                    });
                }
            }

            //Create MLContext
            MLContext mlContext = new MLContext();

            //Load Data File
            IDataView trainData = mlContext.Data.LoadFromEnumerable <ListingData>(DataFromCSV);
            var       xx        = trainData.Preview();
            //Data process configuration with pipeline data transformations
            var dataPrepTransform = mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "neighbourhood_encoded", inputColumnName: "neighbourhood")
                                    .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "room_type_encoded", inputColumnName: "room_type"))
                                    .Append(mlContext.Transforms.Concatenate("Features", new[] { "neighbourhood_encoded", "room_type_encoded", "price", "minimum_nights", "availability_365", "latitude", "longitude" }))
                                    .AppendCacheCheckpoint(mlContext);

            // Create data transformer
            ITransformer dataPrepTransformer = dataPrepTransform.Fit(trainData);

            IDataView transformedTrainingData = dataPrepTransformer.Transform(trainData);
            // Choose learner
            var CluteringEstimator = mlContext.BinaryClassification.Trainers.FieldAwareFactorizationMachine(new string[] { "Features" });

            // Build machine learning model
            var trainedModel = dataPrepTransformer.Append(CluteringEstimator.Fit(transformedTrainingData));

            // Measure trained model performance
            var testData = trainedModel.Transform(transformedTrainingData);
            var metrics  = mlContext.BinaryClassification.Evaluate(testData);

            Console.WriteLine();
            Console.WriteLine("Model quality metrics evaluation");
            Console.WriteLine("--------------------------------");
            Console.WriteLine($"Accuracy: {metrics.Accuracy:P2}");
            Console.WriteLine($"Auc: {metrics.AreaUnderRocCurve:P2}");
            Console.WriteLine($"F1Score: {metrics.F1Score:P2}");
            Console.WriteLine("=============== End of model evaluation ===============");

            var modelRelativePath = GetAbsolutePath("MLModel.zip");

            mlContext.Model.Save(trainedModel, trainData.Schema, GetAbsolutePath(modelRelativePath));
            Console.WriteLine("The model is saved to {0}", GetAbsolutePath(modelRelativePath));

            ITransformer mlModel    = mlContext.Model.Load(GetAbsolutePath(modelRelativePath), out DataViewSchema inputSchema);
            var          predEngine = mlContext.Model.CreatePredictionEngine <ListingData, ListingPrediction>(mlModel);

            // Create sample data to do a single prediction with it
            var sampleDatas = mlContext.Data.CreateEnumerable <ListingData>(trainData, false).Take(10);

            foreach (var sampleData in sampleDatas)
            {
                // Try a single prediction
                ListingPrediction predictionResult = predEngine.Predict(sampleData);
                Console.WriteLine($"Single Prediction {sampleData.name} --> Predicted:  { (predictionResult.PredictedLabel ? "Recommended" : "Not Recommended") }");
            }
            Console.ReadKey();
        }
예제 #8
0
        static void Main(string[] args)
        {
            //Create MLContext
            MLContext mlContext = new MLContext();

            //Load Data File
            IDataView trainData = mlContext.Data.LoadFromTextFile <IncomeData>(GetAbsolutePath("../../../Data/AdultIncome.csv"), separatorChar: ',', hasHeader: true);

            //Data process configuration with pipeline data transformations
            var dataPrepTransform = mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "workclass_encoded", inputColumnName: "workclass")
                                    .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "education_encoded", inputColumnName: "education"))
                                    .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "marital_status_encoded", inputColumnName: "marital_status"))
                                    .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "occupation_encoded", inputColumnName: "occupation"))
                                    .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "relationship_encoded", inputColumnName: "relationship"))
                                    .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "race_encoded", inputColumnName: "race"))
                                    .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "sex_encoded", inputColumnName: "sex"))
                                    .Append(mlContext.Transforms.Categorical.OneHotEncoding(outputColumnName: "native_country_encoded", inputColumnName: "native_country"))
                                    .Append(mlContext.Transforms.Concatenate("Features", new[] { "age", "fnlwgt", "education_num", "marital_status_encoded", "relationship_encoded", "race_encoded", "sex_encoded", "hours_per_week" }))
                                    .Append(mlContext.Transforms.NormalizeMinMax("Features", "Features"))
                                    .AppendCacheCheckpoint(mlContext);

            // Create data transformer
            ITransformer dataPrepTransformer = dataPrepTransform.Fit(trainData);

            IDataView transformedTrainingData = dataPrepTransformer.Transform(trainData);
            // Choose learner
            var CluteringEstimator = mlContext.Clustering.Trainers.KMeans(featureColumnName: "Features", numberOfClusters: 2);

            // Build machine learning model
            var trainedModel = dataPrepTransformer.Append(CluteringEstimator.Fit(transformedTrainingData));

            // Measure trained model performance
            var testData    = trainedModel.Transform(transformedTrainingData);
            var testMetrics = mlContext.Clustering.Evaluate(testData);

            Console.WriteLine($"*************************************************************************************************************");
            Console.WriteLine($"*       Metrics for Clustering model - Test Data     ");
            Console.WriteLine($"*------------------------------------------------------------------------------------------------------------");
            Console.WriteLine($"*       AverageDistance:            {testMetrics.AverageDistance:0.###}");
            Console.WriteLine($"*       DaviesBouldinIndex:         {testMetrics.DaviesBouldinIndex:0.###}");
            Console.WriteLine($"*       NormalizedMutualInformation:{testMetrics.NormalizedMutualInformation:#.###}");
            Console.WriteLine($"*************************************************************************************************************");

            var modelRelativePath = GetAbsolutePath("MLModel.zip");

            mlContext.Model.Save(trainedModel, trainData.Schema, GetAbsolutePath(modelRelativePath));
            Console.WriteLine("The model is saved to {0}", GetAbsolutePath(modelRelativePath));

            ITransformer mlModel    = mlContext.Model.Load(GetAbsolutePath(modelRelativePath), out DataViewSchema inputSchema);
            var          predEngine = mlContext.Model.CreatePredictionEngine <IncomeData, ClusterPrediction>(mlModel);

            // Create sample data to do a single prediction with it
            var sampleData1 = mlContext.Data.CreateEnumerable <IncomeData>(trainData, false).First();
            var sampleData2 = mlContext.Data.CreateEnumerable <IncomeData>(trainData, false).Skip(1).First();
            // Try a single prediction
            ClusterPrediction predictionResult = predEngine.Predict(sampleData1);

            Console.WriteLine($"Sample 1");
            Console.WriteLine($"Cluster: {predictionResult.PredictedClusterId}");
            Console.WriteLine($"Distances: {string.Join(" ", predictionResult.Distances)}");
            Console.WriteLine($"Sample 2");
            ClusterPrediction predictionResult2 = predEngine.Predict(sampleData2);

            Console.WriteLine($"Cluster: {predictionResult2.PredictedClusterId}");
            Console.WriteLine($"Distances: {string.Join(" ", predictionResult2.Distances)}");
            Console.WriteLine("both sample must be on the same cluster..");
            Console.ReadKey();
        }
예제 #9
0
        static void Main(string[] args)
        {
            //Create MLContext
            MLContext mlContext = new MLContext();

            var filePath = GetAbsolutePath("../../../Data/IENS_USER_ITEM.csv");
            //Load Data File
            IDataView trainData = mlContext.Data.LoadFromTextFile <RestaurantData>(filePath, separatorChar: ',', hasHeader: true);
            //var xx = trainData.Preview();
            //Data process configuration with pipeline data transformations
            var dataPrepTransform = mlContext.Transforms.Conversion.MapValueToKey(outputColumnName: "RestaurantNameEncoded", inputColumnName: nameof(RestaurantData.RestaurantName))
                                    .Append(mlContext.Transforms.Conversion.MapValueToKey(outputColumnName: "ReviewerEncoded", inputColumnName: nameof(RestaurantData.Reviewer)))
                                    .Append(mlContext.Transforms.CopyColumns("Label", nameof(RestaurantData.Score)))
                                    .AppendCacheCheckpoint(mlContext);

            // Create data transformer
            ITransformer dataPrepTransformer = dataPrepTransform.Fit(trainData);

            IDataView transformedTrainingData = dataPrepTransformer.Transform(trainData);
            // Choose learner
            var Estimator = mlContext.Recommendation().Trainers.MatrixFactorization(
                labelColumnName: "Label",
                matrixColumnIndexColumnName: "RestaurantNameEncoded",
                matrixRowIndexColumnName: "ReviewerEncoded");

            // Build machine learning model
            var trainedModel = dataPrepTransformer.Append(Estimator.Fit(transformedTrainingData));

            // Measure trained model performance
            var testData = trainedModel.Transform(transformedTrainingData);
            var metrics  = mlContext.Regression.Evaluate(testData, labelColumnName: "Label", scoreColumnName: "Score");

            Console.WriteLine();
            Console.WriteLine("Model quality metrics evaluation");
            Console.WriteLine("Root Mean Squared Error : " + metrics.RootMeanSquaredError.ToString());
            Console.WriteLine("RSquared: " + metrics.RSquared.ToString());
            Console.WriteLine("=============== End of model evaluation ===============");

            var modelRelativePath = GetAbsolutePath("MLModel.zip");

            mlContext.Model.Save(trainedModel, trainData.Schema, GetAbsolutePath(modelRelativePath));
            Console.WriteLine("The model is saved to {0}", GetAbsolutePath(modelRelativePath));

            ITransformer mlModel    = mlContext.Model.Load(GetAbsolutePath(modelRelativePath), out DataViewSchema inputSchema);
            var          predEngine = mlContext.Model.CreatePredictionEngine <RestaurantData, RestaurantPrediction>(mlModel);

            // Create sample data to do a single prediction with it
            var sampleDatas = mlContext.Data.CreateEnumerable <RestaurantData>(trainData, false).Take(10);

            foreach (var sampleData in sampleDatas)
            {
                // Try a single prediction
                RestaurantPrediction predictionResult = predEngine.Predict(sampleData);
                if (Math.Round(predictionResult.Score, 1) > 7.5)
                {
                    Console.WriteLine("Restaurant " + sampleData.RestaurantName + " is recommended for reviewer " + sampleData.Reviewer);
                }
                else
                {
                    Console.WriteLine("Restaurant " + sampleData.RestaurantName + " is not recommended for reviewer " + sampleData.Reviewer);
                }
            }
            Console.ReadKey();
        }