Ejemplo n.º 1
0
        static void Main(string[] args)
        {
            var assetsPath = ModelHelpers.GetAssetsPath(@"..\..\..\assets");

            var transactionsCsv = Path.Combine(assetsPath, "inputs", "transactions.csv");
            var offersCsv       = Path.Combine(assetsPath, "inputs", "offers.csv");
            var pivotCsv        = Path.Combine(assetsPath, "inputs", "pivot.csv");
            var modelZip        = Path.Combine(assetsPath, "outputs", "retailClustering.zip");

            try
            {
                //STEP 0: Special data pre-process in this sample creating the PivotTable csv file
                DataHelpers.PreProcessAndSave(offersCsv, transactionsCsv, pivotCsv);

                //Create the MLContext to share across components for deterministic results
                MLContext mlContext = new MLContext(seed: 1);  //Seed set to any number so you have a deterministic environment

                // STEP 1: Common data loading configuration
                var textLoader    = CustomerSegmentationTextLoaderFactory.CreateTextLoader(mlContext);
                var pivotDataView = textLoader.Read(pivotCsv);

                //STEP 2: Configure data transformations in pipeline
                var dataProcessPipeline = new PrincipalComponentAnalysisEstimator(mlContext, "Features", "PCAFeatures", rank: 2)
                                          .Append(new OneHotEncodingEstimator(mlContext, new[] { new OneHotEncodingEstimator.ColumnInfo("LastName",
                                                                                                                                        "LastNameKey",
                                                                                                                                        CategoricalTransform.OutputKind.Ind) }));
                // (Optional) Peek data in training DataView after applying the ProcessPipeline's transformations
                Common.ConsoleHelper.PeekDataViewInConsole <PivotObservation>(mlContext, pivotDataView, dataProcessPipeline, 10);
                Common.ConsoleHelper.PeekVectorColumnDataInConsole(mlContext, "Features", pivotDataView, dataProcessPipeline, 10);

                // STEP 3: Create and train the model
                var trainer      = mlContext.Clustering.Trainers.KMeans("Features", clustersCount: 3);
                var modelBuilder = new Common.ModelBuilder <PivotObservation, ClusteringPrediction>(mlContext, dataProcessPipeline);
                modelBuilder.AddTrainer(trainer);
                var trainedModel = modelBuilder.Train(pivotDataView);

                // STEP4: Evaluate accuracy of the model
                var metrics = modelBuilder.EvaluateClusteringModel(pivotDataView);
                Common.ConsoleHelper.PrintClusteringMetrics(trainer.ToString(), metrics);

                // STEP5: Save/persist the model as a .ZIP file
                modelBuilder.SaveModelAsFile(modelZip);
            } catch (Exception ex)
            {
                Common.ConsoleHelper.ConsoleWriteException(ex.Message);
            }

            Common.ConsoleHelper.ConsolePressAnyKey();
        }
Ejemplo n.º 2
0
        public void TestPcaEstimator()
        {
            var data = TextLoader.CreateReader(_env,
                                               c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)),
                                               separator: ';', hasHeader: true)
                       .Read(_dataSource);

            var est        = new PrincipalComponentAnalysisEstimator(_env, "features", "pca", rank: 5, seed: 1);
            var outputPath = GetOutputPath("PCA", "pca.tsv");

            using (var ch = _env.Start("save"))
            {
                IDataView savedData = TakeFilter.Create(_env, est.Fit(data.AsDynamic).Transform(data.AsDynamic), 4);
                savedData = ColumnSelectingTransformer.CreateKeep(_env, savedData, new[] { "pca" });

                using (var fs = File.Create(outputPath))
                    DataSaverUtils.SaveDataView(ch, _saver, savedData, fs, keepHidden: true);
            }

            CheckEquality("PCA", "pca.tsv", digitsOfPrecision: 4);
            Done();
        }
Ejemplo n.º 3
0
        public void PcaWorkout()
        {
            var data = TextLoader.CreateReader(_env,
                                               c => (label: c.LoadFloat(11), weight: c.LoadFloat(0), features: c.LoadFloat(1, 10)),
                                               separator: ';', hasHeader: true)
                       .Read(_dataSource);

            var invalidData = TextLoader.CreateReader(_env,
                                                      c => (label: c.LoadFloat(11), weight: c.LoadFloat(0), features: c.LoadText(1, 10)),
                                                      separator: ';', hasHeader: true)
                              .Read(_dataSource);

            var est = new PrincipalComponentAnalysisEstimator(_env, "features", "pca", rank: 4, seed: 10);

            TestEstimatorCore(est, data.AsDynamic, invalidInput: invalidData.AsDynamic);

            var estNonDefaultArgs = new PrincipalComponentAnalysisEstimator(_env, "features", "pca", rank: 3, weightColumn: "weight", overSampling: 2, center: false);

            TestEstimatorCore(estNonDefaultArgs, data.AsDynamic, invalidInput: invalidData.AsDynamic);

            Done();
        }
Ejemplo n.º 4
0
        static void Main(string[] args)
        {
            string assetsRelativePath = @"../../../assets";
            string assetsPath         = GetDataSetAbsolutePath(assetsRelativePath);

            string transactionsCsv = Path.Combine(assetsPath, "inputs", "transactions.csv");
            string offersCsv       = Path.Combine(assetsPath, "inputs", "offers.csv");
            string pivotCsv        = Path.Combine(assetsPath, "inputs", "pivot.csv");
            string modelZip        = Path.Combine(assetsPath, "outputs", "retailClustering.zip");

            try
            {
                //STEP 0: Special data pre-process in this sample creating the PivotTable csv file
                DataHelpers.PreProcessAndSave(offersCsv, transactionsCsv, pivotCsv);

                //Create the MLContext to share across components for deterministic results
                MLContext mlContext = new MLContext(seed: 1);  //Seed set to any number so you have a deterministic environment

                // STEP 1: Common data loading configuration
                var pivotDataView = mlContext.Data.ReadFromTextFile(path: pivotCsv,
                                                                    columns: new[]
                {
                    new TextLoader.Column(DefaultColumnNames.Features, DataKind.R4, new[] { new TextLoader.Range(0, 31) }),
                    new TextLoader.Column(nameof(PivotData.LastName), DataKind.Text, 32)
                },
                                                                    hasHeader: true,
                                                                    separatorChar: ',');

                //STEP 2: Configure data transformations in pipeline
                var dataProcessPipeline = new PrincipalComponentAnalysisEstimator(env: mlContext, outputColumnName: "PCAFeatures", inputColumnName: DefaultColumnNames.Features, rank: 2)
                                          .Append(new OneHotEncodingEstimator(mlContext,
                                                                              new[]
                {
                    new OneHotEncodingEstimator.ColumnInfo(name: "LastNameKey", inputColumnName: nameof(PivotData.LastName),
                                                           OneHotEncodingTransformer.OutputKind.Ind)
                }
                                                                              ));

                // (Optional) Peek data in training DataView after applying the ProcessPipeline's transformations
                Common.ConsoleHelper.PeekDataViewInConsole(mlContext, pivotDataView, dataProcessPipeline, 10);
                Common.ConsoleHelper.PeekVectorColumnDataInConsole(mlContext, DefaultColumnNames.Features, pivotDataView, dataProcessPipeline, 10);

                //STEP 3: Create the training pipeline
                var trainer          = mlContext.Clustering.Trainers.KMeans(featureColumn: DefaultColumnNames.Features, clustersCount: 3);
                var trainingPipeline = dataProcessPipeline.Append(trainer);

                //STEP 4: Train the model fitting to the pivotDataView
                Console.WriteLine("=============== Training the model ===============");
                ITransformer trainedModel = trainingPipeline.Fit(pivotDataView);

                //STEP 5: Evaluate the model and show accuracy stats
                Console.WriteLine("===== Evaluating Model's accuracy with Test data =====");
                var predictions = trainedModel.Transform(pivotDataView);
                var metrics     = mlContext.Clustering.Evaluate(predictions, score: DefaultColumnNames.Score, features: DefaultColumnNames.Features);

                ConsoleHelper.PrintClusteringMetrics(trainer.ToString(), metrics);

                //STEP 6: Save/persist the trained model to a .ZIP file
                using (var fs = new FileStream(modelZip, FileMode.Create, FileAccess.Write, FileShare.Write))
                    mlContext.Model.Save(trainedModel, fs);

                Console.WriteLine("The model is saved to {0}", modelZip);
            }
            catch (Exception ex)
            {
                Common.ConsoleHelper.ConsoleWriteException(ex.Message);
            }

            Common.ConsoleHelper.ConsolePressAnyKey();
        }
Ejemplo n.º 5
0
        static void Main(string[] args)
        {
            var assetsPath = PathHelper.GetAssetsPath(@"..\..\..\assets");

            var transactionsCsv = Path.Combine(assetsPath, "inputs", "transactions.csv");
            var offersCsv       = Path.Combine(assetsPath, "inputs", "offers.csv");
            var pivotCsv        = Path.Combine(assetsPath, "inputs", "pivot.csv");
            var modelZip        = Path.Combine(assetsPath, "outputs", "retailClustering.zip");

            try
            {
                //STEP 0: Special data pre-process in this sample creating the PivotTable csv file
                DataHelpers.PreProcessAndSave(offersCsv, transactionsCsv, pivotCsv);

                //Create the MLContext to share across components for deterministic results
                MLContext mlContext = new MLContext(seed: 1);  //Seed set to any number so you have a deterministic environment

                // STEP 1: Common data loading configuration
                TextLoader textLoader = mlContext.Data.TextReader(new TextLoader.Arguments()
                {
                    Separator = ",",
                    HasHeader = true,
                    Column    = new[]
                    {
                        new TextLoader.Column("Features", DataKind.R4, new[] { new TextLoader.Range(0, 31) }),
                        new TextLoader.Column("LastName", DataKind.Text, 32)
                    }
                });

                var pivotDataView = textLoader.Read(pivotCsv);

                //STEP 2: Configure data transformations in pipeline
                var dataProcessPipeline = new PrincipalComponentAnalysisEstimator(mlContext, "Features", "PCAFeatures", rank: 2)
                                          .Append(new OneHotEncodingEstimator(mlContext, new[] { new OneHotEncodingEstimator.ColumnInfo("LastName",
                                                                                                                                        "LastNameKey",
                                                                                                                                        CategoricalTransform.OutputKind.Ind) }));
                // (Optional) Peek data in training DataView after applying the ProcessPipeline's transformations
                Common.ConsoleHelper.PeekDataViewInConsole <PivotObservation>(mlContext, pivotDataView, dataProcessPipeline, 10);
                Common.ConsoleHelper.PeekVectorColumnDataInConsole(mlContext, "Features", pivotDataView, dataProcessPipeline, 10);

                //STEP 3: Create the training pipeline
                var trainer          = mlContext.Clustering.Trainers.KMeans("Features", clustersCount: 3);
                var trainingPipeline = dataProcessPipeline.Append(trainer);

                //STEP 4: Train the model fitting to the pivotDataView
                Console.WriteLine("=============== Training the model ===============");
                ITransformer trainedModel = trainingPipeline.Fit(pivotDataView);

                //STEP 5: Evaluate the model and show accuracy stats
                Console.WriteLine("===== Evaluating Model's accuracy with Test data =====");
                var predictions = trainedModel.Transform(pivotDataView);
                var metrics     = mlContext.Clustering.Evaluate(predictions, score: "Score", features: "Features");

                ConsoleHelper.PrintClusteringMetrics(trainer.ToString(), metrics);

                //STEP 6: Save/persist the trained model to a .ZIP file
                using (var fs = new FileStream(modelZip, FileMode.Create, FileAccess.Write, FileShare.Write))
                    mlContext.Model.Save(trainedModel, fs);

                Console.WriteLine("The model is saved to {0}", modelZip);
            }
            catch (Exception ex)
            {
                Common.ConsoleHelper.ConsoleWriteException(ex.Message);
            }

            Common.ConsoleHelper.ConsolePressAnyKey();
        }