static void Main(string[] args) { var assetsPath = ModelHelpers.GetAssetsPath(@"..\..\..\assets"); var transactionsCsv = Path.Combine(assetsPath, "inputs", "transactions.csv"); var offersCsv = Path.Combine(assetsPath, "inputs", "offers.csv"); var pivotCsv = Path.Combine(assetsPath, "inputs", "pivot.csv"); var modelZip = Path.Combine(assetsPath, "outputs", "retailClustering.zip"); try { //STEP 0: Special data pre-process in this sample creating the PivotTable csv file DataHelpers.PreProcessAndSave(offersCsv, transactionsCsv, pivotCsv); //Create the MLContext to share across components for deterministic results MLContext mlContext = new MLContext(seed: 1); //Seed set to any number so you have a deterministic environment // STEP 1: Common data loading configuration var textLoader = CustomerSegmentationTextLoaderFactory.CreateTextLoader(mlContext); var pivotDataView = textLoader.Read(pivotCsv); //STEP 2: Configure data transformations in pipeline var dataProcessPipeline = new PrincipalComponentAnalysisEstimator(mlContext, "Features", "PCAFeatures", rank: 2) .Append(new OneHotEncodingEstimator(mlContext, new[] { new OneHotEncodingEstimator.ColumnInfo("LastName", "LastNameKey", CategoricalTransform.OutputKind.Ind) })); // (Optional) Peek data in training DataView after applying the ProcessPipeline's transformations Common.ConsoleHelper.PeekDataViewInConsole <PivotObservation>(mlContext, pivotDataView, dataProcessPipeline, 10); Common.ConsoleHelper.PeekVectorColumnDataInConsole(mlContext, "Features", pivotDataView, dataProcessPipeline, 10); // STEP 3: Create and train the model var trainer = mlContext.Clustering.Trainers.KMeans("Features", clustersCount: 3); var modelBuilder = new Common.ModelBuilder <PivotObservation, ClusteringPrediction>(mlContext, dataProcessPipeline); modelBuilder.AddTrainer(trainer); var trainedModel = modelBuilder.Train(pivotDataView); // STEP4: Evaluate accuracy of the model var metrics = modelBuilder.EvaluateClusteringModel(pivotDataView); Common.ConsoleHelper.PrintClusteringMetrics(trainer.ToString(), metrics); // STEP5: Save/persist the model as a .ZIP file modelBuilder.SaveModelAsFile(modelZip); } catch (Exception ex) { Common.ConsoleHelper.ConsoleWriteException(ex.Message); } Common.ConsoleHelper.ConsolePressAnyKey(); }
public void TestPcaEstimator() { var data = TextLoader.CreateReader(_env, c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)), separator: ';', hasHeader: true) .Read(_dataSource); var est = new PrincipalComponentAnalysisEstimator(_env, "features", "pca", rank: 5, seed: 1); var outputPath = GetOutputPath("PCA", "pca.tsv"); using (var ch = _env.Start("save")) { IDataView savedData = TakeFilter.Create(_env, est.Fit(data.AsDynamic).Transform(data.AsDynamic), 4); savedData = ColumnSelectingTransformer.CreateKeep(_env, savedData, new[] { "pca" }); using (var fs = File.Create(outputPath)) DataSaverUtils.SaveDataView(ch, _saver, savedData, fs, keepHidden: true); } CheckEquality("PCA", "pca.tsv", digitsOfPrecision: 4); Done(); }
public void PcaWorkout() { var data = TextLoader.CreateReader(_env, c => (label: c.LoadFloat(11), weight: c.LoadFloat(0), features: c.LoadFloat(1, 10)), separator: ';', hasHeader: true) .Read(_dataSource); var invalidData = TextLoader.CreateReader(_env, c => (label: c.LoadFloat(11), weight: c.LoadFloat(0), features: c.LoadText(1, 10)), separator: ';', hasHeader: true) .Read(_dataSource); var est = new PrincipalComponentAnalysisEstimator(_env, "features", "pca", rank: 4, seed: 10); TestEstimatorCore(est, data.AsDynamic, invalidInput: invalidData.AsDynamic); var estNonDefaultArgs = new PrincipalComponentAnalysisEstimator(_env, "features", "pca", rank: 3, weightColumn: "weight", overSampling: 2, center: false); TestEstimatorCore(estNonDefaultArgs, data.AsDynamic, invalidInput: invalidData.AsDynamic); Done(); }
static void Main(string[] args) { string assetsRelativePath = @"../../../assets"; string assetsPath = GetDataSetAbsolutePath(assetsRelativePath); string transactionsCsv = Path.Combine(assetsPath, "inputs", "transactions.csv"); string offersCsv = Path.Combine(assetsPath, "inputs", "offers.csv"); string pivotCsv = Path.Combine(assetsPath, "inputs", "pivot.csv"); string modelZip = Path.Combine(assetsPath, "outputs", "retailClustering.zip"); try { //STEP 0: Special data pre-process in this sample creating the PivotTable csv file DataHelpers.PreProcessAndSave(offersCsv, transactionsCsv, pivotCsv); //Create the MLContext to share across components for deterministic results MLContext mlContext = new MLContext(seed: 1); //Seed set to any number so you have a deterministic environment // STEP 1: Common data loading configuration var pivotDataView = mlContext.Data.ReadFromTextFile(path: pivotCsv, columns: new[] { new TextLoader.Column(DefaultColumnNames.Features, DataKind.R4, new[] { new TextLoader.Range(0, 31) }), new TextLoader.Column(nameof(PivotData.LastName), DataKind.Text, 32) }, hasHeader: true, separatorChar: ','); //STEP 2: Configure data transformations in pipeline var dataProcessPipeline = new PrincipalComponentAnalysisEstimator(env: mlContext, outputColumnName: "PCAFeatures", inputColumnName: DefaultColumnNames.Features, rank: 2) .Append(new OneHotEncodingEstimator(mlContext, new[] { new OneHotEncodingEstimator.ColumnInfo(name: "LastNameKey", inputColumnName: nameof(PivotData.LastName), OneHotEncodingTransformer.OutputKind.Ind) } )); // (Optional) Peek data in training DataView after applying the ProcessPipeline's transformations Common.ConsoleHelper.PeekDataViewInConsole(mlContext, pivotDataView, dataProcessPipeline, 10); Common.ConsoleHelper.PeekVectorColumnDataInConsole(mlContext, DefaultColumnNames.Features, pivotDataView, dataProcessPipeline, 10); //STEP 3: Create the training pipeline var trainer = mlContext.Clustering.Trainers.KMeans(featureColumn: DefaultColumnNames.Features, clustersCount: 3); var trainingPipeline = dataProcessPipeline.Append(trainer); //STEP 4: Train the model fitting to the pivotDataView Console.WriteLine("=============== Training the model ==============="); ITransformer trainedModel = trainingPipeline.Fit(pivotDataView); //STEP 5: Evaluate the model and show accuracy stats Console.WriteLine("===== Evaluating Model's accuracy with Test data ====="); var predictions = trainedModel.Transform(pivotDataView); var metrics = mlContext.Clustering.Evaluate(predictions, score: DefaultColumnNames.Score, features: DefaultColumnNames.Features); ConsoleHelper.PrintClusteringMetrics(trainer.ToString(), metrics); //STEP 6: Save/persist the trained model to a .ZIP file using (var fs = new FileStream(modelZip, FileMode.Create, FileAccess.Write, FileShare.Write)) mlContext.Model.Save(trainedModel, fs); Console.WriteLine("The model is saved to {0}", modelZip); } catch (Exception ex) { Common.ConsoleHelper.ConsoleWriteException(ex.Message); } Common.ConsoleHelper.ConsolePressAnyKey(); }
static void Main(string[] args) { var assetsPath = PathHelper.GetAssetsPath(@"..\..\..\assets"); var transactionsCsv = Path.Combine(assetsPath, "inputs", "transactions.csv"); var offersCsv = Path.Combine(assetsPath, "inputs", "offers.csv"); var pivotCsv = Path.Combine(assetsPath, "inputs", "pivot.csv"); var modelZip = Path.Combine(assetsPath, "outputs", "retailClustering.zip"); try { //STEP 0: Special data pre-process in this sample creating the PivotTable csv file DataHelpers.PreProcessAndSave(offersCsv, transactionsCsv, pivotCsv); //Create the MLContext to share across components for deterministic results MLContext mlContext = new MLContext(seed: 1); //Seed set to any number so you have a deterministic environment // STEP 1: Common data loading configuration TextLoader textLoader = mlContext.Data.TextReader(new TextLoader.Arguments() { Separator = ",", HasHeader = true, Column = new[] { new TextLoader.Column("Features", DataKind.R4, new[] { new TextLoader.Range(0, 31) }), new TextLoader.Column("LastName", DataKind.Text, 32) } }); var pivotDataView = textLoader.Read(pivotCsv); //STEP 2: Configure data transformations in pipeline var dataProcessPipeline = new PrincipalComponentAnalysisEstimator(mlContext, "Features", "PCAFeatures", rank: 2) .Append(new OneHotEncodingEstimator(mlContext, new[] { new OneHotEncodingEstimator.ColumnInfo("LastName", "LastNameKey", CategoricalTransform.OutputKind.Ind) })); // (Optional) Peek data in training DataView after applying the ProcessPipeline's transformations Common.ConsoleHelper.PeekDataViewInConsole <PivotObservation>(mlContext, pivotDataView, dataProcessPipeline, 10); Common.ConsoleHelper.PeekVectorColumnDataInConsole(mlContext, "Features", pivotDataView, dataProcessPipeline, 10); //STEP 3: Create the training pipeline var trainer = mlContext.Clustering.Trainers.KMeans("Features", clustersCount: 3); var trainingPipeline = dataProcessPipeline.Append(trainer); //STEP 4: Train the model fitting to the pivotDataView Console.WriteLine("=============== Training the model ==============="); ITransformer trainedModel = trainingPipeline.Fit(pivotDataView); //STEP 5: Evaluate the model and show accuracy stats Console.WriteLine("===== Evaluating Model's accuracy with Test data ====="); var predictions = trainedModel.Transform(pivotDataView); var metrics = mlContext.Clustering.Evaluate(predictions, score: "Score", features: "Features"); ConsoleHelper.PrintClusteringMetrics(trainer.ToString(), metrics); //STEP 6: Save/persist the trained model to a .ZIP file using (var fs = new FileStream(modelZip, FileMode.Create, FileAccess.Write, FileShare.Write)) mlContext.Model.Save(trainedModel, fs); Console.WriteLine("The model is saved to {0}", modelZip); } catch (Exception ex) { Common.ConsoleHelper.ConsoleWriteException(ex.Message); } Common.ConsoleHelper.ConsolePressAnyKey(); }