public void BuildAndTrain(int kClusters = 3) { ConsoleWriteHeader("Build and Train using Static API"); Console.Out.WriteLine($"Input file: {pivotLocation}"); ConsoleWriteHeader("Reading file ..."); var reader = new TextLoader(env, new TextLoader.Arguments { Column = new[] { new TextLoader.Column("Features", DataKind.R4, new[] { new TextLoader.Range(0, 31) }), new TextLoader.Column("LastName", DataKind.Text, 32) }, HasHeader = true, Separator = "," }); var estrimator = new PcaEstimator(env, "Features", "PCAFeatures", rank: 2, advancedSettings: (p) => p.Seed = 42) .Append(new CategoricalEstimator(env, new[] { new CategoricalEstimator.ColumnInfo("LastName", "LastNameKey", CategoricalTransform.OutputKind.Ind) })) .Append(new KMeansPlusPlusTrainer(env, "Features", clustersCount: kClusters)); ConsoleWriteHeader("Training model for customer clustering"); var dataSource = reader.Read(new MultiFileSource(pivotLocation)); var model = estrimator.Fit(dataSource); var data = model.Transform(dataSource); // inspect data var columnNames = data.Schema.GetColumnNames().ToArray(); var trainDataAsEnumerable = data.AsEnumerable <PivotPipelineData>(env, false).Take(10).ToArray(); ConsoleWriteHeader("Evaluate model"); var clustering = new ClusteringContext(env); var metrics = clustering.Evaluate(data, score: "Score", features: "Features"); Console.WriteLine($"AvgMinScore is: {metrics.AvgMinScore}"); Console.WriteLine($"Dbi is: {metrics.Dbi}"); ConsoleWriteHeader("Save model to local file"); ModelHelpers.DeleteAssets(modelLocation); using (var f = new FileStream(modelLocation, FileMode.Create)) model.SaveTo(env, f); Console.WriteLine($"Model saved: {modelLocation}"); }
public void TestPcaEstimator() { var data = TextLoader.CreateReader(_env, c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)), separator: ';', hasHeader: true) .Read(_dataSource); var est = new PcaEstimator(_env, "features", "pca", rank: 5, seed: 1); var outputPath = GetOutputPath("PCA", "pca.tsv"); using (var ch = _env.Start("save")) { IDataView savedData = TakeFilter.Create(_env, est.Fit(data.AsDynamic).Transform(data.AsDynamic), 4); savedData = new ChooseColumnsTransform(_env, savedData, "pca"); using (var fs = File.Create(outputPath)) DataSaverUtils.SaveDataView(ch, _saver, savedData, fs, keepHidden: true); } CheckEquality("PCA", "pca.tsv", digitsOfPrecision: 4); Done(); }
public void PcaWorkout() { var data = TextLoader.CreateReader(_env, c => (label: c.LoadFloat(11), weight: c.LoadFloat(0), features: c.LoadFloat(1, 10)), separator: ';', hasHeader: true) .Read(_dataSource); var invalidData = TextLoader.CreateReader(_env, c => (label: c.LoadFloat(11), weight: c.LoadFloat(0), features: c.LoadText(1, 10)), separator: ';', hasHeader: true) .Read(_dataSource); var est = new PcaEstimator(_env, "features", "pca", rank: 4, seed: 10); TestEstimatorCore(est, data.AsDynamic, invalidInput: invalidData.AsDynamic); var estNonDefaultArgs = new PcaEstimator(_env, "features", "pca", rank: 3, weightColumn: "weight", overSampling: 2, center: false); TestEstimatorCore(estNonDefaultArgs, data.AsDynamic, invalidInput: invalidData.AsDynamic); Done(); }
public void PcaWorkout() { var env = new ConsoleEnvironment(seed: 1, conc: 1); string dataSource = GetDataPath("generated_regression_dataset.csv"); var data = TextLoader.CreateReader(env, c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)), separator: ';', hasHeader: true) .Read(new MultiFileSource(dataSource)); var invalidData = TextLoader.CreateReader(env, c => (label: c.LoadFloat(11), features: c.LoadText(0, 10)), separator: ';', hasHeader: true) .Read(new MultiFileSource(dataSource)); var est = new PcaEstimator(env, "features", "pca", rank: 5, advancedSettings: s => { s.Seed = 1; }); // The following call fails because of the following issue // https://github.com/dotnet/machinelearning/issues/969 // TestEstimatorCore(est, data.AsDynamic, invalidInput: invalidData.AsDynamic); var outputPath = GetOutputPath("PCA", "pca.tsv"); using (var ch = env.Start("save")) { var saver = new TextSaver(env, new TextSaver.Arguments { Silent = true, OutputHeader = false }); IDataView savedData = TakeFilter.Create(env, est.Fit(data.AsDynamic).Transform(data.AsDynamic), 4); savedData = new ChooseColumnsTransform(env, savedData, "pca"); using (var fs = File.Create(outputPath)) DataSaverUtils.SaveDataView(ch, saver, savedData, fs, keepHidden: true); } CheckEquality("PCA", "pca.tsv"); Done(); }