public void BuildAndTrain(int kClusters = 3)
        {
            ConsoleWriteHeader("Build and Train using Static API");
            Console.Out.WriteLine($"Input file: {pivotLocation}");

            ConsoleWriteHeader("Reading file ...");
            var reader = new TextLoader(env,
                                        new TextLoader.Arguments
            {
                Column = new[] {
                    new TextLoader.Column("Features", DataKind.R4, new[] { new TextLoader.Range(0, 31) }),
                    new TextLoader.Column("LastName", DataKind.Text, 32)
                },
                HasHeader = true,
                Separator = ","
            });


            var estrimator = new PcaEstimator(env, "Features", "PCAFeatures", rank: 2, advancedSettings: (p) => p.Seed = 42)
                             .Append(new CategoricalEstimator(env, new[] { new CategoricalEstimator.ColumnInfo("LastName", "LastNameKey", CategoricalTransform.OutputKind.Ind) }))
                             .Append(new KMeansPlusPlusTrainer(env, "Features", clustersCount: kClusters));


            ConsoleWriteHeader("Training model for customer clustering");

            var dataSource = reader.Read(new MultiFileSource(pivotLocation));
            var model      = estrimator.Fit(dataSource);
            var data       = model.Transform(dataSource);

            // inspect data
            var columnNames           = data.Schema.GetColumnNames().ToArray();
            var trainDataAsEnumerable = data.AsEnumerable <PivotPipelineData>(env, false).Take(10).ToArray();

            ConsoleWriteHeader("Evaluate model");

            var clustering = new ClusteringContext(env);
            var metrics    = clustering.Evaluate(data, score: "Score", features: "Features");

            Console.WriteLine($"AvgMinScore is: {metrics.AvgMinScore}");
            Console.WriteLine($"Dbi is: {metrics.Dbi}");

            ConsoleWriteHeader("Save model to local file");
            ModelHelpers.DeleteAssets(modelLocation);
            using (var f = new FileStream(modelLocation, FileMode.Create))
                model.SaveTo(env, f);
            Console.WriteLine($"Model saved: {modelLocation}");
        }
Exemple #2
0
        public void TestPcaEstimator()
        {
            var data = TextLoader.CreateReader(_env,
                                               c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)),
                                               separator: ';', hasHeader: true)
                       .Read(_dataSource);

            var est        = new PcaEstimator(_env, "features", "pca", rank: 5, seed: 1);
            var outputPath = GetOutputPath("PCA", "pca.tsv");

            using (var ch = _env.Start("save"))
            {
                IDataView savedData = TakeFilter.Create(_env, est.Fit(data.AsDynamic).Transform(data.AsDynamic), 4);
                savedData = new ChooseColumnsTransform(_env, savedData, "pca");

                using (var fs = File.Create(outputPath))
                    DataSaverUtils.SaveDataView(ch, _saver, savedData, fs, keepHidden: true);
            }

            CheckEquality("PCA", "pca.tsv", digitsOfPrecision: 4);
            Done();
        }
Exemple #3
0
        public void PcaWorkout()
        {
            var data = TextLoader.CreateReader(_env,
                                               c => (label: c.LoadFloat(11), weight: c.LoadFloat(0), features: c.LoadFloat(1, 10)),
                                               separator: ';', hasHeader: true)
                       .Read(_dataSource);

            var invalidData = TextLoader.CreateReader(_env,
                                                      c => (label: c.LoadFloat(11), weight: c.LoadFloat(0), features: c.LoadText(1, 10)),
                                                      separator: ';', hasHeader: true)
                              .Read(_dataSource);

            var est = new PcaEstimator(_env, "features", "pca", rank: 4, seed: 10);

            TestEstimatorCore(est, data.AsDynamic, invalidInput: invalidData.AsDynamic);

            var estNonDefaultArgs = new PcaEstimator(_env, "features", "pca", rank: 3, weightColumn: "weight", overSampling: 2, center: false);

            TestEstimatorCore(estNonDefaultArgs, data.AsDynamic, invalidInput: invalidData.AsDynamic);

            Done();
        }
        public void PcaWorkout()
        {
            var    env        = new ConsoleEnvironment(seed: 1, conc: 1);
            string dataSource = GetDataPath("generated_regression_dataset.csv");
            var    data       = TextLoader.CreateReader(env,
                                                        c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)),
                                                        separator: ';', hasHeader: true)
                                .Read(new MultiFileSource(dataSource));

            var invalidData = TextLoader.CreateReader(env,
                                                      c => (label: c.LoadFloat(11), features: c.LoadText(0, 10)),
                                                      separator: ';', hasHeader: true)
                              .Read(new MultiFileSource(dataSource));

            var est = new PcaEstimator(env, "features", "pca", rank: 5, advancedSettings: s => {
                s.Seed = 1;
            });

            // The following call fails because of the following issue
            // https://github.com/dotnet/machinelearning/issues/969
            // TestEstimatorCore(est, data.AsDynamic, invalidInput: invalidData.AsDynamic);

            var outputPath = GetOutputPath("PCA", "pca.tsv");

            using (var ch = env.Start("save"))
            {
                var saver = new TextSaver(env, new TextSaver.Arguments {
                    Silent = true, OutputHeader = false
                });
                IDataView savedData = TakeFilter.Create(env, est.Fit(data.AsDynamic).Transform(data.AsDynamic), 4);
                savedData = new ChooseColumnsTransform(env, savedData, "pca");

                using (var fs = File.Create(outputPath))
                    DataSaverUtils.SaveDataView(ch, saver, savedData, fs, keepHidden: true);
            }

            CheckEquality("PCA", "pca.tsv");
            Done();
        }