コード例 #1
0
        public void SetupIrisPipeline()
        {
            _irisExample = new IrisData()
            {
                SepalLength = 3.3f,
                SepalWidth  = 1.6f,
                PetalLength = 0.2f,
                PetalWidth  = 5.1f,
            };

            string _irisDataPath = Program.GetInvariantCultureDataPath("iris.txt");

            var env    = new MLContext(seed: 1, conc: 1);
            var reader = new TextLoader(env,
                                        columns: new[]
            {
                new TextLoader.Column("Label", DataKind.R4, 0),
                new TextLoader.Column("SepalLength", DataKind.R4, 1),
                new TextLoader.Column("SepalWidth", DataKind.R4, 2),
                new TextLoader.Column("PetalLength", DataKind.R4, 3),
                new TextLoader.Column("PetalWidth", DataKind.R4, 4),
            },
                                        hasHeader: true
                                        );

            IDataView data = reader.Read(_irisDataPath);

            var pipeline = new ColumnConcatenatingEstimator(env, "Features", new[] { "SepalLength", "SepalWidth", "PetalLength", "PetalWidth" })
                           .Append(new SdcaMultiClassTrainer(env, "Label", "Features", advancedSettings: (s) => { s.NumThreads = 1; s.ConvergenceTolerance = 1e-2f; }));

            var model = pipeline.Fit(data);

            _irisModel = model.CreatePredictionEngine <IrisData, IrisPrediction>(env);
        }
コード例 #2
0
                public static SuggestedTransform ConcatColumnsIntoOne(List <string> columnNames, string concatColumnName,
                                                                      Type transformType, bool isNumeric)
                {
                    StringBuilder columnName = new StringBuilder();

                    columnNames.ForEach(column =>
                    {
                        columnName.AppendFormat("{0}", column);
                    });

                    string columnsToConcat = string.Join(",", columnNames);

                    var env   = new MLContext();
                    var input = new ColumnConcatenatingEstimator(env, concatColumnName, columnNames.ToArray());

                    // Not sure if resulting columns will be numeric or text, since concat can apply to either.
                    ColumnRoutingStructure.AnnotatedName[] columnsSource =
                        columnNames.Select(c => new ColumnRoutingStructure.AnnotatedName {
                        IsNumeric = isNumeric, Name = c
                    }).ToArray();
                    ColumnRoutingStructure.AnnotatedName[] columnsDest =
                        new[] { new ColumnRoutingStructure.AnnotatedName {
                                    IsNumeric = isNumeric, Name = concatColumnName
                                } };
                    var routingStructure = new ColumnRoutingStructure(columnsSource, columnsDest);

                    return(new SuggestedTransform(input, routingStructure));
                }
コード例 #3
0
        void New_Extensibility()
        {
            var dataPath = GetDataPath(TestDatasets.irisData.trainFilename);

            var ml   = new MLContext();
            var data = ml.Data.TextReader(MakeIrisTextLoaderArgs())
                       .Read(dataPath);

            Action <IrisData, IrisData> action = (i, j) =>
            {
                j.Label       = i.Label;
                j.PetalLength = i.SepalLength > 3 ? i.PetalLength : i.SepalLength;
                j.PetalWidth  = i.PetalWidth;
                j.SepalLength = i.SepalLength;
                j.SepalWidth  = i.SepalWidth;
            };
            var pipeline = new ColumnConcatenatingEstimator(ml, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")
                           .Append(new MyLambdaTransform <IrisData, IrisData>(ml, action), TransformerScope.TrainTest)
                           .Append(new ValueToKeyMappingEstimator(ml, "Label"), TransformerScope.TrainTest)
                           .Append(ml.MulticlassClassification.Trainers.StochasticDualCoordinateAscent(advancedSettings: (s) => { s.MaxIterations = 100; s.Shuffle = true; s.NumThreads = 1; }))
                           .Append(new KeyToValueEstimator(ml, "PredictedLabel"));

            var model  = pipeline.Fit(data).GetModelFor(TransformerScope.Scoring);
            var engine = model.MakePredictionFunction <IrisDataNoLabel, IrisPrediction>(ml);

            var testLoader = TextLoader.ReadFile(ml, MakeIrisTextLoaderArgs(), new MultiFileSource(dataPath));
            var testData   = testLoader.AsEnumerable <IrisData>(ml, false);

            foreach (var input in testData.Take(20))
            {
                var prediction = engine.Predict(input);
                Assert.True(prediction.PredictedLabel == input.Label);
            }
        }
        private TransformerChain <MulticlassPredictionTransformer <MaximumEntropyModelParameters> > Train(string dataPath)
        {
            // Create text loader.
            var options = new TextLoader.Options()
            {
                Columns = new[]
                {
                    new TextLoader.Column("Label", DataKind.Single, 0),
                    new TextLoader.Column("SepalLength", DataKind.Single, 1),
                    new TextLoader.Column("SepalWidth", DataKind.Single, 2),
                    new TextLoader.Column("PetalLength", DataKind.Single, 3),
                    new TextLoader.Column("PetalWidth", DataKind.Single, 4),
                },
                HasHeader = true,
            };
            var loader = new TextLoader(mlContext, options: options);

            IDataView data = loader.Load(dataPath);

            var pipeline = new ColumnConcatenatingEstimator(mlContext, "Features", new[] { "SepalLength", "SepalWidth", "PetalLength", "PetalWidth" })
                           .Append(mlContext.Transforms.Conversion.MapValueToKey("Label"))
                           .Append(mlContext.MulticlassClassification.Trainers.SdcaCalibrated());

            return(pipeline.Fit(data));
        }
コード例 #5
0
        public void MetacomponentsFeaturesRenamed()
        {
            // Create text loader.
            var options = new TextLoader.Options()
            {
                Columns    = TestDatasets.irisData.GetLoaderColumns(),
                Separators = new[] { ',' },
            };
            var loader = new TextLoader(Env, options: options);

            var data = loader.Load(GetDataPath(TestDatasets.irisData.trainFilename));

            var sdcaTrainer = ML.BinaryClassification.Trainers.SdcaNonCalibrated(
                new SdcaNonCalibratedBinaryTrainer.Options {
                LabelColumnName           = "Label",
                FeatureColumnName         = "Vars",
                MaximumNumberOfIterations = 100,
                Shuffle         = true,
                NumberOfThreads = 1,
            });

            var pipeline = new ColumnConcatenatingEstimator(Env, "Vars", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")
                           .Append(new ValueToKeyMappingEstimator(Env, "Label"), TransformerScope.TrainTest)
                           .Append(ML.MulticlassClassification.Trainers.OneVersusAll(sdcaTrainer))
                           .Append(new KeyToValueMappingEstimator(Env, "PredictedLabel"));

            var model = pipeline.Fit(data);

            TestEstimatorCore(pipeline, data);
            Done();
        }
コード例 #6
0
        void New_DecomposableTrainAndPredict()
        {
            var dataPath = GetDataPath(TestDatasets.irisData.trainFilename);
            var ml       = new MLContext();

            var data = ml.Data.CreateTextReader(MakeIrisColumns(), separatorChar: ',')
                       .Read(dataPath);

            var pipeline = new ColumnConcatenatingEstimator(ml, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")
                           .Append(new ValueToKeyMappingEstimator(ml, "Label"), TransformerScope.TrainTest)
                           .Append(ml.MulticlassClassification.Trainers.StochasticDualCoordinateAscent("Label", "Features", advancedSettings: s => { s.MaxIterations = 100; s.Shuffle = true; s.NumThreads = 1; }))
                           .Append(new KeyToValueMappingEstimator(ml, "PredictedLabel"));

            var model  = pipeline.Fit(data).GetModelFor(TransformerScope.Scoring);
            var engine = model.MakePredictionFunction <IrisDataNoLabel, IrisPrediction>(ml);

            var testLoader = ml.Data.ReadFromTextFile(dataPath, MakeIrisColumns(), separatorChar: ',');
            var testData   = testLoader.AsEnumerable <IrisData>(ml, false);

            foreach (var input in testData.Take(20))
            {
                var prediction = engine.Predict(input);
                Assert.True(prediction.PredictedLabel == input.Label);
            }
        }
コード例 #7
0
        public PredictionFunction <SearchData, FlatPrediction> GetPredictor()
        {
            var reader = TextLoader.CreateReader(_env,
                                                 ctx => (
                                                     RateCode: ctx.LoadFloat(1),
                                                     PassengerCount: ctx.LoadFloat(2),
                                                     TripTime: ctx.LoadFloat(3),
                                                     TripDistance: ctx.LoadFloat(4)),
                                                 separator: ',',
                                                 hasHeader: true);
            var dummyTempFile = Path.GetTempFileName();
            var data          = reader.Read(new MultiFileSource(dummyTempFile));

            var pipeline = new ColumnConcatenatingEstimator(_env, "Features", "RateCode", "PassengerCount", "TripTime", "TripDistance")
                           .Append(new ColumnSelectingEstimator(_env, "Features"))
                           .Append(new OnnxScoringEstimator(_env, _onnxFilePath, "Features", "Estimate"))
                           .Append(new ColumnSelectingEstimator(_env, "Estimate"))
                           .Append(new CustomMappingEstimator <RawPrediction, FlatPrediction>(_env, contractName: "OnnxPredictionExtractor",
                                                                                              mapAction: (input, output) =>
            {
                output.Estimate = input.Estimate[0];
            }));

            var transformer = pipeline.Fit(data.AsDynamic);

            File.Delete(dummyTempFile);
            return(transformer.MakePredictionFunction <SearchData, FlatPrediction>(_env));
        }
コード例 #8
0
        void DecomposableTrainAndPredict()
        {
            var dataPath = GetDataPath(TestDatasets.irisData.trainFilename);
            var ml       = new MLContext();

            var data = ml.Data.LoadFromTextFile <IrisData>(dataPath, separatorChar: ',');

            var pipeline = new ColumnConcatenatingEstimator(ml, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")
                           .Append(new ValueToKeyMappingEstimator(ml, "Label"), TransformerScope.TrainTest)
                           .Append(ml.MulticlassClassification.Trainers.SdcaCalibrated(
                                       new SdcaCalibratedMulticlassTrainer.Options {
                MaximumNumberOfIterations = 100, Shuffle = true, NumberOfThreads = 1,
            }))
                           .Append(new KeyToValueMappingEstimator(ml, "PredictedLabel"));

            var model  = pipeline.Fit(data).GetModelFor(TransformerScope.Scoring);
            var engine = ml.Model.CreatePredictionEngine <IrisDataNoLabel, IrisPrediction>(model);

            var testLoader = ml.Data.LoadFromTextFile(dataPath, TestDatasets.irisData.GetLoaderColumns(), separatorChar: ',', hasHeader: true);
            var testData   = ml.Data.CreateEnumerable <IrisData>(testLoader, false);

            foreach (var input in testData.Take(20))
            {
                var prediction = engine.Predict(input);
                Assert.True(prediction.PredictedLabel == input.Label);
            }
        }
コード例 #9
0
        void TestConcat()
        {
            string dataPath = GetDataPath("adult.test");

            var source = new MultiFileSource(dataPath);
            var loader = new TextLoader(Env, new TextLoader.Arguments
            {
                Column = new[]{
                    new TextLoader.Column("float1", DataKind.R4, 0),
                    new TextLoader.Column("float4", DataKind.R4, new[]{new TextLoader.Range(0), new TextLoader.Range(2), new TextLoader.Range(4), new TextLoader.Range(10) }),
                    new TextLoader.Column("float6", DataKind.R4, new[]{new TextLoader.Range(0), new TextLoader.Range(2), new TextLoader.Range(4), new TextLoader.Range(10, 12) }),
                    new TextLoader.Column("vfloat", DataKind.R4, new[]{new TextLoader.Range(14, null) { AutoEnd = false, VariableEnd = true } })
                },
                Separator = ",",
                HasHeader = true
            }, new MultiFileSource(dataPath));
            var data = loader.Read(source);

            ColumnType GetType(Schema schema, string name)
            {
                Assert.True(schema.TryGetColumnIndex(name, out int cIdx), $"Could not find '{name}'");
                return schema.GetColumnType(cIdx);
            }
            var pipe = new ColumnConcatenatingEstimator(Env, "f1", "float1")
                .Append(new ColumnConcatenatingEstimator(Env, "f2", "float1", "float1"))
                .Append(new ColumnConcatenatingEstimator(Env, "f3", "float4", "float1"))
                .Append(new ColumnConcatenatingEstimator(Env, "f4", "float6", "vfloat", "float1"));

            data = TakeFilter.Create(Env, data, 10);
            data = pipe.Fit(data).Transform(data);

            ColumnType t;
            t = GetType(data.Schema, "f1");
            Assert.True(t.IsVector && t.ItemType == NumberType.R4 && t.VectorSize == 1);
            t = GetType(data.Schema, "f2");
            Assert.True(t.IsVector && t.ItemType == NumberType.R4 && t.VectorSize == 2);
            t = GetType(data.Schema, "f3");
            Assert.True(t.IsVector && t.ItemType == NumberType.R4 && t.VectorSize == 5);
            t = GetType(data.Schema, "f4");
            Assert.True(t.IsVector && t.ItemType == NumberType.R4 && t.VectorSize == 0);

            data = new ChooseColumnsTransform(Env, data, "f1", "f2", "f3", "f4");

            var subdir = Path.Combine("Transform", "Concat");
            var outputPath = GetOutputPath(subdir, "Concat1.tsv");
            using (var ch = Env.Start("save"))
            {
                var saver = new TextSaver(Env, new TextSaver.Arguments { Silent = true, Dense = true });
                using (var fs = File.Create(outputPath))
                    DataSaverUtils.SaveDataView(ch, saver, data, fs, keepHidden: false);
            }

            CheckEquality(subdir, "Concat1.tsv");
            Done();
        }
コード例 #10
0
        public void Metacomponents()
        {
            var ml   = new MLContext();
            var data = ml.Data.ReadFromTextFile <IrisData>(GetDataPath(TestDatasets.irisData.trainFilename), separatorChar: ',');

            var sdcaTrainer = ml.BinaryClassification.Trainers.StochasticDualCoordinateAscent("Label", "Features", advancedSettings: (s) => { s.MaxIterations = 100; s.Shuffle = true; s.NumThreads = 1; });

            var pipeline = new ColumnConcatenatingEstimator(ml, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")
                           .Append(new ValueToKeyMappingEstimator(ml, "Label"), TransformerScope.TrainTest)
                           .Append(new Ova(ml, sdcaTrainer))
                           .Append(new KeyToValueMappingEstimator(ml, "PredictedLabel"));

            var model = pipeline.Fit(data);
        }
コード例 #11
0
        public void MetacomponentsFeaturesRenamed()
        {
            var data = new TextLoader(Env, TestDatasets.irisData.GetLoaderColumns(), separatorChar: ',')
                       .Read(GetDataPath(TestDatasets.irisData.trainFilename));

            var sdcaTrainer = new SdcaBinaryTrainer(Env, "Label", "Vars", advancedSettings: (s) => { s.MaxIterations = 100; s.Shuffle = true; s.NumThreads = 1; });
            var pipeline    = new ColumnConcatenatingEstimator(Env, "Vars", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")
                              .Append(new ValueToKeyMappingEstimator(Env, "Label"), TransformerScope.TrainTest)
                              .Append(new Ova(Env, sdcaTrainer))
                              .Append(new KeyToValueMappingEstimator(Env, "PredictedLabel"));

            var model = pipeline.Fit(data);

            TestEstimatorCore(pipeline, data);
            Done();
        }
コード例 #12
0
        public static void ConcatTransform()
        {
            // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
            // as well as the source of randomness.
            var ml = new MLContext();

            // Get a small dataset as an IEnumerable and them read it as ML.NET's data type.
            IEnumerable <SamplesUtils.DatasetUtils.SampleInfertData> data = SamplesUtils.DatasetUtils.GetInfertData();
            var trainData = ml.Data.ReadFromEnumerable(data);

            // Preview of the data.
            //
            // Age    Case  Education  induced     parity  pooled.stratum  row_num  ...
            // 26.0   1.0   0-5yrs      1.0         6.0       3.0      1.0  ...
            // 42.0   1.0   0-5yrs      1.0         1.0       1.0      2.0  ...
            // 39.0   1.0   0-5yrs      2.0         6.0       4.0      3.0  ...
            // 34.0   1.0   0-5yrs      2.0         4.0       2.0      4.0  ...
            // 35.0   1.0   6-11yrs     1.0         3.0       32.0     5.0  ...

            // A pipeline for concatenating the age, parity and induced columns together in the Features column.
            string outputColumnName = "Features";
            var    pipeline         = new ColumnConcatenatingEstimator(ml, outputColumnName, new[] { "Age", "Parity", "Induced" });

            // The transformed data.
            var transformedData = pipeline.Fit(trainData).Transform(trainData);

            // Getting the data of the newly created column as an IEnumerable of SampleInfertDataWithFeatures.
            var featuresColumn = ml.CreateEnumerable <SampleInfertDataWithFeatures>(transformedData, reuseRowObject: false);

            Console.WriteLine($"{outputColumnName} column obtained post-transformation.");
            foreach (var featureRow in featuresColumn)
            {
                foreach (var value in featureRow.Features.GetValues())
                {
                    Console.Write($"{value} ");
                }
                Console.WriteLine("");
            }

            // Features column obtained post-transformation.
            //
            // 26 6 1
            // 42 1 1
            // 39 6 2
            // 34 4 2
            // 35 3 1
        }
コード例 #13
0
        public void Metacomponents()
        {
            var ml   = new MLContext();
            var data = ml.Data.LoadFromTextFile <IrisData>(GetDataPath(TestDatasets.irisData.trainFilename), separatorChar: ',');

            var sdcaTrainer = ml.BinaryClassification.Trainers.StochasticDualCoordinateAscentNonCalibrated(
                new SdcaNonCalibratedBinaryTrainer.Options {
                NumberOfIterations = 100, Shuffle = true, NumberOfThreads = 1,
            });

            var pipeline = new ColumnConcatenatingEstimator(ml, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")
                           .Append(ml.Transforms.Conversion.MapValueToKey("Label"), TransformerScope.TrainTest)
                           .Append(ml.MulticlassClassification.Trainers.OneVersusAll(sdcaTrainer))
                           .Append(ml.Transforms.Conversion.MapKeyToValue(("PredictedLabel")));

            var model = pipeline.Fit(data);
        }
コード例 #14
0
        public void TestEP_Q_KMeansEntryPointAPI_06()
        {
            var iris = FileHelper.GetTestFile("iris.txt");

            using (var env = new ConsoleEnvironment())
            {
                var reader = new TextLoader(env,
                                            new TextLoader.Arguments()
                {
                    Separator = "\t",
                    HasHeader = true,
                    Column    = new[] {
                        new TextLoader.Column("Label", DataKind.R4, 0),
                        new TextLoader.Column("Sepal_length", DataKind.R4, 1),
                        new TextLoader.Column("Sepal_width", DataKind.R4, 2),
                        new TextLoader.Column("Petal_length", DataKind.R4, 3),
                        new TextLoader.Column("Petal_width", DataKind.R4, 4),
                    }
                });

                var pipeline = new ColumnConcatenatingEstimator(env, "Features", "Sepal_length", "Sepal_width", "Petal_length", "Petal_width")
                               .Append(new KMeansPlusPlusTrainer(env, "Features", clustersCount: 3));

                IDataView trainingDataView = reader.Read(new MultiFileSource(iris));
                var       model            = pipeline.Fit(trainingDataView);

                var obs = new IrisObservation()
                {
                    Sepal_length = 3.3f,
                    Sepal_width  = 1.6f,
                    Petal_length = 0.2f,
                    Petal_width  = 5.1f,
                };

                var prediction = model.MakePredictionFunction <IrisObservation, IrisPrediction>(env).Predict(obs);
                Assert.IsTrue(prediction.PredictedLabel != 0);

                var df          = Scikit.ML.DataManipulation.DataFrameIO.ReadCsv(iris, sep: '\t', dtypes: new ColumnType[] { NumberType.R4 });
                var prediction2 = model.MakePredictionFunctionDataFrame(env, df.Schema);
                var df2         = Scikit.ML.DataManipulation.DataFrameIO.ReadCsv(iris, sep: '\t', dtypes: new ColumnType[] { NumberType.R4 });
                var df3         = prediction2.Predict(df2);
                Assert.AreEqual(df.Shape[0], df3.Shape[0]);
            }
        }
コード例 #15
0
                public override IEnumerable <SuggestedTransform> Apply(IntermediateColumn[] columns)
                {
                    var  selectedColumns      = columns.Where(c => !IgnoreColumn(c.Purpose)).ToArray();
                    var  colList              = selectedColumns.Select(c => c.ColumnName).ToArray();
                    bool allColumnsNumeric    = selectedColumns.All(c => c.Purpose == ColumnPurpose.NumericFeature && c.Type.ItemType() != BoolType.Instance);
                    bool allColumnsNonNumeric = selectedColumns.All(c => c.Purpose != ColumnPurpose.NumericFeature);

                    if (colList.Length > 0)
                    {
                        // Check if column is named features and already numeric
                        if (colList.Length == 1 && colList[0] == DefaultColumnNames.Features && allColumnsNumeric)
                        {
                            yield break;
                        }

                        if (!allColumnsNumeric && !allColumnsNonNumeric)
                        {
                            yield break;
                        }

                        List <string> columnList = new List <string>();

                        foreach (var column in colList)
                        {
                            var columnName = new StringBuilder();
                            columnName.AppendFormat("{0}", column);
                            columnList.Add(columnName.ToString());
                        }

                        var input = new ColumnConcatenatingEstimator(Env, DefaultColumnNames.Features, columnList.ToArray());

                        ColumnRoutingStructure.AnnotatedName[] columnsSource =
                            columnList.Select(c => new ColumnRoutingStructure.AnnotatedName {
                            IsNumeric = allColumnsNumeric, Name = c
                        }).ToArray();
                        ColumnRoutingStructure.AnnotatedName[] columnsDest =
                        { new ColumnRoutingStructure.AnnotatedName {
                              IsNumeric = allColumnsNumeric, Name = DefaultColumnNames.Features
                          } };
                        var routingStructure = new ColumnRoutingStructure(columnsSource, columnsDest);
                        yield return(new SuggestedTransform(input, routingStructure));
                    }
                }
コード例 #16
0
        public void SetupIrisPipeline()
        {
            _irisExample = new IrisData()
            {
                SepalLength = 3.3f,
                SepalWidth  = 1.6f,
                PetalLength = 0.2f,
                PetalWidth  = 5.1f,
            };

            string irisDataPath = GetBenchmarkDataPath("iris.txt");

            var env = new MLContext(seed: 1);

            // Create text loader.
            var options = new TextLoader.Options()
            {
                Columns = new[]
                {
                    new TextLoader.Column("Label", DataKind.Single, 0),
                    new TextLoader.Column("SepalLength", DataKind.Single, 1),
                    new TextLoader.Column("SepalWidth", DataKind.Single, 2),
                    new TextLoader.Column("PetalLength", DataKind.Single, 3),
                    new TextLoader.Column("PetalWidth", DataKind.Single, 4),
                },
                HasHeader = true,
            };
            var loader = new TextLoader(env, options: options);

            IDataView data = loader.Load(irisDataPath);

            var pipeline = new ColumnConcatenatingEstimator(env, "Features", new[] { "SepalLength", "SepalWidth", "PetalLength", "PetalWidth" })
                           .Append(env.Transforms.Conversion.MapValueToKey("Label"))
                           .Append(env.MulticlassClassification.Trainers.SdcaMaximumEntropy(
                                       new SdcaMaximumEntropyMulticlassTrainer.Options {
                NumberOfThreads = 1, ConvergenceTolerance = 1e-2f,
            }));

            var model = pipeline.Fit(data);

            _irisModel = env.Model.CreatePredictionEngine <IrisData, IrisPrediction>(model);
        }
コード例 #17
0
        public void SetupIrisPipeline()
        {
            _irisExample = new IrisData()
            {
                SepalLength = 3.3f,
                SepalWidth  = 1.6f,
                PetalLength = 0.2f,
                PetalWidth  = 5.1f,
            };

            string _irisDataPath = BaseTestClass.GetDataPath("iris.txt");

            var env = new MLContext(seed: 1, conc: 1);

            // Create text loader.
            var options = new TextLoader.Options()
            {
                Columns = new[]
                {
                    new TextLoader.Column("Label", DataKind.Single, 0),
                    new TextLoader.Column("SepalLength", DataKind.Single, 1),
                    new TextLoader.Column("SepalWidth", DataKind.Single, 2),
                    new TextLoader.Column("PetalLength", DataKind.Single, 3),
                    new TextLoader.Column("PetalWidth", DataKind.Single, 4),
                },
                HasHeader = true,
            };
            var loader = new TextLoader(env, options: options);

            IDataView data = loader.Load(_irisDataPath);

            var pipeline = new ColumnConcatenatingEstimator(env, "Features", new[] { "SepalLength", "SepalWidth", "PetalLength", "PetalWidth" })
                           .Append(env.MulticlassClassification.Trainers.StochasticDualCoordinateAscent(
                                       new SdcaMultiClassTrainer.Options {
                NumberOfThreads = 1, ConvergenceTolerance = 1e-2f,
            }));

            var model = pipeline.Fit(data);

            _irisModel = model.CreatePredictionEngine <IrisData, IrisPrediction>(env);
        }
        private TransformerChain <MulticlassPredictionTransformer <MulticlassLogisticRegressionModelParameters> > Train(string dataPath)
        {
            var reader = new TextLoader(_env,
                                        columns: new[]
            {
                new TextLoader.Column("Label", DataKind.R4, 0),
                new TextLoader.Column("SepalLength", DataKind.R4, 1),
                new TextLoader.Column("SepalWidth", DataKind.R4, 2),
                new TextLoader.Column("PetalLength", DataKind.R4, 3),
                new TextLoader.Column("PetalWidth", DataKind.R4, 4),
            },
                                        hasHeader: true
                                        );

            IDataView data = reader.Read(dataPath);

            var pipeline = new ColumnConcatenatingEstimator(_env, "Features", new[] { "SepalLength", "SepalWidth", "PetalLength", "PetalWidth" })
                           .Append(_env.MulticlassClassification.Trainers.StochasticDualCoordinateAscent());

            return(pipeline.Fit(data));
        }
コード例 #19
0
ファイル: debug.cs プロジェクト: sdpython/csharpyml
        public void Train(string dest)
        {
            using (var env = new ConsoleEnvironment(verbose: false))
            {
                var args = new TextLoader.Arguments()
                {
                    Separator = ",",
                    HasHeader = true,
                    Column    = new TextLoader.Column[] {
                        new TextLoader.Column("Label", DataKind.R4, 0),
                        new TextLoader.Column("Sepal_length", DataKind.R4, 1),
                        new TextLoader.Column("Sepal_width", DataKind.R4, 2),
                        new TextLoader.Column("Petal_length", DataKind.R4, 3),
                        new TextLoader.Column("Petal_width", DataKind.R4, 4),
                    }
                };

                var reader = new TextLoader(env, args);
                var concat = new ColumnConcatenatingEstimator(env,
                                                              "Features", "Sepal_length",
                                                              "Sepal_width", "Petal_length", "Petal_width");
                var km       = new MulticlassLogisticRegression(env, "Label", "Features");
                var pipeline = concat.Append(km);

                IDataView trainingDataView = reader.Read(new MultiFileSource(_dataset));
                var       model            = pipeline.Fit(trainingDataView);

                var obs = new IrisObservation()
                {
                    Sepal_length = 3.3f,
                    Sepal_width  = 1.6f,
                    Petal_length = 0.2f,
                    Petal_width  = 5.1f,
                };

                _fct = model.MakePredictionFunction <IrisObservation, IrisPrediction>(env);
                using (var stdest = File.OpenWrite(dest))
                    model.SaveTo(env, stdest);
            }
        }
コード例 #20
0
        public void SetupIrisPipeline()
        {
            _irisExample = new IrisData()
            {
                SepalLength = 3.3f,
                SepalWidth  = 1.6f,
                PetalLength = 0.2f,
                PetalWidth  = 5.1f,
            };

            string _irisDataPath = Program.GetInvariantCultureDataPath("iris.txt");

            using (var env = new ConsoleEnvironment(seed: 1, conc: 1, verbose: false, sensitivity: MessageSensitivity.None, outWriter: EmptyWriter.Instance))
            {
                var reader = new TextLoader(env,
                                            new TextLoader.Arguments()
                {
                    Separator = "\t",
                    HasHeader = true,
                    Column    = new[]
                    {
                        new TextLoader.Column("Label", DataKind.R4, 0),
                        new TextLoader.Column("SepalLength", DataKind.R4, 1),
                        new TextLoader.Column("SepalWidth", DataKind.R4, 2),
                        new TextLoader.Column("PetalLength", DataKind.R4, 3),
                        new TextLoader.Column("PetalWidth", DataKind.R4, 4),
                    }
                });

                IDataView data = reader.Read(_irisDataPath);

                var pipeline = new ColumnConcatenatingEstimator(env, "Features", new[] { "SepalLength", "SepalWidth", "PetalLength", "PetalWidth" })
                               .Append(new SdcaMultiClassTrainer(env, "Label", "Features", advancedSettings: (s) => { s.NumThreads = 1; s.ConvergenceTolerance = 1e-2f; }));

                var model = pipeline.Fit(data);

                _irisModel = model.MakePredictionFunction <IrisData, IrisPrediction>(env);
            }
        }
コード例 #21
0
        void Extensibility()
        {
            var dataPath = GetDataPath(TestDatasets.irisData.trainFilename);

            var ml   = new MLContext();
            var data = ml.Data.CreateTextLoader(TestDatasets.irisData.GetLoaderColumns(), separatorChar: ',')
                       .Load(dataPath);

            Action <IrisData, IrisData> action = (i, j) =>
            {
                j.Label       = i.Label;
                j.PetalLength = i.SepalLength > 3 ? i.PetalLength : i.SepalLength;
                j.PetalWidth  = i.PetalWidth;
                j.SepalLength = i.SepalLength;
                j.SepalWidth  = i.SepalWidth;
            };
            var pipeline = new ColumnConcatenatingEstimator(ml, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")
                           .Append(new CustomMappingEstimator <IrisData, IrisData>(ml, action, null), TransformerScope.TrainTest)
                           .Append(new ValueToKeyMappingEstimator(ml, "Label"), TransformerScope.TrainTest)
                           .Append(ml.MulticlassClassification.Trainers.StochasticDualCoordinateAscent(
                                       new SdcaMultiClassTrainer.Options {
                NumberOfIterations = 100, Shuffle = true, NumberOfThreads = 1
            }))
                           .Append(new KeyToValueMappingEstimator(ml, "PredictedLabel"));

            var model  = pipeline.Fit(data).GetModelFor(TransformerScope.Scoring);
            var engine = model.CreatePredictionEngine <IrisDataNoLabel, IrisPrediction>(ml);

            var testLoader = ml.Data.LoadFromTextFile(dataPath, TestDatasets.irisData.GetLoaderColumns(), separatorChar: ',');
            var testData   = ml.Data.CreateEnumerable <IrisData>(testLoader, false);

            foreach (var input in testData.Take(20))
            {
                var prediction = engine.Predict(input);
                Assert.True(prediction.PredictedLabel == input.Label);
            }
        }
コード例 #22
0
        public void MetacomponentsFeaturesRenamed()
        {
            var data = new TextLoader(Env, TestDatasets.irisData.GetLoaderColumns(), separatorChar: ',')
                       .Read(GetDataPath(TestDatasets.irisData.trainFilename));

            var sdcaTrainer = ML.BinaryClassification.Trainers.StochasticDualCoordinateAscent(
                new SdcaBinaryTrainer.Options {
                LabelColumn   = "Label",
                FeatureColumn = "Vars",
                MaxIterations = 100,
                Shuffle       = true,
                NumThreads    = 1,
            });

            var pipeline = new ColumnConcatenatingEstimator(Env, "Vars", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")
                           .Append(new ValueToKeyMappingEstimator(Env, "Label"), TransformerScope.TrainTest)
                           .Append(new Ova(Env, sdcaTrainer))
                           .Append(new KeyToValueMappingEstimator(Env, "PredictedLabel"));

            var model = pipeline.Fit(data);

            TestEstimatorCore(pipeline, data);
            Done();
        }
        private TransformerChain <MulticlassPredictionTransformer <MulticlassLogisticRegressionModelParameters> > Train(string dataPath)
        {
            // Create text loader.
            var options = new TextLoader.Options()
            {
                Columns = new[]
                {
                    new TextLoader.Column("Label", DataKind.Single, 0),
                    new TextLoader.Column("SepalLength", DataKind.Single, 1),
                    new TextLoader.Column("SepalWidth", DataKind.Single, 2),
                    new TextLoader.Column("PetalLength", DataKind.Single, 3),
                    new TextLoader.Column("PetalWidth", DataKind.Single, 4),
                },
                HasHeader = true,
            };
            var loader = new TextLoader(mlContext, options: options);

            IDataView data = loader.Load(dataPath);

            var pipeline = new ColumnConcatenatingEstimator(mlContext, "Features", new[] { "SepalLength", "SepalWidth", "PetalLength", "PetalWidth" })
                           .Append(mlContext.MulticlassClassification.Trainers.StochasticDualCoordinateAscent());

            return(pipeline.Fit(data));
        }
コード例 #24
0
                public override IEnumerable <SuggestedTransform> Apply(IntermediateColumn[] columns)
                {
                    int  count           = 0;
                    bool isAllText       = true;
                    var  colSpec         = new StringBuilder();
                    var  colSpecTextOnly = new List <string>();
                    var  columnList      = new List <string>();

                    foreach (var column in columns)
                    {
                        var columnName = new StringBuilder();
                        if (column.Purpose != ColumnPurpose.Name)
                        {
                            continue;
                        }
                        count++;

                        if (colSpec.Length > 0)
                        {
                            colSpec.Append(",");
                        }
                        colSpec.Append(column.ColumnName);

                        columnName.Append(column.ColumnName);
                        columnList.Add(columnName.ToString());

                        if (column.Type.ItemType().IsText())
                        {
                            colSpecTextOnly.Add(column.ColumnName);
                        }
                        isAllText = isAllText && column.Type.ItemType().IsText();
                    }

                    if (count == 1 && colSpec.ToString() != DefaultColumnNames.Name)
                    {
                        var columnName = new StringBuilder();
                        columnName.AppendFormat("{0}", colSpec);
                        var input = new ColumnCopyingEstimator(Env, columnName.ToString(), DefaultColumnNames.Name);
                        ColumnRoutingStructure.AnnotatedName[] columnsSource =
                        { new ColumnRoutingStructure.AnnotatedName {
                              IsNumeric = false, Name = columnName.ToString()
                          } };
                        ColumnRoutingStructure.AnnotatedName[] columnsDest =
                        { new ColumnRoutingStructure.AnnotatedName {
                              IsNumeric = false, Name = DefaultColumnNames.Name
                          } };
                        var routingStructure = new ColumnRoutingStructure(columnsSource, columnsDest);
                        yield return(new SuggestedTransform(input, routingStructure));
                    }
                    else if (count > 1)
                    {
                        if (string.IsNullOrWhiteSpace(colSpecTextOnly.ToString()))
                        {
                            yield break;
                        }

                        // suggested grouping name columns into one vector
                        var input = new ColumnConcatenatingEstimator(Env, DefaultColumnNames.Name, columnList.ToArray());

                        ColumnRoutingStructure.AnnotatedName[] columnsSource =
                            columnList.Select(c => new ColumnRoutingStructure.AnnotatedName {
                            IsNumeric = false, Name = c
                        }).ToArray();
                        ColumnRoutingStructure.AnnotatedName[] columnsDest =
                        { new ColumnRoutingStructure.AnnotatedName {
                              IsNumeric = false, Name = DefaultColumnNames.Name
                          } };
                        var routingStructure = new ColumnRoutingStructure(columnsSource, columnsDest);
                        yield return(new SuggestedTransform(input, routingStructure));
                    }
                }