예제 #1
0
        void TestMetadataCopy()
        {
            var data = new[] { new TestMetaClass()
                               {
                                   Term = "A", NotUsed = 1
                               }, new TestMetaClass()
                               {
                                   Term = "B"
                               }, new TestMetaClass()
                               {
                                   Term = "C"
                               } };
            var dataView = ComponentCreation.CreateDataView(Env, data);
            var termEst  = new TermEstimator(Env, new[] {
                new TermTransform.ColumnInfo("Term", "T")
            });

            var termTransformer = termEst.Fit(dataView);
            var result          = termTransformer.Transform(dataView);

            result.Schema.TryGetColumnIndex("T", out int termIndex);
            var names1 = default(VBuffer <ReadOnlyMemory <char> >);
            var type1  = result.Schema.GetColumnType(termIndex);
            int size   = type1.ItemType.IsKey ? type1.ItemType.KeyCount : -1;

            result.Schema.GetMetadata(MetadataUtils.Kinds.KeyValues, termIndex, ref names1);
            Assert.True(names1.Count > 0);
        }
        public void KeyToBinaryVectorStatic()
        {
            string dataPath = GetDataPath("breast-cancer.txt");
            var    reader   = TextLoader.CreateReader(Env, ctx => (
                                                          ScalarString: ctx.LoadText(1),
                                                          VectorString: ctx.LoadText(1, 4)
                                                          ));

            var data = reader.Read(dataPath);

            // Non-pigsty Term.
            var dynamicData = new TermEstimator(Env, new[] {
                new TermTransform.ColumnInfo("ScalarString", "A"),
                new TermTransform.ColumnInfo("VectorString", "B")
            })
                              .Fit(data.AsDynamic).Transform(data.AsDynamic);

            var data2 = dynamicData.AssertStatic(Env, ctx => (
                                                     A: ctx.KeyU4.TextValues.Scalar,
                                                     B: ctx.KeyU4.TextValues.Vector));

            var est = data2.MakeNewEstimator()
                      .Append(row => (
                                  ScalarString: row.A.ToBinaryVector(),
                                  VectorString: row.B.ToBinaryVector()));

            TestEstimatorCore(est.AsDynamic, data2.AsDynamic, invalidInput: data.AsDynamic);

            Done();
        }
예제 #3
0
        public void Train()
        {
            // If working in Visual Studio, make sure the 'Copy to Output Directory'
            // property of iris-data.txt is set to 'Copy always'
            string dataPath = "IrisClassification/iris.data.txt";
            var    reader   = new TextLoader(_env,
                                             new TextLoader.Arguments()
            {
                Separator = ",",
                HasHeader = true,
                Column    = new[]
                {
                    new TextLoader.Column("SepalLength", DataKind.R4, 0),
                    new TextLoader.Column("SepalWidth", DataKind.R4, 1),
                    new TextLoader.Column("PetalLength", DataKind.R4, 2),
                    new TextLoader.Column("PetalWidth", DataKind.R4, 3),
                    new TextLoader.Column("Label", DataKind.Text, 4)
                }
            });

            IDataView trainingDataView = reader.Read(new MultiFileSource(dataPath));

            // STEP 3: Transform your data and add a learner
            // Assign numeric values to text in the "Label" column, because only
            // numbers can be processed during model training.
            // Add a learning algorithm to the pipeline. e.g.(What type of iris is this?)
            // Convert the Label back into original text (after converting to number in step 3)
            var pipeline = new TermEstimator(_env, "Label", "Label")
                           .Append(new ConcatEstimator(_env, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth"))
                           .Append(new SdcaMultiClassTrainer(_env, new SdcaMultiClassTrainer.Arguments()))
                           .Append(new KeyToValueEstimator(_env, "PredictedLabel"));

            // STEP 4: Train your model based on the data set
            _model = pipeline.Fit(trainingDataView);
        }
예제 #4
0
        void TestSimpleCase()
        {
            var data = new[] { new TestClass()
                               {
                                   A = 1, B = 2, C = 3,
                               }, new TestClass()
                               {
                                   A = 4, B = 5, C = 6
                               } };

            var xydata = new[] { new TestClassXY()
                                 {
                                     X = 10, Y = 100
                                 }, new TestClassXY()
                                 {
                                     X = -1, Y = -100
                                 } };
            var stringData = new[] { new TestClassDifferentTypes {
                                         A = "1", B = "c", C = "b"
                                     } };
            var dataView = ComponentCreation.CreateDataView(Env, data);
            var pipe     = new TermEstimator(Env, new[] {
                new TermTransform.ColumnInfo("A", "TermA"),
                new TermTransform.ColumnInfo("B", "TermB"),
                new TermTransform.ColumnInfo("C", "TermC")
            });
            var invalidData = ComponentCreation.CreateDataView(Env, xydata);
            var validFitNotValidTransformData = ComponentCreation.CreateDataView(Env, stringData);

            TestEstimatorCore(pipe, dataView, null, invalidData, validFitNotValidTransformData);
        }
예제 #5
0
        public void FastTreeRankerEstimator()
        {
            using (var env = new LocalEnvironment(seed: 1, conc: 1))
            {
                var reader = new TextLoader(env, new TextLoader.Arguments
                {
                    HasHeader = true,
                    Separator = "\t",
                    Column    = new[]
                    {
                        new TextLoader.Column("Label", DataKind.R4, 0),
                        new TextLoader.Column("Workclass", DataKind.Text, 1),
                        new TextLoader.Column("NumericFeatures", DataKind.R4, new [] { new TextLoader.Range(9, 14) })
                    }
                });
                var data = reader.Read(new MultiFileSource(GetDataPath(TestDatasets.adultRanking.trainFilename)));


                // Pipeline.
                var pipeline = new TermEstimator(env, new[] {
                    new TermTransform.ColumnInfo("Workclass", "Group"),
                    new TermTransform.ColumnInfo("Label", "Label0")
                })
                               .Append(new FastTreeRankingTrainer(env, "Label0", "NumericFeatures", "Group",
                                                                  advancedSettings: s => { s.NumTrees = 10; }));

                TestEstimatorCore(pipeline, data);
            }
        }
        public void Train()
        {
            LocalEnvironment = new LocalEnvironment();
            string dataPath = "Data//PassengerData.txt";
            var    reader   = new TextLoader(LocalEnvironment,
                                             new TextLoader.Arguments()
            {
                Separator = ",",
                HasHeader = true,
                Column    = new[]
                {
                    new TextLoader.Column("Gender", DataKind.R4, 0),
                    new TextLoader.Column("Nationality", DataKind.R4, 1),
                    new TextLoader.Column("year", DataKind.R4, 2),
                    new TextLoader.Column("Label", DataKind.Text, 3)
                }
            });

            IDataView trainingDataView = reader.Read(new MultiFileSource(dataPath));
            var       pipeline         = new TermEstimator(LocalEnvironment, "Label", "Label")
                                         .Append(new ConcatEstimator(LocalEnvironment, "Features", "Gender", "Nationality", "year"))
                                         .Append(new SdcaMultiClassTrainer(LocalEnvironment, new SdcaMultiClassTrainer.Arguments()))
                                         .Append(new KeyToValueEstimator(LocalEnvironment, "PredictedLabel"));

            Model = pipeline.Fit(trainingDataView);
        }
        public void KeyToVectorWorkout()
        {
            var data = new[] { new TestClass()
                               {
                                   A = 1, B = 2, C = 3,
                               }, new TestClass()
                               {
                                   A = 4, B = 5, C = 6
                               } };

            var dataView = ComponentCreation.CreateDataView(Env, data);

            dataView = new TermEstimator(Env, new[] {
                new TermTransform.ColumnInfo("A", "TermA"),
                new TermTransform.ColumnInfo("B", "TermB"),
                new TermTransform.ColumnInfo("C", "TermC", textKeyValues: true)
            }).Fit(dataView).Transform(dataView);

            var pipe = new KeyToVectorEstimator(Env, new KeyToVectorTransform.ColumnInfo("TermA", "CatA", false),
                                                new KeyToVectorTransform.ColumnInfo("TermB", "CatB", true),
                                                new KeyToVectorTransform.ColumnInfo("TermC", "CatC", true),
                                                new KeyToVectorTransform.ColumnInfo("TermC", "CatCNonBag", false));

            TestEstimatorCore(pipe, dataView);
            Done();
        }
        public void TestOldSavingAndLoading()
        {
            var data = new[] { new TestClass()
                               {
                                   A = 1, B = 2, C = 3,
                               }, new TestClass()
                               {
                                   A = 4, B = 5, C = 6
                               } };
            var dataView = ComponentCreation.CreateDataView(Env, data);
            var est      = new TermEstimator(Env, new[] {
                new TermTransform.ColumnInfo("A", "TermA"),
                new TermTransform.ColumnInfo("B", "TermB", textKeyValues: true),
                new TermTransform.ColumnInfo("C", "TermC")
            });
            var transformer = est.Fit(dataView);

            dataView = transformer.Transform(dataView);
            var pipe = new KeyToBinaryVectorEstimator(Env,
                                                      new KeyToBinaryVectorTransform.ColumnInfo("TermA", "CatA"),
                                                      new KeyToBinaryVectorTransform.ColumnInfo("TermB", "CatB"),
                                                      new KeyToBinaryVectorTransform.ColumnInfo("TermC", "CatC")
                                                      );
            var result      = pipe.Fit(dataView).Transform(dataView);
            var resultRoles = new RoleMappedData(result);

            using (var ms = new MemoryStream())
            {
                TrainUtils.SaveModel(Env, Env.Start("saving"), ms, null, resultRoles);
                ms.Position = 0;
                var loadedView = ModelFileUtils.LoadTransforms(Env, dataView, ms);
            }
        }
예제 #9
0
        public void KeyToValuePigsty()
        {
            string dataPath = GetDataPath("breast-cancer.txt");
            var    reader   = TextLoader.CreateReader(Env, ctx => (
                                                          ScalarString: ctx.LoadText(1),
                                                          VectorString: ctx.LoadText(1, 4)
                                                          ));

            var data = reader.Read(new MultiFileSource(dataPath));

            // Non-pigsty Term.
            var dynamicData = new TermEstimator(Env,
                                                new TermTransform.ColumnInfo("ScalarString", "A"),
                                                new TermTransform.ColumnInfo("VectorString", "B"))
                              .Fit(data.AsDynamic).Transform(data.AsDynamic);

            var data2 = dynamicData.AssertStatic(Env, ctx => (
                                                     A: ctx.KeyU4.TextValues.Scalar,
                                                     B: ctx.KeyU4.TextValues.Vector));

            var est = data2.MakeNewEstimator()
                      .Append(row => (
                                  ScalarString: row.A.ToValue(),
                                  VectorString: row.B.ToValue()));

            TestEstimatorCore(est.AsDynamic, data2.AsDynamic, invalidInput: data.AsDynamic);

            // Check that term and ToValue are round-trippable.
            var dataLeft  = new ChooseColumnsTransform(Env, data.AsDynamic, "ScalarString", "VectorString");
            var dataRight = new ChooseColumnsTransform(Env, est.Fit(data2).Transform(data2).AsDynamic, "ScalarString", "VectorString");

            CheckSameSchemas(dataLeft.Schema, dataRight.Schema);
            CheckSameValues(dataLeft, dataRight);
            Done();
        }
예제 #10
0
        void TestOldSavingAndLoading()
        {
            var data = new[] { new TestClass()
                               {
                                   A = 1, B = 2, C = 3,
                               }, new TestClass()
                               {
                                   A = 4, B = 5, C = 6
                               } };

            using (var env = new TlcEnvironment())
            {
                var dataView = ComponentCreation.CreateDataView(env, data);
                var est      = new TermEstimator(env, new[] {
                    new TermTransform.ColumnInfo("A", "TermA"),
                    new TermTransform.ColumnInfo("B", "TermB"),
                    new TermTransform.ColumnInfo("C", "TermC")
                });
                var transformer = est.Fit(dataView);
                var result      = transformer.Transform(dataView);
                var resultRoles = new RoleMappedData(result);
                using (var ms = new MemoryStream())
                {
                    TrainUtils.SaveModel(env, env.Start("saving"), ms, null, resultRoles);
                    ms.Position = 0;
                    var loadedView = ModelFileUtils.LoadTransforms(env, dataView, ms);
                    ValidateTermTransformer(loadedView);
                }
            }
        }
        public void TestMetadataPropagation()
        {
            var data = new[] {
                new TestMeta()
                {
                    A = new string[2] {
                        "A", "B"
                    }, B = "C", C = new int[2] {
                        3, 5
                    }, D = 6
                },
                new TestMeta()
                {
                    A = new string[2] {
                        "A", "B"
                    }, B = "C", C = new int[2] {
                        5, 3
                    }, D = 1
                },
                new TestMeta()
                {
                    A = new string[2] {
                        "A", "B"
                    }, B = "C", C = new int[2] {
                        3, 5
                    }, D = 6
                }
            };


            var dataView = ComponentCreation.CreateDataView(Env, data);
            var termEst  = new TermEstimator(Env, new[] {
                new TermTransform.ColumnInfo("A", "TA", textKeyValues: true),
                new TermTransform.ColumnInfo("B", "TB", textKeyValues: true),
                new TermTransform.ColumnInfo("C", "TC"),
                new TermTransform.ColumnInfo("D", "TD")
            });
            var termTransformer = termEst.Fit(dataView);

            dataView = termTransformer.Transform(dataView);

            var pipe = new KeyToBinaryVectorEstimator(Env,
                                                      new KeyToBinaryVectorTransform.ColumnInfo("TA", "CatA"),
                                                      new KeyToBinaryVectorTransform.ColumnInfo("TB", "CatB"),
                                                      new KeyToBinaryVectorTransform.ColumnInfo("TC", "CatC"),
                                                      new KeyToBinaryVectorTransform.ColumnInfo("TD", "CatD"));

            var result = pipe.Fit(dataView).Transform(dataView);

            ValidateMetadata(result);
            Done();
        }
예제 #12
0
        public void KeyToValueWorkout()
        {
            string dataPath = GetDataPath("iris.txt");

            var reader = new TextLoader(Env, new TextLoader.Arguments
            {
                Column = new[]
                {
                    new TextLoader.Column("ScalarString", DataKind.TX, 1),
                    new TextLoader.Column("VectorString", DataKind.TX, new[] { new TextLoader.Range(1, 4) }),
                    new TextLoader.Column
                    {
                        Name     = "BareKey",
                        Source   = new[] { new TextLoader.Range(0) },
                        Type     = DataKind.U4,
                        KeyRange = new KeyRange(0, 5),
                    }
                }
            });

            var data = reader.Read(dataPath);

            data = new TermEstimator(Env, new[] {
                new TermTransform.ColumnInfo("ScalarString", "A"),
                new TermTransform.ColumnInfo("VectorString", "B")
            }).Fit(data).Transform(data);

            var badData1 = new CopyColumnsTransform(Env, ("BareKey", "A")).Transform(data);
            var badData2 = new CopyColumnsTransform(Env, ("VectorString", "B")).Transform(data);

            var est = new KeyToValueEstimator(Env, ("A", "A_back"), ("B", "B_back"));

            TestEstimatorCore(est, data, invalidInput: badData1);
            TestEstimatorCore(est, data, invalidInput: badData2);


            var outputPath = GetOutputPath("KeyToValue", "featurized.tsv");

            using (var ch = Env.Start("save"))
            {
                var saver = new TextSaver(Env, new TextSaver.Arguments {
                    Silent = true
                });
                IDataView savedData = est.Fit(data).Transform(data);
                using (var fs = File.Create(outputPath))
                    DataSaverUtils.SaveDataView(ch, saver, savedData, fs, keepHidden: true);
            }

            CheckEquality("KeyToValue", "featurized.tsv");
            Done();
        }
예제 #13
0
        static void Main(string[] args)
        {
            // STEP 2: Create an environment and load your data
            var env = new LocalEnvironment();

            // If working in Visual Studio, make sure the 'Copy to Output Directory'
            // property of iris-data.txt is set to 'Copy always'
            var dataPath = "iris.data.txt";
            var reader   = new TextLoader(env,
                                          new TextLoader.Arguments
            {
                Separator = ",",
                HasHeader = true,
                Column    = new[]
                {
                    new TextLoader.Column("SepalLength", DataKind.R4, 0),
                    new TextLoader.Column("SepalWidth", DataKind.R4, 1),
                    new TextLoader.Column("PetalLength", DataKind.R4, 2),
                    new TextLoader.Column("PetalWidth", DataKind.R4, 3),
                    new TextLoader.Column("Label", DataKind.Text, 4)
                }
            });

            var trainingDataView = reader.Read(new MultiFileSource(dataPath));

            // STEP 3: Transform your data and add a learner
            // Assign numeric values to text in the "Label" column, because only
            // numbers can be processed during model training
            // Add a learning algorithm to the pipeline. e.g.(What type of iris is this?)
            // Convert the Label back into original text (after converting to number in step 3)
            var pipeline = new TermEstimator(env, "Label", "Label")
                           .Append(new ConcatEstimator(env, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth"))
                           .Append(new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments()))
                           .Append(new KeyToValueEstimator(env, "PredictedLabel"));

            // STEP 4: Train your model based on the data set
            var model = pipeline.Fit(trainingDataView);

            // STEP 5: Use your model to make a prediction
            // You can change these numbers to test different predictions
            var prediction = model.MakePredictionFunction <IrisData, IrisPrediction>(env).Predict(new IrisData
            {
                SepalLength = 3.3f,
                SepalWidth  = 1.6f,
                PetalLength = 0.2f,
                PetalWidth  = 5.1f
            });

            Console.WriteLine($"Predicted flower type is: {prediction.PredictedLabels}");
        }
예제 #14
0
        void TestDifferentTypes()
        {
            string dataPath = GetDataPath("adult.test");

            var loader = new TextLoader(Env, new TextLoader.Arguments
            {
                Column = new[] {
                    new TextLoader.Column("float1", DataKind.R4, 0),
                    new TextLoader.Column("float4", DataKind.R4, new[] { new TextLoader.Range(0), new TextLoader.Range(2), new TextLoader.Range(4), new TextLoader.Range(10) }),
                    new TextLoader.Column("double1", DataKind.R8, 0),
                    new TextLoader.Column("double4", DataKind.R8, new[] { new TextLoader.Range(0), new TextLoader.Range(2), new TextLoader.Range(4), new TextLoader.Range(10) }),
                    new TextLoader.Column("int1", DataKind.I4, 0),
                    new TextLoader.Column("text1", DataKind.TX, 1),
                    new TextLoader.Column("text2", DataKind.TX, new[] { new TextLoader.Range(1), new TextLoader.Range(3) }),
                },
                Separator = ",",
                HasHeader = true
            }, new MultiFileSource(dataPath));

            var pipe = new TermEstimator(Env, new[] {
                new TermTransform.ColumnInfo("float1", "TermFloat1"),
                new TermTransform.ColumnInfo("float4", "TermFloat4"),
                new TermTransform.ColumnInfo("double1", "TermDouble1"),
                new TermTransform.ColumnInfo("double4", "TermDouble4"),
                new TermTransform.ColumnInfo("int1", "TermInt1"),
                new TermTransform.ColumnInfo("text1", "TermText1"),
                new TermTransform.ColumnInfo("text2", "TermText2")
            });
            var data = loader.Read(new MultiFileSource(dataPath));

            data = TakeFilter.Create(Env, data, 10);
            var outputPath = GetOutputPath("Term", "Term.tsv");

            using (var ch = Env.Start("save"))
            {
                var saver = new TextSaver(Env, new TextSaver.Arguments {
                    Silent = true
                });
                using (var fs = File.Create(outputPath))
                    DataSaverUtils.SaveDataView(ch, saver, pipe.Fit(data).Transform(data), fs, keepHidden: true);
            }

            CheckEquality("Term", "Term.tsv");
            Done();
        }
        private (IEstimator <ITransformer>, IDataView) GetMultiClassPipeline()
        {
            var data = new TextLoader(Env, new TextLoader.Arguments()
            {
                Separator = "comma",
                HasHeader = true,
                Column    = new[]
                {
                    new TextLoader.Column("Features", DataKind.R4, new [] { new TextLoader.Range(0, 3) }),
                    new TextLoader.Column("Label", DataKind.Text, 4)
                }
            })
                       .Read(new MultiFileSource(GetDataPath(IrisDataPath)));

            var pipeline = new TermEstimator(Env, "Label");

            return(pipeline, data);
        }
예제 #16
0
        public void OVAUncalibrated()
        {
            var dataPath = GetDataPath(IrisDataPath);

            using (var env = new TlcEnvironment())
            {
                var data = new TextLoader(env, GetIrisLoaderArgs()).Read(new MultiFileSource(dataPath));

                var sdcaTrainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments {
                    MaxIterations = 100, Shuffle = true, NumThreads = 1, Calibrator = null
                }, "Features", "Label");
                var pipeline = new TermEstimator(env, "Label")
                               .Append(new Ova(env, sdcaTrainer, useProbabilities: false))
                               .Append(new KeyToValueEstimator(env, "PredictedLabel"));

                TestEstimatorCore(pipeline, data);
            }
        }
예제 #17
0
        public static void criar_e_treinar()
        {        
            string dataPath = "data.csv";
            string outputModel = "model.pb";

            if (!File.Exists(dataPath))
                throw new Exception("Arquivo <data.csv> contendo dados para treinamento não existe");

            var env = new LocalEnvironment();
            var reader = new TextLoader(env, 
                new TextLoader.Arguments()
                {
                    Separator = ";",
                    HasHeader = false,
                    Column = new[]
                    {
                        new TextLoader.Column("prioridade", DataKind.R4, 0),
                        new TextLoader.Column("punicao", DataKind.R4, 1),
                        new TextLoader.Column("dispensa", DataKind.R4, 2),
                        new TextLoader.Column("ultimoDiaSemana", DataKind.R4, 3),
                        new TextLoader.Column("diaAtualSemanaServico", DataKind.R4, 4),
                        new TextLoader.Column("Label", DataKind.R4, 5)
                    }
                });

            IDataView trainingDataView = reader.Read(new MultiFileSource(dataPath));

            var pipeline = new TermEstimator(env, "Label", "Label")
                       .Append(new ConcatEstimator(env, "Features", "prioridade", "punicao", "dispensa", "ultimoDiaSemana", "diaAtualSemanaServico"))
                       .Append(new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments()))
                       .Append(new KeyToValueEstimator(env, "PredictedLabel"));

            var model = pipeline.Fit(trainingDataView);            

            if (File.Exists(outputModel))
                File.Delete(outputModel);

            FileStream fs = File.Create(outputModel);

            model.SaveTo(env, fs);

            fs.Close();

        }
예제 #18
0
        public static int GetRelatedArticle(string dataPath, int ArticleId)
        {
            // STEP 2: Create an environment  and load your data
            var env = new LocalEnvironment();

            var reader = new TextLoader(env,
                                        new TextLoader.Arguments()
            {
                Separator = ",",
                HasHeader = true,
                Column    = new[]
                {
                    new TextLoader.Column("CurrentArticleId", DataKind.R4, 0),
                    new TextLoader.Column("Label", DataKind.R4, 1)
                }
            });

            IDataView trainingDataView = reader.Read(new MultiFileSource(dataPath));

            // STEP 3: Transform your data and add a learner
            // Assign numeric values to text in the "Label" column, because only
            // numbers can be processed during model training.
            // Add a learning algorithm to the pipeline. e.g.(What type of iris is this?)
            // Convert the Label back into original text (after converting to number in step 3)
            var pipeline = new TermEstimator(env, "Label", "Label")
                           .Append(new ConcatEstimator(env, "Features", "CurrentArticleId"))
                           .Append(new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments()))
                           .Append(new KeyToValueEstimator(env, "PredictedLabel"));

            // STEP 4: Train your model based on the data set
            var model = pipeline.Fit(trainingDataView);

            // STEP 5: Use your model to make a prediction
            // You can change these numbers to test different predictions
            var prediction = model.MakePredictionFunction <RelatedArticleData, RelatedArticlesPrediction>(env).Predict(
                new RelatedArticleData()
            {
                CurrentArticleId = (float)ArticleId
            });

            return((int)prediction.PredictedRelatedArticle);
        }
예제 #19
0
        public void OVAWithExplicitCalibrator()
        {
            var dataPath = GetDataPath(IrisDataPath);

            using (var env = new TlcEnvironment())
            {
                var calibrator = new PavCalibratorTrainer(env);

                var data = new TextLoader(env, GetIrisLoaderArgs()).Read(new MultiFileSource(dataPath));

                var sdcaTrainer = new LinearClassificationTrainer(env, new LinearClassificationTrainer.Arguments {
                    MaxIterations = 100, Shuffle = true, NumThreads = 1
                }, "Features", "Label");
                var pipeline = new TermEstimator(env, "Label")
                               .Append(new Ova(env, sdcaTrainer, "Label", calibrator: calibrator, maxCalibrationExamples: 990000))
                               .Append(new KeyToValueEstimator(env, "PredictedLabel"));

                TestEstimatorCore(pipeline, data);
            }
        }
예제 #20
0
        // This method gets an article ID and predicts the most related article
        public static int GetRelatedArticle(string dataPath, int ArticleId)
        {
            // Create an environment for the learning process
            LocalEnvironment env = new LocalEnvironment();

            // Create a reader object to parse our training data from the training data file
            TextLoader reader = new TextLoader(env,
                                               new TextLoader.Arguments()
            {
                Separator = ",",
                HasHeader = true,
                Column    = new[]
                {
                    new TextLoader.Column("CurrentArticleId", DataKind.R4, 0),
                    new TextLoader.Column("Label", DataKind.R4, 1)
                }
            });

            // Read the training data
            IDataView trainingData = reader.Read(new MultiFileSource(dataPath));

            // Process the training data, set a target column and create a learning model (SDCA multi-class model)
            EstimatorChain <KeyToValueTransform> pipeline = new TermEstimator(env, "Label", "Label")
                                                            .Append(new ConcatEstimator(env, "Features", "CurrentArticleId"))
                                                            .Append(new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments()))
                                                            .Append(new KeyToValueEstimator(env, "PredictedLabel"));

            // Train the learning model based on the training data
            TransformerChain <KeyToValueTransform> model = pipeline.Fit(trainingData);

            // Activate the model to make a prediction for the requested article
            RelatedArticlesPrediction prediction = model.MakePredictionFunction <RelatedArticleData, RelatedArticlesPrediction>(env).Predict(
                new RelatedArticleData()
            {
                CurrentArticleId = (float)ArticleId
            });

            // Return the predicted articles ID
            return((int)prediction.PredictedRelatedArticle);
        }
예제 #21
0
        public void OVAWithAllConstructorArgs()
        {
            var    dataPath = GetDataPath(IrisDataPath);
            string featNam  = "Features";
            string labNam   = "Label";

            using (var env = new TlcEnvironment())
            {
                var calibrator = new FixedPlattCalibratorTrainer(env, new FixedPlattCalibratorTrainer.Arguments());

                var data = new TextLoader(env, GetIrisLoaderArgs()).Read(new MultiFileSource(dataPath));

                var averagePerceptron = new AveragedPerceptronTrainer(env, new AveragedPerceptronTrainer.Arguments {
                    FeatureColumn = featNam, LabelColumn = labNam, Shuffle = true, Calibrator = null
                });
                var pipeline = new TermEstimator(env, labNam)
                               .Append(new Ova(env, averagePerceptron, labNam, true, calibrator: calibrator, 10000, true))
                               .Append(new KeyToValueEstimator(env, "PredictedLabel"));

                TestEstimatorCore(pipeline, data);
            }
        }
        private (IEstimator <ITransformer>, IDataView) GetRankingPipeline()
        {
            var data = new TextLoader(Env, new TextLoader.Arguments
            {
                HasHeader = true,
                Separator = "\t",
                Column    = new[]
                {
                    new TextLoader.Column("Label", DataKind.R4, 0),
                    new TextLoader.Column("Workclass", DataKind.Text, 1),
                    new TextLoader.Column("NumericFeatures", DataKind.R4, new [] { new TextLoader.Range(9, 14) })
                }
            }).Read(new MultiFileSource(GetDataPath(TestDatasets.adultRanking.trainFilename)));

            // Pipeline.
            var pipeline = new TermEstimator(Env, new[] {
                new TermTransform.ColumnInfo("Workclass", "Group"),
                new TermTransform.ColumnInfo("Label", "Label0")
            });

            return(pipeline, data);
        }
예제 #23
0
        public static void Train()
        {
            using (var env = new LocalEnvironment())
            {
                var reader = new TextLoader(env,
                                            new TextLoader.Arguments()
                {
                    Separator = "tab",
                    HasHeader = true,
                    Column    = new[]
                    {
                        new TextLoader.Column("ID", DataKind.Text, 0),
                        new TextLoader.Column("Area", DataKind.Text, 1),
                        new TextLoader.Column("Title", DataKind.Text, 2),
                        new TextLoader.Column("Description", DataKind.Text, 3),
                    }
                });

                var pipeline = new TermEstimator(env, "Area", "Label")
                               .Append(new TextTransform(env, "Title", "Title"))
                               .Append(new TextTransform(env, "Description", "Description"))
                               .Append(new ConcatEstimator(env, "Features", "Title", "Description"))
                               .Append(new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments()))
                               .Append(new KeyToValueEstimator(env, "PredictedLabel"));

                Console.WriteLine("=============== Training model ===============");

                var model = pipeline.Fit(reader.Read(new MultiFileSource(DataPath)));

                using (var fs = new FileStream(ModelPath, FileMode.Create, FileAccess.Write, FileShare.Write))
                    model.SaveTo(env, fs);

                Console.WriteLine("=============== End training ===============");
                Console.WriteLine("The model is saved to {0}", ModelPath);
            }
        }
        public void TestMetadataPropagation()
        {
            var data = new[] {
                new TestMeta()
                {
                    A = new string[2] {
                        "A", "B"
                    }, B = "C", C = new int[2] {
                        3, 5
                    }, D = 6, E = new float[2] {
                        1.0f, 2.0f
                    }, F = 1.0f, G = new string[2] {
                        "A", "D"
                    }, H = "D"
                },
                new TestMeta()
                {
                    A = new string[2] {
                        "A", "B"
                    }, B = "C", C = new int[2] {
                        5, 3
                    }, D = 1, E = new float[2] {
                        3.0f, 4.0f
                    }, F = -1.0f, G = new string[2] {
                        "E", "A"
                    }, H = "E"
                },
                new TestMeta()
                {
                    A = new string[2] {
                        "A", "B"
                    }, B = "C", C = new int[2] {
                        3, 5
                    }, D = 6, E = new float[2] {
                        5.0f, 6.0f
                    }, F = 1.0f, G = new string[2] {
                        "D", "E"
                    }, H = "D"
                }
            };


            var dataView = ComponentCreation.CreateDataView(Env, data);
            var termEst  = new TermEstimator(Env,
                                             new TermTransform.ColumnInfo("A", "TA", textKeyValues: true),
                                             new TermTransform.ColumnInfo("B", "TB"),
                                             new TermTransform.ColumnInfo("C", "TC", textKeyValues: true),
                                             new TermTransform.ColumnInfo("D", "TD", textKeyValues: true),
                                             new TermTransform.ColumnInfo("E", "TE"),
                                             new TermTransform.ColumnInfo("F", "TF"),
                                             new TermTransform.ColumnInfo("G", "TG"),
                                             new TermTransform.ColumnInfo("H", "TH", textKeyValues: true));
            var termTransformer = termEst.Fit(dataView);

            dataView = termTransformer.Transform(dataView);

            var pipe = new KeyToVectorEstimator(Env,
                                                new KeyToVectorTransform.ColumnInfo("TA", "CatA", true),
                                                new KeyToVectorTransform.ColumnInfo("TB", "CatB", false),
                                                new KeyToVectorTransform.ColumnInfo("TC", "CatC", false),
                                                new KeyToVectorTransform.ColumnInfo("TD", "CatD", true),
                                                new KeyToVectorTransform.ColumnInfo("TE", "CatE", false),
                                                new KeyToVectorTransform.ColumnInfo("TF", "CatF", true),
                                                new KeyToVectorTransform.ColumnInfo("TG", "CatG", true),
                                                new KeyToVectorTransform.ColumnInfo("TH", "CatH", false)
                                                );

            var result = pipe.Fit(dataView).Transform(dataView);

            ValidateMetadata(result);
            Done();
        }