예제 #1
0
        public void Train()
        {
            // If working in Visual Studio, make sure the 'Copy to Output Directory'
            // property of iris-data.txt is set to 'Copy always'
            string dataPath = "IrisClassification/iris.data.txt";
            var    reader   = new TextLoader(_env,
                                             new TextLoader.Arguments()
            {
                Separator = ",",
                HasHeader = true,
                Column    = new[]
                {
                    new TextLoader.Column("SepalLength", DataKind.R4, 0),
                    new TextLoader.Column("SepalWidth", DataKind.R4, 1),
                    new TextLoader.Column("PetalLength", DataKind.R4, 2),
                    new TextLoader.Column("PetalWidth", DataKind.R4, 3),
                    new TextLoader.Column("Label", DataKind.Text, 4)
                }
            });

            IDataView trainingDataView = reader.Read(new MultiFileSource(dataPath));

            // STEP 3: Transform your data and add a learner
            // Assign numeric values to text in the "Label" column, because only
            // numbers can be processed during model training.
            // Add a learning algorithm to the pipeline. e.g.(What type of iris is this?)
            // Convert the Label back into original text (after converting to number in step 3)
            var pipeline = new TermEstimator(_env, "Label", "Label")
                           .Append(new ConcatEstimator(_env, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth"))
                           .Append(new SdcaMultiClassTrainer(_env, new SdcaMultiClassTrainer.Arguments()))
                           .Append(new KeyToValueEstimator(_env, "PredictedLabel"));

            // STEP 4: Train your model based on the data set
            _model = pipeline.Fit(trainingDataView);
        }
        public void TestOldSavingAndLoading()
        {
            var data = new[] { new TestClass()
                               {
                                   A = 1, B = 2, C = 3,
                               }, new TestClass()
                               {
                                   A = 4, B = 5, C = 6
                               } };
            var dataView = ComponentCreation.CreateDataView(Env, data);
            var est      = new TermEstimator(Env, new[] {
                new TermTransform.ColumnInfo("A", "TermA"),
                new TermTransform.ColumnInfo("B", "TermB", textKeyValues: true),
                new TermTransform.ColumnInfo("C", "TermC")
            });
            var transformer = est.Fit(dataView);

            dataView = transformer.Transform(dataView);
            var pipe = new KeyToBinaryVectorEstimator(Env,
                                                      new KeyToBinaryVectorTransform.ColumnInfo("TermA", "CatA"),
                                                      new KeyToBinaryVectorTransform.ColumnInfo("TermB", "CatB"),
                                                      new KeyToBinaryVectorTransform.ColumnInfo("TermC", "CatC")
                                                      );
            var result      = pipe.Fit(dataView).Transform(dataView);
            var resultRoles = new RoleMappedData(result);

            using (var ms = new MemoryStream())
            {
                TrainUtils.SaveModel(Env, Env.Start("saving"), ms, null, resultRoles);
                ms.Position = 0;
                var loadedView = ModelFileUtils.LoadTransforms(Env, dataView, ms);
            }
        }
예제 #3
0
        void TestMetadataCopy()
        {
            var data = new[] { new TestMetaClass()
                               {
                                   Term = "A", NotUsed = 1
                               }, new TestMetaClass()
                               {
                                   Term = "B"
                               }, new TestMetaClass()
                               {
                                   Term = "C"
                               } };
            var dataView = ComponentCreation.CreateDataView(Env, data);
            var termEst  = new TermEstimator(Env, new[] {
                new TermTransform.ColumnInfo("Term", "T")
            });

            var termTransformer = termEst.Fit(dataView);
            var result          = termTransformer.Transform(dataView);

            result.Schema.TryGetColumnIndex("T", out int termIndex);
            var names1 = default(VBuffer <ReadOnlyMemory <char> >);
            var type1  = result.Schema.GetColumnType(termIndex);
            int size   = type1.ItemType.IsKey ? type1.ItemType.KeyCount : -1;

            result.Schema.GetMetadata(MetadataUtils.Kinds.KeyValues, termIndex, ref names1);
            Assert.True(names1.Count > 0);
        }
        public void Train()
        {
            LocalEnvironment = new LocalEnvironment();
            string dataPath = "Data//PassengerData.txt";
            var    reader   = new TextLoader(LocalEnvironment,
                                             new TextLoader.Arguments()
            {
                Separator = ",",
                HasHeader = true,
                Column    = new[]
                {
                    new TextLoader.Column("Gender", DataKind.R4, 0),
                    new TextLoader.Column("Nationality", DataKind.R4, 1),
                    new TextLoader.Column("year", DataKind.R4, 2),
                    new TextLoader.Column("Label", DataKind.Text, 3)
                }
            });

            IDataView trainingDataView = reader.Read(new MultiFileSource(dataPath));
            var       pipeline         = new TermEstimator(LocalEnvironment, "Label", "Label")
                                         .Append(new ConcatEstimator(LocalEnvironment, "Features", "Gender", "Nationality", "year"))
                                         .Append(new SdcaMultiClassTrainer(LocalEnvironment, new SdcaMultiClassTrainer.Arguments()))
                                         .Append(new KeyToValueEstimator(LocalEnvironment, "PredictedLabel"));

            Model = pipeline.Fit(trainingDataView);
        }
예제 #5
0
        void TestOldSavingAndLoading()
        {
            var data = new[] { new TestClass()
                               {
                                   A = 1, B = 2, C = 3,
                               }, new TestClass()
                               {
                                   A = 4, B = 5, C = 6
                               } };

            using (var env = new TlcEnvironment())
            {
                var dataView = ComponentCreation.CreateDataView(env, data);
                var est      = new TermEstimator(env, new[] {
                    new TermTransform.ColumnInfo("A", "TermA"),
                    new TermTransform.ColumnInfo("B", "TermB"),
                    new TermTransform.ColumnInfo("C", "TermC")
                });
                var transformer = est.Fit(dataView);
                var result      = transformer.Transform(dataView);
                var resultRoles = new RoleMappedData(result);
                using (var ms = new MemoryStream())
                {
                    TrainUtils.SaveModel(env, env.Start("saving"), ms, null, resultRoles);
                    ms.Position = 0;
                    var loadedView = ModelFileUtils.LoadTransforms(env, dataView, ms);
                    ValidateTermTransformer(loadedView);
                }
            }
        }
        public void TestMetadataPropagation()
        {
            var data = new[] {
                new TestMeta()
                {
                    A = new string[2] {
                        "A", "B"
                    }, B = "C", C = new int[2] {
                        3, 5
                    }, D = 6
                },
                new TestMeta()
                {
                    A = new string[2] {
                        "A", "B"
                    }, B = "C", C = new int[2] {
                        5, 3
                    }, D = 1
                },
                new TestMeta()
                {
                    A = new string[2] {
                        "A", "B"
                    }, B = "C", C = new int[2] {
                        3, 5
                    }, D = 6
                }
            };


            var dataView = ComponentCreation.CreateDataView(Env, data);
            var termEst  = new TermEstimator(Env, new[] {
                new TermTransform.ColumnInfo("A", "TA", textKeyValues: true),
                new TermTransform.ColumnInfo("B", "TB", textKeyValues: true),
                new TermTransform.ColumnInfo("C", "TC"),
                new TermTransform.ColumnInfo("D", "TD")
            });
            var termTransformer = termEst.Fit(dataView);

            dataView = termTransformer.Transform(dataView);

            var pipe = new KeyToBinaryVectorEstimator(Env,
                                                      new KeyToBinaryVectorTransform.ColumnInfo("TA", "CatA"),
                                                      new KeyToBinaryVectorTransform.ColumnInfo("TB", "CatB"),
                                                      new KeyToBinaryVectorTransform.ColumnInfo("TC", "CatC"),
                                                      new KeyToBinaryVectorTransform.ColumnInfo("TD", "CatD"));

            var result = pipe.Fit(dataView).Transform(dataView);

            ValidateMetadata(result);
            Done();
        }
예제 #7
0
        static void Main(string[] args)
        {
            // STEP 2: Create an environment and load your data
            var env = new LocalEnvironment();

            // If working in Visual Studio, make sure the 'Copy to Output Directory'
            // property of iris-data.txt is set to 'Copy always'
            var dataPath = "iris.data.txt";
            var reader   = new TextLoader(env,
                                          new TextLoader.Arguments
            {
                Separator = ",",
                HasHeader = true,
                Column    = new[]
                {
                    new TextLoader.Column("SepalLength", DataKind.R4, 0),
                    new TextLoader.Column("SepalWidth", DataKind.R4, 1),
                    new TextLoader.Column("PetalLength", DataKind.R4, 2),
                    new TextLoader.Column("PetalWidth", DataKind.R4, 3),
                    new TextLoader.Column("Label", DataKind.Text, 4)
                }
            });

            var trainingDataView = reader.Read(new MultiFileSource(dataPath));

            // STEP 3: Transform your data and add a learner
            // Assign numeric values to text in the "Label" column, because only
            // numbers can be processed during model training
            // Add a learning algorithm to the pipeline. e.g.(What type of iris is this?)
            // Convert the Label back into original text (after converting to number in step 3)
            var pipeline = new TermEstimator(env, "Label", "Label")
                           .Append(new ConcatEstimator(env, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth"))
                           .Append(new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments()))
                           .Append(new KeyToValueEstimator(env, "PredictedLabel"));

            // STEP 4: Train your model based on the data set
            var model = pipeline.Fit(trainingDataView);

            // STEP 5: Use your model to make a prediction
            // You can change these numbers to test different predictions
            var prediction = model.MakePredictionFunction <IrisData, IrisPrediction>(env).Predict(new IrisData
            {
                SepalLength = 3.3f,
                SepalWidth  = 1.6f,
                PetalLength = 0.2f,
                PetalWidth  = 5.1f
            });

            Console.WriteLine($"Predicted flower type is: {prediction.PredictedLabels}");
        }
예제 #8
0
        void TestDifferentTypes()
        {
            string dataPath = GetDataPath("adult.test");

            var loader = new TextLoader(Env, new TextLoader.Arguments
            {
                Column = new[] {
                    new TextLoader.Column("float1", DataKind.R4, 0),
                    new TextLoader.Column("float4", DataKind.R4, new[] { new TextLoader.Range(0), new TextLoader.Range(2), new TextLoader.Range(4), new TextLoader.Range(10) }),
                    new TextLoader.Column("double1", DataKind.R8, 0),
                    new TextLoader.Column("double4", DataKind.R8, new[] { new TextLoader.Range(0), new TextLoader.Range(2), new TextLoader.Range(4), new TextLoader.Range(10) }),
                    new TextLoader.Column("int1", DataKind.I4, 0),
                    new TextLoader.Column("text1", DataKind.TX, 1),
                    new TextLoader.Column("text2", DataKind.TX, new[] { new TextLoader.Range(1), new TextLoader.Range(3) }),
                },
                Separator = ",",
                HasHeader = true
            }, new MultiFileSource(dataPath));

            var pipe = new TermEstimator(Env, new[] {
                new TermTransform.ColumnInfo("float1", "TermFloat1"),
                new TermTransform.ColumnInfo("float4", "TermFloat4"),
                new TermTransform.ColumnInfo("double1", "TermDouble1"),
                new TermTransform.ColumnInfo("double4", "TermDouble4"),
                new TermTransform.ColumnInfo("int1", "TermInt1"),
                new TermTransform.ColumnInfo("text1", "TermText1"),
                new TermTransform.ColumnInfo("text2", "TermText2")
            });
            var data = loader.Read(new MultiFileSource(dataPath));

            data = TakeFilter.Create(Env, data, 10);
            var outputPath = GetOutputPath("Term", "Term.tsv");

            using (var ch = Env.Start("save"))
            {
                var saver = new TextSaver(Env, new TextSaver.Arguments {
                    Silent = true
                });
                using (var fs = File.Create(outputPath))
                    DataSaverUtils.SaveDataView(ch, saver, pipe.Fit(data).Transform(data), fs, keepHidden: true);
            }

            CheckEquality("Term", "Term.tsv");
            Done();
        }
예제 #9
0
        public static void criar_e_treinar()
        {        
            string dataPath = "data.csv";
            string outputModel = "model.pb";

            if (!File.Exists(dataPath))
                throw new Exception("Arquivo <data.csv> contendo dados para treinamento não existe");

            var env = new LocalEnvironment();
            var reader = new TextLoader(env, 
                new TextLoader.Arguments()
                {
                    Separator = ";",
                    HasHeader = false,
                    Column = new[]
                    {
                        new TextLoader.Column("prioridade", DataKind.R4, 0),
                        new TextLoader.Column("punicao", DataKind.R4, 1),
                        new TextLoader.Column("dispensa", DataKind.R4, 2),
                        new TextLoader.Column("ultimoDiaSemana", DataKind.R4, 3),
                        new TextLoader.Column("diaAtualSemanaServico", DataKind.R4, 4),
                        new TextLoader.Column("Label", DataKind.R4, 5)
                    }
                });

            IDataView trainingDataView = reader.Read(new MultiFileSource(dataPath));

            var pipeline = new TermEstimator(env, "Label", "Label")
                       .Append(new ConcatEstimator(env, "Features", "prioridade", "punicao", "dispensa", "ultimoDiaSemana", "diaAtualSemanaServico"))
                       .Append(new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments()))
                       .Append(new KeyToValueEstimator(env, "PredictedLabel"));

            var model = pipeline.Fit(trainingDataView);            

            if (File.Exists(outputModel))
                File.Delete(outputModel);

            FileStream fs = File.Create(outputModel);

            model.SaveTo(env, fs);

            fs.Close();

        }
예제 #10
0
        public static int GetRelatedArticle(string dataPath, int ArticleId)
        {
            // STEP 2: Create an environment  and load your data
            var env = new LocalEnvironment();

            var reader = new TextLoader(env,
                                        new TextLoader.Arguments()
            {
                Separator = ",",
                HasHeader = true,
                Column    = new[]
                {
                    new TextLoader.Column("CurrentArticleId", DataKind.R4, 0),
                    new TextLoader.Column("Label", DataKind.R4, 1)
                }
            });

            IDataView trainingDataView = reader.Read(new MultiFileSource(dataPath));

            // STEP 3: Transform your data and add a learner
            // Assign numeric values to text in the "Label" column, because only
            // numbers can be processed during model training.
            // Add a learning algorithm to the pipeline. e.g.(What type of iris is this?)
            // Convert the Label back into original text (after converting to number in step 3)
            var pipeline = new TermEstimator(env, "Label", "Label")
                           .Append(new ConcatEstimator(env, "Features", "CurrentArticleId"))
                           .Append(new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments()))
                           .Append(new KeyToValueEstimator(env, "PredictedLabel"));

            // STEP 4: Train your model based on the data set
            var model = pipeline.Fit(trainingDataView);

            // STEP 5: Use your model to make a prediction
            // You can change these numbers to test different predictions
            var prediction = model.MakePredictionFunction <RelatedArticleData, RelatedArticlesPrediction>(env).Predict(
                new RelatedArticleData()
            {
                CurrentArticleId = (float)ArticleId
            });

            return((int)prediction.PredictedRelatedArticle);
        }
예제 #11
0
        // This method gets an article ID and predicts the most related article
        public static int GetRelatedArticle(string dataPath, int ArticleId)
        {
            // Create an environment for the learning process
            LocalEnvironment env = new LocalEnvironment();

            // Create a reader object to parse our training data from the training data file
            TextLoader reader = new TextLoader(env,
                                               new TextLoader.Arguments()
            {
                Separator = ",",
                HasHeader = true,
                Column    = new[]
                {
                    new TextLoader.Column("CurrentArticleId", DataKind.R4, 0),
                    new TextLoader.Column("Label", DataKind.R4, 1)
                }
            });

            // Read the training data
            IDataView trainingData = reader.Read(new MultiFileSource(dataPath));

            // Process the training data, set a target column and create a learning model (SDCA multi-class model)
            EstimatorChain <KeyToValueTransform> pipeline = new TermEstimator(env, "Label", "Label")
                                                            .Append(new ConcatEstimator(env, "Features", "CurrentArticleId"))
                                                            .Append(new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments()))
                                                            .Append(new KeyToValueEstimator(env, "PredictedLabel"));

            // Train the learning model based on the training data
            TransformerChain <KeyToValueTransform> model = pipeline.Fit(trainingData);

            // Activate the model to make a prediction for the requested article
            RelatedArticlesPrediction prediction = model.MakePredictionFunction <RelatedArticleData, RelatedArticlesPrediction>(env).Predict(
                new RelatedArticleData()
            {
                CurrentArticleId = (float)ArticleId
            });

            // Return the predicted articles ID
            return((int)prediction.PredictedRelatedArticle);
        }
예제 #12
0
        public static void Train()
        {
            using (var env = new LocalEnvironment())
            {
                var reader = new TextLoader(env,
                                            new TextLoader.Arguments()
                {
                    Separator = "tab",
                    HasHeader = true,
                    Column    = new[]
                    {
                        new TextLoader.Column("ID", DataKind.Text, 0),
                        new TextLoader.Column("Area", DataKind.Text, 1),
                        new TextLoader.Column("Title", DataKind.Text, 2),
                        new TextLoader.Column("Description", DataKind.Text, 3),
                    }
                });

                var pipeline = new TermEstimator(env, "Area", "Label")
                               .Append(new TextTransform(env, "Title", "Title"))
                               .Append(new TextTransform(env, "Description", "Description"))
                               .Append(new ConcatEstimator(env, "Features", "Title", "Description"))
                               .Append(new SdcaMultiClassTrainer(env, new SdcaMultiClassTrainer.Arguments()))
                               .Append(new KeyToValueEstimator(env, "PredictedLabel"));

                Console.WriteLine("=============== Training model ===============");

                var model = pipeline.Fit(reader.Read(new MultiFileSource(DataPath)));

                using (var fs = new FileStream(ModelPath, FileMode.Create, FileAccess.Write, FileShare.Write))
                    model.SaveTo(env, fs);

                Console.WriteLine("=============== End training ===============");
                Console.WriteLine("The model is saved to {0}", ModelPath);
            }
        }
        public void TestMetadataPropagation()
        {
            var data = new[] {
                new TestMeta()
                {
                    A = new string[2] {
                        "A", "B"
                    }, B = "C", C = new int[2] {
                        3, 5
                    }, D = 6, E = new float[2] {
                        1.0f, 2.0f
                    }, F = 1.0f, G = new string[2] {
                        "A", "D"
                    }, H = "D"
                },
                new TestMeta()
                {
                    A = new string[2] {
                        "A", "B"
                    }, B = "C", C = new int[2] {
                        5, 3
                    }, D = 1, E = new float[2] {
                        3.0f, 4.0f
                    }, F = -1.0f, G = new string[2] {
                        "E", "A"
                    }, H = "E"
                },
                new TestMeta()
                {
                    A = new string[2] {
                        "A", "B"
                    }, B = "C", C = new int[2] {
                        3, 5
                    }, D = 6, E = new float[2] {
                        5.0f, 6.0f
                    }, F = 1.0f, G = new string[2] {
                        "D", "E"
                    }, H = "D"
                }
            };


            var dataView = ComponentCreation.CreateDataView(Env, data);
            var termEst  = new TermEstimator(Env,
                                             new TermTransform.ColumnInfo("A", "TA", textKeyValues: true),
                                             new TermTransform.ColumnInfo("B", "TB"),
                                             new TermTransform.ColumnInfo("C", "TC", textKeyValues: true),
                                             new TermTransform.ColumnInfo("D", "TD", textKeyValues: true),
                                             new TermTransform.ColumnInfo("E", "TE"),
                                             new TermTransform.ColumnInfo("F", "TF"),
                                             new TermTransform.ColumnInfo("G", "TG"),
                                             new TermTransform.ColumnInfo("H", "TH", textKeyValues: true));
            var termTransformer = termEst.Fit(dataView);

            dataView = termTransformer.Transform(dataView);

            var pipe = new KeyToVectorEstimator(Env,
                                                new KeyToVectorTransform.ColumnInfo("TA", "CatA", true),
                                                new KeyToVectorTransform.ColumnInfo("TB", "CatB", false),
                                                new KeyToVectorTransform.ColumnInfo("TC", "CatC", false),
                                                new KeyToVectorTransform.ColumnInfo("TD", "CatD", true),
                                                new KeyToVectorTransform.ColumnInfo("TE", "CatE", false),
                                                new KeyToVectorTransform.ColumnInfo("TF", "CatF", true),
                                                new KeyToVectorTransform.ColumnInfo("TG", "CatG", true),
                                                new KeyToVectorTransform.ColumnInfo("TH", "CatH", false)
                                                );

            var result = pipe.Fit(dataView).Transform(dataView);

            ValidateMetadata(result);
            Done();
        }