示例#1
0
        /// <summary>
        /// Train and write in model and set <see cref="_model"/>
        /// </summary>
        /// <returns>Task asynchronous method</returns>
        private async Task Train()
        {
            try
            {
                Logger.Instance.Info("ML : Training model");
                CheckDataFile();

                var pipeline = new LearningPipeline {
                    new TextLoader(Constants.DataPath).CreateFrom <WindowData>(separator: ','),
                    new Dictionarizer("Label"),
                    new TextFeaturizer("Program", "Program"),
                    new TextFeaturizer("WindowTitle", "WindowTitle"),
                    new ColumnConcatenator("Features", "Program", "WindowTitle", "WindowTop", "WindowLeft", "WindowHeight", "WindowWidth"),
                    new StochasticDualCoordinateAscentClassifier(),
                    new PredictedLabelColumnOriginalValueConverter {
                        PredictedLabelColumn = "PredictedLabel"
                    }
                };

                await _semaphore.WaitAsync();

                _model = pipeline.Train <WindowData, RegionPrediction>();
                _semaphore.Release();

                await _model.WriteAsync(Constants.ModelPath);

                Logger.Instance.Info("ML : Model trained");
            }
            catch (Exception e)
            {
                Console.WriteLine(e);
            }
        }
示例#2
0
        internal override async Task <ReturnObj <PredictionModel <T, TK> > > LoadOrGenerateModelAsync <T, TK>(string trainingFileName)
        {
            PredictionModel <T, TK> model;

            if (File.Exists(ModelName))
            {
                model = await PredictionModel.ReadAsync <T, TK>(ModelName);

                return(new ReturnObj <PredictionModel <T, TK> >(model));
            }

            try
            {
                var pipeline = new LearningPipeline
                {
                    new TextLoader(trainingFileName).CreateFrom <T>(separator: ','),
                    new ColumnConcatenator("Features", "Features"),
                    new FastTreeRegressor()
                };

                model = pipeline.Train <T, TK>();

                await model.WriteAsync(ModelName);
            }
            catch (Exception ex)
            {
                return(new ReturnObj <PredictionModel <T, TK> >(ex));
            }

            return(new ReturnObj <PredictionModel <T, TK> >(model));
        }
示例#3
0
        public void TutorialOne()
        {
            var pipeline = new LearningPipeline();

            var dataPath = @"C:\Test\MLNetTutorials\MLNetTutorials\Data\iris.data.txt";

            pipeline.Add(new TextLoader(dataPath).CreateFrom <IrisData>(separator: ','));

            pipeline.Add(new Dictionarizer("Label"));

            pipeline.Add(new ColumnConcatenator("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth"));

            //Learning algorithm
            pipeline.Add(new StochasticDualCoordinateAscentClassifier());

            pipeline.Add(new PredictedLabelColumnOriginalValueConverter()
            {
                PredictedLabelColumn = "PredictedLabel"
            });

            var model = pipeline.Train <IrisData, IrisPrediction>();

            var prediction = model.Predict(new IrisData()
            {
                SepalLength = 3.3f,
                SepalWidth  = 1.6f,
                PetalLength = 0.2f,
                PetalWidth  = 5.1f,
            });

            Console.WriteLine($"Predicted flower type is: {prediction.PredictedLabels}");
        }
示例#4
0
        //train the model
        public static async Task <PredictionModel <SentimentData, SentimentPrediction> > Train()
        {
            //Instance used to load,process,featurize the data
            var pipeline = new LearningPipeline();

            //to load train data
            pipeline.Add(new TextLoader(_dataPath).CreateFrom <SentimentData>(useHeader: true));

            pipeline.Add(new Dictionarizer("Label"));

            // TextFeaturizer to convert the SentimentText column into a numeric vector called Features used by the ML algorithm
            pipeline.Add(new TextFeaturizer("Features", "SentimentText"));

            //choose learning algorithm
            pipeline.Add(new StochasticDualCoordinateAscentClassifier());
            //pipeline.Add(new LogisticRegressionClassifier());
            //pipeline.Add(new NaiveBayesClassifier());
            //pipeline.Add(new FastTreeBinaryClassifier() { NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2 });
            pipeline.Add(new PredictedLabelColumnOriginalValueConverter()
            {
                PredictedLabelColumn = "PredictedLabel"
            });



            //train the model
            PredictionModel <SentimentData, SentimentPrediction> model = pipeline.Train <SentimentData, SentimentPrediction>();

            //save model
            await model.WriteAsync(_modelpath);

            return(model);
        }
        private LearningPipeline PreparePipelineLightGBM()
        {
            var dataPath = GetDataPath(SentimentDataPath);
            var pipeline = new LearningPipeline();

            pipeline.Add(new Data.TextLoader(dataPath)
            {
                Arguments = new TextLoaderArguments
                {
                    Separator = new[] { '\t' },
                    HasHeader = true,
                    Column    = new[]
                    {
                        new TextLoaderColumn()
                        {
                            Name   = "Label",
                            Source = new [] { new TextLoaderRange(0) },
                            Type   = Data.DataKind.Num
                        },

                        new TextLoaderColumn()
                        {
                            Name   = "SentimentText",
                            Source = new [] { new TextLoaderRange(1) },
                            Type   = Data.DataKind.Text
                        }
                    }
                }
            });

            pipeline.Add(new TextFeaturizer("Features", "SentimentText")
            {
                KeepDiacritics       = false,
                KeepPunctuations     = false,
                TextCase             = TextNormalizerTransformCaseNormalizationMode.Lower,
                OutputTokens         = true,
                StopWordsRemover     = new PredefinedStopWordsRemover(),
                VectorNormalizer     = TextTransformTextNormKind.L2,
                CharFeatureExtractor = new NGramNgramExtractor()
                {
                    NgramLength = 3, AllLengths = false
                },
                WordFeatureExtractor = new NGramNgramExtractor()
                {
                    NgramLength = 2, AllLengths = true
                }
            });


            pipeline.Add(new LightGbmBinaryClassifier()
            {
                NumLeaves = 5, NumBoostRound = 5, MinDataPerLeaf = 2
            });

            pipeline.Add(new PredictedLabelColumnOriginalValueConverter()
            {
                PredictedLabelColumn = "PredictedLabel"
            });
            return(pipeline);
        }
示例#6
0
        static void Main(string[] args)
        {
            var pipeline = new LearningPipeline();

            string dataPath = "data.txt";

            pipeline.Add(new TextLoader <IrisData>(dataPath, separator: ","));

            pipeline.Add(new Dictionarizer("Label"));

            pipeline.Add(new ColumnConcatenator("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth"));

            pipeline.Add(new StochasticDualCoordinateAscentClassifier());

            pipeline.Add(new PredictedLabelColumnOriginalValueConverter()
            {
                PredictedLabelColumn = "PredictedLabel"
            });

            var model = pipeline.Train <IrisData, IrisPrediction>();

            var prediction = model.Predict(new IrisData()
            {
                SepalLength = 5.03f,
                SepalWidth  = 2.6f,
                PetalLength = 0.2f,
                PetalWidth  = 5.1f,
            });

            Console.WriteLine($"Predicted flower type is: {prediction.PredictedLabels}");
        }
示例#7
0
        static void Main(string[] args)
        {
            var agesRangesCsv = "AgeRangeData.csv";
            var pipeline      = new LearningPipeline
            {
                new TextLoader <AgeRangeData>(agesRangesCsv, separator: ","),
                new Dictionarizer("Label"),
                new ColumnConcatenator("Features", "AgeStart", "AgeEnd"),
                new StochasticDualCoordinateAscentClassifier(),
                new PredictedLabelColumnOriginalValueConverter {
                    PredictedLabelColumn = "PredictedLabel"
                }
            };
            var model = pipeline.Train <AgeRangeData, AgeRangePrediction>();

            var prediction = model.Predict(new AgeRangeData()
            {
                AgeStart = 1,
                AgeEnd   = 2
            });

            Console.WriteLine($"Predicted age range is: {prediction.PredictedLabels}");

            prediction = model.Predict(new AgeRangeData()
            {
                AgeStart = 7,
                AgeEnd   = 7
            });
            Console.WriteLine($"Predicted age range is: {prediction.PredictedLabels}");

            Console.ReadLine();
        }
示例#8
0
文件: Program.cs 项目: sjison/ML
        TrainAsync(InputData input)
        {
            // LearningPipeline allows you to add steps in order to keep everything together
            // during the learning process.
            var pipeline = new LearningPipeline();

            // The TextLoader loads a dataset with comments and corresponding postive or negative sentiment.
            // When you create a loader, you specify the schema by passing a class to the loader containing
            // all the column names and their types. This is used to create the model, and train it.

            //pipeline.Add(new TextLoader(_dataPath).CreateFrom<SentimentData>());
            pipeline.Add(new TextLoader(input.TrainingData).CreateFrom <ClassificationData>());

            // TextFeaturizer is a transform that is used to featurize an input column.
            // This is used to format and clean the data.
            pipeline.Add(new TextFeaturizer("Features", "Text"));

            // Adds a FastTreeBinaryClassifier, the decision tree learner for this project, and
            // three hyperparameters to be used for tuning decision tree performance.
            pipeline.Add(new FastTreeBinaryClassifier()
            {
                NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2
            });

            // Train the pipeline based on the dataset that has been loaded, transformed.
            PredictionModel <ClassificationData, ClassPrediction> model =
                pipeline.Train <ClassificationData, ClassPrediction>();

            // Saves the model we trained to a zip file.
            await model.WriteAsync(_modelpath);

            // Returns the model we trained to use for evaluation.
            return(model);
        }
示例#9
0
        public static async Task <PredictionModel <IrisData, IrisPrediction> > TrainModel(string dataPath, string modelPath)
        {
            //Initialize Learning Pipeline
            LearningPipeline pipeline = new LearningPipeline();

            // Load Data
            pipeline.Add(new TextLoader(dataPath).CreateFrom <IrisData>(separator: ','));

            // Transform Data
            // Assign numeric values to text in the "Label" column, because
            // only numbers can be processed during model training
            pipeline.Add(new Dictionarizer("Label"));

            // Vectorize Features
            pipeline.Add(new ColumnConcatenator("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth"));

            // Add Learner
            pipeline.Add(new StochasticDualCoordinateAscentClassifier());

            // Convert Label back to text
            pipeline.Add(new PredictedLabelColumnOriginalValueConverter()
            {
                PredictedLabelColumn = "PredictedLabel"
            });

            // Train Model
            var model = pipeline.Train <IrisData, IrisPrediction>();

            // Persist Model
            await model.WriteAsync(modelPath);

            return(model);
        }
示例#10
0
        static PredictionModel <NewsData, NewsPrediction> TrainNews()
        {
            const string trainingSet = @"news-train.txt";

            var pipeline = new LearningPipeline();

            pipeline.Add(new TextLoader(trainingSet).CreateFrom <NewsData>());
            pipeline.Add(new TextFeaturizer("Features", "Text")
            {
                KeepDiacritics       = false,
                KeepPunctuations     = false,
                TextCase             = TextNormalizerTransformCaseNormalizationMode.Lower,
                OutputTokens         = true,
                Language             = TextTransformLanguage.English,
                StopWordsRemover     = new PredefinedStopWordsRemover(),
                VectorNormalizer     = TextTransformTextNormKind.L2,
                CharFeatureExtractor = new NGramNgramExtractor()
                {
                    NgramLength = 3, AllLengths = false
                },
                WordFeatureExtractor = new NGramNgramExtractor()
                {
                    NgramLength = 3, AllLengths = true
                }
            });
            pipeline.Add(new Dictionarizer("Label"));
            pipeline.Add(new StochasticDualCoordinateAscentClassifier());
            return(pipeline.Train <NewsData, NewsPrediction>());
        }
示例#11
0
        /// <summary>
        /// Using training data location that is passed trough constructor this method is building
        /// and training machine learning model.
        /// </summary>
        /// <returns>Trained machine learning model.</returns>
        public PredictionModel <WineQualitySample, WineQualityPrediction> BuildAndTrain()
        {
            var pipeline = new LearningPipeline();

            pipeline.Add(new TextLoader(_trainingDataLocation).CreateFrom <WineQualitySample>(useHeader: true, separator: ';'));
            pipeline.Add(new MissingValueSubstitutor("FixedAcidity")
            {
                ReplacementKind = NAReplaceTransformReplacementKind.Mean
            });
            pipeline.Add(MakeNormalizer());
            pipeline.Add(new ColumnConcatenator("Features",
                                                "FixedAcidity",
                                                "VolatileAcidity",
                                                "CitricAcid",
                                                "ResidualSugar",
                                                "Chlorides",
                                                "FreeSulfurDioxide",
                                                "TotalSulfurDioxide",
                                                "Density",
                                                "Ph",
                                                "Sulphates",
                                                "Alcohol"));
            pipeline.Add(_algorythm);

            return(pipeline.Train <WineQualitySample, WineQualityPrediction>());
        }
示例#12
0
        static async Task <PredictionModel <Open311Data, Open311DataPrediction> > TrainOpen311(string dataPath)
        {
            var pipeline   = new LearningPipeline();
            var dataSource = CollectionDataSource.Create(OpenFile(dataPath, 3, 0, 1, 2));

            pipeline.Add(dataSource);
            pipeline.Add(new Dictionarizer(@"Label"));
            pipeline.Add(new TextFeaturizer(@"Features", @"Request")
            {
                KeepDiacritics       = false,
                KeepPunctuations     = false,
                TextCase             = TextNormalizerTransformCaseNormalizationMode.Lower,
                OutputTokens         = true,
                Language             = TextTransformLanguage.German,
                StopWordsRemover     = new PredefinedStopWordsRemover(),
                VectorNormalizer     = TextTransformTextNormKind.L2,
                CharFeatureExtractor = new NGramNgramExtractor()
                {
                    NgramLength = 3, AllLengths = false
                },
                WordFeatureExtractor = new NGramNgramExtractor()
                {
                    NgramLength = 3, AllLengths = true
                }
            });
            pipeline.Add(new StochasticDualCoordinateAscentClassifier());
            pipeline.Add(new PredictedLabelColumnOriginalValueConverter {
                PredictedLabelColumn = @"PredictedLabel"
            });

            var model = pipeline.Train <Open311Data, Open311DataPrediction>();
            await model.WriteAsync(_modelPath);

            return(model);
        }
示例#13
0
        static void Main(string[] args)
        {
            var dataFilePath = "Data/test_generated.data";

            var pipeline = new LearningPipeline()
            {
                new TextLoader(dataFilePath).CreateFrom <ReopenedIssueData>(),
                new TextFeaturizer(Columns.Environment, Columns.Environment),
                new TextFeaturizer(Columns.Type, Columns.Type),
                new TextFeaturizer(Columns.ProjectName, Columns.ProjectName),
                new TextFeaturizer(Columns.AsigneeEmail, Columns.AsigneeEmail),
                new TextFeaturizer(Columns.ReporterEmail, Columns.ReporterEmail),
                new ColumnConcatenator(
                    Columns.Features,
                    Columns.Environment,
                    Columns.Type,
                    Columns.CommentsCount,
                    Columns.CommentsLenght,
                    Columns.ReporterCommentsCount,
                    Columns.ProjectName,
                    Columns.AsigneeEmail,
                    Columns.ReporterEmail
                    ),
                new FastTreeBinaryClassifier()
            };

            //var predictionModel = pipeline.Train<ReopenedIssueData, ReopenedIssuePrediction>();

            var crossValidator = new CrossValidator()
            {
                // NumFolds = numOfFolds,
                Kind = MacroUtilsTrainerKinds.SignatureBinaryClassifierTrainer
            };
            var crossValidationResult = crossValidator.CrossValidate <ReopenedIssueData, ReopenedIssuePrediction>(pipeline);
        }
示例#14
0
        public static async Task TrainAsync()
        {
            var pipeline = new LearningPipeline();

            pipeline.Add(new TextLoader <GitHubIssue>(DataPath, useHeader: true));

            pipeline.Add(new Dictionarizer(("Area", "Label")));

            pipeline.Add(new TextFeaturizer("Title", "Title"));

            pipeline.Add(new TextFeaturizer("Description", "Description"));

            pipeline.Add(new ColumnConcatenator("Features", "Title", "Description"));

            pipeline.Add(new StochasticDualCoordinateAscentClassifier());
            pipeline.Add(new PredictedLabelColumnOriginalValueConverter()
            {
                PredictedLabelColumn = "PredictedLabel"
            });

            Console.WriteLine("=============== Training model ===============");

            var model = pipeline.Train <GitHubIssue, GitHubIssuePrediction>();

            await model.WriteAsync(ModelPath);

            Console.WriteLine("=============== End training ===============");
            Console.WriteLine("The model is saved to {0}", ModelPath);
        }
示例#15
0
        public static void CrossValidate()
        {
            // Define pipeline
            var pipeline = new LearningPipeline();

            pipeline.Add(new TextLoader("1_BinaryClassification/problem1.csv").CreateFrom <BeerOrWineData>(useHeader: true, separator: ','));

            pipeline.Add(new TextFeaturizer("Features", "FullName"));

            pipeline.Add(new Dictionarizer(("Type", "Label")));

            pipeline.Add(new StochasticDualCoordinateAscentBinaryClassifier()
            {
            });

            pipeline.Add(new PredictedLabelColumnOriginalValueConverter()
            {
                PredictedLabelColumn = "PredictedLabel"
            });

            // Cross validation
            var cv = new CrossValidator().CrossValidate <BeerOrWineData, BeerOrWinePrediction>(pipeline);

            // show matrix
        }
示例#16
0
文件: Program.cs 项目: vadimas/events
        static void Main()
        {
            var pipeline = new LearningPipeline
            {
                new TextLoader(FileName).CreateFrom <AgeRange>(separator: ',', useHeader: true),
                new Dictionarizer("Label"),
                new TextFeaturizer("Gender", "Gender"),
                new ColumnConcatenator("Features", "Age", "Gender"),
                new StochasticDualCoordinateAscentClassifier(),
                new PredictedLabelColumnOriginalValueConverter {
                    PredictedLabelColumn = "PredictedLabel"
                }
            };
            var model = pipeline.Train <AgeRange, AgeRangePrediction>();

            var converter = new OnnxConverter
            {
                Onnx   = OnnxPath,
                Json   = OnnxAsJsonPath,
                Domain = "com.elbruno"
            };

            converter.Convert(model);

            // Strip the version.
            var fileText = File.ReadAllText(OnnxAsJsonPath);

            fileText = Regex.Replace(fileText, "\"producerVersion\": \"([^\"]+)\"", "\"producerVersion\": \"##VERSION##\"");
            File.WriteAllText(OnnxAsJsonPath, fileText);
        }
        public void TrainOneVersusAll()
        {
            string dataPath = GetDataPath("iris.txt");

            var pipeline = new LearningPipeline(seed: 1, conc: 1);

            pipeline.Add(new TextLoader(dataPath).CreateFrom <IrisData>(useHeader: false));
            pipeline.Add(new ColumnConcatenator(outputColumn: "Features",
                                                "SepalLength", "SepalWidth", "PetalLength", "PetalWidth"));

            pipeline.Add(OneVersusAll.With(new StochasticDualCoordinateAscentBinaryClassifier()));

            var model = pipeline.Train <IrisData, IrisPrediction>();

            var testData  = new TextLoader(dataPath).CreateFrom <IrisData>(useHeader: false);
            var evaluator = new ClassificationEvaluator();
            ClassificationMetrics metrics = evaluator.Evaluate(model, testData);

            CheckMetrics(metrics);

            var trainTest = new TrainTestEvaluator()
            {
                Kind = MacroUtilsTrainerKinds.SignatureMultiClassClassifierTrainer
            }.TrainTestEvaluate <IrisData, IrisPrediction>(pipeline, testData);

            CheckMetrics(trainTest.ClassificationMetrics);
        }
示例#18
0
        public PredictStock()
        {
            // Creating a pipeline and loading the data
            var pipeline = new LearningPipeline();

            // Pipelining the training file
            string dataPath = System.AppDomain.CurrentDomain.BaseDirectory + @"\Profit-Train.txt";

            pipeline.Add(new TextLoader(dataPath).CreateFrom <StockData>(separator: ','));

            // Labeling the data
            pipeline.Add(new Dictionarizer("Label"));

            // Putting features into a vector
            pipeline.Add(new ColumnConcatenator("Features", "CurrentPrice", "DayHigh", "DayLow"));

            // Adding learning algorithm
            pipeline.Add(new StochasticDualCoordinateAscentClassifier());

            // Converting the Label back into original text
            pipeline.Add(new PredictedLabelColumnOriginalValueConverter()
            {
                PredictedLabelColumn = "PredictedLabel"
            });

            // Train the model
            this.model = pipeline.Train <StockData, StockPrediction>();
        }
示例#19
0
        static void Main(string[] args)
        {
            var dataset     = MLNetUtilities.GetDataPathByDatasetName("SalaryData.csv");
            var testDataset = MLNetUtilities.GetDataPathByDatasetName("SalaryData-test.csv");

            var pipeline = new LearningPipeline
            {
                new TextLoader(dataset).CreateFrom <SalaryData>(useHeader: true, separator: ','),
                new ColumnConcatenator("Features", "YearsExperience"),
                new GeneralizedAdditiveModelRegressor()
            };

            var crossValidator = new CrossValidator()
            {
                Kind     = MacroUtilsTrainerKinds.SignatureRegressorTrainer,
                NumFolds = 5
            };
            var crossValidatorOutput = crossValidator.CrossValidate <SalaryData, SalaryPrediction>(pipeline);

            Console.Write(Environment.NewLine);
            Console.WriteLine("Root Mean Squared for each fold:");
            crossValidatorOutput.RegressionMetrics.ForEach(m => Console.WriteLine(m.Rms));

            var totalR2  = crossValidatorOutput.RegressionMetrics.Sum(metric => metric.RSquared);
            var totalRMS = crossValidatorOutput.RegressionMetrics.Sum(metric => metric.Rms);

            Console.Write(Environment.NewLine);
            Console.WriteLine($"Average R^2: {totalR2 / crossValidatorOutput.RegressionMetrics.Count}");
            Console.WriteLine($"Average RMS: {totalRMS / crossValidatorOutput.RegressionMetrics.Count}");

            Console.ReadLine();
        }
示例#20
0
文件: Program.cs 项目: sjison/ML
        /// <summary>
        /// Source:
        /// https://stackoverflow.com/questions/50497593/how-to-predict-integer-values-using-ml-net
        /// https://github.com/Rowandish/MachineLearningTest
        /// </summary>
        internal static void DigitsDataPrediction()
        {
            Console.WriteLine();
            Console.WriteLine();
            Console.WriteLine("2> Training and predicting Digits data:");

            var dataPath = @"Models\PredictDigits\Data\segments.txt";
            var pipeline = new LearningPipeline
            {
                new TextLoader(dataPath).CreateFrom <Digit>(separator: ','),
                new ColumnConcatenator("Features", nameof(Digit.Features)),
                new StochasticDualCoordinateAscentClassifier()
            };

            var model      = pipeline.Train <Digit, DigitPrediction>();
            var prediction = model.Predict(new Digit
            {
                Up          = 1,
                Middle      = 1,
                Bottom      = 0,
                UpLeft      = 1,
                BottomLeft  = 1,
                TopRight    = 1,
                BottomRight = 1
            });

            Console.WriteLine($"Predicted digit is: {prediction.ExpectedDigit - 1}");
        }
        internal static async Task <PredictionModel <IrisData, ClusterPrediction> > TrainAsync()
        {
            // LearningPipeline holds all steps of the learning process: data, transforms, learners.
            var pipeline = new LearningPipeline
            {
                // The TextLoader loads a dataset. The schema of the dataset is specified by passing a class containing
                // all the column names and their types.
                new TextLoader(DataPath).CreateFrom <IrisData>(useHeader: true),
                // ColumnConcatenator concatenates all columns into Features column
                new ColumnConcatenator("Features",
                                       "SepalLength",
                                       "SepalWidth",
                                       "PetalLength",
                                       "PetalWidth"),
                // KMeansPlusPlusClusterer is an algorithm that will be used to build clusters. We set the number of clusters to 3.
                new KMeansPlusPlusClusterer()
                {
                    K = 3
                }
            };

            Console.WriteLine("=============== Training model ===============");
            var model = pipeline.Train <IrisData, ClusterPrediction>();

            Console.WriteLine("=============== End training ===============");

            // Saving the model as a .zip file.
            await model.WriteAsync(ModelPath);

            Console.WriteLine("The model is saved to {0}", ModelPath);

            return(model);
        }
示例#22
0
        public static void GetMyPrediction()
        {
            Console.WriteLine("Begin ML.NET demo run");
            Console.WriteLine("Income from age, sex, politics");
            var    pipeline = new LearningPipeline();
            string dataPath = AppDomain.CurrentDomain.BaseDirectory + "/PeopleData.txt";

            pipeline.Add(new TextLoader(dataPath).
                         CreateFrom <myLottery>(separator: ' '));
            pipeline.Add(new ColumnCopier(("Income", "Label")));
            //pipeline.Add(new CategoricalOneHotVectorizer("Politic"));
            pipeline.Add(new ColumnConcatenator("Features", "pre10",
                                                "pre9", "pre8", "pre7", "pre6", "pre5", "pre4", "pre3"
                                                , "pre2", "pre1"));
            var sdcar = new StochasticDualCoordinateAscentRegressor();

            sdcar.MaxIterations     = 1000;
            sdcar.NormalizeFeatures = NormalizeOption.Auto;
            pipeline.Add(sdcar);
            // pipeline.N
            Console.WriteLine("\nStarting training \n");
            var model = pipeline.Train <myLottery, myPrediction>();

            Console.WriteLine("\nTraining complete \n");
            string modelPath = AppDomain.CurrentDomain.BaseDirectory + "/IncomeModel.zip";

            Task.Run(async() =>
            {
                await model.WriteAsync(modelPath);
            }).GetAwaiter().GetResult();
            var testData = new TextLoader(dataPath).
                           CreateFrom <myLottery>(separator: ' ');
            var    evaluator = new RegressionEvaluator();
            var    metrics   = evaluator.Evaluate(model, testData);
            double rms       = metrics.Rms;

            Console.WriteLine("Root mean squared error = " +
                              rms.ToString("F4"));
            Console.WriteLine("Income age 40 conservative male: ");
            myLottery newPatient = new myLottery()
            {
                pre10 = 6824298f,
                pre9  = 2589916f,
                pre8  = 2602089f,
                pre7  = 2915497f,
                pre6  = 8507838f,
                pre5  = 7679324f,
                pre4  = 607461f,
                pre3  = 5806877,
                pre2  = 6776442f,
                pre1  = 9975203
            };
            myPrediction prediction = model.Predict(newPatient);
            float        predIncome = prediction.Income;

            Console.WriteLine("Predicted income = $" +
                              predIncome.ToString("F2"));
            Console.WriteLine("\nEnd ML.NET demo");
            Console.ReadLine();
        }
示例#23
0
        public static void PredictIris()
        {
            var    pipeline = new LearningPipeline();
            string dataPath = "iris-data.txt";

            pipeline.Add(new TextLoader(dataPath).CreateFrom <IrisData>(separator: ','));
            pipeline.Add(new Dictionarizer("Label"));
            pipeline.Add(new ColumnConcatenator("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth"));
            pipeline.Add(new StochasticDualCoordinateAscentClassifier());
            pipeline.Add(new PredictedLabelColumnOriginalValueConverter()
            {
                PredictedLabelColumn = "PredictedLabel"
            });
            var model      = pipeline.Train <IrisData, IrisPrediction>();
            var prediction = model.Predict(new IrisData()
            {
                SepalLength = 3.3f,
                SepalWidth  = 1.6f,
                PetalLength = 0.2f,
                PetalWidth  = 0.2f,
            });

            var prediction2 = model.Predict(new IrisData()
            {
                SepalLength = 5.8f,
                SepalWidth  = 2.7f,
                PetalLength = 5.1f,
                PetalWidth  = 1.9f
            });

            Console.WriteLine($"Predicred flower type is: {prediction.PredictedLabels}");

            Console.WriteLine($"Predicred 2 flower type is: {prediction2.PredictedLabels}");
        }
示例#24
0
        public static async Task <PredictionModel <SentimentData, SentimentPrediction> > TrainAsync()
        {
            // LearningPipeline holds all steps of the learning process: data, transforms, learners.
            var pipeline = new LearningPipeline();

            // The TextLoader loads a dataset. The schema of the dataset is specified by passing a class containing
            // all the column names and their types.
            pipeline.Add(new TextLoader(TrainDataPath).CreateFrom <SentimentData>());

            // TextFeaturizer is a transform that will be used to featurize an input column to format and clean the data.
            pipeline.Add(new TextFeaturizer("Features", "SentimentText"));

            // FastTreeBinaryClassifier is an algorithm that will be used to train the model.
            // It has three hyperparameters for tuning decision tree performance.
            pipeline.Add(new FastTreeBinaryClassifier()
            {
                NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2
            });

            Console.WriteLine("=============== Training model ===============");
            // The pipeline is trained on the dataset that has been loaded and transformed.
            var model = pipeline.Train <SentimentData, SentimentPrediction>();

            // Saving the model as a .zip file.
            await model.WriteAsync(ModelPath);

            Console.WriteLine("=============== End training ===============");
            Console.WriteLine("The model is saved to {0}", ModelPath);

            return(model);
        }
        public void TransformOnlyPipeline()
        {
            const string _dataPath = @"..\..\Data\breast-cancer.txt";
            var          pipeline  = new LearningPipeline();

            pipeline.Add(new ML.Data.TextLoader(_dataPath).CreateFrom <InputData>(useHeader: false));
            pipeline.Add(new CategoricalHashOneHotVectorizer("F1")
            {
                HashBits = 10, Seed = 314489979, OutputKind = CategoricalTransformOutputKind.Bag
            });
            var model           = pipeline.Train <InputData, TransformedData>();
            var predictionModel = model.Predict(new InputData()
            {
                F1 = "5"
            });

            Assert.NotNull(predictionModel);
            Assert.NotNull(predictionModel.TransformedF1);
            Assert.Equal(1024, predictionModel.TransformedF1.Length);

            for (int index = 0; index < 1024; index++)
            {
                if (index == 265)
                {
                    Assert.Equal(1, predictionModel.TransformedF1[index]);
                }
                else
                {
                    Assert.Equal(0, predictionModel.TransformedF1[index]);
                }
            }
        }
示例#26
0
文件: Program.cs 项目: nangs/Tryouts
        static void Main(string[] args)
        {
            // Creating a pipeline
            var pipeline = new LearningPipeline();

            var fileName = "iris-data.csv";

            pipeline.Add(new TextLoader <IrisData>(fileName, separator: ","));

            // Assign numeric values to the texts in Label column (4)
            pipeline.Add(new Dictionarizer("Label"));

            // Put all features into a vector
            pipeline.Add(new ColumnConcatenator("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth"));

            //Adding classifier
            pipeline.Add(new StochasticDualCoordinateAscentClassifier());

            pipeline.Add(new PredictedLabelColumnOriginalValueConverter
            {
                PredictedLabelColumn = "PredictedLabel"
            });

            var model = pipeline.Train <IrisData, IrisPrediction>();

            var prediction = model.Predict(new IrisData
            {
                SepalLength = 3.3f,
                SepalWidth  = 1.6f,
                PetalLength = 0.2f,
                PetalWidth  = 5.1f
            });

            System.Console.WriteLine($"Predicted flower type is : {prediction.PredictedLabels}");
        }
示例#27
0
        public override void Train(List <DataSet> data, List <float> labels = null)
        {
            if (TrainedModel != null)
            {
                throw new InvalidOperationException("May only train/load a model once");
            }

#if ML_LEGACY
            var pipeline = new LearningPipeline();

            // add data
            pipeline.Add(CollectionDataSource.Create(data));

            // choose what to predict
            pipeline.Add(new ColumnCopier(("Score", "Label")));

            // add columns as features
            // do not include the features which should be predicted
            pipeline.Add(new ColumnConcatenator("Features", DataSet.ColumnNames()));

            // add a regression prediction
            pipeline.Add(new FastTreeRegressor());

            // train the model
            TrainedModel = pipeline.Train <DataSet, DataSetPrediction>();
#else
            // add data
            var textLoader = GetTextLoader(Context);

            // spill to disk !?!?! since there is no way to load from a collection
            var pathToData = "";
            try
            {
                // write data to disk
                pathToData = WriteToDisk(data);

                // read in data
                IDataView dataView = textLoader.Load(pathToData);
                InputSchema = dataView.Schema;

                // configurations
                var dataPipeline = Context.Transforms.CopyColumns(outputColumnName: "Label", inputColumnName: nameof(DataSet.Score))
                                   .Append(Context.Transforms.Concatenate("Features", DataSet.ColumnNames()));

                // set the training algorithm
                var trainer          = Context.Regression.Trainers.Sdca(labelColumnName: "Label", featureColumnName: "Features");
                var trainingPipeline = dataPipeline.Append(trainer);

                TrainedModel = trainingPipeline.Fit(dataView);
            }
            finally
            {
                // cleanup
                if (!string.IsNullOrWhiteSpace(pathToData) && File.Exists(pathToData))
                {
                    File.Delete(pathToData);
                }
            }
#endif
        }
示例#28
0
        public static PredictionModel <SentimentData, SentimentPrediction> TrainModelWordEmbeddings(WordEmbeddingsTransformPretrainedModelKind?modelKind)
        {
            var pipeline = new LearningPipeline
            {
                new TextLoader(TrainDataPath).CreateFrom <SentimentData>(),
                new TextFeaturizer("FeaturesA", "SentimentText")
                {
                    OutputTokens = true
                }
            };
            var we = new WordEmbeddings(("FeaturesA_TransformedText", "FeaturesB"))
            {
                ModelKind = modelKind
            };

            pipeline.Add(we);
            pipeline.Add(new ColumnConcatenator("Features", "FeaturesA", "FeaturesB"));
            pipeline.Add(new FastTreeBinaryClassifier()
            {
                NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2
            });
            Console.WriteLine("=============== Training model with Word Embeddings ===============");
            var model = pipeline.Train <SentimentData, SentimentPrediction>();

            Console.WriteLine("=============== End training ===============");
            return(model);
        }
示例#29
0
        private static async Task RebuildModelAsync(Config config)
        {
            Console.WriteLine("RebuildModel:");

            var pipeline = new LearningPipeline();

            pipeline.Add(new TextLoader(config.DatabasePath).CreateFrom <TrainingDatabaseEntry>(useHeader: true, separator: ','));

            var e = new TrainingDatabaseEntry();

            pipeline.Add(new CategoricalOneHotVectorizer(nameof(e.globalTolerance)));

            pipeline.Add(new ColumnConcatenator("Features", nameof(e.toleranceValue), nameof(e.numNotTolerance), nameof(e.numTolerance), nameof(e.percentTolerance), nameof(e.globalTolerance), nameof(e.dayOfWeek)));

            pipeline.Add(new FastTreeBinaryClassifier()
            {
                NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2
            });

            var model = pipeline.Train <TrainingDatabaseEntry, TimelinessPrediction>();

            Console.WriteLine($"  Saving model to '{config.ModelPath}'...");

            await model.WriteAsync(config.ModelPath);

            Console.WriteLine("  Model rebuilt.");
        }
示例#30
0
        TrainAsync(string trainingDataFile, string modelPath)
        {
            var pipeline = new LearningPipeline();

            pipeline.Add(new TextLoader(trainingDataFile).CreateFrom <InputData>(separator: ','));

            pipeline.Add(new Dictionarizer("Label"));
            pipeline.Add(new ColumnConcatenator("Features", "MFCC1", "MFCC2", "MFCC3", "MFCC4",
                                                "MFCC5", "MFCC6", "MFCC7", "MFCC8",
                                                "MFCC9", "MFCC10", "MFCC11",
                                                "MFCC12", "MFCC13", "MFCCDelta1", "MFCCDelta2", "MFCCDelta3", "MFCCDelta4",
                                                "MFCCDelta5", "MFCCDelta6", "MFCCDelta7", "MFCCDelta8",
                                                "MFCCDelta9", "MFCCDelta10", "MFCCDelta11",
                                                "MFCCDelta12", "MFCCDelta13", "MFCCDeltaDelta1", "MFCCDeltaDelta2", "MFCCDeltaDelta3", "MFCCDeltaDelta4",
                                                "MFCCDeltaDelta5", "MFCCDeltaDelta6", "MFCCDeltaDelta7", "MFCCDeltaDelta8",
                                                "MFCCDeltaDelta9", "MFCCDeltaDelta10", "MFCCDeltaDelta11",
                                                "MFCCDeltaDelta12", "MFCCDeltaDelta13"));

            pipeline.Add(new StochasticDualCoordinateAscentClassifier());

            pipeline.Add(new PredictedLabelColumnOriginalValueConverter()
            {
                PredictedLabelColumn = "PredictedLabel"
            });

            PredictionModel <InputData, OutputData> model =
                pipeline.Train <InputData, OutputData>();

            await model.WriteAsync(modelPath);

            Console.WriteLine("Model created");
        }