public static async Task <PredictionModel <ItemStock, itemStockQtyPrediction> > TrainourModel()
        {
            var pipeline = new LearningPipeline
            {
                new TextLoader(_Traindatapath).CreateFrom <ItemStock>(useHeader: true, separator: ','),
                new ColumnCopier(("TotalStockQty", "Label")),
                new CategoricalOneHotVectorizer(
                    "ItemID",
                    "Loccode",
                    //"InQty",
                    //  "OutQty",
                    "ItemType"),
                new ColumnConcatenator(
                    "Features",
                    "ItemID",
                    "Loccode",
                    "InQty",
                    "OutQty",
                    "ItemType"),
                new FastTreeRegressor()
            };

            PredictionModel <ItemStock, itemStockQtyPrediction> model = pipeline.Train <ItemStock, itemStockQtyPrediction>();

            await model.WriteAsync(_modelpath);

            return(model);
        }
示例#2
0
        public static async Task <PredictionModel <ShakVector, ShakPrediction> > Train()
        {
            var pipeline = new LearningPipeline
            {
                new TextLoader(_datapath).CreateFrom <ShakVector>(useHeader: true, separator: ','),
                new ColumnCopier(("Rating", "Label")),
                new ColumnConcatenator(
                    "Features",
                    "TomatoAmount",
                    "OnionAmount",
                    "GarlicAmount",
                    "BellPepperAmount",
                    "EggsAmount",
                    "PepperAmount",
                    "SaltAmount",
                    "BulgerianCheeseAmount",
                    "PaprikaAmount",
                    "WaterAmount",
                    "TomatoResekAmount",
                    "CuminAmount",
                    "EggplantAmount",
                    "TofuAmount",
                    "FryingTimeBeforeTomatosMinutes",
                    "CookingAfterTomatosMinutes",
                    "CookingAfterEggsMinutes"),
                new FastTreeRegressor()
            };

            PredictionModel <ShakVector, ShakPrediction> model = pipeline.Train <ShakVector, ShakPrediction>();
            await model.WriteAsync(_modelpath);

            return(model);
        }
示例#3
0
文件: Program.cs 项目: sjison/ML
        TrainAsync(InputData input)
        {
            // LearningPipeline allows you to add steps in order to keep everything together
            // during the learning process.
            var pipeline = new LearningPipeline();

            // The TextLoader loads a dataset with comments and corresponding postive or negative sentiment.
            // When you create a loader, you specify the schema by passing a class to the loader containing
            // all the column names and their types. This is used to create the model, and train it.

            //pipeline.Add(new TextLoader(_dataPath).CreateFrom<SentimentData>());
            pipeline.Add(new TextLoader(input.TrainingData).CreateFrom <ClassificationData>());

            // TextFeaturizer is a transform that is used to featurize an input column.
            // This is used to format and clean the data.
            pipeline.Add(new TextFeaturizer("Features", "Text"));

            // Adds a FastTreeBinaryClassifier, the decision tree learner for this project, and
            // three hyperparameters to be used for tuning decision tree performance.
            pipeline.Add(new FastTreeBinaryClassifier()
            {
                NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2
            });

            // Train the pipeline based on the dataset that has been loaded, transformed.
            PredictionModel <ClassificationData, ClassPrediction> model =
                pipeline.Train <ClassificationData, ClassPrediction>();

            // Saves the model we trained to a zip file.
            await model.WriteAsync(_modelpath);

            // Returns the model we trained to use for evaluation.
            return(model);
        }
示例#4
0
        /// <summary>
        /// Train and write in model and set <see cref="_model"/>
        /// </summary>
        /// <returns>Task asynchronous method</returns>
        private async Task Train()
        {
            try
            {
                Logger.Instance.Info("ML : Training model");
                CheckDataFile();

                var pipeline = new LearningPipeline {
                    new TextLoader(Constants.DataPath).CreateFrom <WindowData>(separator: ','),
                    new Dictionarizer("Label"),
                    new TextFeaturizer("Program", "Program"),
                    new TextFeaturizer("WindowTitle", "WindowTitle"),
                    new ColumnConcatenator("Features", "Program", "WindowTitle", "WindowTop", "WindowLeft", "WindowHeight", "WindowWidth"),
                    new StochasticDualCoordinateAscentClassifier(),
                    new PredictedLabelColumnOriginalValueConverter {
                        PredictedLabelColumn = "PredictedLabel"
                    }
                };

                await _semaphore.WaitAsync();

                _model = pipeline.Train <WindowData, RegionPrediction>();
                _semaphore.Release();

                await _model.WriteAsync(Constants.ModelPath);

                Logger.Instance.Info("ML : Model trained");
            }
            catch (Exception e)
            {
                Console.WriteLine(e);
            }
        }
示例#5
0
        public static async Task <PredictionModel <TaxiTrip, TaxiTripFarePrediction> > Train()
        {
            var pipeline = new LearningPipeline();

            var textLoaderPiplelineItem = new TextLoader(_datapath).CreateFrom <TaxiTrip>(useHeader: true, separator: ',');

            pipeline.Add(textLoaderPiplelineItem);

            pipeline.Add(new ColumnCopier(("FareAmount", "Label")));

            pipeline.Add(new CategoricalOneHotVectorizer("VendorId",
                                                         "RateCode",
                                                         "PaymentType"));

            pipeline.Add(new ColumnConcatenator("Features",
                                                "VendorId",
                                                "RateCode",
                                                "PassengerCount",
                                                "TripDistance",
                                                "PaymentType"));

            pipeline.Add(new FastTreeRegressor());

            PredictionModel <TaxiTrip, TaxiTripFarePrediction> model = pipeline.Train <TaxiTrip, TaxiTripFarePrediction>();
            await model.WriteAsync(_modelpath);

            return(model);
        }
示例#6
0
        TrainAsync(string trainingDataFile, string modelPath)
        {
            var pipeline = new LearningPipeline();

            pipeline.Add(new TextLoader(trainingDataFile).CreateFrom <InputData>(separator: ','));

            pipeline.Add(new Dictionarizer("Label"));
            pipeline.Add(new ColumnConcatenator("Features", "MFCC1", "MFCC2", "MFCC3", "MFCC4",
                                                "MFCC5", "MFCC6", "MFCC7", "MFCC8",
                                                "MFCC9", "MFCC10", "MFCC11",
                                                "MFCC12", "MFCC13", "MFCCDelta1", "MFCCDelta2", "MFCCDelta3", "MFCCDelta4",
                                                "MFCCDelta5", "MFCCDelta6", "MFCCDelta7", "MFCCDelta8",
                                                "MFCCDelta9", "MFCCDelta10", "MFCCDelta11",
                                                "MFCCDelta12", "MFCCDelta13", "MFCCDeltaDelta1", "MFCCDeltaDelta2", "MFCCDeltaDelta3", "MFCCDeltaDelta4",
                                                "MFCCDeltaDelta5", "MFCCDeltaDelta6", "MFCCDeltaDelta7", "MFCCDeltaDelta8",
                                                "MFCCDeltaDelta9", "MFCCDeltaDelta10", "MFCCDeltaDelta11",
                                                "MFCCDeltaDelta12", "MFCCDeltaDelta13"));

            pipeline.Add(new StochasticDualCoordinateAscentClassifier());

            pipeline.Add(new PredictedLabelColumnOriginalValueConverter()
            {
                PredictedLabelColumn = "PredictedLabel"
            });

            PredictionModel <InputData, OutputData> model =
                pipeline.Train <InputData, OutputData>();

            await model.WriteAsync(modelPath);

            Console.WriteLine("Model created");
        }
示例#7
0
        public static async Task <PredictionModel <SentimentData, SentimentPrediction> > Train()
        {
            // Initialize a new instance of LearningPipeline that will include the data loading, data
            // processing/feturization, and model.
            var pipeline = new LearningPipeline();

            // The TextLoader object is the first part of the pipeline, and loads the training file data
            pipeline.Add(new TextLoader(_dataPath).CreateFrom <SentimentData>());

            // TextFeaturizer converts the SentimentText colun into a numeric vector called Features used
            // bye the machine learning algorithm. This is the prepcoessingfeaturization step. Using additional
            // componentes available in ML.NET can enable better results with the model.
            pipeline.Add(new TextFeaturizer("Features", "SentimentText"));

            pipeline.Add(new FastTreeBinaryClassifier()
            {
                NumLeaves           = 5,
                NumTrees            = 5,
                MinDocumentsInLeafs = 2
            });

            PredictionModel <SentimentData, SentimentPrediction> model = pipeline.Train <SentimentData, SentimentPrediction>();
            await model.WriteAsync(_modelPath);

            return(model);
        }
示例#8
0
        public static async Task <PredictionModel <NodeObject, NodePrediction> > Train()
        {
            var pipeline = new LearningPipeline
            {
                new TextLoader(_datapath).CreateFrom <NodeObject>(useHeader: true, separator: ','),
                new ColumnCopier(("NodeIdCounter", "Label")),
                new CategoricalOneHotVectorizer(
                    "NodeAName",
                    "NodeBName",
                    "NodeType"),
                new ColumnConcatenator(
                    "Features",
                    "NodeAName",
                    "CountAllConnections",
                    "CountUniqueConnections",
                    "NodeType"),
                new FastTreeRegressor()
                //new NaiveBayesClassifier()
            };

            PredictionModel <NodeObject, NodePrediction> model = pipeline.Train <NodeObject, NodePrediction>();

            await model.WriteAsync(_modelpath);

            return(model);
        }
示例#9
0
        public static async Task <PredictionModel <ProductData, ProductPredict> > PreProcessMLEngine()
        {
            PredictionModel <ProductData, ProductPredict> model = Train();
            await model.WriteAsync(_modelPath);

            return(model);
        }
示例#10
0
        public static async Task <PredictionModel <TaxiTrip, TaxiTripFaresPrediction> > Train()
        {
            // This is the usual form to create this

            // var pipeline = new LearningPipeline();
            // pipeline.Add(new TextLoader(_datapath).CreateFrom<TaxiTrip>(useHeader: true, separator: ','));
            // pipeline.Add(new ColumnCopier(("FareAmount")));
            // pipeline.Add(new CategoricalOneHotVectorizer("VendorId", "RateCode", "PaymentType"));
            // pipeline.Add(new ColumnConcatenator("Features","VendorId","RateCode","PassengerCount","TripDistance","PaymentType"));
            // pipeline.Add(new FastTreeRegressor());

            // This is the Handy form in C# to do this
            var pipeline = new LearningPipeline {
                new TextLoader(_datapath).CreateFrom <TaxiTrip>(useHeader: true, separator: ','),
                new ColumnCopier(("FareAmount", "Label")),
                new CategoricalOneHotVectorizer("VendorId", "RateCode", "PaymentType"),
                new ColumnConcatenator("Features", "VendorId", "RateCode", "PassengerCount", "TripDistance", "PaymentType"),
                new FastTreeRegressor()
            };

            PredictionModel <TaxiTrip, TaxiTripFaresPrediction> model = pipeline.Train <TaxiTrip, TaxiTripFaresPrediction>();

            await model.WriteAsync(_modelpath);

            return(model);
        }
示例#11
0
        // <Snippet6>
        public static async Task <PredictionModel <TaxiTrip, TaxiTripFarePrediction> > Train()
        // </Snippet6>
        {
            // <Snippet3>
            var pipeline = new LearningPipeline
            {
                new TextLoader(_datapath).CreateFrom <TaxiTrip>(useHeader: true, separator: ','),
                new ColumnCopier(("FareAmount", "Label")),
                new CategoricalOneHotVectorizer(
                    "VendorId",
                    "RateCode",
                    "PaymentType"),
                new ColumnConcatenator(
                    "Features",
                    "VendorId",
                    "RateCode",
                    "PassengerCount",
                    "TripDistance",
                    "PaymentType"),
                new FastTreeRegressor()
            };
            // </Snippet3>

            // <Snippet4>
            PredictionModel <TaxiTrip, TaxiTripFarePrediction> model = pipeline.Train <TaxiTrip, TaxiTripFarePrediction>();
            // </Snippet4>
            // <Snippet5>
            await model.WriteAsync(_modelpath);

            return(model);
            // </Snippet5>
        }
示例#12
0
        //train the model
        public static async Task <PredictionModel <SentimentData, SentimentPrediction> > Train()
        {
            //Instance used to load,process,featurize the data
            var pipeline = new LearningPipeline();

            //to load train data
            pipeline.Add(new TextLoader(_dataPath).CreateFrom <SentimentData>(useHeader: true));

            pipeline.Add(new Dictionarizer("Label"));

            // TextFeaturizer to convert the SentimentText column into a numeric vector called Features used by the ML algorithm
            pipeline.Add(new TextFeaturizer("Features", "SentimentText"));

            //choose learning algorithm
            pipeline.Add(new StochasticDualCoordinateAscentClassifier());
            //pipeline.Add(new LogisticRegressionClassifier());
            //pipeline.Add(new NaiveBayesClassifier());
            //pipeline.Add(new FastTreeBinaryClassifier() { NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2 });
            pipeline.Add(new PredictedLabelColumnOriginalValueConverter()
            {
                PredictedLabelColumn = "PredictedLabel"
            });



            //train the model
            PredictionModel <SentimentData, SentimentPrediction> model = pipeline.Train <SentimentData, SentimentPrediction>();

            //save model
            await model.WriteAsync(_modelpath);

            return(model);
        }
示例#13
0
        public static async Task <PredictionModel <TaxiTrip, TaxiTripFarePredition> > Train()
        {
            var pipeline = new LearningPipeline
            {
                new TextLoader(_datapath).CreateFrom <TaxiTrip>(useHeader: true, separator: ','),

                // fails with Source column "Label" not found without the double brackets
                new ColumnCopier(("FareAmount", "Label")),

                new CategoricalOneHotVectorizer(
                    "VendorId",
                    "RateCode",
                    "PaymentType"),
                new ColumnConcatenator("Features",
                                       "VendorId",
                                       "RateCode",
                                       "PassengerCount",
                                       "TripDistance",
                                       "PaymentType"),
                new FastTreeRegressor()
            };

            PredictionModel <TaxiTrip, TaxiTripFarePredition> model = pipeline.Train <TaxiTrip, TaxiTripFarePredition>();

            await model.WriteAsync(_modelpath);

            return(model);
        }
        public async Task <PredictionModel <TInput, TOutput> > TrainAsync()
        {
            //1.通过反射获取Tinput的attributes
            Type ti = typeof(TInput), to = typeof(TOutput);
            //2.获取input属性
            List <string> inputColoums = new List <string>();

            Array.ForEach(ti.GetFields(), new Action <System.Reflection.FieldInfo>(p =>
            {
                inputColoums.Add(p.Name);
            }));
            //3.获取output属性
            string outputColoum        = to.GetFields().Length > 0 ? to.GetFields()[0].Name : null;
            //4.聚合输入输出层参数名称
            ColumnConcatenator coloums = new ColumnConcatenator(outputColoum, inputColoums.ToArray());
            //4.构建学习机
            //LearningPipeline pipeline = new LearningPipeline();
            //pipeline.Add()\
            //CollectionDataSource.Create(new List<Input>() { new Input { Number1 = 1, String1 = "1" } })

            LearningPipeline pipeline = new LearningPipeline
            {
                coloums,
                new LogisticRegressionBinaryClassifier()
            };
            PredictionModel <TInput, TOutput> model = pipeline.Train <TInput, TOutput>();
            //model写入zip file
            await model.WriteAsync(_workDirectory + System.DateTime.Now.ToLongDateString() + ".zip");

            //返回model对象
            return(model);
        }
        TrainAsync(string trainingDataFile, string modelPath)
        {
            // LearningPipeline allows you to add steps in order to keep everything together
            // during the learning process.
            var pipeline = new LearningPipeline();

            // The TextLoader loads a dataset with comments and corresponding postive or negative sentiment.
            // When you create a loader, you specify the schema by passing a class to the loader containing
            // all the column names and their types. This is used to create the model, and train it.
            pipeline.Add(new TextLoader(trainingDataFile).CreateFrom <PimaDiabeteModel>(useHeader: true, separator: ','));

            pipeline.Add(new Dictionarizer("Label"));

            // TextFeaturizer is a transform that is used to featurize an input column.
            // This is used to format and clean the data.
            pipeline.Add(new ColumnConcatenator("Features", "Pregnancies", "Glucose", "BloodPressure", "SkinThickness", "Insulin", "BMI", "DiabetesPedigreeFunction", "Age"));

            pipeline.Add(new StochasticDualCoordinateAscentClassifier());

            pipeline.Add(new PredictedLabelColumnOriginalValueConverter()
            {
                PredictedLabelColumn = "PredictedLabel"
            });

            // Train the pipeline based on the dataset that has been loaded, transformed.
            PredictionModel <PimaDiabeteModel, ClassPrediction> model =
                pipeline.Train <PimaDiabeteModel, ClassPrediction>();

            await model.WriteAsync(modelPath);

            return(model);
        }
示例#16
0
文件: Program.cs 项目: uahgit/samples
        // <Snippet6>
        public static async Task <PredictionModel <TaxiTrip, TaxiTripFarePrediction> > Train()
        // </Snippet6>
        {
            // <Snippet3>
            var pipeline = new LearningPipeline
            {
                new TextLoader <TaxiTrip>(_datapath, useHeader: true, separator: ","),
                new ColumnCopier(("fare_amount", "Label")),
                new CategoricalOneHotVectorizer("vendor_id",
                                                "rate_code",
                                                "payment_type"),
                new ColumnConcatenator("Features",
                                       "vendor_id",
                                       "rate_code",
                                       "passenger_count",
                                       "trip_distance",
                                       "payment_type"),
                new FastTreeRegressor()
            };
            // </Snippet3>

            // <Snippet4>
            PredictionModel <TaxiTrip, TaxiTripFarePrediction> model = pipeline.Train <TaxiTrip, TaxiTripFarePrediction>();
            // </Snippet4>
            // <Snippet5>
            await model.WriteAsync(_modelpath);

            return(model);
            // </Snippet5>
        }
示例#17
0
        public static async Task <PredictionModel <SentimentData, SentimentPrediction> > Train()
        {
            var pipeline = new LearningPipeline();

            // Carga o ingiere los datos.
            pipeline.Add(new TextLoader(_dataPath).CreateFrom <SentimentData>());

            // Preprocesa y caracteriza los datos.
            pipeline.Add(new TextFeaturizer("Features", "SentimentText"));
            pipeline.Add(new FastTreeBinaryClassifier()
            {
                NumLeaves           = 5,
                NumTrees            = 5,
                MinDocumentsInLeafs = 2
            });

            // Entrena el modelo.
            PredictionModel <SentimentData, SentimentPrediction> model =
                pipeline.Train <SentimentData, SentimentPrediction>();

            // Predice sentimientos en función de datos de prueba.
            await model.WriteAsync(_modelpath);

            return(model);
        }
        public static void Execute()
        {
            Console.WriteLine("Executing Diabetes Experiment");
            Console.WriteLine("Creating new model");
            var pipeline = new LearningPipeline();

            pipeline.Add(new TextLoader <DiabetesData>(dataPath, separator: ","));

            var features = new string[] { "BMI", "Age", "Pregnancies", "PlasmaGlucoseConcentration", "TricepsSkinFoldThickness" };

            pipeline.Add(new ColumnConcatenator("Features", features));

            var algorithm = new BinaryLogisticRegressor();

            pipeline.Add(algorithm);

            model = pipeline.Train <DiabetesData, DiabetesPrediction>();

            var testData  = new TextLoader <DiabetesData>(testDataPath, separator: ",");
            var evaluator = new BinaryClassificationEvaluator();
            BinaryClassificationMetrics metrics = evaluator.Evaluate(model, testData);

            Console.WriteLine();
            Console.WriteLine("PredictionModel quality metrics evaluation");
            Console.WriteLine("------------------------------------------");
            Console.WriteLine($"Accuracy: {metrics.Accuracy:P2}");
            Console.WriteLine($"Auc: {metrics.Auc:P2}");
            Console.WriteLine($"F1Score: {metrics.F1Score:P2}");

            var    score             = metrics.Accuracy + metrics.Auc + metrics.F1Score;
            double previousHighScore = 0;

            if (File.Exists(modelStatsPath))
            {
                var previousModelData = File.ReadAllLines(modelStatsPath);
                previousHighScore = double.Parse(previousModelData[0]);
            }

            if (score > previousHighScore)
            {
                File.WriteAllText(modelStatsPath, score.ToString() + Environment.NewLine);
                File.AppendAllLines(modelStatsPath, new List <string>
                {
                    $"Accuracy: {metrics.Accuracy:P2}",
                    $"Auc: {metrics.Auc:P2}",
                    $"F1Score: {metrics.F1Score:P2}"
                });
                File.AppendAllText(modelStatsPath, "Features:" + Environment.NewLine);
                File.AppendAllLines(modelStatsPath, features);
                File.AppendAllText(modelStatsPath, "Algorithm: " + algorithm.GetType().Name);
                model.WriteAsync(modelPath);
                Console.WriteLine("New model is better");
            }
            else
            {
                Console.WriteLine("Old model is better");
            }
            Console.ReadLine();
        }
        private async Task SaveModel(PredictionModel <PivotData, ClusteringPrediction> model)
        {
            ConsoleWriteHeader("Save model to local file");
            DeleteAssets(modelLocation);
            await model.WriteAsync(modelLocation);

            Console.WriteLine($"Model saved: {modelLocation}");
        }
示例#20
0
        private async Task SaveModel(PredictionModel <SalesData, SalesPrediction> model)
        {
            ConsoleWriteHeader("Save model to local file");
            ModelHelpers.DeleteAssets(modelLocation);
            await model.WriteAsync(modelLocation);

            Console.WriteLine($"Model saved: {modelLocation}");
        }
示例#21
0
        public static async Task <PredictionModel <BitCoinData, BitCoinPrediction> > TrainAsyncBitcoin(ListBox scroll)
        {
            PredictionModel <BitCoinData, BitCoinPrediction> model = null;

            try
            {// LearningPipeline holds all steps of the learning process: data, transforms, learners.
                var pipeline = new LearningPipeline();

                // The TextLoader loads a dataset. The schema of the dataset is specified by passing a class containing
                // all the column names and their types.
                pipeline.Add(new TextLoader(TrainBitcoinDataPath).CreateFrom <BitCoinData>(useHeader: true, separator: ','));

                // Transform any text feature to numeric values
                //pipeline.Add(new CategoricalOneHotVectorizer(
                //    "Label"));

                // Put all features into a vector
                pipeline.Add(new ColumnConcatenator(
                                 "Features",
                                 "Date",
                                 "Price",
                                 "Amount",
                                 "Transaction",
                                 "Type"));

                // FastTreeBinaryClassifier is an algorithm that will be used to train the model.
                // It has three hyperparameters for tuning decision tree performance.
                pipeline.Add(new FastTreeBinaryClassifier()
                {
                    NumTrees      = 100,
                    LearningRates = 0.4f,
                    DropoutRate   = 0.05f
                });
                //pipeline.Add(new FastTreeRegressor() {
                //    NumTrees = 1,
                //    EarlyStoppingRule = new GLEarlyStoppingCriterion(),
                //    LearningRates = 0.4f,
                //    DropoutRate = 0.05f
                //});

                scroll.Invoke(new Action(() => scroll.Items.Add("=============== Training model ===============" + "\r\n")));

                // The pipeline is trained on the dataset that has been loaded and transformed.
                model = pipeline.Train <BitCoinData, BitCoinPrediction>();

                // Saving the model as a .zip file.
                await model.WriteAsync(ModelPath);

                scroll.Invoke(new Action(() => scroll.Items.Add(("=============== End training ===============") + "\r\n")));

                scroll.Invoke(new Action(() => scroll.Items.Add(("The model is saved to {0}", ModelPath) + "\r\n")));
            }
            catch (Exception ex)
            {
                Debug.WriteLine("Erro na Task(TrainAsync)" + ex.Message);
            }
            return(model);
        }
示例#22
0
        public static async Task KM()
        {
            PredictionModel <MealClass, ClusterPrediction> model = Train();
            await model.WriteAsync(_modelPath);

            var prediction = model.Predict(KMeansTest.meal);

            System.Diagnostics.Debug.WriteLine($"Category: {prediction.PredictedCategory}");
        }
 public Task Save(PredictionModel model, string modelName)
 {
     return(Task.Run(() =>
     {
         var storageFolder = ApplicationData.Current.LocalFolder;
         using (var fs = new FileStream(Path.Combine(storageFolder.Path, modelName), FileMode.Create, FileAccess.Write, FileShare.Write))
             model.WriteAsync(fs);
     }));
 }
示例#24
0
        public static async Task <PredictionModel <SentimentData, SentimentPrediction> > Train()
        {
            //1- Ingest the data

            /* Initialize a new instance of LearningPipeline that will
             * include the data loading,
             * data processing/featurization, and model. */
            var pipeline = new LearningPipeline();

            /* The TextLoader<TInput> object is the first part of the pipeline,
             *  and loads the training file data. */
            pipeline.Add(new TextLoader(_dataPath).CreateFrom <SentimentData>());

            // 2- Data preprocess and feature engineering

            /*Apply a TextFeaturizer to convert the SentimentText column
             * into a numeric vector called
             * Features used by the machine learning algorithm.
             * This is the preprocessing / featurization step.
             * Using additional components available in ML.NET can enable better
             * results with your model. */

            pipeline.Add(new TextFeaturizer("Features", "SentimentText"));

            //3 -Choose a learning algorithm

            /* The FastTreeBinaryClassifier object is a decision tree learner
             * you'll use in this pipeline. Similar to the featurization step,
             * trying out different learners available in ML.NET and changing their
             * parameters leads to different results. For tuning, you can set
             * hyperparameters like NumTrees, NumLeaves, and MinDocumentsInLeafs.
             * These hyperparameters are set before anything affects the model and are model-specific.
             * They're used to tune the decision tree for performance,
             * so larger values can negatively impact performance. */

            pipeline.Add(new FastTreeBinaryClassifier()
            {
                NumLeaves = 5, NumTrees = 5, MinDocumentsInLeafs = 2
            });

            //4-Train the model

            /*You train the model, PredictionModel< TInput,TOutput >,
             * based on the dataset that has been loaded and transformed. pipeline.
             * Train<SentimentData, SentimentPrediction>() trains the pipeline(loads the data,
             * trains the featurizer and learner).The experiment is not executed until this happens. */

            PredictionModel <SentimentData, SentimentPrediction> model = pipeline.Train <SentimentData, SentimentPrediction>();

            //5-Save and Return the model trained to use for evaluation

            /*At this point, you have a model that can be integrated into any of your existing or new .NET applications.
             * To save your model to a .zip file before returning */
            await model.WriteAsync(_modelPath);

            return(model);
        }
示例#25
0
        /// <summary>
        /// 训练并生成模型
        /// </summary>
        /// <returns></returns>
        public static async Task <PredictionModel <JiaMiTu, JiaMiTuPrediction> > Train(IEnumerable <JiaMiTu> trainData, string modelFileName, string labelColumn, string[] oneHotColumns, string[] features, string[] drops)
        {
            //创建学习管道
            var pipeline = new LearningPipeline();

            //加载和转换您的数据
            //var textLoader = new TextLoader<JiaMiTu>(DataPath, useHeader: true, separator: ",");

            //pipeline.Add(textLoader);
            pipeline.Add(CollectionDataSource.Create(trainData));
            //使用该ColumnCopier()功能将“票价_帐户”列复制到名为“标签”的新列中。此列是标签。
            pipeline.Add(new ColumnCopier((labelColumn, "Label")));
            //一个对象叫ColumnDropper,可以用来在训练开始前舍弃掉不需要的字段,比如id,对结果没有任何影响,因此可以去掉
            if (drops.Count() > 0)
            {
                pipeline.Add(new ColumnDropper()
                {
                    Column = drops
                });
            }
            //进行一些特征工程来转换数据,以便它可以有效地用于机器学习。该训练模型需要算法的数字功能,
            //您变换中的分类数据(vendor_id,rate_code,和payment_type)为数字。
            //该CategoricalOneHotVectorizer()
            //函数为每个列中的值分配一个数字键。通过添加以下代码来转换您的数据:
            if (oneHotColumns.Count() > 0)
            {
                pipeline.Add(new CategoricalOneHotVectorizer(oneHotColumns));
            }
            //数据准备的最后一步是使用该功能将所有功能组合到一个向量中ColumnConcatenator()。这一必要步骤
            //有助于算法轻松处理您的功能。按照您在最后一步中编写的内容添加以下代码:
            //请注意,“trip_time_in_secs”列不包括在内。你已经确定它不是一个有用的预测功能。
            pipeline.Add(new ColumnConcatenator("Features",
                                                features
                                                ));
            //在将数据添加到流水线并将其转换为正确的输入格式之后,您可以选择一种学习算法(学习者)。学习算
            //法训练模型。你为这个问题选择了一个回归任务,所以你增加了一个学习者调用FastTreeRegressor()到
            //使用梯度提升的管道。
            //渐变增强是回归问题的机器学习技术。它以逐步的方式构建每个回归树。它使用预定义的损失函数来测
            //量每个步骤中的错误,并在下一步中对其进行修正。结果是预测模型实际上是较弱预测模型的集合。
            pipeline.Add(new FastTreeRegressor());
            //泊松回归
            //pipeline.Add(new PoissonRegressor());
            //训练模型
            //最后一步是训练模型。在此之前,管道中没有任何东西被执行。该pipeline.Train<T_Input, T_Output>()
            //函数接受预定义的JiaMiTu类类型并输出一个JiaMiTuPrediction类型。将这最后一段代码添加到Train()
            //函数中:
            PredictionModel <JiaMiTu, JiaMiTuPrediction> model = pipeline.Train <JiaMiTu, JiaMiTuPrediction>();

            //改性Train()方法为异步方法public static async Task<PredictionModel<JiaMiTu, JiaMiTuPrediction>> Train()
            ///通过生么预测什么
            if (!string.IsNullOrEmpty(modelFileName))
            {
                await model.WriteAsync(modelFileName);
            }

            return(model);
        }
示例#26
0
        public static async Task <PredictionModel <SentimentData, SentimentPrediction> > Train(IMongoDatabase db)
        {
            // LearningPipeline allows you to add steps in order to keep everything together
            // during the learning process.
            // <Snippet5>
            var pipeline = new LearningPipeline();
            // </Snippet5>

            // <Snippet6>
            var collection = db.GetCollection <SentimentData>("review_train");
            var documents  = collection.Find <SentimentData>(new BsonDocument()).ToEnumerable();

            pipeline.Add(CollectionDataSource.Create(documents));
            // </Snippet6>

            // TextFeaturizer is a transform that is used to featurize an input column.
            // This is used to format and clean the data.
            // <Snippet7>
            pipeline.Add(new TextFeaturizer("Features", "text")
            {
                KeepDiacritics   = false,
                KeepPunctuations = false,
                TextCase         = TextNormalizerTransformCaseNormalizationMode.Lower,
            });
            //</Snippet7>

            // Adds a FastTreeBinaryClassifier, the decision tree learner for this project, and
            // three hyperparameters to be used for tuning decision tree performance.
            // <Snippet8>
            pipeline.Add(new FastTreeBinaryClassifier()
            {
                NumLeaves           = 100,
                NumTrees            = 50,
                MinDocumentsInLeafs = 2,
                LearningRates       = 0.4f,
            });
            // </Snippet8>

            // Train the pipeline based on the dataset that has been loaded, transformed.
            // <Snippet9>
            PredictionModel <SentimentData, SentimentPrediction> model =
                pipeline.Train <SentimentData, SentimentPrediction>();
            // </Snippet9>

            // Saves the model we trained to a zip file.
            // <Snippet10>
            await model.WriteAsync(_modelpath);

            // </Snippet10>

            // Returns the model we trained to use for evaluation.
            // <Snippet11>
            return(model);
            // </Snippet11>
        }
示例#27
0
        public static void Execute()
        {
            Console.WriteLine("Executing Iris Experiment");
            if (File.Exists(modelPath))
            {
                Console.WriteLine("Using existing model");
                model = PredictionModel.ReadAsync <IrisData, IrisPrediction>(modelPath).Result;
            }
            else
            {
                Console.WriteLine("Creating new model");
                // STEP 2: Create a pipeline and load your data
                var pipeline = new LearningPipeline();

                // If working in Visual Studio, make sure the 'Copy to Output Directory'
                // property of iris-data.txt is set to 'Copy always'
                pipeline.Add(new TextLoader <IrisData>(dataPath, separator: ","));

                // STEP 3: Transform your data
                // Assign numeric values to text in the "Label" column, because only
                // numbers can be processed during model training
                pipeline.Add(new Dictionarizer("Label"));

                // Puts all features into a vector
                pipeline.Add(new ColumnConcatenator("Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth"));

                // STEP 4: Add learner
                // Add a learning algorithm to the pipeline.
                // This is a classification scenario (What type of iris is this?)
                pipeline.Add(new StochasticDualCoordinateAscentClassifier());

                // Convert the Label back into original text (after converting to number in step 3)
                pipeline.Add(new PredictedLabelColumnOriginalValueConverter()
                {
                    PredictedLabelColumn = "PredictedLabel"
                });

                // STEP 5: Train your model based on the data set
                model = pipeline.Train <IrisData, IrisPrediction>();

                model.WriteAsync(modelPath);
            }
            // STEP 6: Use your model to make a prediction
            // You can change these numbers to test different predictions
            var prediction = model.Predict(new IrisData()
            {
                SepalLength = 3.3f,
                SepalWidth  = 1.6f,
                PetalLength = 1.7f,
                PetalWidth  = 1.7f,
            });

            Console.WriteLine($"Predicted flower type is: {prediction.PredictedLabels}");
            Console.ReadLine();
        }
示例#28
0
        private static void Main(string[] args)
        {
            PredictionModel <IrisData, ClusterPrediction> model = Train();

            model.WriteAsync(_modelPath);

            var prediction = model.Predict(TestIrisData.Setosa);

            Console.WriteLine($"Cluster: {prediction.PredictedClusterId}");
            Console.WriteLine($"Distances: {string.Join(" ", prediction.Distances)}");
        }
        public static async Task BC()
        {
            PredictionModel <FoodBinaryClass, FPrediction> model = Train();
            await model.WriteAsync(_modelPath);

            var prediction = model.Predict(binaryTest.meal);

            System.Diagnostics.Debug.WriteLine($"Decision: {prediction.Prediction}");
            System.Diagnostics.Debug.WriteLine($"Probability: {prediction.Probability}");
            System.Diagnostics.Debug.WriteLine($"Score: {prediction.Score}");
        }
        private async Task SaveModel(PredictionModel <ImageNetData, ImageNetPrediction> model, string modelLocation)
        {
            if (!string.IsNullOrEmpty(modelLocation))
            {
                ConsoleWriteHeader("Save model to local file");
                ModelHelpers.DeleteAssets(modelLocation);
                await model.WriteAsync(modelLocation);

                Console.WriteLine($"Model saved: {modelLocation}");
            }
        }