示例#1
0
        public async Task Training()
        {
            var pipeline = new LearningPipeline();

            pipeline.Add(new TextLoader <ManifestDataTraining>(dataPath, useHeader: true, separator: ";"));

            pipeline.Add(new ColumnConcatenator("Features",
                                                "Sex",
                                                "Pclass",
                                                "Age",
                                                "Fare",
                                                "SibSp",
                                                "TEmbarked"
                                                ));

            pipeline.Add(new MissingValuesRowDropper()
            {
                Column = new[] { "Age" }
            });

            var classifier = new FastForestBinaryClassifier()
            {
                NumLeaves = 10, NumTrees = 13, MinDocumentsInLeafs = 5
            };

            pipeline.Add(classifier);


            pipeline.Add(new PredictedLabelColumnOriginalValueConverter()
            {
                PredictedLabelColumn = "PredictedLabel"
            });

            model = pipeline.Train <ManifestDataTraining, ManifestPrediction>();

            await model.WriteAsync("model.train");

            if (model != null)
            {
                TestModel();
            }
        }
示例#2
0
        static async Task Train()
        {
            //パイプラインの作成
            var pipeline = new LearningPipeline();

            //訓練データの読み込み
            var trainingSets = new TextLoader <TitanicData>(trainSetPath, useHeader: true, separator: ",");

            pipeline.Add(trainingSets);

            //年齢が欠損値の行を捨てる
            pipeline.Add(new MissingValuesRowDropper()
            {
                Column = new string[] { "Age" }
            });

            //数値でない変数をOneHotVectorにする
            pipeline.Add(new CategoricalOneHotVectorizer("Sex", "Embarked"));

            //モデルに使う変数を結合する
            pipeline.Add(new ColumnConcatenator("Features",
                                                "Pclass", "Sex", "Age", "SibSp", "Parch", "Fare", "Embarked"));

            //交差検証データ
            var cvSets = new TextLoader <TitanicData>(crossValidationSetPath, useHeader: true, separator: ",");
            //グリッドサーチ用
            var n_trees  = new int[] { 2, 4, 8, 16, 32, 64, 128 };
            var n_leaves = new int[] { 2, 4, 8, 16, 32, 64, 128 };
            //最も良い精度
            var bestf1 = 0.0;
            //最も良い分類器
            FastForestBinaryClassifier bestClassifier = null;
            //二値分類の評価
            var evaluator = new BinaryClassificationEvaluator();

            foreach (var nt in n_trees)
            {
                foreach (var nl in n_leaves)
                {
                    //ランダムフォレストで二値分類
                    var classifier = new FastForestBinaryClassifier()
                    {
                        NumTrees  = nt,
                        NumLeaves = nl
                    };
                    pipeline.Add(classifier);
                    //訓練
                    var model = pipeline.Train <TitanicData, TitanicPrediction>();

                    //F1スコア
                    var metrics = evaluator.Evaluate(model, cvSets);
                    Console.WriteLine($"#tree = {nt}, #leaf = {nl}, cv_f1={metrics.F1Score}");
                    if (!double.IsNaN(metrics.F1Score) && metrics.F1Score > bestf1)
                    {
                        Console.WriteLine($"[!]Classifier Updated {bestf1} -> {metrics.F1Score} / nt : {nt}, nl : {nl}");
                        bestf1         = metrics.F1Score;
                        bestClassifier = classifier;
                    }

                    //パイプラインから一旦分類器削除
                    pipeline.Remove(classifier);
                }
            }

            //グリッドサーチの結果から最も良いモデルを選択してパイプラインに追加
            pipeline.Add(bestClassifier);

            //訓練
            var bestModel = pipeline.Train <TitanicData, TitanicPrediction>();
            //訓練誤差
            var trainMetrics = evaluator.Evaluate(bestModel, trainingSets);
            //交差検証誤差
            var cvMetrics = evaluator.Evaluate(bestModel, cvSets);
            //テストデータ
            var testSets = new TextLoader <TitanicData>(testSetPath, useHeader: true, separator: ",");
            //テスト誤差
            var testMetrics = evaluator.Evaluate(bestModel, testSets);

            //モデルの保存
            await bestModel.WriteAsync("model.zip");

            //結果表示
            Console.WriteLine("### Result ###");
            Console.WriteLine("- Selected Classifier");
            Console.WriteLine($"NumTree={bestClassifier.NumTrees}, NumLeaves={bestClassifier.NumLeaves}");
            Console.WriteLine("- Trian Sets");
            Console.WriteLine($"Accuracy={trainMetrics.Accuracy:P2}, " +
                              $"Precision={trainMetrics.PositivePrecision:P2}, " +
                              $"Recall={trainMetrics.PositiveRecall:P2}, " +
                              $"F1score={trainMetrics.F1Score:P2}");
            Console.WriteLine("- Cross Validation Sets");
            Console.WriteLine($"Accuracy={cvMetrics.Accuracy:P2}, " +
                              $"Precision={cvMetrics.PositivePrecision:P2}, " +
                              $"Recall={cvMetrics.PositiveRecall:P2}, " +
                              $"F1score={cvMetrics.F1Score:P2}");
            Console.WriteLine("- Test Sets");
            Console.WriteLine($"Accuracy={testMetrics.Accuracy:P2}, " +
                              $"Precision={testMetrics.PositivePrecision:P2}, " +
                              $"Recall={testMetrics.PositiveRecall:P2}, " +
                              $"F1score={testMetrics.F1Score:P2}");

            /*
             ### Result ###
             ###- Selected Classifier
             ###NumTree=4, NumLeaves=32
             ###- Trian Sets
             ###Accuracy=83.45%, Precision=83.78%, Recall=72.94%, F1score=77.99%
             ###- Cross Validation Sets
             ###Accuracy=80.28%, Precision=86.67%, Recall=63.93%, F1score=73.58%
             ###- Test Sets
             ###Accuracy=86.58%, Precision=86.79%, Recall=77.97%, F1score=82.14%
             */
        }