예제 #1
0
        public void BenchmarkEval()
        {
            var rand       = new Random(Seed);
            int numColumns = 100;
            var pms        = new Parameters();

            pms.Objective.Objective    = ObjectiveType.Binary;
            pms.Dataset.MaxBin         = 63;
            pms.Learning.LearningRate  = 1e-3;
            pms.Learning.NumIterations = 1000;
            pms.Common.DeviceType      = DeviceType.CPU;

            var       categorical = new Dictionary <int, int>(); // i.e., no cat
            var       trainData   = CreateRandomDenseClassifyData(rand, 2, ref categorical, pms.Dataset.UseMissing, numColumns);
            DataDense validData   = null;

            pms.Dataset.CategoricalFeature = categorical.Keys.ToArray();

            using (var datasets = new Datasets(pms.Common, pms.Dataset, trainData, validData))
                using (var trainer = new BinaryTrainer(pms.Learning, pms.Objective))
                {
                    var model = trainer.Train(datasets);
                    output.WriteLine($"MaxNumTrees={model.Managed.MaxNumTrees}");

                    var timer = System.Diagnostics.Stopwatch.StartNew();
                    model.Native.GetOutputs(trainData.Features);
                    var elapsed1 = timer.Elapsed;
                    output.WriteLine($"EvalNativeMulti={elapsed1.TotalMilliseconds}");

                    timer.Restart();
                    foreach (var row in trainData.Features)
                    {
                        trainer.Evaluate(Booster.PredictType.Normal, row);
                    }
                    var elapsed2 = timer.Elapsed;
                    output.WriteLine($"EvalNativeSingle={elapsed2.TotalMilliseconds}");

                    foreach (var maxThreads in new int[] { 1, 2, 4, 8, 16, 32, Environment.ProcessorCount }) //
                    {
                        model.Managed.MaxThreads = maxThreads;
                        timer.Restart();
                        foreach (var row in trainData.Features)
                        {
                            double output = 0;
                            var    input  = new VBuffer <float>(row.Length, row);
                            model.Managed.GetOutput(ref input, ref output);
                        }
                        var elapsed3 = timer.Elapsed;
                        output.WriteLine($"MaxThreads={maxThreads} EvalManaged={elapsed3.TotalMilliseconds}");
                    }
                }
        }
예제 #2
0
        //[Fact]
        public void BenchmarkBinary()
        {
            var rand = new Random(Seed);

            for (int test = 0; test < 3; ++test)
            {
                for (int gpu = 0; gpu < 2; gpu++)
                {
                    int numColumns = 50 * (test + 1);
                    var pms        = new Parameters();
                    pms.Objective.Objective      = ObjectiveType.Binary;
                    pms.Dataset.MaxBin           = 63;
                    pms.Learning.BaggingFraction = 1;
                    pms.Learning.BaggingFreq     = 1;
                    pms.Learning.LearningRate    = 1e-3;
                    pms.Learning.NumIterations   = 10;
                    pms.Common.DeviceType        = (gpu > 0) ? DeviceType.GPU : DeviceType.CPU;

                    var       categorical = new Dictionary <int, int>(); // i.e., no cat
                    var       trainData   = CreateRandomDenseClassifyData(rand, 2, ref categorical, pms.Dataset.UseMissing, numColumns);
                    DataDense validData   = null;
                    pms.Dataset.CategoricalFeature = categorical.Keys.ToArray();

                    try
                    {
                        using (var datasets = new Datasets(pms.Common, pms.Dataset, trainData, validData))
                            using (var trainer = new BinaryTrainer(pms.Learning, pms.Objective))
                            {
                                var timer   = System.Diagnostics.Stopwatch.StartNew();
                                var model   = trainer.Train(datasets);
                                var elapsed = timer.Elapsed;
                                output.WriteLine($"{pms.Common.DeviceType}: NumRows={trainData.NumRows} NumCols={numColumns} MaxNumTrees={model.Managed.MaxNumTrees} TrainTimeSecs={elapsed.TotalSeconds}");
                            }
                    }
                    catch (Exception e)
                    {
                        throw new Exception($"Failed: {Seed} #{test} {pms}", e);
                    }
                }
            }
        }
예제 #3
0
        public void TrainBinary()
        {
            var rand = new Random(Seed);

            for (int test = 0; test < 5; ++test)
            {
                int numColumns = rand.Next(1, 10);
                var pms        = GenerateParameters(rand, ObjectiveType.Binary, numColumns);
                Dictionary <int, int> categorical = null;
                var trainData = CreateRandomDenseClassifyData(rand, 2, ref categorical, pms.Dataset.UseMissing, numColumns);
                var validData = (pms.Learning.EarlyStoppingRound > 0 || rand.Next(2) == 0) ? CreateRandomDenseClassifyData(rand, 2, ref categorical, pms.Dataset.UseMissing, numColumns) : null;
                pms.Dataset.CategoricalFeature = categorical.Keys.ToArray();

                var learningRateSchedule = (rand.Next(2) == 0) ? (Func <int, double>)null : (iter => pms.Learning.LearningRate * Math.Pow(0.99, iter));

                try
                {
                    using (var datasets = (rand.Next(2) == 0) ? new Datasets(pms.Common, pms.Dataset, trainData, validData) :
                                          new Datasets(pms.Common, pms.Dataset, Dense2Sparse(trainData), Dense2Sparse(validData)))
                        using (var trainer = new BinaryTrainer(pms.Learning, pms.Objective))
                        {
                            //trainer.ToCommandLineFiles(datasets);

                            var model = trainer.Train(datasets, learningRateSchedule);
                            model.Managed.MaxThreads = rand.Next(1, Environment.ProcessorCount);

                            // possibly use subset of trees
                            var numIterations = -1;
                            if (rand.Next(2) == 0)
                            {
                                numIterations             = rand.Next(1, model.Managed.MaxNumTrees);
                                model.Managed.MaxNumTrees = numIterations;
                                model.Native.MaxNumTrees  = numIterations;
                            }

                            CalibratedPredictor model2 = null;
                            using (var ms = new System.IO.MemoryStream())
                                using (var writer = new System.IO.BinaryWriter(ms))
                                    using (var reader = new System.IO.BinaryReader(ms))
                                    {
                                        PredictorPersist.Save(model.Managed, writer);
                                        ms.Position = 0;
                                        model2      = PredictorPersist.Load <double>(reader) as CalibratedPredictor;
                                        Assert.Equal(ms.Position, ms.Length);
                                    }

                            BinaryNativePredictor model2native = null;
                            using (var ms = new System.IO.MemoryStream())
                                using (var writer = new System.IO.BinaryWriter(ms))
                                    using (var reader = new System.IO.BinaryReader(ms))
                                    {
                                        NativePredictorPersist.Save(model.Native, writer);
                                        ms.Position  = 0;
                                        model2native = NativePredictorPersist.Load <double>(reader) as BinaryNativePredictor;
                                        Assert.Equal(ms.Position, ms.Length);
                                    }

                            var rawscore2s = trainer.Evaluate(Booster.PredictType.RawScore, trainData.Features, numIterations);
                            Assert.Equal(trainData.Features.Length, rawscore2s.GetLength(0));
                            Assert.Equal(1, rawscore2s.GetLength(1));

                            var output3s = trainer.Evaluate(Booster.PredictType.Normal, trainData.Features, numIterations);
                            Assert.Equal(trainData.Features.Length, output3s.GetLength(0));
                            Assert.Equal(1, output3s.GetLength(1));

                            var output3natives = model.Native.GetOutputs(trainData.Features);
                            Assert.Equal(trainData.Features.Length, output3s.Length);

                            for (int i = 0; i < trainData.Features.Length; i++)
                            {
                                var row = trainData.Features[i];

                                double output = 0;
                                var    input  = new VBuffer <float>(row.Length, row);
                                model.Managed.GetOutput(ref input, ref output);
                                Assert.True(output >= 0);
                                Assert.True(output <= 1);

                                double output2 = 0;
                                model2.GetOutput(ref input, ref output2);
                                Compare(output, output2);

                                // check raw score against native booster object
                                var rawscore = 0.0;
                                (model.Managed as CalibratedPredictor).SubPredictor.GetOutput(ref input, ref rawscore);
                                var rawscore2 = trainer.Evaluate(Booster.PredictType.RawScore, row, numIterations);
                                Assert.Single(rawscore2);
                                Assert.Equal(rawscore2[0], rawscore2s[i, 0]);
                                var isRf = (pms.Learning.Boosting == BoostingType.RandomForest);
                                Compare(isRf ? rawscore * model.Managed.MaxNumTrees : rawscore, rawscore2[0]);

                                var output3 = trainer.Evaluate(Booster.PredictType.Normal, row, numIterations);
                                Assert.Single(output3);
                                Assert.Equal(output3[0], output3s[i, 0]);
                                Assert.Equal(output3[0], output3natives[i]);
                                Compare(output, output3[0]);

                                double outputNative = 0;
                                model.Native.GetOutput(ref input, ref outputNative);
                                Assert.Equal(outputNative, output3[0]);

                                model2native.GetOutput(ref input, ref outputNative);
                                Assert.Equal(outputNative, output3[0]);

                                //Console.WriteLine(trainer.GetModelString());
                                //throw new Exception($"Output mismatch {output} vs {output3[0]} (error: {Math.Abs(output - output3[0])}) input: {String.Join(", ", row)}");
                            }

                            var normalise   = rand.Next(2) == 0;
                            var getSplits   = rand.Next(2) == 0;
                            var gains       = model.Managed.GetFeatureWeights(normalise, getSplits);
                            var gainsNative = model.Native.GetFeatureWeights(normalise, getSplits);
                            Assert.Equal(gains.Count, gainsNative.Count);
                            foreach (var kv in gains)
                            {
                                Assert.True(0 <= kv.Key && kv.Key < trainData.NumColumns);
                                Assert.True(0.0 <= kv.Value);
                                Compare(kv.Value, gainsNative[kv.Key]);
                            }

                            if (!getSplits && !normalise)
                            {
                                var totGain1 = gains.Values.Sum();
                                var totGain2 = Enumerable.Range(0, trainData.NumColumns).SelectMany(i => model.Managed.GetFeatureGains(i)).Sum();
                                Compare(totGain1, totGain2);
                            }
                        }
                }
                catch (Exception e)
                {
                    throw new Exception($"Failed: {Seed} #{test} {pms}", e);
                }
            }
        }