Example #1
0
 public static DataSparse Dense2Sparse(DataDense data)
 {
     return((data == null) ? null : new DataSparse()
     {
         Features = DatasetTest.Dense2Sparse(data.Features),
         Labels = data.Labels,
         Weights = data.Weights,
         Groups = data.Groups
     });
 }
Example #2
0
        public void BenchmarkEval()
        {
            var rand       = new Random(Seed);
            int numColumns = 100;
            var pms        = new Parameters();

            pms.Objective.Objective    = ObjectiveType.Binary;
            pms.Dataset.MaxBin         = 63;
            pms.Learning.LearningRate  = 1e-3;
            pms.Learning.NumIterations = 1000;
            pms.Common.DeviceType      = DeviceType.CPU;

            var       categorical = new Dictionary <int, int>(); // i.e., no cat
            var       trainData   = CreateRandomDenseClassifyData(rand, 2, ref categorical, pms.Dataset.UseMissing, numColumns);
            DataDense validData   = null;

            pms.Dataset.CategoricalFeature = categorical.Keys.ToArray();

            using (var datasets = new Datasets(pms.Common, pms.Dataset, trainData, validData))
                using (var trainer = new BinaryTrainer(pms.Learning, pms.Objective))
                {
                    var model = trainer.Train(datasets);
                    output.WriteLine($"MaxNumTrees={model.Managed.MaxNumTrees}");

                    var timer = System.Diagnostics.Stopwatch.StartNew();
                    model.Native.GetOutputs(trainData.Features);
                    var elapsed1 = timer.Elapsed;
                    output.WriteLine($"EvalNativeMulti={elapsed1.TotalMilliseconds}");

                    timer.Restart();
                    foreach (var row in trainData.Features)
                    {
                        trainer.Evaluate(Booster.PredictType.Normal, row);
                    }
                    var elapsed2 = timer.Elapsed;
                    output.WriteLine($"EvalNativeSingle={elapsed2.TotalMilliseconds}");

                    foreach (var maxThreads in new int[] { 1, 2, 4, 8, 16, 32, Environment.ProcessorCount }) //
                    {
                        model.Managed.MaxThreads = maxThreads;
                        timer.Restart();
                        foreach (var row in trainData.Features)
                        {
                            double output = 0;
                            var    input  = new VBuffer <float>(row.Length, row);
                            model.Managed.GetOutput(ref input, ref output);
                        }
                        var elapsed3 = timer.Elapsed;
                        output.WriteLine($"MaxThreads={maxThreads} EvalManaged={elapsed3.TotalMilliseconds}");
                    }
                }
        }
Example #3
0
        //[Fact]
        public void BenchmarkBinary()
        {
            var rand = new Random(Seed);

            for (int test = 0; test < 3; ++test)
            {
                for (int gpu = 0; gpu < 2; gpu++)
                {
                    int numColumns = 50 * (test + 1);
                    var pms        = new Parameters();
                    pms.Objective.Objective      = ObjectiveType.Binary;
                    pms.Dataset.MaxBin           = 63;
                    pms.Learning.BaggingFraction = 1;
                    pms.Learning.BaggingFreq     = 1;
                    pms.Learning.LearningRate    = 1e-3;
                    pms.Learning.NumIterations   = 10;
                    pms.Common.DeviceType        = (gpu > 0) ? DeviceType.GPU : DeviceType.CPU;

                    var       categorical = new Dictionary <int, int>(); // i.e., no cat
                    var       trainData   = CreateRandomDenseClassifyData(rand, 2, ref categorical, pms.Dataset.UseMissing, numColumns);
                    DataDense validData   = null;
                    pms.Dataset.CategoricalFeature = categorical.Keys.ToArray();

                    try
                    {
                        using (var datasets = new Datasets(pms.Common, pms.Dataset, trainData, validData))
                            using (var trainer = new BinaryTrainer(pms.Learning, pms.Objective))
                            {
                                var timer   = System.Diagnostics.Stopwatch.StartNew();
                                var model   = trainer.Train(datasets);
                                var elapsed = timer.Elapsed;
                                output.WriteLine($"{pms.Common.DeviceType}: NumRows={trainData.NumRows} NumCols={numColumns} MaxNumTrees={model.Managed.MaxNumTrees} TrainTimeSecs={elapsed.TotalSeconds}");
                            }
                    }
                    catch (Exception e)
                    {
                        throw new Exception($"Failed: {Seed} #{test} {pms}", e);
                    }
                }
            }
        }
Example #4
0
        public static DataDense CreateRandomDenseData(
            Random rand,
            ref Dictionary <int, int> categorical,
            bool useMissing,
            int numColumns
            )
        {
            var numRows = rand.Next(100, 500);

            // from column index to number of classes
            if (categorical == null)
            {
                categorical = new Dictionary <int, int>();
                if (rand.Next(2) == 0)
                {
                    for (int j = 0; j < numColumns; j++)
                    {
                        if (rand.Next(10) == 0)
                        {
                            categorical.Add(j, rand.Next(3, 100));
                        }
                    }
                }
            }

            var scales = Enumerable.Range(0, numColumns).Select(x => Math.Pow(10.0, rand.Next(-10, 10))).ToArray();

            var rows    = new float[numRows][];
            var weights = (rand.Next(2) == 0) ? new float[numRows] : null;

            for (int i = 0; i < numRows; ++i)
            {
                var row = new float[numColumns];
                for (int j = 0; j < row.Length; ++j)
                {
                    if (useMissing && rand.Next(50) == 0)
                    {
                        row[j] = float.NaN;
                    }
                    else
                    {
                        if (categorical.TryGetValue(j, out int numClass))
                        {
                            row[j] = rand.Next(numClass);
                        }
                        else
                        {
                            row[j] = (rand.Next(100) == 0) ? 0.0f : (float)(scales[j] * (rand.NextDouble() - 0.5));
                        }
                    }
                }
                rows[i] = row;
                if (weights != null)
                {
                    weights[i] = (float)rand.NextDouble();
                }
            }

            var rslt = new DataDense
            {
                Features = rows,
                Weights  = weights,
                Groups   = null
            };

            return(rslt);
        }