public static DataSparse Dense2Sparse(DataDense data) { return((data == null) ? null : new DataSparse() { Features = DatasetTest.Dense2Sparse(data.Features), Labels = data.Labels, Weights = data.Weights, Groups = data.Groups }); }
public void BenchmarkEval() { var rand = new Random(Seed); int numColumns = 100; var pms = new Parameters(); pms.Objective.Objective = ObjectiveType.Binary; pms.Dataset.MaxBin = 63; pms.Learning.LearningRate = 1e-3; pms.Learning.NumIterations = 1000; pms.Common.DeviceType = DeviceType.CPU; var categorical = new Dictionary <int, int>(); // i.e., no cat var trainData = CreateRandomDenseClassifyData(rand, 2, ref categorical, pms.Dataset.UseMissing, numColumns); DataDense validData = null; pms.Dataset.CategoricalFeature = categorical.Keys.ToArray(); using (var datasets = new Datasets(pms.Common, pms.Dataset, trainData, validData)) using (var trainer = new BinaryTrainer(pms.Learning, pms.Objective)) { var model = trainer.Train(datasets); output.WriteLine($"MaxNumTrees={model.Managed.MaxNumTrees}"); var timer = System.Diagnostics.Stopwatch.StartNew(); model.Native.GetOutputs(trainData.Features); var elapsed1 = timer.Elapsed; output.WriteLine($"EvalNativeMulti={elapsed1.TotalMilliseconds}"); timer.Restart(); foreach (var row in trainData.Features) { trainer.Evaluate(Booster.PredictType.Normal, row); } var elapsed2 = timer.Elapsed; output.WriteLine($"EvalNativeSingle={elapsed2.TotalMilliseconds}"); foreach (var maxThreads in new int[] { 1, 2, 4, 8, 16, 32, Environment.ProcessorCount }) // { model.Managed.MaxThreads = maxThreads; timer.Restart(); foreach (var row in trainData.Features) { double output = 0; var input = new VBuffer <float>(row.Length, row); model.Managed.GetOutput(ref input, ref output); } var elapsed3 = timer.Elapsed; output.WriteLine($"MaxThreads={maxThreads} EvalManaged={elapsed3.TotalMilliseconds}"); } } }
//[Fact] public void BenchmarkBinary() { var rand = new Random(Seed); for (int test = 0; test < 3; ++test) { for (int gpu = 0; gpu < 2; gpu++) { int numColumns = 50 * (test + 1); var pms = new Parameters(); pms.Objective.Objective = ObjectiveType.Binary; pms.Dataset.MaxBin = 63; pms.Learning.BaggingFraction = 1; pms.Learning.BaggingFreq = 1; pms.Learning.LearningRate = 1e-3; pms.Learning.NumIterations = 10; pms.Common.DeviceType = (gpu > 0) ? DeviceType.GPU : DeviceType.CPU; var categorical = new Dictionary <int, int>(); // i.e., no cat var trainData = CreateRandomDenseClassifyData(rand, 2, ref categorical, pms.Dataset.UseMissing, numColumns); DataDense validData = null; pms.Dataset.CategoricalFeature = categorical.Keys.ToArray(); try { using (var datasets = new Datasets(pms.Common, pms.Dataset, trainData, validData)) using (var trainer = new BinaryTrainer(pms.Learning, pms.Objective)) { var timer = System.Diagnostics.Stopwatch.StartNew(); var model = trainer.Train(datasets); var elapsed = timer.Elapsed; output.WriteLine($"{pms.Common.DeviceType}: NumRows={trainData.NumRows} NumCols={numColumns} MaxNumTrees={model.Managed.MaxNumTrees} TrainTimeSecs={elapsed.TotalSeconds}"); } } catch (Exception e) { throw new Exception($"Failed: {Seed} #{test} {pms}", e); } } } }
public static DataDense CreateRandomDenseData( Random rand, ref Dictionary <int, int> categorical, bool useMissing, int numColumns ) { var numRows = rand.Next(100, 500); // from column index to number of classes if (categorical == null) { categorical = new Dictionary <int, int>(); if (rand.Next(2) == 0) { for (int j = 0; j < numColumns; j++) { if (rand.Next(10) == 0) { categorical.Add(j, rand.Next(3, 100)); } } } } var scales = Enumerable.Range(0, numColumns).Select(x => Math.Pow(10.0, rand.Next(-10, 10))).ToArray(); var rows = new float[numRows][]; var weights = (rand.Next(2) == 0) ? new float[numRows] : null; for (int i = 0; i < numRows; ++i) { var row = new float[numColumns]; for (int j = 0; j < row.Length; ++j) { if (useMissing && rand.Next(50) == 0) { row[j] = float.NaN; } else { if (categorical.TryGetValue(j, out int numClass)) { row[j] = rand.Next(numClass); } else { row[j] = (rand.Next(100) == 0) ? 0.0f : (float)(scales[j] * (rand.NextDouble() - 0.5)); } } } rows[i] = row; if (weights != null) { weights[i] = (float)rand.NextDouble(); } } var rslt = new DataDense { Features = rows, Weights = weights, Groups = null }; return(rslt); }