[ConditionalFact(typeof(Environment), nameof(Environment.Is64BitProcess))] // LightGBM is 64-bit only public void LightGbmBinaryClassification() { var env = new MLContext(seed: 0); var dataPath = GetDataPath(TestDatasets.breastCancer.trainFilename); var dataSource = new MultiFileSource(dataPath); var ctx = new BinaryClassificationContext(env); var reader = TextLoader.CreateReader(env, c => (label: c.LoadBool(0), features: c.LoadFloat(1, 9))); IPredictorWithFeatureWeights <float> pred = null; var est = reader.MakeNewEstimator() .Append(r => (r.label, preds: ctx.Trainers.LightGbm(r.label, r.features, numBoostRound: 10, numLeaves: 5, learningRate: 0.01, onFit: (p) => { pred = p; }))); var pipe = reader.Append(est); Assert.Null(pred); var model = pipe.Fit(dataSource); Assert.NotNull(pred); // 9 input features, so we ought to have 9 weights. VBuffer <float> weights = new VBuffer <float>(); pred.GetFeatureWeights(ref weights); Assert.Equal(9, weights.Length); var data = model.Read(dataSource); var metrics = ctx.Evaluate(data, r => r.label, r => r.preds); // Run a sanity check against a few of the metrics. Assert.InRange(metrics.Accuracy, 0, 1); Assert.InRange(metrics.Auc, 0, 1); Assert.InRange(metrics.Auprc, 0, 1); }
private protected override IPredictorWithFeatureWeights <double []> CreateManagedPredictor() { var numClass = Objective.NumClass; if (TrainedEnsemble.NumTrees % numClass != 0) { throw new Exception("Number of trees should be a multiple of number of classes."); } var isSoftMax = (Objective.Objective == ObjectiveType.MultiClass); IPredictorWithFeatureWeights <double>[] predictors = new IPredictorWithFeatureWeights <double> [numClass]; var cali = isSoftMax ? null : new PlattCalibrator(-Objective.Sigmoid); for (int i = 0; i < numClass; ++i) { var pred = CreateBinaryPredictor(i) as IPredictorWithFeatureWeights <double>; predictors[i] = isSoftMax ? pred : new CalibratedPredictor(pred, cali); } return(OvaPredictor.Create(isSoftMax, predictors)); }
public void HogwildSGDBinaryClassification() { var env = new MLContext(seed: 0); var dataPath = GetDataPath(TestDatasets.breastCancer.trainFilename); var dataSource = new MultiFileSource(dataPath); var ctx = new BinaryClassificationContext(env); var reader = TextLoaderStatic.CreateReader(env, c => (label: c.LoadBool(0), features: c.LoadFloat(1, 9))); IPredictorWithFeatureWeights <float> pred = null; var est = reader.MakeNewEstimator() .Append(r => (r.label, preds: ctx.Trainers.StochasticGradientDescentClassificationTrainer(r.label, r.features, l2Weight: 0, onFit: (p) => { pred = p; }, advancedSettings: s => s.NumThreads = 1))); var pipe = reader.Append(est); Assert.Null(pred); var model = pipe.Fit(dataSource); Assert.NotNull(pred); // 9 input features, so we ought to have 9 weights. VBuffer <float> weights = new VBuffer <float>(); pred.GetFeatureWeights(ref weights); Assert.Equal(9, weights.Length); var data = model.Read(dataSource); var metrics = ctx.Evaluate(data, r => r.label, r => r.preds); // Run a sanity check against a few of the metrics. Assert.InRange(metrics.Accuracy, 0, 1); Assert.InRange(metrics.Auc, 0, 1); Assert.InRange(metrics.Auprc, 0, 1); }
public void TrainRegression() { var objectiveTypes = new ObjectiveType[] { ObjectiveType.Regression, ObjectiveType.RegressionL1, ObjectiveType.Huber, ObjectiveType.Fair, ObjectiveType.Poisson, ObjectiveType.Quantile, ObjectiveType.Mape, ObjectiveType.Gamma, ObjectiveType.Tweedie }; var rand = new Random(Seed); for (int test = 0; test < 5; ++test) { int numColumns = rand.Next(1, 10); var objective = objectiveTypes[rand.Next(objectiveTypes.Length)]; var pms = GenerateParameters(rand, objective, numColumns); if (rand.Next(2) == 0) { pms.Objective.RegSqrt = true; } var learningRateSchedule = (rand.Next(2) == 0) ? (Func <int, double>)null : (iter => pms.Learning.LearningRate * Math.Pow(0.99, iter)); try { Dictionary <int, int> categorical = null; var trainData = CreateRandomDenseRegressionData(rand, ref categorical, pms.Dataset.UseMissing, numColumns); var validData = (pms.Learning.EarlyStoppingRound > 0 || rand.Next(2) == 0) ? CreateRandomDenseRegressionData(rand, ref categorical, pms.Dataset.UseMissing, numColumns) : null; pms.Dataset.CategoricalFeature = categorical.Keys.ToArray(); // make labels positive for certain objective types if (objective == ObjectiveType.Poisson || objective == ObjectiveType.Gamma || objective == ObjectiveType.Tweedie) { for (var i = 0; i < trainData.Labels.Length; i++) { trainData.Labels[i] = Math.Abs(trainData.Labels[i]); } if (validData != null) { for (var i = 0; i < validData.Labels.Length; i++) { validData.Labels[i] = Math.Abs(validData.Labels[i]); } } } // uncomment to select particular iteration //if (test != 3) // continue; using (var datasets = (rand.Next(2) == 0) ? new Datasets(pms.Common, pms.Dataset, trainData, validData) : new Datasets(pms.Common, pms.Dataset, Dense2Sparse(trainData), Dense2Sparse(validData))) using (var trainer = new RegressionTrainer(pms.Learning, pms.Objective)) { //if (true) // trainer.ToCommandLineFiles(datasets); var model = trainer.Train(datasets, learningRateSchedule); model.Managed.MaxThreads = rand.Next(1, Environment.ProcessorCount); // possibly use subset of trees var numIterations = -1; if (rand.Next(2) == 0) { numIterations = rand.Next(1, model.Managed.MaxNumTrees); model.Managed.MaxNumTrees = numIterations; model.Native.MaxNumTrees = numIterations; } IPredictorWithFeatureWeights <double> model2 = null; using (var ms = new System.IO.MemoryStream()) using (var writer = new System.IO.BinaryWriter(ms)) using (var reader = new System.IO.BinaryReader(ms)) { PredictorPersist.Save(model.Managed, writer); ms.Position = 0; model2 = PredictorPersist.Load <double>(reader); Assert.Equal(ms.Position, ms.Length); } IPredictorWithFeatureWeights <double> model2native = null; using (var ms = new System.IO.MemoryStream()) using (var writer = new System.IO.BinaryWriter(ms)) using (var reader = new System.IO.BinaryReader(ms)) { NativePredictorPersist.Save(model.Native, writer); ms.Position = 0; model2native = NativePredictorPersist.Load <double>(reader); Assert.Equal(ms.Position, ms.Length); } var output3s = trainer.Evaluate(Booster.PredictType.Normal, trainData.Features, numIterations); Assert.Equal(trainData.Features.Length, output3s.GetLength(0)); Assert.Equal(1, output3s.GetLength(1)); var output3natives = model.Native.GetOutputs(trainData.Features); Assert.Equal(trainData.Features.Length, output3s.Length); for (int i = 0; i < trainData.Features.Length; i++) { var row = trainData.Features[i]; double output = 0; var input = new VBuffer <float>(row.Length, row); model.Managed.GetOutput(ref input, ref output); Assert.False(double.IsNaN(output)); double output2 = 0; model2.GetOutput(ref input, ref output2); Compare(output, output2); var output3 = trainer.Evaluate(Booster.PredictType.Normal, row, numIterations); Assert.Single(output3); Assert.Equal(output3[0], output3s[i, 0]); Assert.Equal(output3[0], output3natives[i]); Compare(output, output3[0]); //Console.WriteLine(trainer.GetModelString()); //throw new Exception($"Output mismatch {output} vs {output3[0]} (error: {Math.Abs(output - output3[0])}) input: {String.Join(", ", row)}"); double outputNative = 0; model.Native.GetOutput(ref input, ref outputNative); Assert.Equal(outputNative, output3[0]); model2native.GetOutput(ref input, ref outputNative); Assert.Equal(outputNative, output3[0]); } var normalise = rand.Next(2) == 0; var getSplits = rand.Next(2) == 0; var gains = model.Managed.GetFeatureWeights(normalise, getSplits); var gainsNative = model.Native.GetFeatureWeights(normalise, getSplits); Assert.Equal(gains.Count, gainsNative.Count); foreach (var kv in gains) { Assert.True(0 <= kv.Key && kv.Key < trainData.NumColumns); Assert.True(0.0 <= kv.Value); Compare(kv.Value, gainsNative[kv.Key]); } if (!getSplits && !normalise) { var totGain1 = gains.Values.Sum(); var totGain2 = Enumerable.Range(0, trainData.NumColumns).SelectMany(i => model.Managed.GetFeatureGains(i)).Sum(); Compare(totGain1, totGain2); } } } catch (Exception e) { throw new Exception($"Failed: {Seed} #{test} {pms}", e); } } }
protected override BinaryPredictionTransformer <IPredictorWithFeatureWeights <float> > MakeTransformer(IPredictorWithFeatureWeights <float> model, Schema trainSchema) => new BinaryPredictionTransformer <IPredictorWithFeatureWeights <float> >(Host, model, trainSchema, FeatureColumn.Name);
public Predictors(IPredictorWithFeatureWeights <TOutput> managed, IVectorisedPredictorWithFeatureWeights <TOutput> native) { Managed = managed; Native = native; }
public CalibratedPredictor(IPredictorWithFeatureWeights <double> predictor, ICalibrator calibrator) { SubPredictor = predictor; Calibrator = calibrator; }