Example #1
0
        public void SdcaBinaryClassification()
        {
            var env        = new MLContext(seed: 0);
            var dataPath   = GetDataPath(TestDatasets.breastCancer.trainFilename);
            var dataSource = new MultiFileSource(dataPath);
            var ctx        = new BinaryClassificationContext(env);

            var reader = TextLoader.CreateReader(env,
                                                 c => (label: c.LoadBool(0), features: c.LoadFloat(1, 9)));

            LinearBinaryModelParameters        pred = null;
            ParameterMixingCalibratedPredictor cali = null;

            var est = reader.MakeNewEstimator()
                      .Append(r => (r.label, preds: ctx.Trainers.Sdca(r.label, r.features,
                                                                      maxIterations: 2,
                                                                      onFit: (p, c) => { pred = p; cali = c; },
                                                                      advancedSettings: s => s.NumThreads = 1)));

            var pipe = reader.Append(est);

            Assert.Null(pred);
            Assert.Null(cali);
            var model = pipe.Fit(dataSource);

            Assert.NotNull(pred);
            Assert.NotNull(cali);
            // 9 input features, so we ought to have 9 weights.
            Assert.Equal(9, pred.Weights.Count);

            var data = model.Read(dataSource);

            var metrics = ctx.Evaluate(data, r => r.label, r => r.preds);

            // Run a sanity check against a few of the metrics.
            Assert.InRange(metrics.Accuracy, 0, 1);
            Assert.InRange(metrics.Auc, 0, 1);
            Assert.InRange(metrics.Auprc, 0, 1);
            Assert.InRange(metrics.LogLoss, 0, double.PositiveInfinity);
            Assert.InRange(metrics.Entropy, 0, double.PositiveInfinity);

            // Just output some data on the schema for fun.
            var schema = data.AsDynamic.Schema;

            for (int c = 0; c < schema.Count; ++c)
            {
                Console.WriteLine($"{schema[c].Name}, {schema[c].Type}");
            }
        }
Example #2
0
        public void SdcaBinaryClassificationNoCalibration()
        {
            var env        = new MLContext(seed: 0);
            var dataPath   = GetDataPath(TestDatasets.breastCancer.trainFilename);
            var dataSource = new MultiFileSource(dataPath);
            var ctx        = new BinaryClassificationContext(env);

            var reader = TextLoader.CreateReader(env,
                                                 c => (label: c.LoadBool(0), features: c.LoadFloat(1, 9)));

            LinearBinaryModelParameters pred = null;

            var loss = new HingeLoss(1);

            // With a custom loss function we no longer get calibrated predictions.
            var est = reader.MakeNewEstimator()
                      .Append(r => (r.label, preds: ctx.Trainers.Sdca(r.label, r.features,
                                                                      maxIterations: 2,
                                                                      loss: loss, onFit: p => pred = p,
                                                                      advancedSettings: s => s.NumThreads = 1)));

            var pipe = reader.Append(est);

            Assert.Null(pred);
            var model = pipe.Fit(dataSource);

            Assert.NotNull(pred);
            // 9 input features, so we ought to have 9 weights.
            Assert.Equal(9, pred.Weights.Count);

            var data = model.Read(dataSource);

            var metrics = ctx.Evaluate(data, r => r.label, r => r.preds);

            // Run a sanity check against a few of the metrics.
            Assert.InRange(metrics.Accuracy, 0, 1);
            Assert.InRange(metrics.Auc, 0, 1);
            Assert.InRange(metrics.Auprc, 0, 1);

            // Just output some data on the schema for fun.
            var schema = data.AsDynamic.Schema;

            for (int c = 0; c < schema.ColumnCount; ++c)
            {
                Console.WriteLine($"{schema.GetColumnName(c)}, {schema.GetColumnType(c)}");
            }
        }
Example #3
0
        public void AveragePerceptronCalibration()
        {
            var env        = new MLContext(seed: 0);
            var dataPath   = GetDataPath(TestDatasets.breastCancer.trainFilename);
            var dataSource = new MultiFileSource(dataPath);
            var ctx        = new BinaryClassificationContext(env);

            var reader = TextLoader.CreateReader(env,
                                                 c => (label: c.LoadBool(0), features: c.LoadFloat(1, 9)));

            LinearBinaryModelParameters pred = null;

            var loss = new HingeLoss(1);

            var est = reader.MakeNewEstimator()
                      .Append(r => (r.label, preds: ctx.Trainers.AveragedPerceptron(r.label, r.features, lossFunction: loss,
                                                                                    numIterations: 2, onFit: p => pred = p)));

            var pipe = reader.Append(est);

            Assert.Null(pred);
            var model = pipe.Fit(dataSource);

            Assert.NotNull(pred);
            // 9 input features, so we ought to have 9 weights.
            Assert.Equal(9, pred.Weights.Count);

            var data = model.Read(dataSource);

            var metrics = ctx.Evaluate(data, r => r.label, r => r.preds);

            // Run a sanity check against a few of the metrics.
            Assert.InRange(metrics.Accuracy, 0, 1);
            Assert.InRange(metrics.Auc, 0, 1);
            Assert.InRange(metrics.Auprc, 0, 1);
        }
 public static float[] GetLinearModelWeights(LinearBinaryModelParameters linearModel)
 {
     return(linearModel.Weights.ToArray());
 }