Example #1
0
        protected override OvaPredictor TrainCore(TrainContext trainContext)
        {
            var trainRoles = trainContext.TrainingSet;

            trainRoles.CheckMultiClassLabel(out var numClasses);

            var predictors = new IPredictorTransformer <TScalarPredictor> [numClasses];

            for (int iClass = 0; iClass < numClasses; iClass++)
            {
                var data = new LabelIndicatorTransform(_env, trainRoles.Data, iClass, "Label");
                predictors[iClass] = _binaryEstimator.Fit(data);
            }
            var prs            = predictors.Select(x => x.InnerModel);
            var finalPredictor = OvaPredictor.Create(_env.Register("ova"), prs.ToArray());

            return(finalPredictor);
        }
Example #2
0
        private void TestFeatureContribution(
            ITrainerEstimator <ISingleFeaturePredictionTransformer <ICalculateFeatureContribution>, ICalculateFeatureContribution> trainer,
            IDataView data,
            string testFile,
            int precision = 6)
        {
            // Train the model.
            var model = trainer.Fit(data);

            // Calculate feature contributions.
            var est = ML.Transforms.CalculateFeatureContribution(model, numberOfPositiveContributions: 3, numberOfNegativeContributions: 0)
                      .Append(ML.Transforms.CalculateFeatureContribution(model, numberOfPositiveContributions: 0, numberOfNegativeContributions: 3))
                      .Append(ML.Transforms.CalculateFeatureContribution(model, numberOfPositiveContributions: 1, numberOfNegativeContributions: 1))
                      .Append(ML.Transforms.CalculateFeatureContribution(model, numberOfPositiveContributions: 1, numberOfNegativeContributions: 1, normalize: false));

            TestEstimatorCore(est, data);

            // Verify output.
            CheckOutput(est, data, testFile, precision);
            Done();
        }
        private void TestFeatureContribution(
            ITrainerEstimator <ISingleFeaturePredictionTransformer <IPredictor>, IPredictor> trainer,
            IDataView data,
            string testFile,
            int precision = 6)
        {
            // Train the model.
            var model = trainer.Fit(data);

            // Extract the predictor, check that it supports feature contribution.
            var predictor = model.Model as ICalculateFeatureContribution;

            Assert.NotNull(predictor);

            // Calculate feature contributions.
            var est = new FeatureContributionCalculatingEstimator(ML, predictor, "Features", numPositiveContributions: 3, numNegativeContributions: 0)
                      .Append(new FeatureContributionCalculatingEstimator(ML, predictor, "Features", numPositiveContributions: 0, numNegativeContributions: 3))
                      .Append(new FeatureContributionCalculatingEstimator(ML, predictor, "Features", numPositiveContributions: 1, numNegativeContributions: 1))
                      .Append(new FeatureContributionCalculatingEstimator(ML, predictor, "Features", numPositiveContributions: 1, numNegativeContributions: 1, normalize: false));

            TestEstimatorCore(est, data);
            // Verify output.
            var outputPath = GetOutputPath("FeatureContribution", testFile + ".tsv");

            using (var ch = Env.Start("save"))
            {
                var saver = new TextSaver(ML, new TextSaver.Arguments {
                    Silent = true, OutputHeader = false
                });
                IDataView savedData = TakeFilter.Create(ML, est.Fit(data).Transform(data), 4);
                using (var fs = File.Create(outputPath))
                    DataSaverUtils.SaveDataView(ch, saver, savedData, fs, keepHidden: true);
            }
            CheckEquality("FeatureContribution", testFile + ".tsv", digitsOfPrecision: precision);
            Done();
        }
Example #4
0
        /// <summary>
        /// Features: x1, x2, x3, xRand; y = 10*x1 + 20x2 + 5.5x3 + e, xRand- random, Label y is dependant on xRand.
        /// Test verifies that feature contribution scores are outputted along with a score for predicted data.
        /// </summary>
        private void TestFeatureContribution(
            ITrainerEstimator <ISingleFeaturePredictionTransformer <IPredictor>, IPredictor> trainer,
            List <float[]> expectedValues,
            int precision = 6)
        {
            // Setup synthetic dataset.
            const int numInstances = 1000;
            const int numFeatures  = 4;

            var rand = new Random(10);

            float[]   yArray       = new float[numInstances];
            float[][] xArray       = new float[numFeatures][];
            int[]     xRangeArray  = new[] { 1000, 10000, 5000, 1000 };
            float[]   xWeightArray = new[] {
                10,
                20, // Most important feature with high weight. Should have the highest contribution.
                5.5f,
                0,  // Least important feature. Should have the least contribution.
            };

            for (var instanceIndex = 0; instanceIndex < numInstances; instanceIndex++)
            {
                for (int featureIndex = 0; featureIndex < numFeatures; featureIndex++)
                {
                    if (xArray[featureIndex] == null)
                    {
                        xArray[featureIndex] = new float[numInstances];
                    }
                    xArray[featureIndex][instanceIndex] = rand.Next(xRangeArray[featureIndex]);
                    yArray[instanceIndex] += xArray[featureIndex][instanceIndex] * xWeightArray[featureIndex];
                }

                var noise = rand.Next(50);
                yArray[instanceIndex] += noise;
            }

            // Create data view.
            var bldr = new ArrayDataViewBuilder(Env);

            bldr.AddColumn("X1", NumberType.Float, xArray[0]);
            bldr.AddColumn("X2Important", NumberType.Float, xArray[1]);
            bldr.AddColumn("X3", NumberType.Float, xArray[2]);
            bldr.AddColumn("X4Rand", NumberType.Float, xArray[3]);
            bldr.AddColumn("Label", NumberType.Float, yArray);
            var srcDV = bldr.GetDataView();

            var pipeline = ML.Transforms.Concatenate("Features", "X1", "X2Important", "X3", "X4Rand")
                           .AppendCacheCheckpoint(ML)
                           .Append(ML.Transforms.Normalize("Features"));
            var data  = pipeline.Fit(srcDV).Transform(srcDV);
            var model = trainer.Fit(data);
            var args  = new FeatureContributionCalculationTransform.Arguments()
            {
                Bottom = 10,
                Top    = 10
            };
            var output = FeatureContributionCalculationTransform.Create(Env, args, data, model.Model, model.FeatureColumn);

            var transformedOutput = output.AsEnumerable <ScoreAndContribution>(Env, true);
            int rowIndex          = 0;

            foreach (var row in transformedOutput.Take(expectedValues.Count))
            {
                var expectedValue = expectedValues[rowIndex++];
                for (int i = 0; i < numFeatures; i++)
                {
                    Assert.Equal(expectedValue[i], row.FeatureContributions[i], precision);
                }
            }

            Done();
        }