protected override OvaPredictor TrainCore(TrainContext trainContext) { var trainRoles = trainContext.TrainingSet; trainRoles.CheckMultiClassLabel(out var numClasses); var predictors = new IPredictorTransformer <TScalarPredictor> [numClasses]; for (int iClass = 0; iClass < numClasses; iClass++) { var data = new LabelIndicatorTransform(_env, trainRoles.Data, iClass, "Label"); predictors[iClass] = _binaryEstimator.Fit(data); } var prs = predictors.Select(x => x.InnerModel); var finalPredictor = OvaPredictor.Create(_env.Register("ova"), prs.ToArray()); return(finalPredictor); }
private void TestFeatureContribution( ITrainerEstimator <ISingleFeaturePredictionTransformer <ICalculateFeatureContribution>, ICalculateFeatureContribution> trainer, IDataView data, string testFile, int precision = 6) { // Train the model. var model = trainer.Fit(data); // Calculate feature contributions. var est = ML.Transforms.CalculateFeatureContribution(model, numberOfPositiveContributions: 3, numberOfNegativeContributions: 0) .Append(ML.Transforms.CalculateFeatureContribution(model, numberOfPositiveContributions: 0, numberOfNegativeContributions: 3)) .Append(ML.Transforms.CalculateFeatureContribution(model, numberOfPositiveContributions: 1, numberOfNegativeContributions: 1)) .Append(ML.Transforms.CalculateFeatureContribution(model, numberOfPositiveContributions: 1, numberOfNegativeContributions: 1, normalize: false)); TestEstimatorCore(est, data); // Verify output. CheckOutput(est, data, testFile, precision); Done(); }
private void TestFeatureContribution( ITrainerEstimator <ISingleFeaturePredictionTransformer <IPredictor>, IPredictor> trainer, IDataView data, string testFile, int precision = 6) { // Train the model. var model = trainer.Fit(data); // Extract the predictor, check that it supports feature contribution. var predictor = model.Model as ICalculateFeatureContribution; Assert.NotNull(predictor); // Calculate feature contributions. var est = new FeatureContributionCalculatingEstimator(ML, predictor, "Features", numPositiveContributions: 3, numNegativeContributions: 0) .Append(new FeatureContributionCalculatingEstimator(ML, predictor, "Features", numPositiveContributions: 0, numNegativeContributions: 3)) .Append(new FeatureContributionCalculatingEstimator(ML, predictor, "Features", numPositiveContributions: 1, numNegativeContributions: 1)) .Append(new FeatureContributionCalculatingEstimator(ML, predictor, "Features", numPositiveContributions: 1, numNegativeContributions: 1, normalize: false)); TestEstimatorCore(est, data); // Verify output. var outputPath = GetOutputPath("FeatureContribution", testFile + ".tsv"); using (var ch = Env.Start("save")) { var saver = new TextSaver(ML, new TextSaver.Arguments { Silent = true, OutputHeader = false }); IDataView savedData = TakeFilter.Create(ML, est.Fit(data).Transform(data), 4); using (var fs = File.Create(outputPath)) DataSaverUtils.SaveDataView(ch, saver, savedData, fs, keepHidden: true); } CheckEquality("FeatureContribution", testFile + ".tsv", digitsOfPrecision: precision); Done(); }
/// <summary> /// Features: x1, x2, x3, xRand; y = 10*x1 + 20x2 + 5.5x3 + e, xRand- random, Label y is dependant on xRand. /// Test verifies that feature contribution scores are outputted along with a score for predicted data. /// </summary> private void TestFeatureContribution( ITrainerEstimator <ISingleFeaturePredictionTransformer <IPredictor>, IPredictor> trainer, List <float[]> expectedValues, int precision = 6) { // Setup synthetic dataset. const int numInstances = 1000; const int numFeatures = 4; var rand = new Random(10); float[] yArray = new float[numInstances]; float[][] xArray = new float[numFeatures][]; int[] xRangeArray = new[] { 1000, 10000, 5000, 1000 }; float[] xWeightArray = new[] { 10, 20, // Most important feature with high weight. Should have the highest contribution. 5.5f, 0, // Least important feature. Should have the least contribution. }; for (var instanceIndex = 0; instanceIndex < numInstances; instanceIndex++) { for (int featureIndex = 0; featureIndex < numFeatures; featureIndex++) { if (xArray[featureIndex] == null) { xArray[featureIndex] = new float[numInstances]; } xArray[featureIndex][instanceIndex] = rand.Next(xRangeArray[featureIndex]); yArray[instanceIndex] += xArray[featureIndex][instanceIndex] * xWeightArray[featureIndex]; } var noise = rand.Next(50); yArray[instanceIndex] += noise; } // Create data view. var bldr = new ArrayDataViewBuilder(Env); bldr.AddColumn("X1", NumberType.Float, xArray[0]); bldr.AddColumn("X2Important", NumberType.Float, xArray[1]); bldr.AddColumn("X3", NumberType.Float, xArray[2]); bldr.AddColumn("X4Rand", NumberType.Float, xArray[3]); bldr.AddColumn("Label", NumberType.Float, yArray); var srcDV = bldr.GetDataView(); var pipeline = ML.Transforms.Concatenate("Features", "X1", "X2Important", "X3", "X4Rand") .AppendCacheCheckpoint(ML) .Append(ML.Transforms.Normalize("Features")); var data = pipeline.Fit(srcDV).Transform(srcDV); var model = trainer.Fit(data); var args = new FeatureContributionCalculationTransform.Arguments() { Bottom = 10, Top = 10 }; var output = FeatureContributionCalculationTransform.Create(Env, args, data, model.Model, model.FeatureColumn); var transformedOutput = output.AsEnumerable <ScoreAndContribution>(Env, true); int rowIndex = 0; foreach (var row in transformedOutput.Take(expectedValues.Count)) { var expectedValue = expectedValues[rowIndex++]; for (int i = 0; i < numFeatures; i++) { Assert.Equal(expectedValue[i], row.FeatureContributions[i], precision); } } Done(); }