private static IDataView GetRegressionMetrics(
            IHostEnvironment env,
            IPredictor predictor,
            RoleMappedData roleMappedData,
            PermutationFeatureImportanceArguments input)
        {
            var roles             = roleMappedData.Schema.GetColumnRoleNames();
            var featureColumnName = roles.Where(x => x.Key.Value == RoleMappedSchema.ColumnRole.Feature.Value).First().Value;
            var labelColumnName   = roles.Where(x => x.Key.Value == RoleMappedSchema.ColumnRole.Label.Value).First().Value;
            var pred = new RegressionPredictionTransformer <IPredictorProducing <float> >(
                env, predictor as IPredictorProducing <float>, roleMappedData.Data.Schema, featureColumnName);
            var regressionCatalog  = new RegressionCatalog(env);
            var permutationMetrics = regressionCatalog
                                     .PermutationFeatureImportance(pred,
                                                                   roleMappedData.Data,
                                                                   labelColumnName: labelColumnName,
                                                                   useFeatureWeightFilter: input.UseFeatureWeightFilter,
                                                                   numberOfExamplesToUse: input.NumberOfExamplesToUse,
                                                                   permutationCount: input.PermutationCount);

            var slotNames = GetSlotNames(roleMappedData.Schema);

            Contracts.Assert(slotNames.Length == permutationMetrics.Length,
                             "Mismatch between number of feature slots and number of features permuted.");

            List <RegressionMetrics> metrics = new List <RegressionMetrics>();

            for (int i = 0; i < permutationMetrics.Length; i++)
            {
                if (string.IsNullOrWhiteSpace(slotNames[i]))
                {
                    continue;
                }
                var pMetric = permutationMetrics[i];
                metrics.Add(new RegressionMetrics
                {
                    FeatureName                = slotNames[i],
                    MeanAbsoluteError          = pMetric.MeanAbsoluteError.Mean,
                    MeanAbsoluteErrorStdErr    = pMetric.MeanAbsoluteError.StandardError,
                    MeanSquaredError           = pMetric.MeanSquaredError.Mean,
                    MeanSquaredErrorStdErr     = pMetric.MeanSquaredError.StandardError,
                    RootMeanSquaredError       = pMetric.RootMeanSquaredError.Mean,
                    RootMeanSquaredErrorStdErr = pMetric.RootMeanSquaredError.StandardError,
                    LossFunction               = pMetric.LossFunction.Mean,
                    LossFunctionStdErr         = pMetric.LossFunction.StandardError,
                    RSquared       = pMetric.RSquared.Mean,
                    RSquaredStdErr = pMetric.RSquared.StandardError
                });
            }

            var dataOps = new DataOperationsCatalog(env);
            var result  = dataOps.LoadFromEnumerable(metrics);

            return(result);
        }
        public void FastTreeRegressionRepresentation()
        {
            var env        = new MLContext(seed: 0);
            var dataPath   = GetDataPath(TestDatasets.generatedRegressionDataset.trainFilename);
            var dataSource = new MultiFileSource(dataPath);

            var catalog = new RegressionCatalog(env);

            var reader = TextLoaderStatic.CreateLoader(env,
                                                       c => (label: c.LoadFloat(11), features: c.LoadFloat(0, 10)),
                                                       separator: ';', hasHeader: true);

            var opts = new FastTreeRegressionTrainer.Options()
            {
                NumTrees   = 10,
                NumLeaves  = 5,
                NumThreads = 1
            };

            FastTreeRegressionModelParameters pred = null;

            var est = reader.MakeNewEstimator()
                      .Append(r => (r.label, score: catalog.Trainers.FastTree(r.label, r.features, null, opts,
                                                                              onFit: (p) => { pred = p; })));

            var pipe = reader.Append(est);

            Assert.Null(pred);
            var model = pipe.Fit(dataSource);

            Assert.NotNull(pred);

            var treeCollection = pred.TrainedTreeEnsemble;

            Assert.Equal(0, treeCollection.Bias);
            Assert.Equal(10, treeCollection.Trees.Count);
            Assert.Equal(10, treeCollection.TreeWeights.Count);

            var trees = treeCollection.Trees;

            Assert.Equal(4, trees[0].NumNodes);

            // Numerical split. There is no categorical split so the follwoing vector contains 0-element.
            var categoricalSplitFeatures = trees[0].GetCategoricalSplitFeaturesAt(0);

            Assert.Equal(0, categoricalSplitFeatures.Count);

            // Numerical split. There is no categorical split so the follwoing vector contains 0-element.
            var categoricalSplitFeatureRange = trees[0].GetCategoricalCategoricalSplitFeatureRangeAt(0);

            Assert.Equal(0, categoricalSplitFeatureRange.Count);

            var expectedGtChild = new int[] { 3, 2, -4, -5 };

            Assert.Equal(4, trees[0].GtChild.Count);
            Assert.Equal(expectedGtChild, trees[0].GtChild);

            var expectedLteChild = new int[] { 1, -1, -3, -2 };

            Assert.Equal(4, trees[0].LteChild.Count);
            Assert.Equal(expectedLteChild, trees[0].LteChild);

            var expectedCategoricalSplitFlags = new bool[] { false, false, false, false };

            Assert.Equal(4, trees[0].CategoricalSplitFlags.Count);
            Assert.Equal(expectedCategoricalSplitFlags, trees[0].CategoricalSplitFlags);

            var expectedNumericalSplitFeatureIndexes = new int[] { 0, 10, 2, 10 };

            Assert.Equal(4, trees[0].NumericalSplitFeatureIndexes.Count);
            Assert.Equal(expectedNumericalSplitFeatureIndexes, trees[0].NumericalSplitFeatureIndexes);

            var expectedNumericalSplitThresholds = new float[] { 0.14f, -0.645f, -0.095f, 0.31f };

            Assert.Equal(4, trees[0].NumericalSplitThresholds.Count);
            for (int i = 0; i < trees[0].NumericalSplitThresholds.Count; ++i)
            {
                Assert.Equal(expectedNumericalSplitThresholds[i], trees[0].NumericalSplitThresholds[i], 6);
            }

            Assert.Equal(5, trees[0].NumLeaves);

            var expectedLeafValues = new double[] { 40.159015006449692, 80.434805844435061, 57.072130551545513, 82.898710076162757, 104.17547955322266 };

            Assert.Equal(5, trees[0].LeafValues.Count);
            for (int i = 0; i < trees[0].LeafValues.Count; ++i)
            {
                Assert.Equal(expectedLeafValues[i], trees[0].LeafValues[i], 6);
            }
        }
        public void FastTreeRegressionRepresentationWithCategoricalSplit()
        {
            var env        = new MLContext(seed: 0);
            var dataPath   = GetDataPath(TestDatasets.generatedRegressionDataset.trainFilename);
            var dataSource = new MultiFileSource(dataPath);

            var catalog = new RegressionCatalog(env);

            var reader = TextLoaderStatic.CreateLoader(env,
                                                       c => (label: c.LoadFloat(11), features: c.LoadText(0, 10)),
                                                       separator: ';', hasHeader: true);

            FastTreeRegressionModelParameters pred = null;

            var opts = new FastTreeRegressionTrainer.Options()
            {
                CategoricalSplit = true,
                NumTrees         = 3,
                NumLeaves        = 5,
                NumThreads       = 1,
                // This is the minimal samples to form a split (i.e., generating two extra nodes/leaves). For a small data set,
                // we should set a small value. Otherwise, the trained trees could be empty.
                MinDocumentsInLeafs = 2
            };

            var est = reader.MakeNewEstimator()
                      .Append(r => (r.label, features: r.features.OneHotEncoding()))
                      .Append(r => (r.label, score: catalog.Trainers.FastTree(r.label, r.features, null, opts,
                                                                              onFit: (p) => { pred = p; })));

            var pipe = reader.Append(est);

            Assert.Null(pred);
            var model = pipe.Fit(dataSource);

            Assert.NotNull(pred);

            var treeCollection = pred.TrainedTreeEnsemble;

            Assert.Equal(0, treeCollection.Bias);
            Assert.Equal(3, treeCollection.Trees.Count);
            Assert.Equal(3, treeCollection.TreeWeights.Count);

            var trees = treeCollection.Trees;

            Assert.Equal(4, trees[0].NumNodes);

            var expectedGtChild = new int[] { 3, -3, -4, -5 };

            Assert.Equal(4, trees[0].GtChild.Count);
            Assert.Equal(expectedGtChild, trees[0].GtChild);

            var expectedLteChild = new int[] { 1, 2, -1, -2 };

            Assert.Equal(4, trees[0].LteChild.Count);
            Assert.Equal(expectedLteChild, trees[0].LteChild);

            var expectedCategoricalSplitFlags = new bool[] { true, true, true, true };

            Assert.Equal(4, trees[0].CategoricalSplitFlags.Count);
            Assert.Equal(expectedCategoricalSplitFlags, trees[0].CategoricalSplitFlags);

            var expectedNumericalSplitFeatureIndexes = new int[] { 5312, 2, 2126, 533 };

            Assert.Equal(4, trees[0].NumericalSplitFeatureIndexes.Count);
            Assert.Equal(expectedNumericalSplitFeatureIndexes, trees[0].NumericalSplitFeatureIndexes);

            var expectedNumericalSplitThresholds = new float[] { 0.5f, 0.5f, 0.5f, 0.5f };

            Assert.Equal(4, trees[0].NumericalSplitThresholds.Count);
            for (int i = 0; i < trees[0].NumericalSplitThresholds.Count; ++i)
            {
                Assert.Equal(expectedNumericalSplitThresholds[i], trees[0].NumericalSplitThresholds[i], 6);
            }

            var actualCategoricalRanges0 = trees[0].GetCategoricalCategoricalSplitFeatureRangeAt(0);

            Assert.Equal(actualCategoricalRanges0, new int[] { 5312, 5782 });

            var actualCategoricalRanges1 = trees[0].GetCategoricalCategoricalSplitFeatureRangeAt(1);

            Assert.Equal(actualCategoricalRanges1, new int[] { 2, 417 });

            var actualCategoricalRanges2 = trees[0].GetCategoricalCategoricalSplitFeatureRangeAt(2);

            Assert.Equal(actualCategoricalRanges2, new int[] { 2126, 2593 });

            var actualCategoricalRanges3 = trees[0].GetCategoricalCategoricalSplitFeatureRangeAt(3);

            Assert.Equal(actualCategoricalRanges3, new int[] { 533, 983 });

            int[] expectedCounts = { 62, 52, 54, 22 };
            int[] expectedStarts = { 5315, 10, 2141, 533 };
            int[] expectedEnds   = { 5782, 401, 2558, 874 };
            for (int i = 0; i < trees[0].NumNodes; ++i)
            {
                // Retrieve i-th node's split features.
                var actualCategoricalSplitFeatures = trees[0].GetCategoricalSplitFeaturesAt(i);
                Assert.Equal(expectedCounts[i], actualCategoricalSplitFeatures.Count);
                Assert.Equal(expectedStarts[i], actualCategoricalSplitFeatures[0]);
                Assert.Equal(expectedEnds[i], actualCategoricalSplitFeatures[expectedCounts[i] - 1]);
            }

            Assert.Equal(5, trees[0].NumLeaves);

            var expectedLeafValues = new double[] { 48.456055413607892, 86.584156799316418, 87.017326642027, 76.381184971185391, 117.68872643673058 };

            Assert.Equal(5, trees[0].LeafValues.Count);
            for (int i = 0; i < trees[0].LeafValues.Count; ++i)
            {
                Assert.Equal(expectedLeafValues[i], trees[0].LeafValues[i], 6);
            }
        }