Example #1
0
        public override IEnumerable <double> GetEstimatedClassValues(IDataset dataset, IEnumerable <int> rows)
        {
            double[,] inputData = dataset.ToArray(AllowedInputVariables, rows);
            RandomForestUtil.AssertInputMatrix(inputData);

            int n       = inputData.GetLength(0);
            int columns = inputData.GetLength(1);

            double[] x = new double[columns];
            double[] y = new double[RandomForest.innerobj.nclasses];

            for (int row = 0; row < n; row++)
            {
                for (int column = 0; column < columns; column++)
                {
                    x[column] = inputData[row, column];
                }
                alglib.dfprocess(randomForest, x, ref y);
                // find class for with the largest probability value
                int    maxProbClassIndex = 0;
                double maxProb           = y[0];
                for (int i = 1; i < y.Length; i++)
                {
                    if (maxProb < y[i])
                    {
                        maxProb           = y[i];
                        maxProbClassIndex = i;
                    }
                }
                yield return(classValues[maxProbClassIndex]);
            }
        }
Example #2
0
        public static RandomForestModel CreateRegressionModel(IRegressionProblemData problemData, IEnumerable <int> trainingIndices, int nTrees, double r, double m, int seed,
                                                              out double rmsError, out double outOfBagRmsError, out double avgRelError, out double outOfBagAvgRelError)
        {
            var variables = problemData.AllowedInputVariables.Concat(new string[] { problemData.TargetVariable });

            double[,] inputMatrix = problemData.Dataset.ToArray(variables, trainingIndices);

            alglib.dfreport rep;
            var             dForest = RandomForestUtil.CreateRandomForestModel(seed, inputMatrix, nTrees, r, m, 1, out rep);

            rmsError            = rep.rmserror;
            outOfBagRmsError    = rep.oobrmserror;
            avgRelError         = rep.avgrelerror;
            outOfBagAvgRelError = rep.oobavgrelerror;

            return(new RandomForestModel(problemData.TargetVariable, dForest, seed, problemData, nTrees, r, m));
        }
Example #3
0
        public IEnumerable <double> GetEstimatedVariances(IDataset dataset, IEnumerable <int> rows)
        {
            double[,] inputData = dataset.ToArray(AllowedInputVariables, rows);
            RandomForestUtil.AssertInputMatrix(inputData);

            int n       = inputData.GetLength(0);
            int columns = inputData.GetLength(1);

            double[] x  = new double[columns];
            double[] ys = new double[this.RandomForest.innerobj.ntrees];

            for (int row = 0; row < n; row++)
            {
                for (int column = 0; column < columns; column++)
                {
                    x[column] = inputData[row, column];
                }
                alglib.dforest.dfprocessraw(RandomForest.innerobj, x, ref ys);
                yield return(ys.VariancePop());
            }
        }
Example #4
0
        public IEnumerable <double> GetEstimatedValues(IDataset dataset, IEnumerable <int> rows)
        {
            double[,] inputData = dataset.ToArray(AllowedInputVariables, rows);
            RandomForestUtil.AssertInputMatrix(inputData);

            int n       = inputData.GetLength(0);
            int columns = inputData.GetLength(1);

            double[] x = new double[columns];
            double[] y = new double[1];

            for (int row = 0; row < n; row++)
            {
                for (int column = 0; column < columns; column++)
                {
                    x[column] = inputData[row, column];
                }
                alglib.dfprocess(RandomForest, x, ref y);
                yield return(y[0]);
            }
        }
Example #5
0
        internal static alglib.decisionforest CreateRandomForestModel(int seed, double[,] inputMatrix, int nTrees, double r, double m, int nClasses, out alglib.dfreport rep)
        {
            RandomForestUtil.AssertParameters(r, m);
            RandomForestUtil.AssertInputMatrix(inputMatrix);

            int info = 0;

            alglib.math.rndobject = new System.Random(seed);
            var dForest = new alglib.decisionforest();

            rep = new alglib.dfreport();
            int nRows      = inputMatrix.GetLength(0);
            int nColumns   = inputMatrix.GetLength(1);
            int sampleSize = Math.Max((int)Math.Round(r * nRows), 1);
            int nFeatures  = Math.Max((int)Math.Round(m * (nColumns - 1)), 1);

            alglib.dforest.dfbuildinternal(inputMatrix, nRows, nColumns - 1, nClasses, nTrees, sampleSize, nFeatures, alglib.dforest.dfusestrongsplits + alglib.dforest.dfuseevs, ref info, dForest.innerobj, rep.innerobj);
            if (info != 1)
            {
                throw new ArgumentException("Error in calculation of random forest model");
            }
            return(dForest);
        }
Example #6
0
        public static RandomForestModel CreateClassificationModel(IClassificationProblemData problemData, IEnumerable <int> trainingIndices, int nTrees, double r, double m, int seed,
                                                                  out double rmsError, out double outOfBagRmsError, out double relClassificationError, out double outOfBagRelClassificationError)
        {
            var variables = problemData.AllowedInputVariables.Concat(new string[] { problemData.TargetVariable });

            double[,] inputMatrix = problemData.Dataset.ToArray(variables, trainingIndices);

            var classValues = problemData.ClassValues.ToArray();
            int nClasses    = classValues.Length;

            // map original class values to values [0..nClasses-1]
            var classIndices = new Dictionary <double, double>();

            for (int i = 0; i < nClasses; i++)
            {
                classIndices[classValues[i]] = i;
            }

            int nRows    = inputMatrix.GetLength(0);
            int nColumns = inputMatrix.GetLength(1);

            for (int row = 0; row < nRows; row++)
            {
                inputMatrix[row, nColumns - 1] = classIndices[inputMatrix[row, nColumns - 1]];
            }

            alglib.dfreport rep;
            var             dForest = RandomForestUtil.CreateRandomForestModel(seed, inputMatrix, nTrees, r, m, nClasses, out rep);

            rmsError                       = rep.rmserror;
            outOfBagRmsError               = rep.oobrmserror;
            relClassificationError         = rep.relclserror;
            outOfBagRelClassificationError = rep.oobrelclserror;

            return(new RandomForestModel(problemData.TargetVariable, dForest, seed, problemData, nTrees, r, m, classValues));
        }