public override IEnumerable <double> GetEstimatedClassValues(IDataset dataset, IEnumerable <int> rows) { double[,] inputData = dataset.ToArray(AllowedInputVariables, rows); RandomForestUtil.AssertInputMatrix(inputData); int n = inputData.GetLength(0); int columns = inputData.GetLength(1); double[] x = new double[columns]; double[] y = new double[RandomForest.innerobj.nclasses]; for (int row = 0; row < n; row++) { for (int column = 0; column < columns; column++) { x[column] = inputData[row, column]; } alglib.dfprocess(randomForest, x, ref y); // find class for with the largest probability value int maxProbClassIndex = 0; double maxProb = y[0]; for (int i = 1; i < y.Length; i++) { if (maxProb < y[i]) { maxProb = y[i]; maxProbClassIndex = i; } } yield return(classValues[maxProbClassIndex]); } }
public static RandomForestModel CreateRegressionModel(IRegressionProblemData problemData, IEnumerable <int> trainingIndices, int nTrees, double r, double m, int seed, out double rmsError, out double outOfBagRmsError, out double avgRelError, out double outOfBagAvgRelError) { var variables = problemData.AllowedInputVariables.Concat(new string[] { problemData.TargetVariable }); double[,] inputMatrix = problemData.Dataset.ToArray(variables, trainingIndices); alglib.dfreport rep; var dForest = RandomForestUtil.CreateRandomForestModel(seed, inputMatrix, nTrees, r, m, 1, out rep); rmsError = rep.rmserror; outOfBagRmsError = rep.oobrmserror; avgRelError = rep.avgrelerror; outOfBagAvgRelError = rep.oobavgrelerror; return(new RandomForestModel(problemData.TargetVariable, dForest, seed, problemData, nTrees, r, m)); }
public IEnumerable <double> GetEstimatedVariances(IDataset dataset, IEnumerable <int> rows) { double[,] inputData = dataset.ToArray(AllowedInputVariables, rows); RandomForestUtil.AssertInputMatrix(inputData); int n = inputData.GetLength(0); int columns = inputData.GetLength(1); double[] x = new double[columns]; double[] ys = new double[this.RandomForest.innerobj.ntrees]; for (int row = 0; row < n; row++) { for (int column = 0; column < columns; column++) { x[column] = inputData[row, column]; } alglib.dforest.dfprocessraw(RandomForest.innerobj, x, ref ys); yield return(ys.VariancePop()); } }
public IEnumerable <double> GetEstimatedValues(IDataset dataset, IEnumerable <int> rows) { double[,] inputData = dataset.ToArray(AllowedInputVariables, rows); RandomForestUtil.AssertInputMatrix(inputData); int n = inputData.GetLength(0); int columns = inputData.GetLength(1); double[] x = new double[columns]; double[] y = new double[1]; for (int row = 0; row < n; row++) { for (int column = 0; column < columns; column++) { x[column] = inputData[row, column]; } alglib.dfprocess(RandomForest, x, ref y); yield return(y[0]); } }
internal static alglib.decisionforest CreateRandomForestModel(int seed, double[,] inputMatrix, int nTrees, double r, double m, int nClasses, out alglib.dfreport rep) { RandomForestUtil.AssertParameters(r, m); RandomForestUtil.AssertInputMatrix(inputMatrix); int info = 0; alglib.math.rndobject = new System.Random(seed); var dForest = new alglib.decisionforest(); rep = new alglib.dfreport(); int nRows = inputMatrix.GetLength(0); int nColumns = inputMatrix.GetLength(1); int sampleSize = Math.Max((int)Math.Round(r * nRows), 1); int nFeatures = Math.Max((int)Math.Round(m * (nColumns - 1)), 1); alglib.dforest.dfbuildinternal(inputMatrix, nRows, nColumns - 1, nClasses, nTrees, sampleSize, nFeatures, alglib.dforest.dfusestrongsplits + alglib.dforest.dfuseevs, ref info, dForest.innerobj, rep.innerobj); if (info != 1) { throw new ArgumentException("Error in calculation of random forest model"); } return(dForest); }
public static RandomForestModel CreateClassificationModel(IClassificationProblemData problemData, IEnumerable <int> trainingIndices, int nTrees, double r, double m, int seed, out double rmsError, out double outOfBagRmsError, out double relClassificationError, out double outOfBagRelClassificationError) { var variables = problemData.AllowedInputVariables.Concat(new string[] { problemData.TargetVariable }); double[,] inputMatrix = problemData.Dataset.ToArray(variables, trainingIndices); var classValues = problemData.ClassValues.ToArray(); int nClasses = classValues.Length; // map original class values to values [0..nClasses-1] var classIndices = new Dictionary <double, double>(); for (int i = 0; i < nClasses; i++) { classIndices[classValues[i]] = i; } int nRows = inputMatrix.GetLength(0); int nColumns = inputMatrix.GetLength(1); for (int row = 0; row < nRows; row++) { inputMatrix[row, nColumns - 1] = classIndices[inputMatrix[row, nColumns - 1]]; } alglib.dfreport rep; var dForest = RandomForestUtil.CreateRandomForestModel(seed, inputMatrix, nTrees, r, m, nClasses, out rep); rmsError = rep.rmserror; outOfBagRmsError = rep.oobrmserror; relClassificationError = rep.relclserror; outOfBagRelClassificationError = rep.oobrelclserror; return(new RandomForestModel(problemData.TargetVariable, dForest, seed, problemData, nTrees, r, m, classValues)); }