private RegressionTreeParameters(RegressionTreeParameters original, Cloner cloner) : base(original, cloner) { problemData = cloner.Clone(original.problemData); random = cloner.Clone(original.random); leafModel = cloner.Clone(original.leafModel); splitter = cloner.Clone(original.splitter); pruning = cloner.Clone(original.pruning); minLeafSize = original.minLeafSize; }
private static IScope InitializeScope(IRandom random, IRegressionProblemData problemData, IPruning pruning, int minLeafSize, ILeafModel leafModel, ISplitter splitter, bool generateRules, bool useHoldout, double holdoutSize) { var stateScope = new Scope("RegressionTreeStateScope"); //reduce RegressionProblemData to AllowedInput & Target column wise and to TrainingSet row wise var doubleVars = new HashSet <string>(problemData.Dataset.DoubleVariables); var vars = problemData.AllowedInputVariables.Concat(new[] { problemData.TargetVariable }).ToArray(); if (vars.Any(v => !doubleVars.Contains(v))) { throw new NotSupportedException("Decision tree regression supports only double valued input or output features."); } var doubles = vars.Select(v => problemData.Dataset.GetDoubleValues(v, problemData.TrainingIndices).ToArray()).ToArray(); if (doubles.Any(v => v.Any(x => double.IsNaN(x) || double.IsInfinity(x)))) { throw new NotSupportedException("Decision tree regression does not support NaN or infinity values in the input dataset."); } var trainingData = new Dataset(vars, doubles); var pd = new RegressionProblemData(trainingData, problemData.AllowedInputVariables, problemData.TargetVariable); pd.TrainingPartition.End = pd.TestPartition.Start = pd.TestPartition.End = pd.Dataset.Rows; pd.TrainingPartition.Start = 0; //store regression tree parameters var regressionTreeParams = new RegressionTreeParameters(pruning, minLeafSize, leafModel, pd, random, splitter); stateScope.Variables.Add(new Variable(RegressionTreeParameterVariableName, regressionTreeParams)); //initialize tree operators pruning.Initialize(stateScope); splitter.Initialize(stateScope); leafModel.Initialize(stateScope); //store unbuilt model IItem model; if (generateRules) { model = RegressionRuleSetModel.CreateRuleModel(problemData.TargetVariable, regressionTreeParams); RegressionRuleSetModel.Initialize(stateScope); } else { model = RegressionNodeTreeModel.CreateTreeModel(problemData.TargetVariable, regressionTreeParams); } stateScope.Variables.Add(new Variable(ModelVariableName, model)); //store training & pruning indices IReadOnlyList <int> trainingSet, pruningSet; GeneratePruningSet(pd.TrainingIndices.ToArray(), random, useHoldout, holdoutSize, out trainingSet, out pruningSet); stateScope.Variables.Add(new Variable(TrainingSetVariableName, new IntArray(trainingSet.ToArray()))); stateScope.Variables.Add(new Variable(PruningSetVariableName, new IntArray(pruningSet.ToArray()))); return(stateScope); }
internal void Split(RegressionTreeParameters regressionTreeParams, string splitAttribute, double splitValue, int numSamples) { NumSamples = numSamples; SplitAttribute = splitAttribute; SplitValue = splitValue; Left = CreateNode(this, regressionTreeParams); Right = CreateNode(this, regressionTreeParams); IsLeaf = false; }
public static void UpdateModel(IDecisionTreeModel model, IRegressionProblemData problemData, IRandom random, ILeafModel leafModel, CancellationToken?cancellationToken = null) { if (cancellationToken == null) { cancellationToken = CancellationToken.None; } var regressionTreeParameters = new RegressionTreeParameters(leafModel, problemData, random); var scope = new Scope(); scope.Variables.Add(new Variable(RegressionTreeParameterVariableName, regressionTreeParameters)); leafModel.Initialize(scope); model.Update(problemData.TrainingIndices.ToList(), scope, cancellationToken.Value); }
public IRegressionModel BuildModel(IReadOnlyList <int> rows, RegressionTreeParameters parameters, CancellationToken cancellation, out int numberOfParameters) { var reducedData = RegressionTreeUtilities.ReduceDataset(parameters.Data, rows, parameters.AllowedInputVariables.ToArray(), parameters.TargetVariable); var pd = new RegressionProblemData(reducedData, parameters.AllowedInputVariables.ToArray(), parameters.TargetVariable); pd.TrainingPartition.Start = 0; pd.TrainingPartition.End = pd.TestPartition.Start = pd.TestPartition.End = reducedData.Rows; int numP; var model = Build(pd, parameters.Random, cancellation, out numP); if (UseDampening && Dampening > 0.0) { model = DampenedModel.DampenModel(model, pd, Dampening); } numberOfParameters = numP; cancellation.ThrowIfCancellationRequested(); return(model); }
internal static RegressionRuleModel CreateRuleModel(string target, RegressionTreeParameters regressionTreeParams) { return(regressionTreeParams.LeafModel.ProvidesConfidence ? new ConfidenceRegressionRuleModel(target) : new RegressionRuleModel(target)); }
private static RegressionNodeModel CreateNode(RegressionNodeModel parent, RegressionTreeParameters regressionTreeParams) { return(regressionTreeParams.LeafModel.ProvidesConfidence ? new ConfidenceRegressionNodeModel(parent) : new RegressionNodeModel(parent)); }
public static RegressionNodeModel CreateNode(string targetAttr, RegressionTreeParameters regressionTreeParams) { return(regressionTreeParams.LeafModel.ProvidesConfidence ? new ConfidenceRegressionNodeModel(targetAttr) : new RegressionNodeModel(targetAttr)); }
private static void BuildPruningModel(RegressionNodeModel regressionNode, ILeafModel leaf, IReadOnlyList <int> trainingRows, IReadOnlyList <int> pruningRows, PruningState state, RegressionTreeParameters regressionTreeParams, CancellationToken cancellationToken) { //create regressionProblemdata from pruning data var vars = regressionTreeParams.AllowedInputVariables.Concat(new[] { regressionTreeParams.TargetVariable }).ToArray(); var reducedData = new Dataset(vars, vars.Select(x => regressionTreeParams.Data.GetDoubleValues(x, pruningRows).ToList())); var pd = new RegressionProblemData(reducedData, regressionTreeParams.AllowedInputVariables, regressionTreeParams.TargetVariable); pd.TrainingPartition.Start = pd.TrainingPartition.End = pd.TestPartition.Start = 0; pd.TestPartition.End = reducedData.Rows; //build pruning model int numModelParams; var model = leaf.BuildModel(trainingRows, regressionTreeParams, cancellationToken, out numModelParams); //record error and complexities var rmsModel = model.CreateRegressionSolution(pd).TestRootMeanSquaredError; state.pruningSizes.Add(regressionNode, pruningRows.Count); state.modelErrors.Add(regressionNode, rmsModel); state.modelComplexities.Add(regressionNode, numModelParams); if (regressionNode.IsLeaf) { state.nodeComplexities[regressionNode] = state.modelComplexities[regressionNode]; } else { state.nodeComplexities.Add(regressionNode, state.nodeComplexities[regressionNode.Left] + state.nodeComplexities[regressionNode.Right] + 1); } }
private static void InstallModels(RegressionNodeTreeModel tree, PruningState state, IReadOnlyList <int> trainingRows, IReadOnlyList <int> pruningRows, ILeafModel leaf, RegressionTreeParameters regressionTreeParams, CancellationToken cancellationToken) { if (state.Code == 0) { state.FillBottomUp(tree, trainingRows, pruningRows, regressionTreeParams.Data); state.Code = 1; } while (state.nodeQueue.Count != 0) { cancellationToken.ThrowIfCancellationRequested(); var n = state.nodeQueue.Peek(); var training = state.trainingRowsQueue.Peek(); var pruning = state.pruningRowsQueue.Peek(); BuildPruningModel(n, leaf, training, pruning, state, regressionTreeParams, cancellationToken); state.nodeQueue.Dequeue(); state.trainingRowsQueue.Dequeue(); state.pruningRowsQueue.Dequeue(); } }