private static SymbolicExpressionTreeNode MirrorTree(RegressionNodeModel regressionNode, IDictionary <int, RegressionNodeModel> dict, IDictionary <int, IReadOnlyList <int> > trainingLeafRows, IDictionary <int, IReadOnlyList <int> > testLeafRows, IntValue nextId, IDataset data, IReadOnlyList <int> trainingRows, IReadOnlyList <int> testRows) { if (regressionNode.IsLeaf) { var i = nextId.Value++; dict.Add(i, regressionNode); trainingLeafRows.Add(i, trainingRows); testLeafRows.Add(i, testRows); return(new SymbolicExpressionTreeNode(new TextSymbol("Model " + i + "\n(" + trainingRows.Count + "/" + testRows.Count + ")"))); } var pftext = "\npf = " + regressionNode.PruningStrength.ToString("0.###"); var text = regressionNode.SplitAttribute + " <= " + regressionNode.SplitValue.ToString("0.###"); if (!double.IsNaN(regressionNode.PruningStrength)) { text += pftext; } var textNode = new SymbolicExpressionTreeNode(new TextSymbol(text)); IReadOnlyList <int> lTrainingRows, rTrainingRows; IReadOnlyList <int> lTestRows, rTestRows; RegressionTreeUtilities.SplitRows(trainingRows, data, regressionNode.SplitAttribute, regressionNode.SplitValue, out lTrainingRows, out rTrainingRows); RegressionTreeUtilities.SplitRows(testRows, data, regressionNode.SplitAttribute, regressionNode.SplitValue, out lTestRows, out rTestRows); textNode.AddSubtree(MirrorTree(regressionNode.Left, dict, trainingLeafRows, testLeafRows, nextId, data, lTrainingRows, lTestRows)); textNode.AddSubtree(MirrorTree(regressionNode.Right, dict, trainingLeafRows, testLeafRows, nextId, data, rTrainingRows, rTestRows)); return(textNode); }
private static void BuildPruningModel(RegressionNodeModel regressionNode, ILeafModel leaf, IReadOnlyList <int> trainingRows, IReadOnlyList <int> pruningRows, PruningState state, RegressionTreeParameters regressionTreeParams, CancellationToken cancellationToken) { //create regressionProblemdata from pruning data var vars = regressionTreeParams.AllowedInputVariables.Concat(new[] { regressionTreeParams.TargetVariable }).ToArray(); var reducedData = new Dataset(vars, vars.Select(x => regressionTreeParams.Data.GetDoubleValues(x, pruningRows).ToList())); var pd = new RegressionProblemData(reducedData, regressionTreeParams.AllowedInputVariables, regressionTreeParams.TargetVariable); pd.TrainingPartition.Start = pd.TrainingPartition.End = pd.TestPartition.Start = 0; pd.TestPartition.End = reducedData.Rows; //build pruning model int numModelParams; var model = leaf.BuildModel(trainingRows, regressionTreeParams, cancellationToken, out numModelParams); //record error and complexities var rmsModel = model.CreateRegressionSolution(pd).TestRootMeanSquaredError; state.pruningSizes.Add(regressionNode, pruningRows.Count); state.modelErrors.Add(regressionNode, rmsModel); state.modelComplexities.Add(regressionNode, numModelParams); if (regressionNode.IsLeaf) { state.nodeComplexities[regressionNode] = state.modelComplexities[regressionNode]; } else { state.nodeComplexities.Add(regressionNode, state.nodeComplexities[regressionNode.Left] + state.nodeComplexities[regressionNode.Right] + 1); } }
protected RegressionNodeModel(RegressionNodeModel original, Cloner cloner) : base(original, cloner) { IsLeaf = original.IsLeaf; Model = cloner.Clone(original.Model); SplitValue = original.SplitValue; SplitAttribute = original.SplitAttribute; Left = cloner.Clone(original.Left); Right = cloner.Clone(original.Right); Parent = cloner.Clone(original.Parent); NumSamples = original.NumSamples; }
public void BuildModel(IReadOnlyList <int> trainingRows, IReadOnlyList <int> pruningRows, IScope statescope, ResultCollection results, CancellationToken cancellationToken) { var regressionTreeParams = (RegressionTreeParameters)statescope.Variables[DecisionTreeRegression.RegressionTreeParameterVariableName].Value; //start with one node Root = RegressionNodeModel.CreateNode(regressionTreeParams.TargetVariable, regressionTreeParams); //split into (overfitted tree) regressionTreeParams.Splitter.Split(this, trainingRows, statescope, cancellationToken); //prune regressionTreeParams.Pruning.Prune(this, trainingRows, pruningRows, statescope, cancellationToken); }
private static double SubtreeError(RegressionNodeModel regressionNode, IDictionary <RegressionNodeModel, int> pruningSizes, IDictionary <RegressionNodeModel, double> modelErrors) { if (regressionNode.IsLeaf) { return(modelErrors[regressionNode]); } var errorL = SubtreeError(regressionNode.Left, pruningSizes, modelErrors); var errorR = SubtreeError(regressionNode.Right, pruningSizes, modelErrors); errorL = errorL * errorL * pruningSizes[regressionNode.Left]; errorR = errorR * errorR * pruningSizes[regressionNode.Right]; return(Math.Sqrt((errorR + errorL) / pruningSizes[regressionNode])); }
private static void GetLeafDepths(RegressionNodeModel n, int depth, ICollection <int> res) { if (n == null) { return; } if (n.Left == null && n.Right == null) { res.Add(depth); } else { GetLeafDepths(n.Left, depth + 1, res); GetLeafDepths(n.Right, depth + 1, res); } }
public ConfidenceRegressionNodeModel(RegressionNodeModel parent) : base(parent) { }
private static RegressionNodeModel CreateNode(RegressionNodeModel parent, RegressionTreeParameters regressionTreeParams) { return(regressionTreeParams.LeafModel.ProvidesConfidence ? new ConfidenceRegressionNodeModel(parent) : new RegressionNodeModel(parent)); }
private RegressionNodeModel(RegressionNodeModel parent) : this(parent.TargetVariable) { Parent = parent; IsLeaf = true; }
protected RegressionNodeTreeModel(RegressionNodeTreeModel original, Cloner cloner) : base(original, cloner) { Root = cloner.Clone(original.Root); }
public static void Initialize(IScope stateScope) { var param = (RegressionTreeParameters)stateScope.Variables[DecisionTreeRegression.RegressionTreeParameterVariableName].Value; stateScope.Variables.Add(new Variable(RootVariableName, RegressionNodeModel.CreateNode(param.TargetVariable, param))); }