private static SymbolicExpressionTreeNode MirrorTree(RegressionNodeModel regressionNode, IDictionary <int, RegressionNodeModel> dict, IDictionary <int, IReadOnlyList <int> > trainingLeafRows, IDictionary <int, IReadOnlyList <int> > testLeafRows, IntValue nextId, IDataset data, IReadOnlyList <int> trainingRows, IReadOnlyList <int> testRows) { if (regressionNode.IsLeaf) { var i = nextId.Value++; dict.Add(i, regressionNode); trainingLeafRows.Add(i, trainingRows); testLeafRows.Add(i, testRows); return(new SymbolicExpressionTreeNode(new TextSymbol("Model " + i + "\n(" + trainingRows.Count + "/" + testRows.Count + ")"))); } var pftext = "\npf = " + regressionNode.PruningStrength.ToString("0.###"); var text = regressionNode.SplitAttribute + " <= " + regressionNode.SplitValue.ToString("0.###"); if (!double.IsNaN(regressionNode.PruningStrength)) { text += pftext; } var textNode = new SymbolicExpressionTreeNode(new TextSymbol(text)); IReadOnlyList <int> lTrainingRows, rTrainingRows; IReadOnlyList <int> lTestRows, rTestRows; RegressionTreeUtilities.SplitRows(trainingRows, data, regressionNode.SplitAttribute, regressionNode.SplitValue, out lTrainingRows, out rTrainingRows); RegressionTreeUtilities.SplitRows(testRows, data, regressionNode.SplitAttribute, regressionNode.SplitValue, out lTestRows, out rTestRows); textNode.AddSubtree(MirrorTree(regressionNode.Left, dict, trainingLeafRows, testLeafRows, nextId, data, lTrainingRows, lTestRows)); textNode.AddSubtree(MirrorTree(regressionNode.Right, dict, trainingLeafRows, testLeafRows, nextId, data, rTrainingRows, rTestRows)); return(textNode); }
public void FillLeafs(RegressionNodeTreeModel tree, IReadOnlyList <int> trainingRows, IDataset data) { var helperQueue = new Queue <RegressionNodeModel>(); var trainingHelperQueue = new Queue <IReadOnlyList <int> >(); nodeQueue.Clear(); trainingRowsQueue.Clear(); helperQueue.Enqueue(tree.Root); trainingHelperQueue.Enqueue(trainingRows); while (helperQueue.Count != 0) { var n = helperQueue.Dequeue(); var t = trainingHelperQueue.Dequeue(); if (n.IsLeaf) { nodeQueue.Enqueue(n); trainingRowsQueue.Enqueue(t); continue; } IReadOnlyList <int> leftTraining, rightTraining; RegressionTreeUtilities.SplitRows(t, data, n.SplitAttribute, n.SplitValue, out leftTraining, out rightTraining); helperQueue.Enqueue(n.Left); helperQueue.Enqueue(n.Right); trainingHelperQueue.Enqueue(leftTraining); trainingHelperQueue.Enqueue(rightTraining); } }
public void Split(RegressionNodeTreeModel tree, IReadOnlyList <int> trainingRows, IScope stateScope, CancellationToken cancellationToken) { var regressionTreeParams = (RegressionTreeParameters)stateScope.Variables[DecisionTreeRegression.RegressionTreeParameterVariableName].Value; var splittingState = (SplittingState)stateScope.Variables[SplittingStateVariableName].Value; var variables = regressionTreeParams.AllowedInputVariables.ToArray(); var target = regressionTreeParams.TargetVariable; if (splittingState.Code <= 0) { splittingState.nodeQueue.Enqueue(tree.Root); splittingState.trainingRowsQueue.Enqueue(trainingRows); splittingState.Code = 1; } while (splittingState.nodeQueue.Count != 0) { var n = splittingState.nodeQueue.Dequeue(); var rows = splittingState.trainingRowsQueue.Dequeue(); string attr; double splitValue; var isLeaf = !DecideSplit(new RegressionProblemData(RegressionTreeUtilities.ReduceDataset(regressionTreeParams.Data, rows, variables, target), variables, target), regressionTreeParams.MinLeafSize, out attr, out splitValue); if (isLeaf) { continue; } IReadOnlyList <int> leftRows, rightRows; RegressionTreeUtilities.SplitRows(rows, regressionTreeParams.Data, attr, splitValue, out leftRows, out rightRows); n.Split(regressionTreeParams, attr, splitValue, rows.Count); splittingState.nodeQueue.Enqueue(n.Left); splittingState.nodeQueue.Enqueue(n.Right); splittingState.trainingRowsQueue.Enqueue(leftRows); splittingState.trainingRowsQueue.Enqueue(rightRows); cancellationToken.ThrowIfCancellationRequested(); } }