public void PruningState_should_merge_phase_correctly() { var p1 = new PruningState(_node1, PruningInitialized.Empty); var p2 = new PruningState(_node1, PruningPerformed.Instance); Assert.Equal(PruningPerformed.Instance, p1.Merge(p2).Phase); Assert.Equal(PruningPerformed.Instance, p2.Merge(p1).Phase); }
public void PruningState_should_merge_seen_correctly() { var p1 = new PruningState(_node1, new PruningInitialized(_node2.Address)); var p2 = new PruningState(_node1, new PruningInitialized(_node4.Address)); var expected = new PruningState(_node1, new PruningInitialized(_node2.Address, _node4.Address)); Assert.Equal(expected, p1.Merge(p2)); Assert.Equal(expected, p2.Merge(p1)); }
public void PruningState_should_merge_owner_correctly() { var p1 = new PruningState(_node1, PruningInitialized.Empty); var p2 = new PruningState(_node2, PruningInitialized.Empty); var expected = new PruningState(_node1, PruningInitialized.Empty); Assert.Equal(expected, p1.Merge(p2)); Assert.Equal(expected, p2.Merge(p1)); }
protected PruningState(PruningState original, Cloner cloner) : base(original, cloner) { modelComplexities = original.modelComplexities.ToDictionary(x => cloner.Clone(x.Key), x => x.Value); nodeComplexities = original.nodeComplexities.ToDictionary(x => cloner.Clone(x.Key), x => x.Value); pruningSizes = original.pruningSizes.ToDictionary(x => cloner.Clone(x.Key), x => x.Value); modelErrors = original.modelErrors.ToDictionary(x => cloner.Clone(x.Key), x => x.Value); nodeQueue = new Queue <RegressionNodeModel>(original.nodeQueue.Select(cloner.Clone)); trainingRowsQueue = new Queue <IReadOnlyList <int> >(original.trainingRowsQueue.Select(x => (IReadOnlyList <int>)x.ToArray())); pruningRowsQueue = new Queue <IReadOnlyList <int> >(original.pruningRowsQueue.Select(x => (IReadOnlyList <int>)x.ToArray())); Code = original.Code; }
private static void Prune(RegressionNodeTreeModel tree, PruningState state, double pruningStrength) { if (state.Code == 3) { state.FillTopDown(tree); state.Code = 4; } while (state.nodeQueue.Count != 0) { var n = state.nodeQueue.Dequeue(); if (n.IsLeaf || pruningStrength <= n.PruningStrength) { continue; } n.ToLeaf(); } }
private static void UpdateThreshold(RegressionNodeTreeModel tree, PruningState state) { if (state.Code == 2) { state.FillTopDown(tree); state.Code = 3; } while (state.nodeQueue.Count != 0) { var n = state.nodeQueue.Dequeue(); if (n.IsLeaf || n.Parent == null || double.IsNaN(n.Parent.PruningStrength)) { continue; } n.PruningStrength = Math.Min(n.PruningStrength, n.Parent.PruningStrength); } }
private static void AssignPruningThresholds(RegressionNodeTreeModel tree, PruningState state, double pruningDecay) { if (state.Code == 1) { state.FillBottomUp(tree); state.Code = 2; } while (state.nodeQueue.Count != 0) { var n = state.nodeQueue.Dequeue(); if (n.IsLeaf) { continue; } n.PruningStrength = PruningThreshold(state.pruningSizes[n], state.modelComplexities[n], state.nodeComplexities[n], state.modelErrors[n], SubtreeError(n, state.pruningSizes, state.modelErrors), pruningDecay); } }
private static void InstallModels(RegressionNodeTreeModel tree, PruningState state, IReadOnlyList <int> trainingRows, IReadOnlyList <int> pruningRows, ILeafModel leaf, RegressionTreeParameters regressionTreeParams, CancellationToken cancellationToken) { if (state.Code == 0) { state.FillBottomUp(tree, trainingRows, pruningRows, regressionTreeParams.Data); state.Code = 1; } while (state.nodeQueue.Count != 0) { cancellationToken.ThrowIfCancellationRequested(); var n = state.nodeQueue.Peek(); var training = state.trainingRowsQueue.Peek(); var pruning = state.pruningRowsQueue.Peek(); BuildPruningModel(n, leaf, training, pruning, state, regressionTreeParams, cancellationToken); state.nodeQueue.Dequeue(); state.trainingRowsQueue.Dequeue(); state.pruningRowsQueue.Dequeue(); } }
private static void BuildPruningModel(RegressionNodeModel regressionNode, ILeafModel leaf, IReadOnlyList <int> trainingRows, IReadOnlyList <int> pruningRows, PruningState state, RegressionTreeParameters regressionTreeParams, CancellationToken cancellationToken) { //create regressionProblemdata from pruning data var vars = regressionTreeParams.AllowedInputVariables.Concat(new[] { regressionTreeParams.TargetVariable }).ToArray(); var reducedData = new Dataset(vars, vars.Select(x => regressionTreeParams.Data.GetDoubleValues(x, pruningRows).ToList())); var pd = new RegressionProblemData(reducedData, regressionTreeParams.AllowedInputVariables, regressionTreeParams.TargetVariable); pd.TrainingPartition.Start = pd.TrainingPartition.End = pd.TestPartition.Start = 0; pd.TestPartition.End = reducedData.Rows; //build pruning model int numModelParams; var model = leaf.BuildModel(trainingRows, regressionTreeParams, cancellationToken, out numModelParams); //record error and complexities var rmsModel = model.CreateRegressionSolution(pd).TestRootMeanSquaredError; state.pruningSizes.Add(regressionNode, pruningRows.Count); state.modelErrors.Add(regressionNode, rmsModel); state.modelComplexities.Add(regressionNode, numModelParams); if (regressionNode.IsLeaf) { state.nodeComplexities[regressionNode] = state.modelComplexities[regressionNode]; } else { state.nodeComplexities.Add(regressionNode, state.nodeComplexities[regressionNode.Left] + state.nodeComplexities[regressionNode.Right] + 1); } }