private static IRegressionProblemData Subselect(IRegressionProblemData data, IReadOnlyList <int> training, IReadOnlyList <int> test) { var dataset = RegressionTreeUtilities.ReduceDataset(data.Dataset, training.Concat(test).ToList(), data.AllowedInputVariables.ToList(), data.TargetVariable); var res = new RegressionProblemData(dataset, data.AllowedInputVariables, data.TargetVariable); res.TrainingPartition.Start = 0; res.TrainingPartition.End = training.Count; res.TestPartition.Start = training.Count; res.TestPartition.End = training.Count + test.Count; return(res); }
public IRegressionModel BuildModel(IReadOnlyList <int> rows, RegressionTreeParameters parameters, CancellationToken cancellation, out int numberOfParameters) { var reducedData = RegressionTreeUtilities.ReduceDataset(parameters.Data, rows, parameters.AllowedInputVariables.ToArray(), parameters.TargetVariable); var pd = new RegressionProblemData(reducedData, parameters.AllowedInputVariables.ToArray(), parameters.TargetVariable); pd.TrainingPartition.Start = 0; pd.TrainingPartition.End = pd.TestPartition.Start = pd.TestPartition.End = reducedData.Rows; int numP; var model = Build(pd, parameters.Random, cancellation, out numP); if (UseDampening && Dampening > 0.0) { model = DampenedModel.DampenModel(model, pd, Dampening); } numberOfParameters = numP; cancellation.ThrowIfCancellationRequested(); return(model); }
public void Split(RegressionNodeTreeModel tree, IReadOnlyList <int> trainingRows, IScope stateScope, CancellationToken cancellationToken) { var regressionTreeParams = (RegressionTreeParameters)stateScope.Variables[DecisionTreeRegression.RegressionTreeParameterVariableName].Value; var splittingState = (SplittingState)stateScope.Variables[SplittingStateVariableName].Value; var variables = regressionTreeParams.AllowedInputVariables.ToArray(); var target = regressionTreeParams.TargetVariable; if (splittingState.Code <= 0) { splittingState.nodeQueue.Enqueue(tree.Root); splittingState.trainingRowsQueue.Enqueue(trainingRows); splittingState.Code = 1; } while (splittingState.nodeQueue.Count != 0) { var n = splittingState.nodeQueue.Dequeue(); var rows = splittingState.trainingRowsQueue.Dequeue(); string attr; double splitValue; var isLeaf = !DecideSplit(new RegressionProblemData(RegressionTreeUtilities.ReduceDataset(regressionTreeParams.Data, rows, variables, target), variables, target), regressionTreeParams.MinLeafSize, out attr, out splitValue); if (isLeaf) { continue; } IReadOnlyList <int> leftRows, rightRows; RegressionTreeUtilities.SplitRows(rows, regressionTreeParams.Data, attr, splitValue, out leftRows, out rightRows); n.Split(regressionTreeParams, attr, splitValue, rows.Count); splittingState.nodeQueue.Enqueue(n.Left); splittingState.nodeQueue.Enqueue(n.Right); splittingState.trainingRowsQueue.Enqueue(leftRows); splittingState.trainingRowsQueue.Enqueue(rightRows); cancellationToken.ThrowIfCancellationRequested(); } }