示例#1
0
 private RegressionTreeParameters(RegressionTreeParameters original, Cloner cloner) : base(original, cloner)
 {
     problemData = cloner.Clone(original.problemData);
     random      = cloner.Clone(original.random);
     leafModel   = cloner.Clone(original.leafModel);
     splitter    = cloner.Clone(original.splitter);
     pruning     = cloner.Clone(original.pruning);
     minLeafSize = original.minLeafSize;
 }
示例#2
0
        private static IScope InitializeScope(IRandom random, IRegressionProblemData problemData, IPruning pruning, int minLeafSize, ILeafModel leafModel, ISplitter splitter, bool generateRules, bool useHoldout, double holdoutSize)
        {
            var stateScope = new Scope("RegressionTreeStateScope");

            //reduce RegressionProblemData to AllowedInput & Target column wise and to TrainingSet row wise
            var doubleVars = new HashSet <string>(problemData.Dataset.DoubleVariables);
            var vars       = problemData.AllowedInputVariables.Concat(new[] { problemData.TargetVariable }).ToArray();

            if (vars.Any(v => !doubleVars.Contains(v)))
            {
                throw new NotSupportedException("Decision tree regression supports only double valued input or output features.");
            }
            var doubles = vars.Select(v => problemData.Dataset.GetDoubleValues(v, problemData.TrainingIndices).ToArray()).ToArray();

            if (doubles.Any(v => v.Any(x => double.IsNaN(x) || double.IsInfinity(x))))
            {
                throw new NotSupportedException("Decision tree regression does not support NaN or infinity values in the input dataset.");
            }
            var trainingData = new Dataset(vars, doubles);
            var pd           = new RegressionProblemData(trainingData, problemData.AllowedInputVariables, problemData.TargetVariable);

            pd.TrainingPartition.End   = pd.TestPartition.Start = pd.TestPartition.End = pd.Dataset.Rows;
            pd.TrainingPartition.Start = 0;

            //store regression tree parameters
            var regressionTreeParams = new RegressionTreeParameters(pruning, minLeafSize, leafModel, pd, random, splitter);

            stateScope.Variables.Add(new Variable(RegressionTreeParameterVariableName, regressionTreeParams));

            //initialize tree operators
            pruning.Initialize(stateScope);
            splitter.Initialize(stateScope);
            leafModel.Initialize(stateScope);

            //store unbuilt model
            IItem model;

            if (generateRules)
            {
                model = RegressionRuleSetModel.CreateRuleModel(problemData.TargetVariable, regressionTreeParams);
                RegressionRuleSetModel.Initialize(stateScope);
            }
            else
            {
                model = RegressionNodeTreeModel.CreateTreeModel(problemData.TargetVariable, regressionTreeParams);
            }
            stateScope.Variables.Add(new Variable(ModelVariableName, model));

            //store training & pruning indices
            IReadOnlyList <int> trainingSet, pruningSet;

            GeneratePruningSet(pd.TrainingIndices.ToArray(), random, useHoldout, holdoutSize, out trainingSet, out pruningSet);
            stateScope.Variables.Add(new Variable(TrainingSetVariableName, new IntArray(trainingSet.ToArray())));
            stateScope.Variables.Add(new Variable(PruningSetVariableName, new IntArray(pruningSet.ToArray())));

            return(stateScope);
        }
 internal void Split(RegressionTreeParameters regressionTreeParams, string splitAttribute, double splitValue, int numSamples)
 {
     NumSamples     = numSamples;
     SplitAttribute = splitAttribute;
     SplitValue     = splitValue;
     Left           = CreateNode(this, regressionTreeParams);
     Right          = CreateNode(this, regressionTreeParams);
     IsLeaf         = false;
 }
示例#4
0
        public static void UpdateModel(IDecisionTreeModel model, IRegressionProblemData problemData, IRandom random, ILeafModel leafModel, CancellationToken?cancellationToken = null)
        {
            if (cancellationToken == null)
            {
                cancellationToken = CancellationToken.None;
            }
            var regressionTreeParameters = new RegressionTreeParameters(leafModel, problemData, random);
            var scope = new Scope();

            scope.Variables.Add(new Variable(RegressionTreeParameterVariableName, regressionTreeParameters));
            leafModel.Initialize(scope);
            model.Update(problemData.TrainingIndices.ToList(), scope, cancellationToken.Value);
        }
示例#5
0
        public IRegressionModel BuildModel(IReadOnlyList <int> rows, RegressionTreeParameters parameters, CancellationToken cancellation, out int numberOfParameters)
        {
            var reducedData = RegressionTreeUtilities.ReduceDataset(parameters.Data, rows, parameters.AllowedInputVariables.ToArray(), parameters.TargetVariable);
            var pd          = new RegressionProblemData(reducedData, parameters.AllowedInputVariables.ToArray(), parameters.TargetVariable);

            pd.TrainingPartition.Start = 0;
            pd.TrainingPartition.End   = pd.TestPartition.Start = pd.TestPartition.End = reducedData.Rows;

            int numP;
            var model = Build(pd, parameters.Random, cancellation, out numP);

            if (UseDampening && Dampening > 0.0)
            {
                model = DampenedModel.DampenModel(model, pd, Dampening);
            }

            numberOfParameters = numP;
            cancellation.ThrowIfCancellationRequested();
            return(model);
        }
示例#6
0
 internal static RegressionRuleModel CreateRuleModel(string target, RegressionTreeParameters regressionTreeParams)
 {
     return(regressionTreeParams.LeafModel.ProvidesConfidence ? new ConfidenceRegressionRuleModel(target) : new RegressionRuleModel(target));
 }
 private static RegressionNodeModel CreateNode(RegressionNodeModel parent, RegressionTreeParameters regressionTreeParams)
 {
     return(regressionTreeParams.LeafModel.ProvidesConfidence ? new ConfidenceRegressionNodeModel(parent) : new RegressionNodeModel(parent));
 }
 public static RegressionNodeModel CreateNode(string targetAttr, RegressionTreeParameters regressionTreeParams)
 {
     return(regressionTreeParams.LeafModel.ProvidesConfidence ? new ConfidenceRegressionNodeModel(targetAttr) : new RegressionNodeModel(targetAttr));
 }
示例#9
0
        private static void BuildPruningModel(RegressionNodeModel regressionNode, ILeafModel leaf, IReadOnlyList <int> trainingRows, IReadOnlyList <int> pruningRows, PruningState state, RegressionTreeParameters regressionTreeParams, CancellationToken cancellationToken)
        {
            //create regressionProblemdata from pruning data
            var vars        = regressionTreeParams.AllowedInputVariables.Concat(new[] { regressionTreeParams.TargetVariable }).ToArray();
            var reducedData = new Dataset(vars, vars.Select(x => regressionTreeParams.Data.GetDoubleValues(x, pruningRows).ToList()));
            var pd          = new RegressionProblemData(reducedData, regressionTreeParams.AllowedInputVariables, regressionTreeParams.TargetVariable);

            pd.TrainingPartition.Start = pd.TrainingPartition.End = pd.TestPartition.Start = 0;
            pd.TestPartition.End       = reducedData.Rows;

            //build pruning model
            int numModelParams;
            var model = leaf.BuildModel(trainingRows, regressionTreeParams, cancellationToken, out numModelParams);

            //record error and complexities
            var rmsModel = model.CreateRegressionSolution(pd).TestRootMeanSquaredError;

            state.pruningSizes.Add(regressionNode, pruningRows.Count);
            state.modelErrors.Add(regressionNode, rmsModel);
            state.modelComplexities.Add(regressionNode, numModelParams);
            if (regressionNode.IsLeaf)
            {
                state.nodeComplexities[regressionNode] = state.modelComplexities[regressionNode];
            }
            else
            {
                state.nodeComplexities.Add(regressionNode, state.nodeComplexities[regressionNode.Left] + state.nodeComplexities[regressionNode.Right] + 1);
            }
        }
示例#10
0
 private static void InstallModels(RegressionNodeTreeModel tree, PruningState state, IReadOnlyList <int> trainingRows, IReadOnlyList <int> pruningRows, ILeafModel leaf, RegressionTreeParameters regressionTreeParams, CancellationToken cancellationToken)
 {
     if (state.Code == 0)
     {
         state.FillBottomUp(tree, trainingRows, pruningRows, regressionTreeParams.Data);
         state.Code = 1;
     }
     while (state.nodeQueue.Count != 0)
     {
         cancellationToken.ThrowIfCancellationRequested();
         var n        = state.nodeQueue.Peek();
         var training = state.trainingRowsQueue.Peek();
         var pruning  = state.pruningRowsQueue.Peek();
         BuildPruningModel(n, leaf, training, pruning, state, regressionTreeParams, cancellationToken);
         state.nodeQueue.Dequeue();
         state.trainingRowsQueue.Dequeue();
         state.pruningRowsQueue.Dequeue();
     }
 }