示例#1
0
        private static IRegressionProblemData Subselect(IRegressionProblemData data, IReadOnlyList <int> training, IReadOnlyList <int> test)
        {
            var dataset = RegressionTreeUtilities.ReduceDataset(data.Dataset, training.Concat(test).ToList(), data.AllowedInputVariables.ToList(), data.TargetVariable);
            var res     = new RegressionProblemData(dataset, data.AllowedInputVariables, data.TargetVariable);

            res.TrainingPartition.Start = 0;
            res.TrainingPartition.End   = training.Count;
            res.TestPartition.Start     = training.Count;
            res.TestPartition.End       = training.Count + test.Count;
            return(res);
        }
示例#2
0
        public IRegressionModel BuildModel(IReadOnlyList <int> rows, RegressionTreeParameters parameters, CancellationToken cancellation, out int numberOfParameters)
        {
            var reducedData = RegressionTreeUtilities.ReduceDataset(parameters.Data, rows, parameters.AllowedInputVariables.ToArray(), parameters.TargetVariable);
            var pd          = new RegressionProblemData(reducedData, parameters.AllowedInputVariables.ToArray(), parameters.TargetVariable);

            pd.TrainingPartition.Start = 0;
            pd.TrainingPartition.End   = pd.TestPartition.Start = pd.TestPartition.End = reducedData.Rows;

            int numP;
            var model = Build(pd, parameters.Random, cancellation, out numP);

            if (UseDampening && Dampening > 0.0)
            {
                model = DampenedModel.DampenModel(model, pd, Dampening);
            }

            numberOfParameters = numP;
            cancellation.ThrowIfCancellationRequested();
            return(model);
        }
示例#3
0
        public void Split(RegressionNodeTreeModel tree, IReadOnlyList <int> trainingRows, IScope stateScope, CancellationToken cancellationToken)
        {
            var regressionTreeParams = (RegressionTreeParameters)stateScope.Variables[DecisionTreeRegression.RegressionTreeParameterVariableName].Value;
            var splittingState       = (SplittingState)stateScope.Variables[SplittingStateVariableName].Value;
            var variables            = regressionTreeParams.AllowedInputVariables.ToArray();
            var target = regressionTreeParams.TargetVariable;

            if (splittingState.Code <= 0)
            {
                splittingState.nodeQueue.Enqueue(tree.Root);
                splittingState.trainingRowsQueue.Enqueue(trainingRows);
                splittingState.Code = 1;
            }
            while (splittingState.nodeQueue.Count != 0)
            {
                var n    = splittingState.nodeQueue.Dequeue();
                var rows = splittingState.trainingRowsQueue.Dequeue();

                string attr;
                double splitValue;
                var    isLeaf = !DecideSplit(new RegressionProblemData(RegressionTreeUtilities.ReduceDataset(regressionTreeParams.Data, rows, variables, target), variables, target), regressionTreeParams.MinLeafSize, out attr, out splitValue);
                if (isLeaf)
                {
                    continue;
                }

                IReadOnlyList <int> leftRows, rightRows;
                RegressionTreeUtilities.SplitRows(rows, regressionTreeParams.Data, attr, splitValue, out leftRows, out rightRows);
                n.Split(regressionTreeParams, attr, splitValue, rows.Count);

                splittingState.nodeQueue.Enqueue(n.Left);
                splittingState.nodeQueue.Enqueue(n.Right);
                splittingState.trainingRowsQueue.Enqueue(leftRows);
                splittingState.trainingRowsQueue.Enqueue(rightRows);
                cancellationToken.ThrowIfCancellationRequested();
            }
        }