Esempio n. 1
0
        public override IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int numberOfParameters)
        {
            if (pd.Dataset.Rows < MinLeafSize(pd))
            {
                throw new ArgumentException("The number of training instances is too small to create a Gaussian process model");
            }
            Regression.Problem = new RegressionProblem {
                ProblemData = pd
            };
            var cvscore = double.MaxValue;
            GaussianProcessRegressionSolution sol = null;

            for (var i = 0; i < Tries; i++)
            {
                var res   = RegressionTreeUtilities.RunSubAlgorithm(Regression, random.Next(), cancellationToken);
                var t     = res.Select(x => x.Value).OfType <GaussianProcessRegressionSolution>().FirstOrDefault();
                var score = ((DoubleValue)res["Negative log pseudo-likelihood (LOO-CV)"].Value).Value;
                if (score >= cvscore || t == null || double.IsNaN(t.TrainingRSquared))
                {
                    continue;
                }
                cvscore = score;
                sol     = t;
            }
            Regression.Runs.Clear();
            if (sol == null)
            {
                throw new ArgumentException("Could not create Gaussian process model");
            }

            numberOfParameters = pd.Dataset.Rows + 1
                                 + Regression.CovarianceFunction.GetNumberOfParameters(pd.AllowedInputVariables.Count())
                                 + Regression.MeanFunction.GetNumberOfParameters(pd.AllowedInputVariables.Count());
            return(sol.Model);
        }
Esempio n. 2
0
            public void FillLeafs(RegressionNodeTreeModel tree, IReadOnlyList <int> trainingRows, IDataset data)
            {
                var helperQueue         = new Queue <RegressionNodeModel>();
                var trainingHelperQueue = new Queue <IReadOnlyList <int> >();

                nodeQueue.Clear();
                trainingRowsQueue.Clear();

                helperQueue.Enqueue(tree.Root);
                trainingHelperQueue.Enqueue(trainingRows);

                while (helperQueue.Count != 0)
                {
                    var n = helperQueue.Dequeue();
                    var t = trainingHelperQueue.Dequeue();
                    if (n.IsLeaf)
                    {
                        nodeQueue.Enqueue(n);
                        trainingRowsQueue.Enqueue(t);
                        continue;
                    }

                    IReadOnlyList <int> leftTraining, rightTraining;
                    RegressionTreeUtilities.SplitRows(t, data, n.SplitAttribute, n.SplitValue, out leftTraining, out rightTraining);

                    helperQueue.Enqueue(n.Left);
                    helperQueue.Enqueue(n.Right);
                    trainingHelperQueue.Enqueue(leftTraining);
                    trainingHelperQueue.Enqueue(rightTraining);
                }
            }
Esempio n. 3
0
        private static SymbolicExpressionTreeNode MirrorTree(RegressionNodeModel regressionNode, IDictionary <int, RegressionNodeModel> dict,
                                                             IDictionary <int, IReadOnlyList <int> > trainingLeafRows,
                                                             IDictionary <int, IReadOnlyList <int> > testLeafRows,
                                                             IntValue nextId, IDataset data, IReadOnlyList <int> trainingRows, IReadOnlyList <int> testRows)
        {
            if (regressionNode.IsLeaf)
            {
                var i = nextId.Value++;
                dict.Add(i, regressionNode);
                trainingLeafRows.Add(i, trainingRows);
                testLeafRows.Add(i, testRows);
                return(new SymbolicExpressionTreeNode(new TextSymbol("Model " + i + "\n(" + trainingRows.Count + "/" + testRows.Count + ")")));
            }

            var pftext = "\npf = " + regressionNode.PruningStrength.ToString("0.###");
            var text   = regressionNode.SplitAttribute + " <= " + regressionNode.SplitValue.ToString("0.###");

            if (!double.IsNaN(regressionNode.PruningStrength))
            {
                text += pftext;
            }

            var textNode = new SymbolicExpressionTreeNode(new TextSymbol(text));
            IReadOnlyList <int> lTrainingRows, rTrainingRows;
            IReadOnlyList <int> lTestRows, rTestRows;

            RegressionTreeUtilities.SplitRows(trainingRows, data, regressionNode.SplitAttribute, regressionNode.SplitValue, out lTrainingRows, out rTrainingRows);
            RegressionTreeUtilities.SplitRows(testRows, data, regressionNode.SplitAttribute, regressionNode.SplitValue, out lTestRows, out rTestRows);

            textNode.AddSubtree(MirrorTree(regressionNode.Left, dict, trainingLeafRows, testLeafRows, nextId, data, lTrainingRows, lTestRows));
            textNode.AddSubtree(MirrorTree(regressionNode.Right, dict, trainingLeafRows, testLeafRows, nextId, data, rTrainingRows, rTestRows));

            return(textNode);
        }
Esempio n. 4
0
        private static IRegressionProblemData Subselect(IRegressionProblemData data, IReadOnlyList <int> training, IReadOnlyList <int> test)
        {
            var dataset = RegressionTreeUtilities.ReduceDataset(data.Dataset, training.Concat(test).ToList(), data.AllowedInputVariables.ToList(), data.TargetVariable);
            var res     = new RegressionProblemData(dataset, data.AllowedInputVariables, data.TargetVariable);

            res.TrainingPartition.Start = 0;
            res.TrainingPartition.End   = training.Count;
            res.TestPartition.Start     = training.Count;
            res.TestPartition.End       = training.Count + test.Count;
            return(res);
        }
Esempio n. 5
0
        public override IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int noParameters)
        {
            if (pd.Dataset.Rows < MinLeafSize(pd))
            {
                throw new ArgumentException("The number of training instances is too small to create a linear model");
            }
            noParameters       = pd.Dataset.Rows + 1;
            Regression.Problem = new RegressionProblem {
                ProblemData = pd
            };
            var res = RegressionTreeUtilities.RunSubAlgorithm(Regression, random.Next(), cancellationToken);
            var t   = res.Select(x => x.Value).OfType <IRegressionSolution>().FirstOrDefault();

            if (t == null)
            {
                throw new ArgumentException("No RegressionSolution was provided by the algorithm");
            }
            return(t.Model);
        }
Esempio n. 6
0
        public IRegressionModel BuildModel(IReadOnlyList <int> rows, RegressionTreeParameters parameters, CancellationToken cancellation, out int numberOfParameters)
        {
            var reducedData = RegressionTreeUtilities.ReduceDataset(parameters.Data, rows, parameters.AllowedInputVariables.ToArray(), parameters.TargetVariable);
            var pd          = new RegressionProblemData(reducedData, parameters.AllowedInputVariables.ToArray(), parameters.TargetVariable);

            pd.TrainingPartition.Start = 0;
            pd.TrainingPartition.End   = pd.TestPartition.Start = pd.TestPartition.End = reducedData.Rows;

            int numP;
            var model = Build(pd, parameters.Random, cancellation, out numP);

            if (UseDampening && Dampening > 0.0)
            {
                model = DampenedModel.DampenModel(model, pd, Dampening);
            }

            numberOfParameters = numP;
            cancellation.ThrowIfCancellationRequested();
            return(model);
        }
Esempio n. 7
0
        public void Split(RegressionNodeTreeModel tree, IReadOnlyList <int> trainingRows, IScope stateScope, CancellationToken cancellationToken)
        {
            var regressionTreeParams = (RegressionTreeParameters)stateScope.Variables[DecisionTreeRegression.RegressionTreeParameterVariableName].Value;
            var splittingState       = (SplittingState)stateScope.Variables[SplittingStateVariableName].Value;
            var variables            = regressionTreeParams.AllowedInputVariables.ToArray();
            var target = regressionTreeParams.TargetVariable;

            if (splittingState.Code <= 0)
            {
                splittingState.nodeQueue.Enqueue(tree.Root);
                splittingState.trainingRowsQueue.Enqueue(trainingRows);
                splittingState.Code = 1;
            }
            while (splittingState.nodeQueue.Count != 0)
            {
                var n    = splittingState.nodeQueue.Dequeue();
                var rows = splittingState.trainingRowsQueue.Dequeue();

                string attr;
                double splitValue;
                var    isLeaf = !DecideSplit(new RegressionProblemData(RegressionTreeUtilities.ReduceDataset(regressionTreeParams.Data, rows, variables, target), variables, target), regressionTreeParams.MinLeafSize, out attr, out splitValue);
                if (isLeaf)
                {
                    continue;
                }

                IReadOnlyList <int> leftRows, rightRows;
                RegressionTreeUtilities.SplitRows(rows, regressionTreeParams.Data, attr, splitValue, out leftRows, out rightRows);
                n.Split(regressionTreeParams, attr, splitValue, rows.Count);

                splittingState.nodeQueue.Enqueue(n.Left);
                splittingState.nodeQueue.Enqueue(n.Right);
                splittingState.trainingRowsQueue.Enqueue(leftRows);
                splittingState.trainingRowsQueue.Enqueue(rightRows);
                cancellationToken.ThrowIfCancellationRequested();
            }
        }