示例#1
0
        private static SymbolicExpressionTreeNode MirrorTree(RegressionNodeModel regressionNode, IDictionary <int, RegressionNodeModel> dict,
                                                             IDictionary <int, IReadOnlyList <int> > trainingLeafRows,
                                                             IDictionary <int, IReadOnlyList <int> > testLeafRows,
                                                             IntValue nextId, IDataset data, IReadOnlyList <int> trainingRows, IReadOnlyList <int> testRows)
        {
            if (regressionNode.IsLeaf)
            {
                var i = nextId.Value++;
                dict.Add(i, regressionNode);
                trainingLeafRows.Add(i, trainingRows);
                testLeafRows.Add(i, testRows);
                return(new SymbolicExpressionTreeNode(new TextSymbol("Model " + i + "\n(" + trainingRows.Count + "/" + testRows.Count + ")")));
            }

            var pftext = "\npf = " + regressionNode.PruningStrength.ToString("0.###");
            var text   = regressionNode.SplitAttribute + " <= " + regressionNode.SplitValue.ToString("0.###");

            if (!double.IsNaN(regressionNode.PruningStrength))
            {
                text += pftext;
            }

            var textNode = new SymbolicExpressionTreeNode(new TextSymbol(text));
            IReadOnlyList <int> lTrainingRows, rTrainingRows;
            IReadOnlyList <int> lTestRows, rTestRows;

            RegressionTreeUtilities.SplitRows(trainingRows, data, regressionNode.SplitAttribute, regressionNode.SplitValue, out lTrainingRows, out rTrainingRows);
            RegressionTreeUtilities.SplitRows(testRows, data, regressionNode.SplitAttribute, regressionNode.SplitValue, out lTestRows, out rTestRows);

            textNode.AddSubtree(MirrorTree(regressionNode.Left, dict, trainingLeafRows, testLeafRows, nextId, data, lTrainingRows, lTestRows));
            textNode.AddSubtree(MirrorTree(regressionNode.Right, dict, trainingLeafRows, testLeafRows, nextId, data, rTrainingRows, rTestRows));

            return(textNode);
        }
示例#2
0
            public void FillLeafs(RegressionNodeTreeModel tree, IReadOnlyList <int> trainingRows, IDataset data)
            {
                var helperQueue         = new Queue <RegressionNodeModel>();
                var trainingHelperQueue = new Queue <IReadOnlyList <int> >();

                nodeQueue.Clear();
                trainingRowsQueue.Clear();

                helperQueue.Enqueue(tree.Root);
                trainingHelperQueue.Enqueue(trainingRows);

                while (helperQueue.Count != 0)
                {
                    var n = helperQueue.Dequeue();
                    var t = trainingHelperQueue.Dequeue();
                    if (n.IsLeaf)
                    {
                        nodeQueue.Enqueue(n);
                        trainingRowsQueue.Enqueue(t);
                        continue;
                    }

                    IReadOnlyList <int> leftTraining, rightTraining;
                    RegressionTreeUtilities.SplitRows(t, data, n.SplitAttribute, n.SplitValue, out leftTraining, out rightTraining);

                    helperQueue.Enqueue(n.Left);
                    helperQueue.Enqueue(n.Right);
                    trainingHelperQueue.Enqueue(leftTraining);
                    trainingHelperQueue.Enqueue(rightTraining);
                }
            }
示例#3
0
        public void Split(RegressionNodeTreeModel tree, IReadOnlyList <int> trainingRows, IScope stateScope, CancellationToken cancellationToken)
        {
            var regressionTreeParams = (RegressionTreeParameters)stateScope.Variables[DecisionTreeRegression.RegressionTreeParameterVariableName].Value;
            var splittingState       = (SplittingState)stateScope.Variables[SplittingStateVariableName].Value;
            var variables            = regressionTreeParams.AllowedInputVariables.ToArray();
            var target = regressionTreeParams.TargetVariable;

            if (splittingState.Code <= 0)
            {
                splittingState.nodeQueue.Enqueue(tree.Root);
                splittingState.trainingRowsQueue.Enqueue(trainingRows);
                splittingState.Code = 1;
            }
            while (splittingState.nodeQueue.Count != 0)
            {
                var n    = splittingState.nodeQueue.Dequeue();
                var rows = splittingState.trainingRowsQueue.Dequeue();

                string attr;
                double splitValue;
                var    isLeaf = !DecideSplit(new RegressionProblemData(RegressionTreeUtilities.ReduceDataset(regressionTreeParams.Data, rows, variables, target), variables, target), regressionTreeParams.MinLeafSize, out attr, out splitValue);
                if (isLeaf)
                {
                    continue;
                }

                IReadOnlyList <int> leftRows, rightRows;
                RegressionTreeUtilities.SplitRows(rows, regressionTreeParams.Data, attr, splitValue, out leftRows, out rightRows);
                n.Split(regressionTreeParams, attr, splitValue, rows.Count);

                splittingState.nodeQueue.Enqueue(n.Left);
                splittingState.nodeQueue.Enqueue(n.Right);
                splittingState.trainingRowsQueue.Enqueue(leftRows);
                splittingState.trainingRowsQueue.Enqueue(rightRows);
                cancellationToken.ThrowIfCancellationRequested();
            }
        }