Ejemplo n.º 1
0
 protected RegressionRuleSetModel(RegressionRuleSetModel original, Cloner cloner) : base(original, cloner)
 {
     if (original.Rules != null)
     {
         Rules = original.Rules.Select(cloner.Clone).ToList();
     }
 }
Ejemplo n.º 2
0
        private static IScope InitializeScope(IRandom random, IRegressionProblemData problemData, IPruning pruning, int minLeafSize, ILeafModel leafModel, ISplitter splitter, bool generateRules, bool useHoldout, double holdoutSize)
        {
            var stateScope = new Scope("RegressionTreeStateScope");

            //reduce RegressionProblemData to AllowedInput & Target column wise and to TrainingSet row wise
            var doubleVars = new HashSet <string>(problemData.Dataset.DoubleVariables);
            var vars       = problemData.AllowedInputVariables.Concat(new[] { problemData.TargetVariable }).ToArray();

            if (vars.Any(v => !doubleVars.Contains(v)))
            {
                throw new NotSupportedException("Decision tree regression supports only double valued input or output features.");
            }
            var doubles = vars.Select(v => problemData.Dataset.GetDoubleValues(v, problemData.TrainingIndices).ToArray()).ToArray();

            if (doubles.Any(v => v.Any(x => double.IsNaN(x) || double.IsInfinity(x))))
            {
                throw new NotSupportedException("Decision tree regression does not support NaN or infinity values in the input dataset.");
            }
            var trainingData = new Dataset(vars, doubles);
            var pd           = new RegressionProblemData(trainingData, problemData.AllowedInputVariables, problemData.TargetVariable);

            pd.TrainingPartition.End   = pd.TestPartition.Start = pd.TestPartition.End = pd.Dataset.Rows;
            pd.TrainingPartition.Start = 0;

            //store regression tree parameters
            var regressionTreeParams = new RegressionTreeParameters(pruning, minLeafSize, leafModel, pd, random, splitter);

            stateScope.Variables.Add(new Variable(RegressionTreeParameterVariableName, regressionTreeParams));

            //initialize tree operators
            pruning.Initialize(stateScope);
            splitter.Initialize(stateScope);
            leafModel.Initialize(stateScope);

            //store unbuilt model
            IItem model;

            if (generateRules)
            {
                model = RegressionRuleSetModel.CreateRuleModel(problemData.TargetVariable, regressionTreeParams);
                RegressionRuleSetModel.Initialize(stateScope);
            }
            else
            {
                model = RegressionNodeTreeModel.CreateTreeModel(problemData.TargetVariable, regressionTreeParams);
            }
            stateScope.Variables.Add(new Variable(ModelVariableName, model));

            //store training & pruning indices
            IReadOnlyList <int> trainingSet, pruningSet;

            GeneratePruningSet(pd.TrainingIndices.ToArray(), random, useHoldout, holdoutSize, out trainingSet, out pruningSet);
            stateScope.Variables.Add(new Variable(TrainingSetVariableName, new IntArray(trainingSet.ToArray())));
            stateScope.Variables.Add(new Variable(PruningSetVariableName, new IntArray(pruningSet.ToArray())));

            return(stateScope);
        }
Ejemplo n.º 3
0
        public static Result CreateRulesResult(RegressionRuleSetModel ruleSetModel, IRegressionProblemData pd, string resultName, bool displayModels)
        {
            var res = new ResultCollection();
            var i   = 0;

            foreach (var rule in ruleSetModel.Rules)
            {
                res.Add(new Result("Rule" + i++, CreateRulesResult(rule, pd, displayModels, out pd)));
            }
            return(new Result(resultName, res));
        }
Ejemplo n.º 4
0
        public static Dictionary <string, int> GetRuleVariableFrequences(RegressionRuleSetModel ruleSetModel)
        {
            var res = ruleSetModel.VariablesUsedForPrediction.ToDictionary(x => x, x => 0);

            foreach (var rule in ruleSetModel.Rules)
            {
                foreach (var att in rule.SplitAttributes)
                {
                    res[att]++;
                }
            }
            return(res);
        }
Ejemplo n.º 5
0
        private static IReadOnlyList <double> CountCoverage(RegressionRuleSetModel setModel, IDataset data, IEnumerable <int> rows)
        {
            var rules = setModel.Rules.ToArray();
            var res   = new double[rules.Length];

            foreach (var row in rows)
            {
                for (var i = 0; i < rules.Length; i++)
                {
                    if (rules[i].Covers(data, row))
                    {
                        res[i]++;
                        break;
                    }
                }
            }
            return(res);
        }
Ejemplo n.º 6
0
        public static IResult CreateCoverageDiagram(RegressionRuleSetModel setModel, IRegressionProblemData problemData)
        {
            var res      = new DataTable(CoverageDiagramResultName);
            var training = CountCoverage(setModel, problemData.Dataset, problemData.TrainingIndices);
            var test     = CountCoverage(setModel, problemData.Dataset, problemData.TestIndices);

            res.Rows.Add(new DataRow("Training", "", training));
            res.Rows.Add(new DataRow("Test", "", test));

            foreach (var row in res.Rows)
            {
                row.VisualProperties.ChartType = DataRowVisualProperties.DataRowChartType.Columns;
            }
            res.VisualProperties.XAxisMaximumFixedValue = training.Count + 1;
            res.VisualProperties.XAxisMaximumAuto       = false;
            res.VisualProperties.XAxisMinimumFixedValue = 0;
            res.VisualProperties.XAxisMinimumAuto       = false;
            res.VisualProperties.XAxisTitle             = "Rule";
            res.VisualProperties.YAxisTitle             = "Covered Instances";

            return(new Result(CoverageDiagramResultName, res));
        }