protected RegressionRuleSetModel(RegressionRuleSetModel original, Cloner cloner) : base(original, cloner) { if (original.Rules != null) { Rules = original.Rules.Select(cloner.Clone).ToList(); } }
private static IScope InitializeScope(IRandom random, IRegressionProblemData problemData, IPruning pruning, int minLeafSize, ILeafModel leafModel, ISplitter splitter, bool generateRules, bool useHoldout, double holdoutSize) { var stateScope = new Scope("RegressionTreeStateScope"); //reduce RegressionProblemData to AllowedInput & Target column wise and to TrainingSet row wise var doubleVars = new HashSet <string>(problemData.Dataset.DoubleVariables); var vars = problemData.AllowedInputVariables.Concat(new[] { problemData.TargetVariable }).ToArray(); if (vars.Any(v => !doubleVars.Contains(v))) { throw new NotSupportedException("Decision tree regression supports only double valued input or output features."); } var doubles = vars.Select(v => problemData.Dataset.GetDoubleValues(v, problemData.TrainingIndices).ToArray()).ToArray(); if (doubles.Any(v => v.Any(x => double.IsNaN(x) || double.IsInfinity(x)))) { throw new NotSupportedException("Decision tree regression does not support NaN or infinity values in the input dataset."); } var trainingData = new Dataset(vars, doubles); var pd = new RegressionProblemData(trainingData, problemData.AllowedInputVariables, problemData.TargetVariable); pd.TrainingPartition.End = pd.TestPartition.Start = pd.TestPartition.End = pd.Dataset.Rows; pd.TrainingPartition.Start = 0; //store regression tree parameters var regressionTreeParams = new RegressionTreeParameters(pruning, minLeafSize, leafModel, pd, random, splitter); stateScope.Variables.Add(new Variable(RegressionTreeParameterVariableName, regressionTreeParams)); //initialize tree operators pruning.Initialize(stateScope); splitter.Initialize(stateScope); leafModel.Initialize(stateScope); //store unbuilt model IItem model; if (generateRules) { model = RegressionRuleSetModel.CreateRuleModel(problemData.TargetVariable, regressionTreeParams); RegressionRuleSetModel.Initialize(stateScope); } else { model = RegressionNodeTreeModel.CreateTreeModel(problemData.TargetVariable, regressionTreeParams); } stateScope.Variables.Add(new Variable(ModelVariableName, model)); //store training & pruning indices IReadOnlyList <int> trainingSet, pruningSet; GeneratePruningSet(pd.TrainingIndices.ToArray(), random, useHoldout, holdoutSize, out trainingSet, out pruningSet); stateScope.Variables.Add(new Variable(TrainingSetVariableName, new IntArray(trainingSet.ToArray()))); stateScope.Variables.Add(new Variable(PruningSetVariableName, new IntArray(pruningSet.ToArray()))); return(stateScope); }
public static Result CreateRulesResult(RegressionRuleSetModel ruleSetModel, IRegressionProblemData pd, string resultName, bool displayModels) { var res = new ResultCollection(); var i = 0; foreach (var rule in ruleSetModel.Rules) { res.Add(new Result("Rule" + i++, CreateRulesResult(rule, pd, displayModels, out pd))); } return(new Result(resultName, res)); }
public static Dictionary <string, int> GetRuleVariableFrequences(RegressionRuleSetModel ruleSetModel) { var res = ruleSetModel.VariablesUsedForPrediction.ToDictionary(x => x, x => 0); foreach (var rule in ruleSetModel.Rules) { foreach (var att in rule.SplitAttributes) { res[att]++; } } return(res); }
private static IReadOnlyList <double> CountCoverage(RegressionRuleSetModel setModel, IDataset data, IEnumerable <int> rows) { var rules = setModel.Rules.ToArray(); var res = new double[rules.Length]; foreach (var row in rows) { for (var i = 0; i < rules.Length; i++) { if (rules[i].Covers(data, row)) { res[i]++; break; } } } return(res); }
public static IResult CreateCoverageDiagram(RegressionRuleSetModel setModel, IRegressionProblemData problemData) { var res = new DataTable(CoverageDiagramResultName); var training = CountCoverage(setModel, problemData.Dataset, problemData.TrainingIndices); var test = CountCoverage(setModel, problemData.Dataset, problemData.TestIndices); res.Rows.Add(new DataRow("Training", "", training)); res.Rows.Add(new DataRow("Test", "", test)); foreach (var row in res.Rows) { row.VisualProperties.ChartType = DataRowVisualProperties.DataRowChartType.Columns; } res.VisualProperties.XAxisMaximumFixedValue = training.Count + 1; res.VisualProperties.XAxisMaximumAuto = false; res.VisualProperties.XAxisMinimumFixedValue = 0; res.VisualProperties.XAxisMinimumAuto = false; res.VisualProperties.XAxisTitle = "Rule"; res.VisualProperties.YAxisTitle = "Covered Instances"; return(new Result(CoverageDiagramResultName, res)); }