private GradientChart CreateGradientChart(string variableName, ModifiableDataset sharedFixedVariables) { var gradientChart = new GradientChart { Dock = DockStyle.Fill, Margin = Padding.Empty, ShowLegend = false, ShowCursor = true, ShowConfigButton = false, YAxisTicks = 5, }; gradientChart.VariableValueChanged += async(o, e) => { var recalculations = VisibleGradientCharts.Except(new[] { (GradientChart)o }).Select(async chart => { await chart.RecalculateAsync(updateOnFinish: false, resetYAxis: false); }).ToList(); await Task.WhenAll(recalculations); if (recalculations.All(t => t.IsCompleted)) { SetupYAxis(); } }; gradientChart.Configure(new[] { Content }, sharedFixedVariables, variableName, Points); gradientChart.SolutionAdded += gradientChart_SolutionAdded; gradientChart.SolutionRemoved += gradientChart_SolutionRemoved; return(gradientChart); }
public ParameterizedMeanFunction GetParameterizedMeanFunction(double[] p, int[] columnIndices) { if (p.Length > 0) { throw new ArgumentException("No parameters allowed for model-based mean function.", "p"); } var solution = RegressionSolution; var variableNames = solution.ProblemData.AllowedInputVariables.ToArray(); if (variableNames.Length != columnIndices.Length) { throw new ArgumentException("The number of input variables does not match in MeanModel"); } var variableValues = variableNames.Select(_ => new List <double>() { 0.0 }).ToArray(); // or of zeros // uses modifyable dataset to pass values to the model var ds = new ModifiableDataset(variableNames, variableValues); var mf = new ParameterizedMeanFunction(); var model = solution.Model; // effort for parameter access only once mf.Mean = (x, i) => { ds.ReplaceRow(0, Util.GetRow(x, i, columnIndices).OfType <object>()); return(model.GetEstimatedValues(ds, 0.ToEnumerable()).Single()); // evaluate the model on the specified row only }; mf.Gradient = (x, i, k) => { if (k > 0) { throw new ArgumentException(); } return(0.0); }; return(mf); }
private void RecalculateInternalDataset() { if (sharedFixedVariables == null) { return; } var factorValues = new List <string>(variableValues); var variables = sharedFixedVariables.VariableNames.ToList(); var values = new List <IList>(); foreach (var varName in variables) { if (varName == FreeVariable) { values.Add(factorValues); } else if (sharedFixedVariables.VariableHasType <double>(varName)) { values.Add(Enumerable.Repeat(sharedFixedVariables.GetDoubleValue(varName, 0), factorValues.Count).ToList()); } else if (sharedFixedVariables.VariableHasType <string>(varName)) { values.Add(Enumerable.Repeat(sharedFixedVariables.GetStringValue(varName, 0), factorValues.Count).ToList()); } } internalDataset = new ModifiableDataset(variables, values); }
public void Configure(IEnumerable <IRegressionSolution> solutions, ModifiableDataset sharedFixedVariables, string freeVariable, IList <string> variableValues, bool initializeAxisRanges = true) { if (!SolutionsCompatible(solutions)) { throw new ArgumentException("Solutions are not compatible with the problem data."); } this.freeVariable = freeVariable; this.variableValues = new List <string>(variableValues); this.solutions.Clear(); this.solutions.AddRange(solutions); // add an event such that whenever a value is changed in the shared dataset, // this change is reflected in the internal dataset (where the value becomes a whole column) if (this.sharedFixedVariables != null) { this.sharedFixedVariables.ItemChanged -= sharedFixedVariables_ItemChanged; this.sharedFixedVariables.Reset -= sharedFixedVariables_Reset; } this.sharedFixedVariables = sharedFixedVariables; this.sharedFixedVariables.ItemChanged += sharedFixedVariables_ItemChanged; this.sharedFixedVariables.Reset += sharedFixedVariables_Reset; RecalculateInternalDataset(); chart.Series.Clear(); seriesCache.Clear(); ciSeriesCache.Clear(); foreach (var solution in this.solutions) { var series = CreateSeries(solution); seriesCache.Add(solution, series.Item1); if (series.Item2 != null) { ciSeriesCache.Add(solution, series.Item2); } } InitSeriesData(); OrderAndColorSeries(); }
public ParameterizedMeanFunction GetParameterizedMeanFunction(double[] p, int[] columnIndices) { if (p.Length > 0) throw new ArgumentException("No parameters allowed for model-based mean function.", "p"); var solution = RegressionSolution; var variableNames = solution.ProblemData.AllowedInputVariables.ToArray(); if (variableNames.Length != columnIndices.Length) throw new ArgumentException("The number of input variables does not match in MeanModel"); var variableValues = variableNames.Select(_ => new List<double>() { 0.0 }).ToArray(); // or of zeros // uses modifyable dataset to pass values to the model var ds = new ModifiableDataset(variableNames, variableValues); var mf = new ParameterizedMeanFunction(); var model = solution.Model; // effort for parameter access only once mf.Mean = (x, i) => { ds.ReplaceRow(0, Util.GetRow(x, i, columnIndices).OfType<object>()); return model.GetEstimatedValues(ds, 0.ToEnumerable()).Single(); // evaluate the model on the specified row only }; mf.Gradient = (x, i, k) => { if (k > 0) throw new ArgumentException(); return 0.0; }; return mf; }
private void RecalculateInternalDataset() { if (sharedFixedVariables == null) { return; } // we expand the range in order to get nice tick intervals on the x axis double xmin, xmax, xinterval; ChartUtil.CalculateAxisInterval(trainingMin, trainingMax, XAxisTicks, out xmin, out xmax, out xinterval); if (FixedXAxisMin.HasValue) { xmin = FixedXAxisMin.Value; } if (FixedXAxisMax.HasValue) { xmax = FixedXAxisMax.Value; } double step = (xmax - xmin) / drawingSteps; var xvalues = new List <double>(); for (int i = 0; i < drawingSteps; i++) { xvalues.Add(xmin + i * step); } var variables = sharedFixedVariables.DoubleVariables.ToList(); internalDataset = new ModifiableDataset(variables, variables.Select(x => x == FreeVariable ? xvalues : Enumerable.Repeat(sharedFixedVariables.GetDoubleValue(x, 0), xvalues.Count).ToList() ) ); }
private void SampleTrainingData(MersenneTwister rand, ModifiableDataset ds, int rRows, IDataset sourceDs, double[] curTarget, string targetVarName, IEnumerable <int> trainingIndices) { var selectedRows = trainingIndices.SampleRandomWithoutRepetition(rand, rRows).ToArray(); int t = 0; object[] srcRow = new object[ds.Columns]; var varNames = ds.DoubleVariables.ToArray(); foreach (var r in selectedRows) { // take all values from the original dataset for (int c = 0; c < srcRow.Length; c++) { var col = sourceDs.GetReadOnlyDoubleValues(varNames[c]); srcRow[c] = col[r]; } ds.ReplaceRow(t, srcRow); // but use the updated target values ds.SetVariableValue(curTarget[r], targetVarName, t); t++; } }
public void TestDecisionTreePartialDependence() { var provider = new HeuristicLab.Problems.Instances.DataAnalysis.RegressionRealWorldInstanceProvider(); var instance = provider.GetDataDescriptors().Single(x => x.Name.Contains("Tower")); var regProblem = new RegressionProblem(); regProblem.Load(provider.LoadData(instance)); var problemData = regProblem.ProblemData; var state = GradientBoostedTreesAlgorithmStatic.CreateGbmState(problemData, new SquaredErrorLoss(), randSeed: 31415, maxSize: 10, r: 0.5, m: 1, nu: 0.02); for (int i = 0; i < 1000; i++) { GradientBoostedTreesAlgorithmStatic.MakeStep(state); } var mostImportantVar = state.GetVariableRelevance().OrderByDescending(kvp => kvp.Value).First(); Console.WriteLine("var: {0} relevance: {1}", mostImportantVar.Key, mostImportantVar.Value); var model = ((IGradientBoostedTreesModel)state.GetModel()); var treeM = model.Models.Skip(1).First(); Console.WriteLine(treeM.ToString()); Console.WriteLine(); var mostImportantVarValues = problemData.Dataset.GetDoubleValues(mostImportantVar.Key).OrderBy(x => x).ToArray(); var ds = new ModifiableDataset(new string[] { mostImportantVar.Key }, new IList[] { mostImportantVarValues.ToList <double>() }); var estValues = model.GetEstimatedValues(ds, Enumerable.Range(0, mostImportantVarValues.Length)).ToArray(); for (int i = 0; i < mostImportantVarValues.Length; i += 10) { Console.WriteLine("{0,-5:N3} {1,-5:N3}", mostImportantVarValues[i], estValues[i]); } }
// wraps the list of basis functions into an IRegressionProblemData object private static IRegressionProblemData PrepareData(IRegressionProblemData problemData, IEnumerable <BasisFunction> basisFunctions) { HashSet <string> variableNames = new HashSet <string>(); List <IList> variableVals = new List <IList>(); foreach (var basisFunc in basisFunctions) { variableNames.Add(basisFunc.Var); variableVals.Add(new List <double>(basisFunc.Val)); } var matrix = new ModifiableDataset(variableNames, variableVals); // add the unmodified target variable to the matrix matrix.AddVariable(problemData.TargetVariable, problemData.TargetVariableValues.ToList()); IEnumerable <string> allowedInputVars = matrix.VariableNames.Where(x => !x.Equals(problemData.TargetVariable)).ToArray(); IRegressionProblemData rpd = new RegressionProblemData(matrix, allowedInputVars, problemData.TargetVariable); rpd.TargetVariable = problemData.TargetVariable; rpd.TrainingPartition.Start = problemData.TrainingPartition.Start; rpd.TrainingPartition.End = problemData.TrainingPartition.End; rpd.TestPartition.Start = problemData.TestPartition.Start; rpd.TestPartition.End = problemData.TestPartition.End; return(rpd); }
protected override void Run(CancellationToken cancellationToken) { // Set up the algorithm if (SetSeedRandomly) { Seed = RandomSeedGenerator.GetSeed(); } var rand = new MersenneTwister((uint)Seed); // Set up the results display var iterations = new IntValue(0); Results.Add(new Result("Iterations", iterations)); var table = new DataTable("Qualities"); table.Rows.Add(new DataRow("R² (train)")); table.Rows.Add(new DataRow("R² (test)")); Results.Add(new Result("Qualities", table)); var curLoss = new DoubleValue(); var curTestLoss = new DoubleValue(); Results.Add(new Result("R² (train)", curLoss)); Results.Add(new Result("R² (test)", curTestLoss)); var runCollection = new RunCollection(); if (StoreRuns) { Results.Add(new Result("Runs", runCollection)); } // init var problemData = Problem.ProblemData; var targetVarName = problemData.TargetVariable; var activeVariables = problemData.AllowedInputVariables.Concat(new string[] { problemData.TargetVariable }); var modifiableDataset = new ModifiableDataset( activeVariables, activeVariables.Select(v => problemData.Dataset.GetDoubleValues(v).ToList())); var trainingRows = problemData.TrainingIndices; var testRows = problemData.TestIndices; var yPred = new double[trainingRows.Count()]; var yPredTest = new double[testRows.Count()]; var y = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices).ToArray(); var curY = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices).ToArray(); var yTest = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TestIndices).ToArray(); var curYTest = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TestIndices).ToArray(); var nu = Nu; var mVars = (int)Math.Ceiling(M * problemData.AllowedInputVariables.Count()); var rRows = (int)Math.Ceiling(R * problemData.TrainingIndices.Count()); var alg = RegressionAlgorithm; List <IRegressionModel> models = new List <IRegressionModel>(); try { // Loop until iteration limit reached or canceled. for (int i = 0; i < Iterations; i++) { cancellationToken.ThrowIfCancellationRequested(); modifiableDataset.RemoveVariable(targetVarName); modifiableDataset.AddVariable(targetVarName, curY.Concat(curYTest).ToList()); SampleTrainingData(rand, modifiableDataset, rRows, problemData.Dataset, curY, problemData.TargetVariable, problemData.TrainingIndices); // all training indices from the original problem data are allowed var modifiableProblemData = new RegressionProblemData(modifiableDataset, problemData.AllowedInputVariables.SampleRandomWithoutRepetition(rand, mVars), problemData.TargetVariable); modifiableProblemData.TrainingPartition.Start = 0; modifiableProblemData.TrainingPartition.End = rRows; modifiableProblemData.TestPartition.Start = problemData.TestPartition.Start; modifiableProblemData.TestPartition.End = problemData.TestPartition.End; if (!TrySetProblemData(alg, modifiableProblemData)) { throw new NotSupportedException("The algorithm cannot be used with GBM."); } IRegressionModel model; IRun run; // try to find a model. The algorithm might fail to produce a model. In this case we just retry until the iterations are exhausted if (TryExecute(alg, rand.Next(), RegressionAlgorithmResult, out model, out run)) { int row = 0; // update predictions for training and test // update new targets (in the case of squared error loss we simply use negative residuals) foreach (var pred in model.GetEstimatedValues(problemData.Dataset, trainingRows)) { yPred[row] = yPred[row] + nu * pred; curY[row] = y[row] - yPred[row]; row++; } row = 0; foreach (var pred in model.GetEstimatedValues(problemData.Dataset, testRows)) { yPredTest[row] = yPredTest[row] + nu * pred; curYTest[row] = yTest[row] - yPredTest[row]; row++; } // determine quality OnlineCalculatorError error; var trainR = OnlinePearsonsRCalculator.Calculate(yPred, y, out error); var testR = OnlinePearsonsRCalculator.Calculate(yPredTest, yTest, out error); // iteration results curLoss.Value = error == OnlineCalculatorError.None ? trainR * trainR : 0.0; curTestLoss.Value = error == OnlineCalculatorError.None ? testR * testR : 0.0; models.Add(model); } if (StoreRuns) { runCollection.Add(run); } table.Rows["R² (train)"].Values.Add(curLoss.Value); table.Rows["R² (test)"].Values.Add(curTestLoss.Value); iterations.Value = i + 1; } // produce solution if (CreateSolution) { // when all our models are symbolic models we can easily combine them to a single model if (models.All(m => m is ISymbolicRegressionModel)) { Results.Add(new Result("Solution", CreateSymbolicSolution(models, Nu, (IRegressionProblemData)problemData.Clone()))); } // just produce an ensemble solution for now (TODO: correct scaling or linear regression for ensemble model weights) var ensembleSolution = CreateEnsembleSolution(models, (IRegressionProblemData)problemData.Clone()); Results.Add(new Result("EnsembleSolution", ensembleSolution)); } } finally { // reset everything alg.Prepare(true); } }
private async void UpdateConfigurationControls() { variableNames.Clear(); trackbars.Clear(); tableLayoutPanel.SuspendRepaint(); tableLayoutPanel.SuspendLayout(); tableLayoutPanel.RowCount = 0; tableLayoutPanel.Controls.Clear(); if (Content == null) { tableLayoutPanel.ResumeLayout(false); tableLayoutPanel.ResumeRepaint(false); return; } variableNames.AddRange(Content.ProblemData.AllowedInputVariables); var newTrackbars = CreateConfiguration(); sharedFixedVariables = new ModifiableDataset(variableNames, newTrackbars.Select(tb => new List <double>(1) { (double)tb.Value })); _partialDependencePlot.Configure(new[] { Content }, sharedFixedVariables, variableNames.First(), DrawingSteps); await _partialDependencePlot.RecalculateAsync(); // Add to table and observable lists tableLayoutPanel.RowCount = variableNames.Count; while (tableLayoutPanel.RowStyles.Count < variableNames.Count) { tableLayoutPanel.RowStyles.Add(new RowStyle(SizeType.AutoSize)); } for (int i = 0; i < newTrackbars.Count; i++) { // events registered automatically trackbars.Add(newTrackbars[i]); tableLayoutPanel.Controls.Add(newTrackbars[i], 0, i); } tableLayoutPanel.ResumeLayout(true); tableLayoutPanel.ResumeRepaint(true); // Init Y-axis range var problemData = Content.ProblemData; double min = double.MaxValue, max = double.MinValue; var trainingTarget = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices); foreach (var t in trainingTarget) { if (t < min) { min = t; } if (t > max) { max = t; } } double range = max - min; const double scale = 1.0 / 3.0; double axisMin, axisMax, axisInterval; ChartUtil.CalculateAxisInterval(min - scale * range, max + scale * range, 5, out axisMin, out axisMax, out axisInterval); _partialDependencePlot.FixedYAxisMin = axisMin; _partialDependencePlot.FixedYAxisMax = axisMax; trackbars.First().Checked = true; }
private GradientChart CreateGradientChart(string variableName, ModifiableDataset sharedFixedVariables) { var gradientChart = new GradientChart { Dock = DockStyle.Fill, Margin = Padding.Empty, ShowLegend = false, ShowCursor = true, ShowConfigButton = false, YAxisTicks = 5, }; gradientChart.VariableValueChanged += async (o, e) => { var recalculations = VisibleGradientCharts.Except(new[] { (GradientChart)o }).Select(async chart => { await chart.RecalculateAsync(updateOnFinish: false, resetYAxis: false); }).ToList(); await Task.WhenAll(recalculations); if (recalculations.All(t => t.IsCompleted)) SetupYAxis(); }; gradientChart.Configure(new[] { Content }, sharedFixedVariables, variableName, Points); gradientChart.SolutionAdded += gradientChart_SolutionAdded; gradientChart.SolutionRemoved += gradientChart_SolutionRemoved; return gradientChart; }
public void TestDecisionTreePartialDependence() { var provider = new HeuristicLab.Problems.Instances.DataAnalysis.RegressionRealWorldInstanceProvider(); var instance = provider.GetDataDescriptors().Single(x => x.Name.Contains("Tower")); var regProblem = new RegressionProblem(); regProblem.Load(provider.LoadData(instance)); var problemData = regProblem.ProblemData; var state = GradientBoostedTreesAlgorithmStatic.CreateGbmState(problemData, new SquaredErrorLoss(), randSeed: 31415, maxSize: 10, r: 0.5, m: 1, nu: 0.02); for (int i = 0; i < 1000; i++) GradientBoostedTreesAlgorithmStatic.MakeStep(state); var mostImportantVar = state.GetVariableRelevance().OrderByDescending(kvp => kvp.Value).First(); Console.WriteLine("var: {0} relevance: {1}", mostImportantVar.Key, mostImportantVar.Value); var model = ((IGradientBoostedTreesModel)state.GetModel()); var treeM = model.Models.Skip(1).First(); Console.WriteLine(treeM.ToString()); Console.WriteLine(); var mostImportantVarValues = problemData.Dataset.GetDoubleValues(mostImportantVar.Key).OrderBy(x => x).ToArray(); var ds = new ModifiableDataset(new string[] { mostImportantVar.Key }, new IList[] { mostImportantVarValues.ToList<double>() }); var estValues = model.GetEstimatedValues(ds, Enumerable.Range(0, mostImportantVarValues.Length)).ToArray(); for (int i = 0; i < mostImportantVarValues.Length; i += 10) { Console.WriteLine("{0,-5:N3} {1,-5:N3}", mostImportantVarValues[i], estValues[i]); } }
protected override void OnContentChanged() { base.OnContentChanged(); if (Content == null) { return; } var problemData = Content.ProblemData; // Init Y-axis range double min = double.MaxValue, max = double.MinValue; var trainingTarget = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices); foreach (var t in trainingTarget) { if (t < min) { min = t; } if (t > max) { max = t; } } double range = max - min; const double scale = 1.0 / 3.0; double axisMin, axisMax, axisInterval; ChartUtil.CalculateAxisInterval(min - scale * range, max + scale * range, 5, out axisMin, out axisMax, out axisInterval); automaticYAxisCheckBox.Checked = false; limitView.ReadOnly = false; limitView.Content.Lower = axisMin; limitView.Content.Upper = axisMax; // create dataset var allowedInputVariables = Content.ProblemData.AllowedInputVariables; var variableValues = allowedInputVariables.Select(x => new List <double> { problemData.Dataset.GetDoubleValues(x, problemData.TrainingIndices).Median() }); var sharedFixedVariables = new ModifiableDataset(allowedInputVariables, variableValues); // create controls gradientCharts.Clear(); densityCharts.Clear(); groupingPanels.Clear(); foreach (var variableName in allowedInputVariables) { var gradientChart = CreateGradientChart(variableName, sharedFixedVariables); gradientCharts.Add(variableName, gradientChart); var densityChart = new DensityChart() { Anchor = AnchorStyles.Left | AnchorStyles.Top | AnchorStyles.Right, Margin = Padding.Empty, Height = 12, Visible = false, Top = (int)(gradientChart.Height * 0.1), }; densityCharts.Add(variableName, densityChart); gradientChart.ZoomChanged += (o, e) => { var gradient = (GradientChart)o; var density = densityCharts[gradient.FreeVariable]; density.Visible = densityComboBox.SelectedIndex != 0 && !gradient.IsZoomed; if (density.Visible) { UpdateDensityChart(density, gradient.FreeVariable); } }; gradientChart.SizeChanged += (o, e) => { var gradient = (GradientChart)o; var density = densityCharts[gradient.FreeVariable]; density.Top = (int)(gradient.Height * 0.1); }; // Initially, the inner plot areas are not initialized for hidden charts (scollpanel, ...) // This event handler listens for the paint event once (where everything is already initialized) to do some manual layouting. gradientChart.ChartPostPaint += OnGradientChartOnChartPostPaint; var panel = new Panel() { Dock = DockStyle.Fill, Margin = Padding.Empty, BackColor = Color.White }; panel.Controls.Add(densityChart); panel.Controls.Add(gradientChart); groupingPanels.Add(variableName, panel); } // update variable list variableListView.ItemChecked -= variableListView_ItemChecked; variableListView.Items.Clear(); foreach (var variable in allowedInputVariables) { variableListView.Items.Add(key: variable, text: variable, imageIndex: 0); } foreach (var variable in Content.Model.VariablesUsedForPrediction) { variableListView.Items[variable].Checked = true; } variableListView.ItemChecked += variableListView_ItemChecked; RecalculateAndRelayoutCharts(); }
private static IEnumerable<double> EvaluateModelWithReplacedVariable(IRegressionModel model, string variable, ModifiableDataset dataset, IEnumerable<int> rows, ReplacementMethodEnum replacement = ReplacementMethodEnum.Median) { var originalValues = dataset.GetReadOnlyDoubleValues(variable).ToList(); double replacementValue; List<double> replacementValues; IRandom rand; switch (replacement) { case ReplacementMethodEnum.Median: replacementValue = rows.Select(r => originalValues[r]).Median(); replacementValues = Enumerable.Repeat(replacementValue, dataset.Rows).ToList(); break; case ReplacementMethodEnum.Average: replacementValue = rows.Select(r => originalValues[r]).Average(); replacementValues = Enumerable.Repeat(replacementValue, dataset.Rows).ToList(); break; case ReplacementMethodEnum.Shuffle: // new var has same empirical distribution but the relation to y is broken rand = new FastRandom(31415); replacementValues = rows.Select(r => originalValues[r]).Shuffle(rand).ToList(); break; case ReplacementMethodEnum.Noise: var avg = rows.Select(r => originalValues[r]).Average(); var stdDev = rows.Select(r => originalValues[r]).StandardDeviation(); rand = new FastRandom(31415); replacementValues = rows.Select(_ => NormalDistributedRandom.NextDouble(rand, avg, stdDev)).ToList(); break; default: throw new ArgumentException(string.Format("ReplacementMethod {0} cannot be handled.", replacement)); } dataset.ReplaceVariable(variable, replacementValues); //mkommend: ToList is used on purpose to avoid lazy evaluation that could result in wrong estimates due to variable replacements var estimates = model.GetEstimatedValues(dataset, rows).ToList(); dataset.ReplaceVariable(variable, originalValues); return estimates; }
public void Configure(IEnumerable<IRegressionSolution> solutions, ModifiableDataset sharedFixedVariables, string freeVariable, int drawingSteps, bool initializeAxisRanges = true) { if (!SolutionsCompatible(solutions)) throw new ArgumentException("Solutions are not compatible with the problem data."); this.freeVariable = freeVariable; this.drawingSteps = drawingSteps; this.solutions.Clear(); this.solutions.AddRange(solutions); // add an event such that whenever a value is changed in the shared dataset, // this change is reflected in the internal dataset (where the value becomes a whole column) if (this.sharedFixedVariables != null) this.sharedFixedVariables.ItemChanged -= sharedFixedVariables_ItemChanged; this.sharedFixedVariables = sharedFixedVariables; this.sharedFixedVariables.ItemChanged += sharedFixedVariables_ItemChanged; RecalculateTrainingLimits(initializeAxisRanges); RecalculateInternalDataset(); chart.Series.Clear(); seriesCache.Clear(); ciSeriesCache.Clear(); foreach (var solution in this.solutions) { var series = CreateSeries(solution); seriesCache.Add(solution, series.Item1); if (series.Item2 != null) ciSeriesCache.Add(solution, series.Item2); } // Set cursor and x-axis // Make sure to allow a small offset to be able to distinguish the vertical line annotation from the axis var defaultValue = sharedFixedVariables.GetDoubleValue(freeVariable, 0); var step = (trainingMax - trainingMin) / drawingSteps; var minimum = chart.ChartAreas[0].AxisX.Minimum; var maximum = chart.ChartAreas[0].AxisX.Maximum; if (defaultValue <= minimum) VerticalLineAnnotation.X = minimum + step; else if (defaultValue >= maximum) VerticalLineAnnotation.X = maximum - step; else VerticalLineAnnotation.X = defaultValue; if (ShowCursor) chart.Titles[0].Text = FreeVariable + " : " + defaultValue.ToString("N3", CultureInfo.CurrentCulture); ResizeAllSeriesData(); OrderAndColorSeries(); }
private FactorPartialDependencePlot CreateFactorPartialDependencePlot(string variableName, ModifiableDataset sharedFixedVariables) { var plot = new FactorPartialDependencePlot { Dock = DockStyle.Fill, Margin = Padding.Empty, ShowLegend = false, ShowCursor = true, YAxisTicks = 5, }; plot.VariableValueChanged += async(o, e) => { var recalculations = VisiblePartialDependencePlots .Except(new[] { (FactorPartialDependencePlot)o }) .Select(async chart => { await chart.RecalculateAsync(updateOnFinish: false, resetYAxis: false); }).ToList(); await Task.WhenAll(recalculations); if (recalculations.All(t => t.IsCompleted)) { SetupYAxis(); } }; var variableValues = Content.ProblemData.Dataset.GetStringValues(variableName).Distinct().OrderBy(n => n).ToList(); plot.Configure(new[] { Content }, sharedFixedVariables, variableName, variableValues); plot.SolutionAdded += partialDependencePlot_SolutionAdded; plot.SolutionRemoved += partialDependencePlot_SolutionRemoved; return(plot); }
protected override void OnContentChanged() { base.OnContentChanged(); if (Content == null) { return; } var problemData = Content.ProblemData; if (sharedFixedVariables != null) { sharedFixedVariables.ItemChanged -= SharedFixedVariables_ItemChanged; sharedFixedVariables.Reset -= SharedFixedVariables_Reset; } // Init Y-axis range double min = double.MaxValue, max = double.MinValue; var trainingTarget = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices); foreach (var t in trainingTarget) { if (t < min) { min = t; } if (t > max) { max = t; } } double range = max - min; const double scale = 1.0 / 3.0; double axisMin, axisMax, axisInterval; ChartUtil.CalculateAxisInterval(min - scale * range, max + scale * range, 5, out axisMin, out axisMax, out axisInterval); automaticYAxisCheckBox.Checked = false; limitView.ReadOnly = false; limitView.Content.Lower = axisMin; limitView.Content.Upper = axisMax; // create dataset of problemData input variables and model input variables // necessary workaround to have the variables in the occuring order var inputvariables = new HashSet <string>(Content.ProblemData.AllowedInputVariables.Union(Content.Model.VariablesUsedForPrediction)); var allowedInputVariables = Content.ProblemData.Dataset.VariableNames.Where(v => inputvariables.Contains(v)).ToList(); var doubleVariables = allowedInputVariables.Where(problemData.Dataset.VariableHasType <double>); var doubleVariableValues = (IEnumerable <IList>)doubleVariables.Select(x => new List <double> { problemData.Dataset.GetDoubleValue(x, 0) }); var factorVariables = allowedInputVariables.Where(problemData.Dataset.VariableHasType <string>); var factorVariableValues = (IEnumerable <IList>)factorVariables.Select(x => new List <string> { problemData.Dataset.GetStringValue(x, 0) }); sharedFixedVariables = new ModifiableDataset(doubleVariables.Concat(factorVariables), doubleVariableValues.Concat(factorVariableValues)); variableValuesModeComboBox.SelectedItem = "Median"; // triggers UpdateVariableValue and changes shardFixedVariables // create controls partialDependencePlots.Clear(); densityCharts.Clear(); groupingPanels.Clear(); foreach (var variableName in doubleVariables) { var plot = CreatePartialDependencePlot(variableName, sharedFixedVariables); partialDependencePlots.Add(variableName, plot); var densityChart = new DensityChart() { Anchor = AnchorStyles.Left | AnchorStyles.Top | AnchorStyles.Right, Margin = Padding.Empty, Height = 12, Visible = false, Top = (int)(plot.Height * 0.1), }; densityCharts.Add(variableName, densityChart); plot.ZoomChanged += (o, e) => { var pdp = (PartialDependencePlot)o; var density = densityCharts[pdp.FreeVariable]; density.Visible = densityComboBox.SelectedIndex != 0 && !pdp.IsZoomed; if (density.Visible) { UpdateDensityChart(density, pdp.FreeVariable); } }; plot.SizeChanged += (o, e) => { var pdp = (PartialDependencePlot)o; var density = densityCharts[pdp.FreeVariable]; density.Top = (int)(pdp.Height * 0.1); }; // Initially, the inner plot areas are not initialized for hidden charts (scrollpanel, ...) // This event handler listens for the paint event once (where everything is already initialized) to do some manual layouting. plot.ChartPostPaint += OnPartialDependencePlotPostPaint; var panel = new Panel() { Dock = DockStyle.Fill, Margin = Padding.Empty, BackColor = Color.White }; panel.Controls.Add(densityChart); panel.Controls.Add(plot); groupingPanels.Add(variableName, panel); } foreach (var variableName in factorVariables) { var plot = CreateFactorPartialDependencePlot(variableName, sharedFixedVariables); partialDependencePlots.Add(variableName, plot); var densityChart = new DensityChart() { Anchor = AnchorStyles.Left | AnchorStyles.Top | AnchorStyles.Right, Margin = Padding.Empty, Height = 12, Visible = false, Top = (int)(plot.Height * 0.1), }; densityCharts.Add(variableName, densityChart); plot.ZoomChanged += (o, e) => { var pdp = (FactorPartialDependencePlot)o; var density = densityCharts[pdp.FreeVariable]; density.Visible = densityComboBox.SelectedIndex != 0 && !pdp.IsZoomed; if (density.Visible) { UpdateDensityChart(density, pdp.FreeVariable); } }; plot.SizeChanged += (o, e) => { var pdp = (FactorPartialDependencePlot)o; var density = densityCharts[pdp.FreeVariable]; density.Top = (int)(pdp.Height * 0.1); }; // Initially, the inner plot areas are not initialized for hidden charts (scrollpanel, ...) // This event handler listens for the paint event once (where everything is already initialized) to do some manual layouting. plot.ChartPostPaint += OnFactorPartialDependencePlotPostPaint; var panel = new Panel() { Dock = DockStyle.Fill, Margin = Padding.Empty, BackColor = Color.White }; panel.Controls.Add(densityChart); panel.Controls.Add(plot); groupingPanels.Add(variableName, panel); } // update variable list variableListView.ItemChecked -= variableListView_ItemChecked; variableListView.Items.Clear(); foreach (var variable in allowedInputVariables) { variableListView.Items.Add(key: variable, text: variable, imageIndex: 0); } foreach (var variable in Content.Model.VariablesUsedForPrediction) { variableListView.Items[variable].Checked = true; } variableListView.ItemChecked += variableListView_ItemChecked; sharedFixedVariables.ItemChanged += SharedFixedVariables_ItemChanged; sharedFixedVariables.Reset += SharedFixedVariables_Reset; rowNrNumericUpDown.Maximum = Content.ProblemData.Dataset.Rows - 1; RecalculateAndRelayoutCharts(); }
protected override void Run(CancellationToken cancellationToken) { // Set up the algorithm if (SetSeedRandomly) Seed = new System.Random().Next(); var rand = new MersenneTwister((uint)Seed); // Set up the results display var iterations = new IntValue(0); Results.Add(new Result("Iterations", iterations)); var table = new DataTable("Qualities"); table.Rows.Add(new DataRow("R² (train)")); table.Rows.Add(new DataRow("R² (test)")); Results.Add(new Result("Qualities", table)); var curLoss = new DoubleValue(); var curTestLoss = new DoubleValue(); Results.Add(new Result("R² (train)", curLoss)); Results.Add(new Result("R² (test)", curTestLoss)); var runCollection = new RunCollection(); if (StoreRuns) Results.Add(new Result("Runs", runCollection)); // init var problemData = Problem.ProblemData; var targetVarName = problemData.TargetVariable; var activeVariables = problemData.AllowedInputVariables.Concat(new string[] { problemData.TargetVariable }); var modifiableDataset = new ModifiableDataset( activeVariables, activeVariables.Select(v => problemData.Dataset.GetDoubleValues(v).ToList())); var trainingRows = problemData.TrainingIndices; var testRows = problemData.TestIndices; var yPred = new double[trainingRows.Count()]; var yPredTest = new double[testRows.Count()]; var y = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices).ToArray(); var curY = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices).ToArray(); var yTest = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TestIndices).ToArray(); var curYTest = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TestIndices).ToArray(); var nu = Nu; var mVars = (int)Math.Ceiling(M * problemData.AllowedInputVariables.Count()); var rRows = (int)Math.Ceiling(R * problemData.TrainingIndices.Count()); var alg = RegressionAlgorithm; List<IRegressionModel> models = new List<IRegressionModel>(); try { // Loop until iteration limit reached or canceled. for (int i = 0; i < Iterations; i++) { cancellationToken.ThrowIfCancellationRequested(); modifiableDataset.RemoveVariable(targetVarName); modifiableDataset.AddVariable(targetVarName, curY.Concat(curYTest)); SampleTrainingData(rand, modifiableDataset, rRows, problemData.Dataset, curY, problemData.TargetVariable, problemData.TrainingIndices); // all training indices from the original problem data are allowed var modifiableProblemData = new RegressionProblemData(modifiableDataset, problemData.AllowedInputVariables.SampleRandomWithoutRepetition(rand, mVars), problemData.TargetVariable); modifiableProblemData.TrainingPartition.Start = 0; modifiableProblemData.TrainingPartition.End = rRows; modifiableProblemData.TestPartition.Start = problemData.TestPartition.Start; modifiableProblemData.TestPartition.End = problemData.TestPartition.End; if (!TrySetProblemData(alg, modifiableProblemData)) throw new NotSupportedException("The algorithm cannot be used with GBM."); IRegressionModel model; IRun run; // try to find a model. The algorithm might fail to produce a model. In this case we just retry until the iterations are exhausted if (TryExecute(alg, rand.Next(), RegressionAlgorithmResult, out model, out run)) { int row = 0; // update predictions for training and test // update new targets (in the case of squared error loss we simply use negative residuals) foreach (var pred in model.GetEstimatedValues(problemData.Dataset, trainingRows)) { yPred[row] = yPred[row] + nu * pred; curY[row] = y[row] - yPred[row]; row++; } row = 0; foreach (var pred in model.GetEstimatedValues(problemData.Dataset, testRows)) { yPredTest[row] = yPredTest[row] + nu * pred; curYTest[row] = yTest[row] - yPredTest[row]; row++; } // determine quality OnlineCalculatorError error; var trainR = OnlinePearsonsRCalculator.Calculate(yPred, y, out error); var testR = OnlinePearsonsRCalculator.Calculate(yPredTest, yTest, out error); // iteration results curLoss.Value = error == OnlineCalculatorError.None ? trainR * trainR : 0.0; curTestLoss.Value = error == OnlineCalculatorError.None ? testR * testR : 0.0; models.Add(model); } if (StoreRuns) runCollection.Add(run); table.Rows["R² (train)"].Values.Add(curLoss.Value); table.Rows["R² (test)"].Values.Add(curTestLoss.Value); iterations.Value = i + 1; } // produce solution if (CreateSolution) { // when all our models are symbolic models we can easily combine them to a single model if (models.All(m => m is ISymbolicRegressionModel)) { Results.Add(new Result("Solution", CreateSymbolicSolution(models, Nu, (IRegressionProblemData)problemData.Clone()))); } // just produce an ensemble solution for now (TODO: correct scaling or linear regression for ensemble model weights) var ensembleSolution = CreateEnsembleSolution(models, (IRegressionProblemData)problemData.Clone()); Results.Add(new Result("EnsembleSolution", ensembleSolution)); } } finally { // reset everything alg.Prepare(true); } }
public void Configure(IEnumerable <IRegressionSolution> solutions, ModifiableDataset sharedFixedVariables, string freeVariable, int drawingSteps, bool initializeAxisRanges = true) { if (!SolutionsCompatible(solutions)) { throw new ArgumentException("Solutions are not compatible with the problem data."); } this.freeVariable = freeVariable; this.drawingSteps = drawingSteps; this.solutions.Clear(); this.solutions.AddRange(solutions); // add an event such that whenever a value is changed in the shared dataset, // this change is reflected in the internal dataset (where the value becomes a whole column) if (this.sharedFixedVariables != null) { this.sharedFixedVariables.ItemChanged -= sharedFixedVariables_ItemChanged; this.sharedFixedVariables.Reset -= sharedFixedVariables_Reset; } this.sharedFixedVariables = sharedFixedVariables; this.sharedFixedVariables.ItemChanged += sharedFixedVariables_ItemChanged; this.sharedFixedVariables.Reset += sharedFixedVariables_Reset; RecalculateTrainingLimits(initializeAxisRanges); RecalculateInternalDataset(); chart.Series.Clear(); seriesCache.Clear(); ciSeriesCache.Clear(); foreach (var solution in this.solutions) { var series = CreateSeries(solution); seriesCache.Add(solution, series.Item1); if (series.Item2 != null) { ciSeriesCache.Add(solution, series.Item2); } } // Set cursor and x-axis // Make sure to allow a small offset to be able to distinguish the vertical line annotation from the axis var defaultValue = sharedFixedVariables.GetDoubleValue(freeVariable, 0); var step = (trainingMax - trainingMin) / drawingSteps; var minimum = chart.ChartAreas[0].AxisX.Minimum; var maximum = chart.ChartAreas[0].AxisX.Maximum; if (defaultValue <= minimum) { VerticalLineAnnotation.X = minimum + step; } else if (defaultValue >= maximum) { VerticalLineAnnotation.X = maximum - step; } else { VerticalLineAnnotation.X = defaultValue; } if (ShowCursor) { chart.Titles[0].Text = FreeVariable + " : " + defaultValue.ToString("G5", CultureInfo.CurrentCulture); } ResizeAllSeriesData(); OrderAndColorSeries(); }
private void SampleTrainingData(MersenneTwister rand, ModifiableDataset ds, int rRows, IDataset sourceDs, double[] curTarget, string targetVarName, IEnumerable<int> trainingIndices) { var selectedRows = trainingIndices.SampleRandomWithoutRepetition(rand, rRows).ToArray(); int t = 0; object[] srcRow = new object[ds.Columns]; var varNames = ds.DoubleVariables.ToArray(); foreach (var r in selectedRows) { // take all values from the original dataset for (int c = 0; c < srcRow.Length; c++) { var col = sourceDs.GetReadOnlyDoubleValues(varNames[c]); srcRow[c] = col[r]; } ds.ReplaceRow(t, srcRow); // but use the updated target values ds.SetVariableValue(curTarget[r], targetVarName, t); t++; } }
private void RecalculateInternalDataset() { if (sharedFixedVariables == null) { return; } // we expand the range in order to get nice tick intervals on the x axis double xmin, xmax, xinterval; //guard if only one distinct value is present if (trainingMin.IsAlmost(trainingMax)) { ChartUtil.CalculateAxisInterval(trainingMin - 0.5, trainingMin + 0.5, XAxisTicks, out xmin, out xmax, out xinterval); } else { ChartUtil.CalculateAxisInterval(trainingMin, trainingMax, XAxisTicks, out xmin, out xmax, out xinterval); } if (FixedXAxisMin.HasValue) { xmin = FixedXAxisMin.Value; } if (FixedXAxisMax.HasValue) { xmax = FixedXAxisMax.Value; } double step = (xmax - xmin) / drawingSteps; var xvalues = new List <double>(); for (int i = 0; i < drawingSteps; i++) { xvalues.Add(xmin + i * step); } if (sharedFixedVariables == null) { return; } var variables = sharedFixedVariables.VariableNames.ToList(); var values = new List <IList>(); foreach (var varName in variables) { if (varName == FreeVariable) { values.Add(xvalues); } else if (sharedFixedVariables.VariableHasType <double>(varName)) { values.Add(Enumerable.Repeat(sharedFixedVariables.GetDoubleValue(varName, 0), xvalues.Count).ToList()); } else if (sharedFixedVariables.VariableHasType <string>(varName)) { values.Add(Enumerable.Repeat(sharedFixedVariables.GetStringValue(varName, 0), xvalues.Count).ToList()); } } internalDataset = new ModifiableDataset(variables, values); }
private void RecalculateInternalDataset() { if (sharedFixedVariables == null) return; // we expand the range in order to get nice tick intervals on the x axis double xmin, xmax, xinterval; ChartUtil.CalculateAxisInterval(trainingMin, trainingMax, XAxisTicks, out xmin, out xmax, out xinterval); if (FixedXAxisMin.HasValue) xmin = FixedXAxisMin.Value; if (FixedXAxisMax.HasValue) xmax = FixedXAxisMax.Value; double step = (xmax - xmin) / drawingSteps; var xvalues = new List<double>(); for (int i = 0; i < drawingSteps; i++) xvalues.Add(xmin + i * step); var variables = sharedFixedVariables.DoubleVariables.ToList(); internalDataset = new ModifiableDataset(variables, variables.Select(x => x == FreeVariable ? xvalues : Enumerable.Repeat(sharedFixedVariables.GetDoubleValue(x, 0), xvalues.Count).ToList() ) ); }
protected override void OnContentChanged() { base.OnContentChanged(); if (Content == null) return; var problemData = Content.ProblemData; // Init Y-axis range double min = double.MaxValue, max = double.MinValue; var trainingTarget = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices); foreach (var t in trainingTarget) { if (t < min) min = t; if (t > max) max = t; } double range = max - min; const double scale = 1.0 / 3.0; double axisMin, axisMax, axisInterval; ChartUtil.CalculateAxisInterval(min - scale * range, max + scale * range, 5, out axisMin, out axisMax, out axisInterval); automaticYAxisCheckBox.Checked = false; limitView.ReadOnly = false; limitView.Content.Lower = axisMin; limitView.Content.Upper = axisMax; // create dataset var allowedInputVariables = Content.ProblemData.AllowedInputVariables; var variableValues = allowedInputVariables.Select(x => new List<double> { problemData.Dataset.GetDoubleValues(x, problemData.TrainingIndices).Median() }); var sharedFixedVariables = new ModifiableDataset(allowedInputVariables, variableValues); // create controls gradientCharts.Clear(); densityCharts.Clear(); groupingPanels.Clear(); foreach (var variableName in allowedInputVariables) { var gradientChart = CreateGradientChart(variableName, sharedFixedVariables); gradientCharts.Add(variableName, gradientChart); var densityChart = new DensityChart() { Anchor = AnchorStyles.Left | AnchorStyles.Top | AnchorStyles.Right, Margin = Padding.Empty, Height = 12, Visible = false, Top = (int)(gradientChart.Height * 0.1), }; densityCharts.Add(variableName, densityChart); gradientChart.ZoomChanged += (o, e) => { var gradient = (GradientChart)o; var density = densityCharts[gradient.FreeVariable]; density.Visible = densityComboBox.SelectedIndex != 0 && !gradient.IsZoomed; if (density.Visible) UpdateDensityChart(density, gradient.FreeVariable); }; gradientChart.SizeChanged += (o, e) => { var gradient = (GradientChart)o; var density = densityCharts[gradient.FreeVariable]; density.Top = (int)(gradient.Height * 0.1); }; // Initially, the inner plot areas are not initialized for hidden charts (scollpanel, ...) // This event handler listens for the paint event once (where everything is already initialized) to do some manual layouting. gradientChart.ChartPostPaint += OnGradientChartOnChartPostPaint; var panel = new Panel() { Dock = DockStyle.Fill, Margin = Padding.Empty, BackColor = Color.White }; panel.Controls.Add(densityChart); panel.Controls.Add(gradientChart); groupingPanels.Add(variableName, panel); } // update variable list variableListView.ItemChecked -= variableListView_ItemChecked; variableListView.Items.Clear(); foreach (var variable in allowedInputVariables) variableListView.Items.Add(key: variable, text: variable, imageIndex: 0); foreach (var variable in Content.Model.VariablesUsedForPrediction) variableListView.Items[variable].Checked = true; variableListView.ItemChecked += variableListView_ItemChecked; RecalculateAndRelayoutCharts(); }