protected void AddSeries(IRegressionSolution solution) { if (chart.Series.Any(s => s.Name == solution.Name)) { return; } Series solutionSeries = new Series(solution.Name); solutionSeries.Tag = solution; solutionSeries.ChartType = SeriesChartType.FastLine; var residuals = GetResiduals(GetOriginalValues(), GetEstimatedValues(solution)); var maxValue = residuals.Max(); if (maxValue >= chart.ChartAreas[0].AxisX.Maximum) { double scale = Math.Pow(10, Math.Floor(Math.Log10(maxValue))); var maximum = scale * (1 + (int)(maxValue / scale)); chart.ChartAreas[0].AxisX.Maximum = maximum; chart.ChartAreas[0].CursorX.Interval = residuals.Min() / 100; } UpdateSeries(residuals, solutionSeries); solutionSeries.ToolTip = "Area over Curve: " + CalculateAreaOverCurve(solutionSeries); solutionSeries.LegendToolTip = "Double-click to open model"; chart.Series.Add(solutionSeries); }
private void ResizeSeriesData(IRegressionSolution solution, IList <double> xvalues = null) { if (xvalues == null) { xvalues = internalDataset.GetDoubleValues(FreeVariable).ToList(); } var series = seriesCache[solution]; series.Points.SuspendUpdates(); series.Points.Clear(); for (int i = 0; i < xvalues.Count; i++) { series.Points.Add(new DataPoint(xvalues[i], 0.0)); } series.Points.ResumeUpdates(); Series confidenceIntervalSeries; if (ciSeriesCache.TryGetValue(solution, out confidenceIntervalSeries)) { confidenceIntervalSeries.Points.SuspendUpdates(); confidenceIntervalSeries.Points.Clear(); for (int i = 0; i < xvalues.Count; i++) { confidenceIntervalSeries.Points.Add(new DataPoint(xvalues[i], new[] { -1.0, 1.0 })); } confidenceIntervalSeries.Points.ResumeUpdates(); } }
protected override void Run(CancellationToken cancellationToken) { double rmsError, avgRelError, outOfBagRmsError, outOfBagAvgRelError; if (SetSeedRandomly) { Seed = Random.RandomSeedGenerator.GetSeed(); } var model = CreateRandomForestRegressionModel(Problem.ProblemData, NumberOfTrees, R, M, Seed, out rmsError, out avgRelError, out outOfBagRmsError, out outOfBagAvgRelError); Results.Add(new Result("Root mean square error", "The root of the mean of squared errors of the random forest regression solution on the training set.", new DoubleValue(rmsError))); Results.Add(new Result("Average relative error", "The average of relative errors of the random forest regression solution on the training set.", new PercentValue(avgRelError))); Results.Add(new Result("Root mean square error (out-of-bag)", "The out-of-bag root of the mean of squared errors of the random forest regression solution.", new DoubleValue(outOfBagRmsError))); Results.Add(new Result("Average relative error (out-of-bag)", "The out-of-bag average of relative errors of the random forest regression solution.", new PercentValue(outOfBagAvgRelError))); IRegressionSolution solution = null; if (ModelCreation == ModelCreation.Model) { solution = model.CreateRegressionSolution(Problem.ProblemData); } else if (ModelCreation == ModelCreation.SurrogateModel) { var problemData = Problem.ProblemData; var surrogateModel = new RandomForestModelSurrogate(model, problemData.TargetVariable, problemData, Seed, NumberOfTrees, R, M); solution = surrogateModel.CreateRegressionSolution(problemData); } if (solution != null) { Results.Add(new Result(RandomForestRegressionModelResultName, "The random forest regression solution.", solution)); } }
private void InitSeriesData(IRegressionSolution solution, IList <string> values) { var series = seriesCache[solution]; series.Points.SuspendUpdates(); series.Points.Clear(); for (int i = 0; i < values.Count; i++) { series.Points.AddXY(values[i], 0.0); series.Points.Last().ToolTip = values[i]; } UpdateAllSeriesStyles(variableValues.IndexOf(sharedFixedVariables.GetStringValue(FreeVariable, 0))); series.Points.ResumeUpdates(); Series confidenceIntervalSeries; if (ciSeriesCache.TryGetValue(solution, out confidenceIntervalSeries)) { confidenceIntervalSeries.Points.SuspendUpdates(); confidenceIntervalSeries.Points.Clear(); for (int i = 0; i < values.Count; i++) { confidenceIntervalSeries.Points.AddXY(values[i], 0.0, 0.0, 0.0, 0.0, 0.0); } confidenceIntervalSeries.Points.ResumeUpdates(); } }
public async Task AddSolutionAsync(IRegressionSolution solution) { if (!SolutionsCompatible(solutions.Concat(new[] { solution }))) { throw new ArgumentException("The solution is not compatible with the problem data."); } if (solutions.Contains(solution)) { return; } solutions.Add(solution); var series = CreateSeries(solution); seriesCache.Add(solution, series.Item1); if (series.Item2 != null) { ciSeriesCache.Add(solution, series.Item2); } InitSeriesData(solution, variableValues); OrderAndColorSeries(); await RecalculateAsync(); var args = new EventArgs <IRegressionSolution>(solution); OnSolutionAdded(this, args); }
public static IEnumerable <Tuple <string, double> > CalculateImpacts(IRegressionSolution solution, DataPartitionEnum data = DataPartitionEnum.Training, ReplacementMethodEnum replacement = ReplacementMethodEnum.Median) { var problemData = solution.ProblemData; var dataset = problemData.Dataset; IEnumerable <int> rows; IEnumerable <double> targetValues; double originalR2 = -1; OnlineCalculatorError error; switch (data) { case DataPartitionEnum.All: rows = solution.ProblemData.AllIndices; targetValues = problemData.TargetVariableValues.ToList(); originalR2 = OnlinePearsonsRCalculator.Calculate(problemData.TargetVariableValues, solution.EstimatedValues, out error); if (error != OnlineCalculatorError.None) { throw new InvalidOperationException("Error during R² calculation."); } originalR2 = originalR2 * originalR2; break; case DataPartitionEnum.Training: rows = problemData.TrainingIndices; targetValues = problemData.TargetVariableTrainingValues.ToList(); originalR2 = solution.TrainingRSquared; break; case DataPartitionEnum.Test: rows = problemData.TestIndices; targetValues = problemData.TargetVariableTestValues.ToList(); originalR2 = solution.TestRSquared; break; default: throw new ArgumentException(string.Format("DataPartition {0} cannot be handled.", data)); } var impacts = new Dictionary <string, double>(); var modifiableDataset = ((Dataset)dataset).ToModifiable(); foreach (var inputVariable in problemData.AllowedInputVariables) { var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows, replacement); var newR2 = OnlinePearsonsRCalculator.Calculate(targetValues, newEstimates, out error); if (error != OnlineCalculatorError.None) { throw new InvalidOperationException("Error during R² calculation with replaced inputs."); } newR2 = newR2 * newR2; var impact = originalR2 - newR2; impacts[inputVariable] = impact; } return(impacts.OrderByDescending(i => i.Value).Select(i => Tuple.Create(i.Key, i.Value))); }
public MeanModel(IRegressionSolution solution) : this() { // here we cannot check if the model is actually compatible (uses only input variables that are available) // we only assume that the list of allowed inputs in the regression solution is the same as the list of allowed // inputs in the Gaussian process. // later we might get an error or bad behaviour when the mean function is evaluated RegressionSolution = solution; }
public static IEnumerable <Tuple <string, double> > CalculateImpacts( IRegressionSolution solution, ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Shuffle, FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best, DataPartitionEnum dataPartition = DataPartitionEnum.Training) { IEnumerable <int> rows = GetPartitionRows(dataPartition, solution.ProblemData); IEnumerable <double> estimatedValues = solution.GetEstimatedValues(rows); return(CalculateImpacts(solution.Model, solution.ProblemData, estimatedValues, rows, replacementMethod, factorReplacementMethod)); }
private Task <DoubleLimit> UpdateSeriesDataAsync(IRegressionSolution solution, CancellationToken cancellationToken) { return(Task.Run(() => { var xvalues = internalDataset.GetDoubleValues(FreeVariable).ToList(); var yvalues = solution.Model.GetEstimatedValues(internalDataset, Enumerable.Range(0, internalDataset.Rows)).ToList(); double min = double.MaxValue, max = double.MinValue; var series = seriesCache[solution]; for (int i = 0; i < xvalues.Count; i++) { series.Points[i].SetValueXY(xvalues[i], yvalues[i]); if (yvalues[i] < min) { min = yvalues[i]; } if (yvalues[i] > max) { max = yvalues[i]; } } chart.Invalidate(); cancellationToken.ThrowIfCancellationRequested(); var confidenceBoundSolution = solution as IConfidenceRegressionSolution; if (confidenceBoundSolution != null) { var confidenceIntervalSeries = ciSeriesCache[solution]; var variances = confidenceBoundSolution.Model.GetEstimatedVariances(internalDataset, Enumerable.Range(0, internalDataset.Rows)).ToList(); for (int i = 0; i < xvalues.Count; i++) { var lower = yvalues[i] - 1.96 * Math.Sqrt(variances[i]); var upper = yvalues[i] + 1.96 * Math.Sqrt(variances[i]); confidenceIntervalSeries.Points[i].SetValueXY(xvalues[i], lower, upper); if (lower < min) { min = lower; } if (upper > max) { max = upper; } } chart.Invalidate(); } cancellationToken.ThrowIfCancellationRequested(); return new DoubleLimit(min, max); }, cancellationToken)); }
private void AddRegressionSolution(IRegressionSolution solution) { if (Model.Models.Contains(solution.Model)) { throw new ArgumentException(); } Model.Add(solution.Model); trainingPartitions[solution.Model] = solution.ProblemData.TrainingPartition; testPartitions[solution.Model] = solution.ProblemData.TestPartition; trainingEvaluationCache.Clear(); testEvaluationCache.Clear(); evaluationCache.Clear(); }
private void RemoveRegressionSolution(IRegressionSolution solution) { if (!Model.Models.Contains(solution.Model)) { throw new ArgumentException(); } Model.Remove(solution.Model); trainingPartitions.Remove(solution.Model); testPartitions.Remove(solution.Model); trainingEvaluationCache.Clear(); testEvaluationCache.Clear(); evaluationCache.Clear(); }
private List <double> CalculateResiduals(IRegressionSolution solution) { List <double> residuals = new List <double>(); IRegressionProblemData problemdata = solution.ProblemData; List <double> targetValues = problemdata.Dataset.GetDoubleValues(Content.ProblemData.TargetVariable).ToList(); List <double> estimatedValues = solution.EstimatedValues.ToList(); for (int i = 0; i < solution.ProblemData.Dataset.Rows; i++) { double residual = estimatedValues[i] - targetValues[i]; residuals.Add(residual); } return(residuals); }
private void CheckDefaultAsserts(IRegressionSolution solution, Dictionary <string, double> expectedImpacts) { IRegressionProblemData problemData = solution.ProblemData; IEnumerable <double> estimatedValues = solution.GetEstimatedValues(solution.ProblemData.TrainingIndices); var solutionImpacts = RegressionSolutionVariableImpactsCalculator.CalculateImpacts(solution); var modelImpacts = RegressionSolutionVariableImpactsCalculator.CalculateImpacts(solution.Model, problemData, estimatedValues, problemData.TrainingIndices); //Both ways should return equal results Assert.IsTrue(solutionImpacts.SequenceEqual(modelImpacts)); //Check if impacts are as expected Assert.AreEqual(modelImpacts.Count(), expectedImpacts.Count); Assert.IsTrue(modelImpacts.All(v => v.Item2.IsAlmost(expectedImpacts[v.Item1]))); }
public async Task RemoveSolutionAsync(IRegressionSolution solution) { if (!solutions.Remove(solution)) { return; } seriesCache.Remove(solution); ciSeriesCache.Remove(solution); await RecalculateAsync(); var args = new EventArgs <IRegressionSolution>(solution); OnSolutionRemoved(this, args); }
protected override void Run(CancellationToken cancellationToken) { IRegressionSolution bestSolution = null; if (InitializeParametersRandomly) { var qualityTable = new DataTable("RMSE table"); qualityTable.VisualProperties.YAxisLogScale = true; var trainRMSERow = new DataRow("RMSE (train)"); trainRMSERow.VisualProperties.ChartType = DataRowVisualProperties.DataRowChartType.Points; var testRMSERow = new DataRow("RMSE test"); testRMSERow.VisualProperties.ChartType = DataRowVisualProperties.DataRowChartType.Points; qualityTable.Rows.Add(trainRMSERow); qualityTable.Rows.Add(testRMSERow); Results.Add(new Result(qualityTable.Name, qualityTable.Name + " for all restarts", qualityTable)); if (SetSeedRandomly) { Seed = RandomSeedGenerator.GetSeed(); } var rand = new MersenneTwister((uint)Seed); bestSolution = CreateRegressionSolution(Problem.ProblemData, ModelStructure, Iterations, ApplyLinearScaling, rand); trainRMSERow.Values.Add(bestSolution.TrainingRootMeanSquaredError); testRMSERow.Values.Add(bestSolution.TestRootMeanSquaredError); for (int r = 0; r < Restarts; r++) { var solution = CreateRegressionSolution(Problem.ProblemData, ModelStructure, Iterations, ApplyLinearScaling, rand); trainRMSERow.Values.Add(solution.TrainingRootMeanSquaredError); testRMSERow.Values.Add(solution.TestRootMeanSquaredError); if (solution.TrainingRootMeanSquaredError < bestSolution.TrainingRootMeanSquaredError) { bestSolution = solution; } } } else { bestSolution = CreateRegressionSolution(Problem.ProblemData, ModelStructure, Iterations, ApplyLinearScaling); } Results.Add(new Result(RegressionSolutionResultName, "The nonlinear regression solution.", bestSolution)); Results.Add(new Result("Root mean square error (train)", "The root of the mean of squared errors of the regression solution on the training set.", new DoubleValue(bestSolution.TrainingRootMeanSquaredError))); Results.Add(new Result("Root mean square error (test)", "The root of the mean of squared errors of the regression solution on the test set.", new DoubleValue(bestSolution.TestRootMeanSquaredError))); }
private void AnalyzeSolution(IRegressionSolution solution, ResultCollection results, IRegressionProblemData problemData) { results.Add(new Result("RegressionSolution", (IItem)solution.Clone())); Dictionary <string, int> frequencies = null; var tree = solution.Model as RegressionNodeTreeModel; if (tree != null) { results.Add(RegressionTreeAnalyzer.CreateLeafDepthHistogram(tree)); frequencies = RegressionTreeAnalyzer.GetTreeVariableFrequences(tree); RegressionTreeAnalyzer.AnalyzeNodes(tree, results, problemData); } var ruleSet = solution.Model as RegressionRuleSetModel; if (ruleSet != null) { results.Add(RegressionTreeAnalyzer.CreateRulesResult(ruleSet, problemData, "Rules", true)); frequencies = RegressionTreeAnalyzer.GetRuleVariableFrequences(ruleSet); results.Add(RegressionTreeAnalyzer.CreateCoverageDiagram(ruleSet, problemData)); } //Variable frequencies if (frequencies != null) { var sum = frequencies.Values.Sum(); sum = sum == 0 ? 1 : sum; var impactArray = new DoubleArray(frequencies.Select(i => (double)i.Value / sum).ToArray()) { ElementNames = frequencies.Select(i => i.Key) }; results.Add(new Result("Variable Frequences", "relative frequencies of variables in rules and tree nodes", impactArray)); } var pruning = Pruning as ComplexityPruning; if (pruning != null && tree != null) { RegressionTreeAnalyzer.PruningChart(tree, pruning, results); } }
private Tuple <Series, Series> CreateSeries(IRegressionSolution solution) { var series = new Series { ChartType = SeriesChartType.Column, Name = solution.ProblemData.TargetVariable + " " + solutions.IndexOf(solution), XValueType = System.Windows.Forms.DataVisualization.Charting.ChartValueType.String }; series.LegendText = series.Name; Series confidenceIntervalSeries = null; confidenceIntervalSeries = new Series { ChartType = SeriesChartType.BoxPlot, XValueType = System.Windows.Forms.DataVisualization.Charting.ChartValueType.String, Color = Color.Black, YValuesPerPoint = 5, Name = "95% Conf. Interval " + series.Name, IsVisibleInLegend = false }; return(Tuple.Create(series, confidenceIntervalSeries)); }
protected IEnumerable <double> GetEstimatedValues(IRegressionSolution solution) { IEnumerable <double> estimatedValues; switch (cmbSamples.SelectedItem.ToString()) { case TrainingSamples: estimatedValues = solution.EstimatedTrainingValues; break; case TestSamples: estimatedValues = solution.EstimatedTestValues; break; case AllSamples: estimatedValues = solution.EstimatedValues; break; default: throw new NotSupportedException(); } return(estimatedValues); }
private Tuple <Series, Series> CreateSeries(IRegressionSolution solution) { var series = new Series { ChartType = SeriesChartType.Line, Name = solution.ProblemData.TargetVariable + " " + solutions.IndexOf(solution) }; series.LegendText = series.Name; var confidenceBoundSolution = solution as IConfidenceRegressionSolution; Series confidenceIntervalSeries = null; if (confidenceBoundSolution != null) { confidenceIntervalSeries = new Series { ChartType = SeriesChartType.Range, YValuesPerPoint = 2, Name = "95% Conf. Interval " + series.Name, IsVisibleInLegend = false }; } return(Tuple.Create(series, confidenceIntervalSeries)); }
public async Task RemoveSolutionAsync(IRegressionSolution solution) { if (!solutions.Remove(solution)) return; RecalculateTrainingLimits(true); seriesCache.Remove(solution); ciSeriesCache.Remove(solution); await RecalculateAsync(); var args = new EventArgs<IRegressionSolution>(solution); OnSolutionRemoved(this, args); }
//mkommend: annoying name clash with static method, open to better naming suggestions public IEnumerable<Tuple<string, double>> Calculate(IRegressionSolution solution) { return CalculateImpacts(solution, DataPartition, ReplacementMethod); }
public static IEnumerable<Tuple<string, double>> CalculateImpacts(IRegressionSolution solution, DataPartitionEnum data = DataPartitionEnum.Training, ReplacementMethodEnum replacement = ReplacementMethodEnum.Median) { var problemData = solution.ProblemData; var dataset = problemData.Dataset; IEnumerable<int> rows; IEnumerable<double> targetValues; double originalR2 = -1; OnlineCalculatorError error; switch (data) { case DataPartitionEnum.All: rows = solution.ProblemData.AllIndices; targetValues = problemData.TargetVariableValues.ToList(); originalR2 = OnlinePearsonsRCalculator.Calculate(problemData.TargetVariableValues, solution.EstimatedValues, out error); if (error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during R² calculation."); originalR2 = originalR2 * originalR2; break; case DataPartitionEnum.Training: rows = problemData.TrainingIndices; targetValues = problemData.TargetVariableTrainingValues.ToList(); originalR2 = solution.TrainingRSquared; break; case DataPartitionEnum.Test: rows = problemData.TestIndices; targetValues = problemData.TargetVariableTestValues.ToList(); originalR2 = solution.TestRSquared; break; default: throw new ArgumentException(string.Format("DataPartition {0} cannot be handled.", data)); } var impacts = new Dictionary<string, double>(); var modifiableDataset = ((Dataset)dataset).ToModifiable(); foreach (var inputVariable in problemData.AllowedInputVariables) { var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows, replacement); var newR2 = OnlinePearsonsRCalculator.Calculate(targetValues, newEstimates, out error); if (error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during R² calculation with replaced inputs."); newR2 = newR2 * newR2; var impact = originalR2 - newR2; impacts[inputVariable] = impact; } return impacts.OrderByDescending(i => i.Value).Select(i => Tuple.Create(i.Key, i.Value)); }
protected void AddSeries(IRegressionSolution solution) { if (chart.Series.Any(s => s.Name == solution.Name)) return; Series solutionSeries = new Series(solution.Name); solutionSeries.Tag = solution; solutionSeries.ChartType = SeriesChartType.FastLine; var residuals = GetResiduals(GetOriginalValues(), GetEstimatedValues(solution)); var maxValue = residuals.Max(); if (maxValue >= chart.ChartAreas[0].AxisX.Maximum) { double scale = Math.Pow(10, Math.Floor(Math.Log10(maxValue))); var maximum = scale * (1 + (int)(maxValue / scale)); chart.ChartAreas[0].AxisX.Maximum = maximum; chart.ChartAreas[0].CursorX.Interval = residuals.Min() / 100; } UpdateSeries(residuals, solutionSeries); solutionSeries.ToolTip = "Area over Curve: " + CalculateAreaOverCurve(solutionSeries); solutionSeries.LegendToolTip = "Double-click to open model"; chart.Series.Add(solutionSeries); }
private void AddRegressionSolution(IRegressionSolution solution) { if (Model.Models.Contains(solution.Model)) throw new ArgumentException(); Model.Add(solution.Model); trainingPartitions[solution.Model] = solution.ProblemData.TrainingPartition; testPartitions[solution.Model] = solution.ProblemData.TestPartition; trainingEvaluationCache.Clear(); testEvaluationCache.Clear(); evaluationCache.Clear(); }
private void RemoveRegressionSolution(IRegressionSolution solution) { if (!Model.Models.Contains(solution.Model)) throw new ArgumentException(); Model.Remove(solution.Model); trainingPartitions.Remove(solution.Model); testPartitions.Remove(solution.Model); trainingEvaluationCache.Clear(); testEvaluationCache.Clear(); evaluationCache.Clear(); }
protected IEnumerable<double> GetEstimatedValues(IRegressionSolution solution) { IEnumerable<double> estimatedValues; switch (cmbSamples.SelectedItem.ToString()) { case TrainingSamples: estimatedValues = solution.EstimatedTrainingValues; break; case TestSamples: estimatedValues = solution.EstimatedTestValues; break; case AllSamples: estimatedValues = solution.EstimatedValues; break; default: throw new NotSupportedException(); } return estimatedValues; }
public async Task AddSolutionAsync(IRegressionSolution solution) { if (!SolutionsCompatible(solutions.Concat(new[] { solution }))) throw new ArgumentException("The solution is not compatible with the problem data."); if (solutions.Contains(solution)) return; solutions.Add(solution); RecalculateTrainingLimits(true); var series = CreateSeries(solution); seriesCache.Add(solution, series.Item1); if (series.Item2 != null) ciSeriesCache.Add(solution, series.Item2); ResizeSeriesData(solution); OrderAndColorSeries(); await RecalculateAsync(); var args = new EventArgs<IRegressionSolution>(solution); OnSolutionAdded(this, args); }
private void ResizeSeriesData(IRegressionSolution solution, IList<double> xvalues = null) { if (xvalues == null) xvalues = internalDataset.GetDoubleValues(FreeVariable).ToList(); var series = seriesCache[solution]; series.Points.SuspendUpdates(); series.Points.Clear(); for (int i = 0; i < xvalues.Count; i++) series.Points.Add(new DataPoint(xvalues[i], 0.0)); series.Points.ResumeUpdates(); Series confidenceIntervalSeries; if (ciSeriesCache.TryGetValue(solution, out confidenceIntervalSeries)) { confidenceIntervalSeries.Points.SuspendUpdates(); confidenceIntervalSeries.Points.Clear(); for (int i = 0; i < xvalues.Count; i++) confidenceIntervalSeries.Points.Add(new DataPoint(xvalues[i], new[] { -1.0, 1.0 })); confidenceIntervalSeries.Points.ResumeUpdates(); } }
private void WriteEstimatedWorksheet(ExcelWorksheet estimatedWorksheet, ExcelWorksheet datasetWorksheet, string[] formulaParts, IRegressionSolution solution) { string preparedFormula = PrepareFormula(formulaParts); int rows = solution.ProblemData.Dataset.Rows; estimatedWorksheet.Cells[1, 1].Value = "Id"; estimatedWorksheet.Cells[1, 2].Value = "Target Variable"; estimatedWorksheet.Cells[1, 3].Value = "Estimated Values"; estimatedWorksheet.Cells[1, 4].Value = "Absolute Error"; estimatedWorksheet.Cells[1, 5].Value = "Relative Error"; estimatedWorksheet.Cells[1, 6].Value = "Error"; estimatedWorksheet.Cells[1, 7].Value = "Squared Error"; estimatedWorksheet.Cells[1, 9].Value = "Unbounded Estimated Values"; estimatedWorksheet.Cells[1, 10].Value = "Bounded Estimated Values"; estimatedWorksheet.Cells[1, 1, 1, 10].AutoFitColumns(); // fill in id, target variable and unbounded estimated values int targetIndex = solution.ProblemData.Dataset.VariableNames.ToList().FindIndex(x => x.Equals(solution.ProblemData.TargetVariable)) + 1; for (int i = 0; i < rows; i++) { estimatedWorksheet.Cells[i + 2, 1].Value = i; // id estimatedWorksheet.Cells[i + 2, 2].Formula = datasetWorksheet.Cells[i + 2, targetIndex].FullAddress; // target variable estimatedWorksheet.Cells[i + 2, 9].Formula = string.Format(preparedFormula, i + 2); // unbounded estimated values } estimatedWorksheet.Cells["B2:B" + (rows + 1)].Style.Numberformat.Format = "0.000"; estimatedWorksheet.Cells["C2:C" + (rows + 1)].Formula = "J2"; estimatedWorksheet.Cells["C2:C" + (rows + 1)].Style.Numberformat.Format = "0.000"; estimatedWorksheet.Cells["D2:D" + (rows + 1)].Formula = "ABS(B2 - C2)"; estimatedWorksheet.Cells["D2:D" + (rows + 1)].Style.Numberformat.Format = "0.000"; estimatedWorksheet.Cells["E2:E" + (rows + 1)].Formula = "ABS(D2 / B2)"; estimatedWorksheet.Cells["E2:E" + (rows + 1)].Style.Numberformat.Format = "0.000"; estimatedWorksheet.Cells["F2:F" + (rows + 1)].Formula = "C2 - B2"; estimatedWorksheet.Cells["F2:F" + (rows + 1)].Style.Numberformat.Format = "0.000"; estimatedWorksheet.Cells["G2:G" + (rows + 1)].Formula = "POWER(F2, 2)"; estimatedWorksheet.Cells["G2:G" + (rows + 1)].Style.Numberformat.Format = "0.000"; estimatedWorksheet.Cells["I2:I" + (rows + 1)].Style.Numberformat.Format = "0.000"; estimatedWorksheet.Cells["J2:J" + (rows + 1)].Formula = "IFERROR(IF(I2 > Model!EstimationLimitUpper, Model!EstimationLimitUpper, IF(I2 < Model!EstimationLimitLower, Model!EstimationLimitLower, I2)), AVERAGE(Model!EstimationLimitLower, Model!EstimationLimitUpper))"; estimatedWorksheet.Cells["J2:J" + (rows + 1)].Style.Numberformat.Format = "0.000"; }
public static IEnumerable <Tuple <string, double> > CalculateImpacts( IRegressionSolution solution, DataPartitionEnum data = DataPartitionEnum.Training, ReplacementMethodEnum replacementMethod = ReplacementMethodEnum.Median, FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best) { var problemData = solution.ProblemData; var dataset = problemData.Dataset; IEnumerable <int> rows; IEnumerable <double> targetValues; double originalR2 = -1; OnlineCalculatorError error; switch (data) { case DataPartitionEnum.All: rows = solution.ProblemData.AllIndices; targetValues = problemData.TargetVariableValues.ToList(); originalR2 = OnlinePearsonsRCalculator.Calculate(problemData.TargetVariableValues, solution.EstimatedValues, out error); if (error != OnlineCalculatorError.None) { throw new InvalidOperationException("Error during R² calculation."); } originalR2 = originalR2 * originalR2; break; case DataPartitionEnum.Training: rows = problemData.TrainingIndices; targetValues = problemData.TargetVariableTrainingValues.ToList(); originalR2 = solution.TrainingRSquared; break; case DataPartitionEnum.Test: rows = problemData.TestIndices; targetValues = problemData.TargetVariableTestValues.ToList(); originalR2 = solution.TestRSquared; break; default: throw new ArgumentException(string.Format("DataPartition {0} cannot be handled.", data)); } var impacts = new Dictionary <string, double>(); var modifiableDataset = ((Dataset)dataset).ToModifiable(); var inputvariables = new HashSet <string>(problemData.AllowedInputVariables.Union(solution.Model.VariablesUsedForPrediction)); var allowedInputVariables = dataset.VariableNames.Where(v => inputvariables.Contains(v)).ToList(); // calculate impacts for double variables foreach (var inputVariable in allowedInputVariables.Where(problemData.Dataset.VariableHasType <double>)) { var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows, replacementMethod); var newR2 = OnlinePearsonsRCalculator.Calculate(targetValues, newEstimates, out error); if (error != OnlineCalculatorError.None) { throw new InvalidOperationException("Error during R² calculation with replaced inputs."); } newR2 = newR2 * newR2; var impact = originalR2 - newR2; impacts[inputVariable] = impact; } // calculate impacts for string variables foreach (var inputVariable in allowedInputVariables.Where(problemData.Dataset.VariableHasType <string>)) { if (factorReplacementMethod == FactorReplacementMethodEnum.Best) { // try replacing with all possible values and find the best replacement value var smallestImpact = double.PositiveInfinity; foreach (var repl in problemData.Dataset.GetStringValues(inputVariable, rows).Distinct()) { var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows, Enumerable.Repeat(repl, dataset.Rows)); var newR2 = OnlinePearsonsRCalculator.Calculate(targetValues, newEstimates, out error); if (error != OnlineCalculatorError.None) { throw new InvalidOperationException("Error during R² calculation with replaced inputs."); } newR2 = newR2 * newR2; var impact = originalR2 - newR2; if (impact < smallestImpact) { smallestImpact = impact; } } impacts[inputVariable] = smallestImpact; } else { // for replacement methods shuffle and mode // calculate impacts for factor variables var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows, factorReplacementMethod); var newR2 = OnlinePearsonsRCalculator.Calculate(targetValues, newEstimates, out error); if (error != OnlineCalculatorError.None) { throw new InvalidOperationException("Error during R² calculation with replaced inputs."); } newR2 = newR2 * newR2; var impact = originalR2 - newR2; impacts[inputVariable] = impact; } } // foreach return(impacts.OrderByDescending(i => i.Value).Select(i => Tuple.Create(i.Key, i.Value))); }
private List<double> CalculateResiduals(IRegressionSolution solution) { List<double> residuals = new List<double>(); IRegressionProblemData problemdata = solution.ProblemData; List<double> targetValues = problemdata.Dataset.GetDoubleValues(Content.ProblemData.TargetVariable).ToList(); List<double> estimatedValues = solution.EstimatedValues.ToList(); for (int i = 0; i < solution.ProblemData.Dataset.Rows; i++) { double residual = estimatedValues[i] - targetValues[i]; residuals.Add(residual); } return residuals; }
//mkommend: annoying name clash with static method, open to better naming suggestions public IEnumerable <Tuple <string, double> > Calculate(IRegressionSolution solution) { return(CalculateImpacts(solution, DataPartition, ReplacementMethod)); }
private Tuple<Series, Series> CreateSeries(IRegressionSolution solution) { var series = new Series { ChartType = SeriesChartType.Line, Name = solution.ProblemData.TargetVariable + " " + solutions.IndexOf(solution) }; series.LegendText = series.Name; var confidenceBoundSolution = solution as IConfidenceRegressionSolution; Series confidenceIntervalSeries = null; if (confidenceBoundSolution != null) { confidenceIntervalSeries = new Series { ChartType = SeriesChartType.Range, YValuesPerPoint = 2, Name = "95% Conf. Interval " + series.Name, IsVisibleInLegend = false }; } return Tuple.Create(series, confidenceIntervalSeries); }
private Task<DoubleLimit> UpdateSeriesDataAsync(IRegressionSolution solution, CancellationToken cancellationToken) { return Task.Run(() => { var xvalues = internalDataset.GetDoubleValues(FreeVariable).ToList(); var yvalues = solution.Model.GetEstimatedValues(internalDataset, Enumerable.Range(0, internalDataset.Rows)).ToList(); double min = double.MaxValue, max = double.MinValue; var series = seriesCache[solution]; for (int i = 0; i < xvalues.Count; i++) { series.Points[i].SetValueXY(xvalues[i], yvalues[i]); if (yvalues[i] < min) min = yvalues[i]; if (yvalues[i] > max) max = yvalues[i]; } chart.Invalidate(); cancellationToken.ThrowIfCancellationRequested(); var confidenceBoundSolution = solution as IConfidenceRegressionSolution; if (confidenceBoundSolution != null) { var confidenceIntervalSeries = ciSeriesCache[solution]; var variances = confidenceBoundSolution.Model.GetEstimatedVariances(internalDataset, Enumerable.Range(0, internalDataset.Rows)).ToList(); for (int i = 0; i < xvalues.Count; i++) { var lower = yvalues[i] - 1.96 * Math.Sqrt(variances[i]); var upper = yvalues[i] + 1.96 * Math.Sqrt(variances[i]); confidenceIntervalSeries.Points[i].SetValueXY(xvalues[i], lower, upper); if (lower < min) min = lower; if (upper > max) max = upper; } chart.Invalidate(); } cancellationToken.ThrowIfCancellationRequested(); return new DoubleLimit(min, max); }, cancellationToken); }