protected void AddSeries(IRegressionSolution solution)
        {
            if (chart.Series.Any(s => s.Name == solution.Name))
            {
                return;
            }

            Series solutionSeries = new Series(solution.Name);

            solutionSeries.Tag       = solution;
            solutionSeries.ChartType = SeriesChartType.FastLine;
            var residuals = GetResiduals(GetOriginalValues(), GetEstimatedValues(solution));

            var maxValue = residuals.Max();

            if (maxValue >= chart.ChartAreas[0].AxisX.Maximum)
            {
                double scale   = Math.Pow(10, Math.Floor(Math.Log10(maxValue)));
                var    maximum = scale * (1 + (int)(maxValue / scale));
                chart.ChartAreas[0].AxisX.Maximum    = maximum;
                chart.ChartAreas[0].CursorX.Interval = residuals.Min() / 100;
            }

            UpdateSeries(residuals, solutionSeries);

            solutionSeries.ToolTip       = "Area over Curve: " + CalculateAreaOverCurve(solutionSeries);
            solutionSeries.LegendToolTip = "Double-click to open model";
            chart.Series.Add(solutionSeries);
        }
Ejemplo n.º 2
0
        private void ResizeSeriesData(IRegressionSolution solution, IList <double> xvalues = null)
        {
            if (xvalues == null)
            {
                xvalues = internalDataset.GetDoubleValues(FreeVariable).ToList();
            }

            var series = seriesCache[solution];

            series.Points.SuspendUpdates();
            series.Points.Clear();
            for (int i = 0; i < xvalues.Count; i++)
            {
                series.Points.Add(new DataPoint(xvalues[i], 0.0));
            }
            series.Points.ResumeUpdates();

            Series confidenceIntervalSeries;

            if (ciSeriesCache.TryGetValue(solution, out confidenceIntervalSeries))
            {
                confidenceIntervalSeries.Points.SuspendUpdates();
                confidenceIntervalSeries.Points.Clear();
                for (int i = 0; i < xvalues.Count; i++)
                {
                    confidenceIntervalSeries.Points.Add(new DataPoint(xvalues[i], new[] { -1.0, 1.0 }));
                }
                confidenceIntervalSeries.Points.ResumeUpdates();
            }
        }
Ejemplo n.º 3
0
        protected override void Run(CancellationToken cancellationToken)
        {
            double rmsError, avgRelError, outOfBagRmsError, outOfBagAvgRelError;

            if (SetSeedRandomly)
            {
                Seed = Random.RandomSeedGenerator.GetSeed();
            }
            var model = CreateRandomForestRegressionModel(Problem.ProblemData, NumberOfTrees, R, M, Seed,
                                                          out rmsError, out avgRelError, out outOfBagRmsError, out outOfBagAvgRelError);

            Results.Add(new Result("Root mean square error", "The root of the mean of squared errors of the random forest regression solution on the training set.", new DoubleValue(rmsError)));
            Results.Add(new Result("Average relative error", "The average of relative errors of the random forest regression solution on the training set.", new PercentValue(avgRelError)));
            Results.Add(new Result("Root mean square error (out-of-bag)", "The out-of-bag root of the mean of squared errors of the random forest regression solution.", new DoubleValue(outOfBagRmsError)));
            Results.Add(new Result("Average relative error (out-of-bag)", "The out-of-bag average of relative errors of the random forest regression solution.", new PercentValue(outOfBagAvgRelError)));

            IRegressionSolution solution = null;

            if (ModelCreation == ModelCreation.Model)
            {
                solution = model.CreateRegressionSolution(Problem.ProblemData);
            }
            else if (ModelCreation == ModelCreation.SurrogateModel)
            {
                var problemData    = Problem.ProblemData;
                var surrogateModel = new RandomForestModelSurrogate(model, problemData.TargetVariable, problemData, Seed, NumberOfTrees, R, M);
                solution = surrogateModel.CreateRegressionSolution(problemData);
            }

            if (solution != null)
            {
                Results.Add(new Result(RandomForestRegressionModelResultName, "The random forest regression solution.", solution));
            }
        }
Ejemplo n.º 4
0
        private void InitSeriesData(IRegressionSolution solution, IList <string> values)
        {
            var series = seriesCache[solution];

            series.Points.SuspendUpdates();
            series.Points.Clear();
            for (int i = 0; i < values.Count; i++)
            {
                series.Points.AddXY(values[i], 0.0);
                series.Points.Last().ToolTip = values[i];
            }

            UpdateAllSeriesStyles(variableValues.IndexOf(sharedFixedVariables.GetStringValue(FreeVariable, 0)));
            series.Points.ResumeUpdates();

            Series confidenceIntervalSeries;

            if (ciSeriesCache.TryGetValue(solution, out confidenceIntervalSeries))
            {
                confidenceIntervalSeries.Points.SuspendUpdates();
                confidenceIntervalSeries.Points.Clear();
                for (int i = 0; i < values.Count; i++)
                {
                    confidenceIntervalSeries.Points.AddXY(values[i], 0.0, 0.0, 0.0, 0.0, 0.0);
                }
                confidenceIntervalSeries.Points.ResumeUpdates();
            }
        }
Ejemplo n.º 5
0
        public async Task AddSolutionAsync(IRegressionSolution solution)
        {
            if (!SolutionsCompatible(solutions.Concat(new[] { solution })))
            {
                throw new ArgumentException("The solution is not compatible with the problem data.");
            }
            if (solutions.Contains(solution))
            {
                return;
            }

            solutions.Add(solution);

            var series = CreateSeries(solution);

            seriesCache.Add(solution, series.Item1);
            if (series.Item2 != null)
            {
                ciSeriesCache.Add(solution, series.Item2);
            }

            InitSeriesData(solution, variableValues);
            OrderAndColorSeries();

            await RecalculateAsync();

            var args = new EventArgs <IRegressionSolution>(solution);

            OnSolutionAdded(this, args);
        }
        public static IEnumerable <Tuple <string, double> > CalculateImpacts(IRegressionSolution solution,
                                                                             DataPartitionEnum data            = DataPartitionEnum.Training,
                                                                             ReplacementMethodEnum replacement = ReplacementMethodEnum.Median)
        {
            var problemData = solution.ProblemData;
            var dataset     = problemData.Dataset;

            IEnumerable <int>    rows;
            IEnumerable <double> targetValues;
            double originalR2 = -1;

            OnlineCalculatorError error;

            switch (data)
            {
            case DataPartitionEnum.All:
                rows         = solution.ProblemData.AllIndices;
                targetValues = problemData.TargetVariableValues.ToList();
                originalR2   = OnlinePearsonsRCalculator.Calculate(problemData.TargetVariableValues, solution.EstimatedValues, out error);
                if (error != OnlineCalculatorError.None)
                {
                    throw new InvalidOperationException("Error during R² calculation.");
                }
                originalR2 = originalR2 * originalR2;
                break;

            case DataPartitionEnum.Training:
                rows         = problemData.TrainingIndices;
                targetValues = problemData.TargetVariableTrainingValues.ToList();
                originalR2   = solution.TrainingRSquared;
                break;

            case DataPartitionEnum.Test:
                rows         = problemData.TestIndices;
                targetValues = problemData.TargetVariableTestValues.ToList();
                originalR2   = solution.TestRSquared;
                break;

            default: throw new ArgumentException(string.Format("DataPartition {0} cannot be handled.", data));
            }


            var impacts           = new Dictionary <string, double>();
            var modifiableDataset = ((Dataset)dataset).ToModifiable();

            foreach (var inputVariable in problemData.AllowedInputVariables)
            {
                var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows, replacement);
                var newR2        = OnlinePearsonsRCalculator.Calculate(targetValues, newEstimates, out error);
                if (error != OnlineCalculatorError.None)
                {
                    throw new InvalidOperationException("Error during R² calculation with replaced inputs.");
                }

                newR2 = newR2 * newR2;
                var impact = originalR2 - newR2;
                impacts[inputVariable] = impact;
            }
            return(impacts.OrderByDescending(i => i.Value).Select(i => Tuple.Create(i.Key, i.Value)));
        }
Ejemplo n.º 7
0
 public MeanModel(IRegressionSolution solution)
   : this() {
   // here we cannot check if the model is actually compatible (uses only input variables that are available)
   // we only assume that the list of allowed inputs in the regression solution is the same as the list of allowed
   // inputs in the Gaussian process.
   // later we might get an error or bad behaviour when the mean function is evaluated
   RegressionSolution = solution;
 }
Ejemplo n.º 8
0
 public MeanModel(IRegressionSolution solution)
     : this()
 {
     // here we cannot check if the model is actually compatible (uses only input variables that are available)
     // we only assume that the list of allowed inputs in the regression solution is the same as the list of allowed
     // inputs in the Gaussian process.
     // later we might get an error or bad behaviour when the mean function is evaluated
     RegressionSolution = solution;
 }
Ejemplo n.º 9
0
        public static IEnumerable <Tuple <string, double> > CalculateImpacts(
            IRegressionSolution solution,
            ReplacementMethodEnum replacementMethod             = ReplacementMethodEnum.Shuffle,
            FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best,
            DataPartitionEnum dataPartition = DataPartitionEnum.Training)
        {
            IEnumerable <int>    rows            = GetPartitionRows(dataPartition, solution.ProblemData);
            IEnumerable <double> estimatedValues = solution.GetEstimatedValues(rows);

            return(CalculateImpacts(solution.Model, solution.ProblemData, estimatedValues, rows, replacementMethod, factorReplacementMethod));
        }
Ejemplo n.º 10
0
        private Task <DoubleLimit> UpdateSeriesDataAsync(IRegressionSolution solution, CancellationToken cancellationToken)
        {
            return(Task.Run(() => {
                var xvalues = internalDataset.GetDoubleValues(FreeVariable).ToList();
                var yvalues = solution.Model.GetEstimatedValues(internalDataset, Enumerable.Range(0, internalDataset.Rows)).ToList();

                double min = double.MaxValue, max = double.MinValue;

                var series = seriesCache[solution];
                for (int i = 0; i < xvalues.Count; i++)
                {
                    series.Points[i].SetValueXY(xvalues[i], yvalues[i]);
                    if (yvalues[i] < min)
                    {
                        min = yvalues[i];
                    }
                    if (yvalues[i] > max)
                    {
                        max = yvalues[i];
                    }
                }
                chart.Invalidate();

                cancellationToken.ThrowIfCancellationRequested();

                var confidenceBoundSolution = solution as IConfidenceRegressionSolution;
                if (confidenceBoundSolution != null)
                {
                    var confidenceIntervalSeries = ciSeriesCache[solution];
                    var variances = confidenceBoundSolution.Model.GetEstimatedVariances(internalDataset, Enumerable.Range(0, internalDataset.Rows)).ToList();
                    for (int i = 0; i < xvalues.Count; i++)
                    {
                        var lower = yvalues[i] - 1.96 * Math.Sqrt(variances[i]);
                        var upper = yvalues[i] + 1.96 * Math.Sqrt(variances[i]);
                        confidenceIntervalSeries.Points[i].SetValueXY(xvalues[i], lower, upper);
                        if (lower < min)
                        {
                            min = lower;
                        }
                        if (upper > max)
                        {
                            max = upper;
                        }
                    }
                    chart.Invalidate();
                }

                cancellationToken.ThrowIfCancellationRequested();
                return new DoubleLimit(min, max);
            }, cancellationToken));
        }
        private void AddRegressionSolution(IRegressionSolution solution)
        {
            if (Model.Models.Contains(solution.Model))
            {
                throw new ArgumentException();
            }
            Model.Add(solution.Model);
            trainingPartitions[solution.Model] = solution.ProblemData.TrainingPartition;
            testPartitions[solution.Model]     = solution.ProblemData.TestPartition;

            trainingEvaluationCache.Clear();
            testEvaluationCache.Clear();
            evaluationCache.Clear();
        }
        private void RemoveRegressionSolution(IRegressionSolution solution)
        {
            if (!Model.Models.Contains(solution.Model))
            {
                throw new ArgumentException();
            }
            Model.Remove(solution.Model);
            trainingPartitions.Remove(solution.Model);
            testPartitions.Remove(solution.Model);

            trainingEvaluationCache.Clear();
            testEvaluationCache.Clear();
            evaluationCache.Clear();
        }
        private List <double> CalculateResiduals(IRegressionSolution solution)
        {
            List <double> residuals = new List <double>();

            IRegressionProblemData problemdata     = solution.ProblemData;
            List <double>          targetValues    = problemdata.Dataset.GetDoubleValues(Content.ProblemData.TargetVariable).ToList();
            List <double>          estimatedValues = solution.EstimatedValues.ToList();

            for (int i = 0; i < solution.ProblemData.Dataset.Rows; i++)
            {
                double residual = estimatedValues[i] - targetValues[i];
                residuals.Add(residual);
            }
            return(residuals);
        }
        private void CheckDefaultAsserts(IRegressionSolution solution, Dictionary <string, double> expectedImpacts)
        {
            IRegressionProblemData problemData     = solution.ProblemData;
            IEnumerable <double>   estimatedValues = solution.GetEstimatedValues(solution.ProblemData.TrainingIndices);

            var solutionImpacts = RegressionSolutionVariableImpactsCalculator.CalculateImpacts(solution);
            var modelImpacts    = RegressionSolutionVariableImpactsCalculator.CalculateImpacts(solution.Model, problemData, estimatedValues, problemData.TrainingIndices);

            //Both ways should return equal results
            Assert.IsTrue(solutionImpacts.SequenceEqual(modelImpacts));

            //Check if impacts are as expected
            Assert.AreEqual(modelImpacts.Count(), expectedImpacts.Count);
            Assert.IsTrue(modelImpacts.All(v => v.Item2.IsAlmost(expectedImpacts[v.Item1])));
        }
Ejemplo n.º 15
0
        public async Task RemoveSolutionAsync(IRegressionSolution solution)
        {
            if (!solutions.Remove(solution))
            {
                return;
            }

            seriesCache.Remove(solution);
            ciSeriesCache.Remove(solution);

            await RecalculateAsync();

            var args = new EventArgs <IRegressionSolution>(solution);

            OnSolutionRemoved(this, args);
        }
Ejemplo n.º 16
0
        protected override void Run(CancellationToken cancellationToken)
        {
            IRegressionSolution bestSolution = null;

            if (InitializeParametersRandomly)
            {
                var qualityTable = new DataTable("RMSE table");
                qualityTable.VisualProperties.YAxisLogScale = true;
                var trainRMSERow = new DataRow("RMSE (train)");
                trainRMSERow.VisualProperties.ChartType = DataRowVisualProperties.DataRowChartType.Points;
                var testRMSERow = new DataRow("RMSE test");
                testRMSERow.VisualProperties.ChartType = DataRowVisualProperties.DataRowChartType.Points;

                qualityTable.Rows.Add(trainRMSERow);
                qualityTable.Rows.Add(testRMSERow);
                Results.Add(new Result(qualityTable.Name, qualityTable.Name + " for all restarts", qualityTable));
                if (SetSeedRandomly)
                {
                    Seed = RandomSeedGenerator.GetSeed();
                }
                var rand = new MersenneTwister((uint)Seed);
                bestSolution = CreateRegressionSolution(Problem.ProblemData, ModelStructure, Iterations, ApplyLinearScaling, rand);
                trainRMSERow.Values.Add(bestSolution.TrainingRootMeanSquaredError);
                testRMSERow.Values.Add(bestSolution.TestRootMeanSquaredError);
                for (int r = 0; r < Restarts; r++)
                {
                    var solution = CreateRegressionSolution(Problem.ProblemData, ModelStructure, Iterations, ApplyLinearScaling, rand);
                    trainRMSERow.Values.Add(solution.TrainingRootMeanSquaredError);
                    testRMSERow.Values.Add(solution.TestRootMeanSquaredError);
                    if (solution.TrainingRootMeanSquaredError < bestSolution.TrainingRootMeanSquaredError)
                    {
                        bestSolution = solution;
                    }
                }
            }
            else
            {
                bestSolution = CreateRegressionSolution(Problem.ProblemData, ModelStructure, Iterations, ApplyLinearScaling);
            }

            Results.Add(new Result(RegressionSolutionResultName, "The nonlinear regression solution.", bestSolution));
            Results.Add(new Result("Root mean square error (train)", "The root of the mean of squared errors of the regression solution on the training set.", new DoubleValue(bestSolution.TrainingRootMeanSquaredError)));
            Results.Add(new Result("Root mean square error (test)", "The root of the mean of squared errors of the regression solution on the test set.", new DoubleValue(bestSolution.TestRootMeanSquaredError)));
        }
Ejemplo n.º 17
0
        private void AnalyzeSolution(IRegressionSolution solution, ResultCollection results, IRegressionProblemData problemData)
        {
            results.Add(new Result("RegressionSolution", (IItem)solution.Clone()));

            Dictionary <string, int> frequencies = null;

            var tree = solution.Model as RegressionNodeTreeModel;

            if (tree != null)
            {
                results.Add(RegressionTreeAnalyzer.CreateLeafDepthHistogram(tree));
                frequencies = RegressionTreeAnalyzer.GetTreeVariableFrequences(tree);
                RegressionTreeAnalyzer.AnalyzeNodes(tree, results, problemData);
            }

            var ruleSet = solution.Model as RegressionRuleSetModel;

            if (ruleSet != null)
            {
                results.Add(RegressionTreeAnalyzer.CreateRulesResult(ruleSet, problemData, "Rules", true));
                frequencies = RegressionTreeAnalyzer.GetRuleVariableFrequences(ruleSet);
                results.Add(RegressionTreeAnalyzer.CreateCoverageDiagram(ruleSet, problemData));
            }

            //Variable frequencies
            if (frequencies != null)
            {
                var sum = frequencies.Values.Sum();
                sum = sum == 0 ? 1 : sum;
                var impactArray = new DoubleArray(frequencies.Select(i => (double)i.Value / sum).ToArray())
                {
                    ElementNames = frequencies.Select(i => i.Key)
                };
                results.Add(new Result("Variable Frequences", "relative frequencies of variables in rules and tree nodes", impactArray));
            }

            var pruning = Pruning as ComplexityPruning;

            if (pruning != null && tree != null)
            {
                RegressionTreeAnalyzer.PruningChart(tree, pruning, results);
            }
        }
Ejemplo n.º 18
0
        private Tuple <Series, Series> CreateSeries(IRegressionSolution solution)
        {
            var series = new Series {
                ChartType  = SeriesChartType.Column,
                Name       = solution.ProblemData.TargetVariable + " " + solutions.IndexOf(solution),
                XValueType = System.Windows.Forms.DataVisualization.Charting.ChartValueType.String
            };

            series.LegendText = series.Name;

            Series confidenceIntervalSeries = null;

            confidenceIntervalSeries = new Series {
                ChartType         = SeriesChartType.BoxPlot,
                XValueType        = System.Windows.Forms.DataVisualization.Charting.ChartValueType.String,
                Color             = Color.Black,
                YValuesPerPoint   = 5,
                Name              = "95% Conf. Interval " + series.Name,
                IsVisibleInLegend = false
            };
            return(Tuple.Create(series, confidenceIntervalSeries));
        }
        protected IEnumerable <double> GetEstimatedValues(IRegressionSolution solution)
        {
            IEnumerable <double> estimatedValues;

            switch (cmbSamples.SelectedItem.ToString())
            {
            case TrainingSamples:
                estimatedValues = solution.EstimatedTrainingValues;
                break;

            case TestSamples:
                estimatedValues = solution.EstimatedTestValues;
                break;

            case AllSamples:
                estimatedValues = solution.EstimatedValues;
                break;

            default:
                throw new NotSupportedException();
            }
            return(estimatedValues);
        }
Ejemplo n.º 20
0
        private Tuple <Series, Series> CreateSeries(IRegressionSolution solution)
        {
            var series = new Series {
                ChartType = SeriesChartType.Line,
                Name      = solution.ProblemData.TargetVariable + " " + solutions.IndexOf(solution)
            };

            series.LegendText = series.Name;

            var    confidenceBoundSolution  = solution as IConfidenceRegressionSolution;
            Series confidenceIntervalSeries = null;

            if (confidenceBoundSolution != null)
            {
                confidenceIntervalSeries = new Series {
                    ChartType         = SeriesChartType.Range,
                    YValuesPerPoint   = 2,
                    Name              = "95% Conf. Interval " + series.Name,
                    IsVisibleInLegend = false
                };
            }
            return(Tuple.Create(series, confidenceIntervalSeries));
        }
Ejemplo n.º 21
0
    public async Task RemoveSolutionAsync(IRegressionSolution solution) {
      if (!solutions.Remove(solution))
        return;

      RecalculateTrainingLimits(true);

      seriesCache.Remove(solution);
      ciSeriesCache.Remove(solution);

      await RecalculateAsync();
      var args = new EventArgs<IRegressionSolution>(solution);
      OnSolutionRemoved(this, args);
    }
 //mkommend: annoying name clash with static method, open to better naming suggestions
 public IEnumerable<Tuple<string, double>> Calculate(IRegressionSolution solution) {
   return CalculateImpacts(solution, DataPartition, ReplacementMethod);
 }
    public static IEnumerable<Tuple<string, double>> CalculateImpacts(IRegressionSolution solution,
      DataPartitionEnum data = DataPartitionEnum.Training,
      ReplacementMethodEnum replacement = ReplacementMethodEnum.Median) {

      var problemData = solution.ProblemData;
      var dataset = problemData.Dataset;

      IEnumerable<int> rows;
      IEnumerable<double> targetValues;
      double originalR2 = -1;

      OnlineCalculatorError error;

      switch (data) {
        case DataPartitionEnum.All:
          rows = solution.ProblemData.AllIndices;
          targetValues = problemData.TargetVariableValues.ToList();
          originalR2 = OnlinePearsonsRCalculator.Calculate(problemData.TargetVariableValues, solution.EstimatedValues, out error);
          if (error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during R² calculation.");
          originalR2 = originalR2 * originalR2;
          break;
        case DataPartitionEnum.Training:
          rows = problemData.TrainingIndices;
          targetValues = problemData.TargetVariableTrainingValues.ToList();
          originalR2 = solution.TrainingRSquared;
          break;
        case DataPartitionEnum.Test:
          rows = problemData.TestIndices;
          targetValues = problemData.TargetVariableTestValues.ToList();
          originalR2 = solution.TestRSquared;
          break;
        default: throw new ArgumentException(string.Format("DataPartition {0} cannot be handled.", data));
      }


      var impacts = new Dictionary<string, double>();
      var modifiableDataset = ((Dataset)dataset).ToModifiable();

      foreach (var inputVariable in problemData.AllowedInputVariables) {
        var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows, replacement);
        var newR2 = OnlinePearsonsRCalculator.Calculate(targetValues, newEstimates, out error);
        if (error != OnlineCalculatorError.None) throw new InvalidOperationException("Error during R² calculation with replaced inputs.");

        newR2 = newR2 * newR2;
        var impact = originalR2 - newR2;
        impacts[inputVariable] = impact;
      }
      return impacts.OrderByDescending(i => i.Value).Select(i => Tuple.Create(i.Key, i.Value));
    }
    protected void AddSeries(IRegressionSolution solution) {
      if (chart.Series.Any(s => s.Name == solution.Name)) return;

      Series solutionSeries = new Series(solution.Name);
      solutionSeries.Tag = solution;
      solutionSeries.ChartType = SeriesChartType.FastLine;
      var residuals = GetResiduals(GetOriginalValues(), GetEstimatedValues(solution));

      var maxValue = residuals.Max();
      if (maxValue >= chart.ChartAreas[0].AxisX.Maximum) {
        double scale = Math.Pow(10, Math.Floor(Math.Log10(maxValue)));
        var maximum = scale * (1 + (int)(maxValue / scale));
        chart.ChartAreas[0].AxisX.Maximum = maximum;
        chart.ChartAreas[0].CursorX.Interval = residuals.Min() / 100;
      }

      UpdateSeries(residuals, solutionSeries);

      solutionSeries.ToolTip = "Area over Curve: " + CalculateAreaOverCurve(solutionSeries);
      solutionSeries.LegendToolTip = "Double-click to open model";
      chart.Series.Add(solutionSeries);
    }
    private void AddRegressionSolution(IRegressionSolution solution) {
      if (Model.Models.Contains(solution.Model)) throw new ArgumentException();
      Model.Add(solution.Model);
      trainingPartitions[solution.Model] = solution.ProblemData.TrainingPartition;
      testPartitions[solution.Model] = solution.ProblemData.TestPartition;

      trainingEvaluationCache.Clear();
      testEvaluationCache.Clear();
      evaluationCache.Clear();
    }
    private void RemoveRegressionSolution(IRegressionSolution solution) {
      if (!Model.Models.Contains(solution.Model)) throw new ArgumentException();
      Model.Remove(solution.Model);
      trainingPartitions.Remove(solution.Model);
      testPartitions.Remove(solution.Model);

      trainingEvaluationCache.Clear();
      testEvaluationCache.Clear();
      evaluationCache.Clear();
    }
 protected IEnumerable<double> GetEstimatedValues(IRegressionSolution solution) {
   IEnumerable<double> estimatedValues;
   switch (cmbSamples.SelectedItem.ToString()) {
     case TrainingSamples:
       estimatedValues = solution.EstimatedTrainingValues;
       break;
     case TestSamples:
       estimatedValues = solution.EstimatedTestValues;
       break;
     case AllSamples:
       estimatedValues = solution.EstimatedValues;
       break;
     default:
       throw new NotSupportedException();
   }
   return estimatedValues;
 }
Ejemplo n.º 28
0
    public async Task AddSolutionAsync(IRegressionSolution solution) {
      if (!SolutionsCompatible(solutions.Concat(new[] { solution })))
        throw new ArgumentException("The solution is not compatible with the problem data.");
      if (solutions.Contains(solution))
        return;

      solutions.Add(solution);
      RecalculateTrainingLimits(true);

      var series = CreateSeries(solution);
      seriesCache.Add(solution, series.Item1);
      if (series.Item2 != null)
        ciSeriesCache.Add(solution, series.Item2);

      ResizeSeriesData(solution);
      OrderAndColorSeries();

      await RecalculateAsync();
      var args = new EventArgs<IRegressionSolution>(solution);
      OnSolutionAdded(this, args);
    }
Ejemplo n.º 29
0
    private void ResizeSeriesData(IRegressionSolution solution, IList<double> xvalues = null) {
      if (xvalues == null)
        xvalues = internalDataset.GetDoubleValues(FreeVariable).ToList();

      var series = seriesCache[solution];
      series.Points.SuspendUpdates();
      series.Points.Clear();
      for (int i = 0; i < xvalues.Count; i++)
        series.Points.Add(new DataPoint(xvalues[i], 0.0));
      series.Points.ResumeUpdates();

      Series confidenceIntervalSeries;
      if (ciSeriesCache.TryGetValue(solution, out confidenceIntervalSeries)) {
        confidenceIntervalSeries.Points.SuspendUpdates();
        confidenceIntervalSeries.Points.Clear();
        for (int i = 0; i < xvalues.Count; i++)
          confidenceIntervalSeries.Points.Add(new DataPoint(xvalues[i], new[] { -1.0, 1.0 }));
        confidenceIntervalSeries.Points.ResumeUpdates();
      }
    }
Ejemplo n.º 30
0
        private void WriteEstimatedWorksheet(ExcelWorksheet estimatedWorksheet, ExcelWorksheet datasetWorksheet, string[] formulaParts, IRegressionSolution solution)
        {
            string preparedFormula = PrepareFormula(formulaParts);
            int    rows            = solution.ProblemData.Dataset.Rows;

            estimatedWorksheet.Cells[1, 1].Value  = "Id";
            estimatedWorksheet.Cells[1, 2].Value  = "Target Variable";
            estimatedWorksheet.Cells[1, 3].Value  = "Estimated Values";
            estimatedWorksheet.Cells[1, 4].Value  = "Absolute Error";
            estimatedWorksheet.Cells[1, 5].Value  = "Relative Error";
            estimatedWorksheet.Cells[1, 6].Value  = "Error";
            estimatedWorksheet.Cells[1, 7].Value  = "Squared Error";
            estimatedWorksheet.Cells[1, 9].Value  = "Unbounded Estimated Values";
            estimatedWorksheet.Cells[1, 10].Value = "Bounded Estimated Values";

            estimatedWorksheet.Cells[1, 1, 1, 10].AutoFitColumns();

            // fill in id, target variable and unbounded estimated values
            int targetIndex = solution.ProblemData.Dataset.VariableNames.ToList().FindIndex(x => x.Equals(solution.ProblemData.TargetVariable)) + 1;

            for (int i = 0; i < rows; i++)
            {
                estimatedWorksheet.Cells[i + 2, 1].Value   = i;                                                      // id
                estimatedWorksheet.Cells[i + 2, 2].Formula = datasetWorksheet.Cells[i + 2, targetIndex].FullAddress; // target variable
                estimatedWorksheet.Cells[i + 2, 9].Formula = string.Format(preparedFormula, i + 2);                  // unbounded estimated values
            }
            estimatedWorksheet.Cells["B2:B" + (rows + 1)].Style.Numberformat.Format = "0.000";

            estimatedWorksheet.Cells["C2:C" + (rows + 1)].Formula = "J2";
            estimatedWorksheet.Cells["C2:C" + (rows + 1)].Style.Numberformat.Format = "0.000";
            estimatedWorksheet.Cells["D2:D" + (rows + 1)].Formula = "ABS(B2 - C2)";
            estimatedWorksheet.Cells["D2:D" + (rows + 1)].Style.Numberformat.Format = "0.000";
            estimatedWorksheet.Cells["E2:E" + (rows + 1)].Formula = "ABS(D2 / B2)";
            estimatedWorksheet.Cells["E2:E" + (rows + 1)].Style.Numberformat.Format = "0.000";
            estimatedWorksheet.Cells["F2:F" + (rows + 1)].Formula = "C2 - B2";
            estimatedWorksheet.Cells["F2:F" + (rows + 1)].Style.Numberformat.Format = "0.000";
            estimatedWorksheet.Cells["G2:G" + (rows + 1)].Formula = "POWER(F2, 2)";
            estimatedWorksheet.Cells["G2:G" + (rows + 1)].Style.Numberformat.Format = "0.000";

            estimatedWorksheet.Cells["I2:I" + (rows + 1)].Style.Numberformat.Format = "0.000";
            estimatedWorksheet.Cells["J2:J" + (rows + 1)].Formula = "IFERROR(IF(I2 > Model!EstimationLimitUpper, Model!EstimationLimitUpper, IF(I2 < Model!EstimationLimitLower, Model!EstimationLimitLower, I2)), AVERAGE(Model!EstimationLimitLower, Model!EstimationLimitUpper))";
            estimatedWorksheet.Cells["J2:J" + (rows + 1)].Style.Numberformat.Format = "0.000";
        }
Ejemplo n.º 31
0
        public static IEnumerable <Tuple <string, double> > CalculateImpacts(
            IRegressionSolution solution,
            DataPartitionEnum data = DataPartitionEnum.Training,
            ReplacementMethodEnum replacementMethod             = ReplacementMethodEnum.Median,
            FactorReplacementMethodEnum factorReplacementMethod = FactorReplacementMethodEnum.Best)
        {
            var problemData = solution.ProblemData;
            var dataset     = problemData.Dataset;

            IEnumerable <int>    rows;
            IEnumerable <double> targetValues;
            double originalR2 = -1;

            OnlineCalculatorError error;

            switch (data)
            {
            case DataPartitionEnum.All:
                rows         = solution.ProblemData.AllIndices;
                targetValues = problemData.TargetVariableValues.ToList();
                originalR2   = OnlinePearsonsRCalculator.Calculate(problemData.TargetVariableValues, solution.EstimatedValues, out error);
                if (error != OnlineCalculatorError.None)
                {
                    throw new InvalidOperationException("Error during R² calculation.");
                }
                originalR2 = originalR2 * originalR2;
                break;

            case DataPartitionEnum.Training:
                rows         = problemData.TrainingIndices;
                targetValues = problemData.TargetVariableTrainingValues.ToList();
                originalR2   = solution.TrainingRSquared;
                break;

            case DataPartitionEnum.Test:
                rows         = problemData.TestIndices;
                targetValues = problemData.TargetVariableTestValues.ToList();
                originalR2   = solution.TestRSquared;
                break;

            default: throw new ArgumentException(string.Format("DataPartition {0} cannot be handled.", data));
            }

            var impacts           = new Dictionary <string, double>();
            var modifiableDataset = ((Dataset)dataset).ToModifiable();

            var inputvariables        = new HashSet <string>(problemData.AllowedInputVariables.Union(solution.Model.VariablesUsedForPrediction));
            var allowedInputVariables = dataset.VariableNames.Where(v => inputvariables.Contains(v)).ToList();

            // calculate impacts for double variables
            foreach (var inputVariable in allowedInputVariables.Where(problemData.Dataset.VariableHasType <double>))
            {
                var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows, replacementMethod);
                var newR2        = OnlinePearsonsRCalculator.Calculate(targetValues, newEstimates, out error);
                if (error != OnlineCalculatorError.None)
                {
                    throw new InvalidOperationException("Error during R² calculation with replaced inputs.");
                }

                newR2 = newR2 * newR2;
                var impact = originalR2 - newR2;
                impacts[inputVariable] = impact;
            }

            // calculate impacts for string variables
            foreach (var inputVariable in allowedInputVariables.Where(problemData.Dataset.VariableHasType <string>))
            {
                if (factorReplacementMethod == FactorReplacementMethodEnum.Best)
                {
                    // try replacing with all possible values and find the best replacement value
                    var smallestImpact = double.PositiveInfinity;
                    foreach (var repl in problemData.Dataset.GetStringValues(inputVariable, rows).Distinct())
                    {
                        var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows,
                                                                             Enumerable.Repeat(repl, dataset.Rows));
                        var newR2 = OnlinePearsonsRCalculator.Calculate(targetValues, newEstimates, out error);
                        if (error != OnlineCalculatorError.None)
                        {
                            throw new InvalidOperationException("Error during R² calculation with replaced inputs.");
                        }

                        newR2 = newR2 * newR2;
                        var impact = originalR2 - newR2;
                        if (impact < smallestImpact)
                        {
                            smallestImpact = impact;
                        }
                    }
                    impacts[inputVariable] = smallestImpact;
                }
                else
                {
                    // for replacement methods shuffle and mode
                    // calculate impacts for factor variables

                    var newEstimates = EvaluateModelWithReplacedVariable(solution.Model, inputVariable, modifiableDataset, rows,
                                                                         factorReplacementMethod);
                    var newR2 = OnlinePearsonsRCalculator.Calculate(targetValues, newEstimates, out error);
                    if (error != OnlineCalculatorError.None)
                    {
                        throw new InvalidOperationException("Error during R² calculation with replaced inputs.");
                    }

                    newR2 = newR2 * newR2;
                    var impact = originalR2 - newR2;
                    impacts[inputVariable] = impact;
                }
            } // foreach
            return(impacts.OrderByDescending(i => i.Value).Select(i => Tuple.Create(i.Key, i.Value)));
        }
    private List<double> CalculateResiduals(IRegressionSolution solution) {
      List<double> residuals = new List<double>();

      IRegressionProblemData problemdata = solution.ProblemData;
      List<double> targetValues = problemdata.Dataset.GetDoubleValues(Content.ProblemData.TargetVariable).ToList();
      List<double> estimatedValues = solution.EstimatedValues.ToList();

      for (int i = 0; i < solution.ProblemData.Dataset.Rows; i++) {
        double residual = estimatedValues[i] - targetValues[i];
        residuals.Add(residual);
      }
      return residuals;
    }
Ejemplo n.º 33
0
 //mkommend: annoying name clash with static method, open to better naming suggestions
 public IEnumerable <Tuple <string, double> > Calculate(IRegressionSolution solution)
 {
     return(CalculateImpacts(solution, DataPartition, ReplacementMethod));
 }
Ejemplo n.º 34
0
    private Tuple<Series, Series> CreateSeries(IRegressionSolution solution) {
      var series = new Series {
        ChartType = SeriesChartType.Line,
        Name = solution.ProblemData.TargetVariable + " " + solutions.IndexOf(solution)
      };
      series.LegendText = series.Name;

      var confidenceBoundSolution = solution as IConfidenceRegressionSolution;
      Series confidenceIntervalSeries = null;
      if (confidenceBoundSolution != null) {
        confidenceIntervalSeries = new Series {
          ChartType = SeriesChartType.Range,
          YValuesPerPoint = 2,
          Name = "95% Conf. Interval " + series.Name,
          IsVisibleInLegend = false
        };
      }
      return Tuple.Create(series, confidenceIntervalSeries);
    }
    private void WriteEstimatedWorksheet(ExcelWorksheet estimatedWorksheet, ExcelWorksheet datasetWorksheet, string[] formulaParts, IRegressionSolution solution) {
      string preparedFormula = PrepareFormula(formulaParts);
      int rows = solution.ProblemData.Dataset.Rows;
      estimatedWorksheet.Cells[1, 1].Value = "Id";
      estimatedWorksheet.Cells[1, 2].Value = "Target Variable";
      estimatedWorksheet.Cells[1, 3].Value = "Estimated Values";
      estimatedWorksheet.Cells[1, 4].Value = "Absolute Error";
      estimatedWorksheet.Cells[1, 5].Value = "Relative Error";
      estimatedWorksheet.Cells[1, 6].Value = "Error";
      estimatedWorksheet.Cells[1, 7].Value = "Squared Error";
      estimatedWorksheet.Cells[1, 9].Value = "Unbounded Estimated Values";
      estimatedWorksheet.Cells[1, 10].Value = "Bounded Estimated Values";

      estimatedWorksheet.Cells[1, 1, 1, 10].AutoFitColumns();

      // fill in id, target variable and unbounded estimated values
      int targetIndex = solution.ProblemData.Dataset.VariableNames.ToList().FindIndex(x => x.Equals(solution.ProblemData.TargetVariable)) + 1;
      for (int i = 0; i < rows; i++) {
        estimatedWorksheet.Cells[i + 2, 1].Value = i; // id
        estimatedWorksheet.Cells[i + 2, 2].Formula = datasetWorksheet.Cells[i + 2, targetIndex].FullAddress; // target variable
        estimatedWorksheet.Cells[i + 2, 9].Formula = string.Format(preparedFormula, i + 2); // unbounded estimated values
      }
      estimatedWorksheet.Cells["B2:B" + (rows + 1)].Style.Numberformat.Format = "0.000";

      estimatedWorksheet.Cells["C2:C" + (rows + 1)].Formula = "J2";
      estimatedWorksheet.Cells["C2:C" + (rows + 1)].Style.Numberformat.Format = "0.000";
      estimatedWorksheet.Cells["D2:D" + (rows + 1)].Formula = "ABS(B2 - C2)";
      estimatedWorksheet.Cells["D2:D" + (rows + 1)].Style.Numberformat.Format = "0.000";
      estimatedWorksheet.Cells["E2:E" + (rows + 1)].Formula = "ABS(D2 / B2)";
      estimatedWorksheet.Cells["E2:E" + (rows + 1)].Style.Numberformat.Format = "0.000";
      estimatedWorksheet.Cells["F2:F" + (rows + 1)].Formula = "C2 - B2";
      estimatedWorksheet.Cells["F2:F" + (rows + 1)].Style.Numberformat.Format = "0.000";
      estimatedWorksheet.Cells["G2:G" + (rows + 1)].Formula = "POWER(F2, 2)";
      estimatedWorksheet.Cells["G2:G" + (rows + 1)].Style.Numberformat.Format = "0.000";

      estimatedWorksheet.Cells["I2:I" + (rows + 1)].Style.Numberformat.Format = "0.000";
      estimatedWorksheet.Cells["J2:J" + (rows + 1)].Formula = "IFERROR(IF(I2 > Model!EstimationLimitUpper, Model!EstimationLimitUpper, IF(I2 < Model!EstimationLimitLower, Model!EstimationLimitLower, I2)), AVERAGE(Model!EstimationLimitLower, Model!EstimationLimitUpper))";
      estimatedWorksheet.Cells["J2:J" + (rows + 1)].Style.Numberformat.Format = "0.000";
    }
Ejemplo n.º 36
0
    private Task<DoubleLimit> UpdateSeriesDataAsync(IRegressionSolution solution, CancellationToken cancellationToken) {
      return Task.Run(() => {
        var xvalues = internalDataset.GetDoubleValues(FreeVariable).ToList();
        var yvalues = solution.Model.GetEstimatedValues(internalDataset, Enumerable.Range(0, internalDataset.Rows)).ToList();

        double min = double.MaxValue, max = double.MinValue;

        var series = seriesCache[solution];
        for (int i = 0; i < xvalues.Count; i++) {
          series.Points[i].SetValueXY(xvalues[i], yvalues[i]);
          if (yvalues[i] < min) min = yvalues[i];
          if (yvalues[i] > max) max = yvalues[i];
        }
        chart.Invalidate();

        cancellationToken.ThrowIfCancellationRequested();

        var confidenceBoundSolution = solution as IConfidenceRegressionSolution;
        if (confidenceBoundSolution != null) {
          var confidenceIntervalSeries = ciSeriesCache[solution];
          var variances = confidenceBoundSolution.Model.GetEstimatedVariances(internalDataset, Enumerable.Range(0, internalDataset.Rows)).ToList();
          for (int i = 0; i < xvalues.Count; i++) {
            var lower = yvalues[i] - 1.96 * Math.Sqrt(variances[i]);
            var upper = yvalues[i] + 1.96 * Math.Sqrt(variances[i]);
            confidenceIntervalSeries.Points[i].SetValueXY(xvalues[i], lower, upper);
            if (lower < min) min = lower;
            if (upper > max) max = upper;
          }
          chart.Invalidate();
        }

        cancellationToken.ThrowIfCancellationRequested();
        return new DoubleLimit(min, max);
      }, cancellationToken);
    }