public override double CalculateSolutionSimilarity(IScope leftSolution, IScope rightSolution)
        {
            if (leftSolution == rightSolution)
            {
                return(1.0);
            }

            if (!leftSolution.Variables.ContainsKey("EstimatedValues") || !rightSolution.Variables.ContainsKey("EstimatedValues"))
            {
                throw new ArgumentException("No estimated values are present in the subscopes.");
            }

            var leftValues  = (DoubleArray)leftSolution.Variables["EstimatedValues"].Value;
            var rightValues = (DoubleArray)rightSolution.Variables["EstimatedValues"].Value;

            if (leftValues.Variance().IsAlmost(0) && rightValues.Variance().IsAlmost(0))
            {
                return(1.0);
            }

            OnlineCalculatorError error;
            var r = OnlinePearsonsRCalculator.Calculate(leftValues, rightValues, out error);

            var r2 = error == OnlineCalculatorError.None ? r * r : 0;

            if (r2 > 1.0)
            {
                r2 = 1.0;
            }

            return(r2);
        }
        public static double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, IRegressionProblemData problemData, IEnumerable <int> rows, bool applyLinearScaling)
        {
            IEnumerable <double>  estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows);
            IEnumerable <double>  targetValues    = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
            OnlineCalculatorError errorState;

            double r;

            if (applyLinearScaling)
            {
                var rCalculator = new OnlinePearsonsRCalculator();
                CalculateWithScaling(targetValues, estimatedValues, lowerEstimationLimit, upperEstimationLimit, rCalculator, problemData.Dataset.Rows);
                errorState = rCalculator.ErrorState;
                r          = rCalculator.R;
            }
            else
            {
                IEnumerable <double> boundedEstimatedValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit);
                r = OnlinePearsonsRCalculator.Calculate(targetValues, boundedEstimatedValues, out errorState);
            }
            if (errorState != OnlineCalculatorError.None)
            {
                return(double.NaN);
            }
            return(r * r);
        }
        public double CalculateSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2)
        {
            if (Interpreter == null || ProblemData == null)
            {
                throw new InvalidOperationException("Cannot calculate phenotypic similarity when no interpreter or problem data were set.");
            }

            var v1 = Interpreter.GetSymbolicExpressionTreeValues(t1, ProblemData.Dataset, ProblemData.TrainingIndices);
            var v2 = Interpreter.GetSymbolicExpressionTreeValues(t2, ProblemData.Dataset, ProblemData.TrainingIndices);

            if (v1.Variance().IsAlmost(0) && v2.Variance().IsAlmost(0))
            {
                return(1.0);
            }

            OnlineCalculatorError error;
            var r = OnlinePearsonsRCalculator.Calculate(v1, v2, out error);

            var r2 = error == OnlineCalculatorError.None ? r * r : 0;

            if (r2 > 1.0)
            {
                r2 = 1.0;
            }

            return(r2);
        }
コード例 #4
0
        public static double CalculateQualityForImpacts(ISymbolicRegressionModel model, IRegressionProblemData problemData, IEnumerable <int> rows)
        {
            var estimatedValues = model.GetEstimatedValues(problemData.Dataset, rows); // also bounds the values
            var targetValues    = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
            OnlineCalculatorError errorState;
            var r       = OnlinePearsonsRCalculator.Calculate(targetValues, estimatedValues, out errorState);
            var quality = r * r;

            if (errorState != OnlineCalculatorError.None)
            {
                return(double.NaN);
            }
            return(quality);
        }
    public static double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, IClassificationProblemData problemData, IEnumerable<int> rows, bool applyLinearScaling) {
      IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows);
      IEnumerable<double> targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
      OnlineCalculatorError errorState;

      double r;
      if (applyLinearScaling) {
        var rCalculator = new OnlinePearsonsRCalculator();
        CalculateWithScaling(targetValues, estimatedValues, lowerEstimationLimit, upperEstimationLimit, rCalculator, problemData.Dataset.Rows);
        errorState = rCalculator.ErrorState;
        r = rCalculator.R;
      } else {
        IEnumerable<double> boundedEstimatedValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit);
        r = OnlinePearsonsRCalculator.Calculate(targetValues, boundedEstimatedValues, out errorState);
      }
      if (errorState != OnlineCalculatorError.None) return double.NaN;
      return r*r;
    }
コード例 #6
0
        public override void CalculateImpactAndReplacementValues(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node,
                                                                 IDataAnalysisProblemData problemData, IEnumerable <int> rows, out double impactValue, out double replacementValue, out double newQualityForImpactsCalculation,
                                                                 double qualityForImpactsCalculation = Double.NaN)
        {
            var regressionModel       = (ISymbolicRegressionModel)model;
            var regressionProblemData = (IRegressionProblemData)problemData;

            var dataset      = regressionProblemData.Dataset;
            var targetValues = dataset.GetDoubleValues(regressionProblemData.TargetVariable, rows);

            OnlineCalculatorError errorState;

            if (double.IsNaN(qualityForImpactsCalculation))
            {
                qualityForImpactsCalculation = CalculateQualityForImpacts(regressionModel, regressionProblemData, rows);
            }

            replacementValue = CalculateReplacementValue(regressionModel, node, regressionProblemData, rows);
            var constantNode = new ConstantTreeNode(new Constant())
            {
                Value = replacementValue
            };

            var cloner        = new Cloner();
            var tempModel     = cloner.Clone(regressionModel);
            var tempModelNode = (ISymbolicExpressionTreeNode)cloner.GetClone(node);

            var tempModelParentNode = tempModelNode.Parent;
            int i = tempModelParentNode.IndexOfSubtree(tempModelNode);

            tempModelParentNode.RemoveSubtree(i);
            tempModelParentNode.InsertSubtree(i, constantNode);

            var    estimatedValues = tempModel.GetEstimatedValues(dataset, rows);
            double r = OnlinePearsonsRCalculator.Calculate(targetValues, estimatedValues, out errorState);

            if (errorState != OnlineCalculatorError.None)
            {
                r = 0.0;
            }
            newQualityForImpactsCalculation = r * r;

            impactValue = qualityForImpactsCalculation - newQualityForImpactsCalculation;
        }
コード例 #7
0
        public override double Evaluate(ISymbolicExpressionTree tree, IRandom random)
        {
            // Doesn't use classes from HeuristicLab.Problems.DataAnalysis.Symbolic to make sure that the implementation can be fully understood easily.
            // HeuristicLab.Problems.DataAnalysis.Symbolic would already provide all the necessary functionality (esp. interpreter) but at a much higher complexity.
            // Another argument is that we don't need a reference to HeuristicLab.Problems.DataAnalysis.Symbolic

            var problemData = ProblemData;
            var rows        = ProblemData.TrainingIndices.ToArray();
            var target      = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
            var predicted   = Interpret(tree, problemData.Dataset, rows);

            OnlineCalculatorError errorState;
            var r = OnlinePearsonsRCalculator.Calculate(target, predicted, out errorState);

            if (errorState != OnlineCalculatorError.None)
            {
                r = 0;
            }
            return(r * r);
        }
        protected override Dictionary <ISymbolicExpressionTreeNode, Tuple <double, double> > CalculateImpactAndReplacementValues(ISymbolicExpressionTree tree)
        {
            var interpreter    = Content.Model.Interpreter;
            var rows           = Content.ProblemData.TrainingIndices;
            var dataset        = Content.ProblemData.Dataset;
            var targetVariable = Content.ProblemData.TargetVariable;
            var targetValues   = dataset.GetDoubleValues(targetVariable, rows);
            var originalOutput = interpreter.GetSymbolicExpressionTreeValues(tree, dataset, rows).ToArray();

            var impactAndReplacementValues           = new Dictionary <ISymbolicExpressionTreeNode, Tuple <double, double> >();
            List <ISymbolicExpressionTreeNode> nodes = tree.Root.GetSubtree(0).GetSubtree(0).IterateNodesPostfix().ToList();
            OnlineCalculatorError errorState;
            double originalR = OnlinePearsonsRCalculator.Calculate(targetValues, originalOutput, out errorState);

            if (errorState != OnlineCalculatorError.None)
            {
                originalR = 0.0;
            }

            foreach (ISymbolicExpressionTreeNode node in nodes)
            {
                var parent = node.Parent;
                constantNode.Value = CalculateReplacementValue(node, tree);
                ISymbolicExpressionTreeNode replacementNode = constantNode;
                SwitchNode(parent, node, replacementNode);
                var    newOutput = interpreter.GetSymbolicExpressionTreeValues(tree, dataset, rows);
                double newR      = OnlinePearsonsRCalculator.Calculate(targetValues, newOutput, out errorState);
                if (errorState != OnlineCalculatorError.None)
                {
                    newR = 0.0;
                }

                // impact = 0 if no change
                // impact < 0 if new solution is better
                // impact > 0 if new solution is worse
                double impact = (originalR * originalR) - (newR * newR);
                impactAndReplacementValues[node] = new Tuple <double, double>(impact, constantNode.Value);
                SwitchNode(parent, replacementNode, node);
            }
            return(impactAndReplacementValues);
        }
コード例 #9
0
        protected override void Run(CancellationToken cancellationToken)
        {
            // Set up the algorithm
            if (SetSeedRandomly)
            {
                Seed = RandomSeedGenerator.GetSeed();
            }
            var rand = new MersenneTwister((uint)Seed);

            // Set up the results display
            var iterations = new IntValue(0);

            Results.Add(new Result("Iterations", iterations));

            var table = new DataTable("Qualities");

            table.Rows.Add(new DataRow("R² (train)"));
            table.Rows.Add(new DataRow("R² (test)"));
            Results.Add(new Result("Qualities", table));
            var curLoss     = new DoubleValue();
            var curTestLoss = new DoubleValue();

            Results.Add(new Result("R² (train)", curLoss));
            Results.Add(new Result("R² (test)", curTestLoss));
            var runCollection = new RunCollection();

            if (StoreRuns)
            {
                Results.Add(new Result("Runs", runCollection));
            }

            // init
            var problemData       = Problem.ProblemData;
            var targetVarName     = problemData.TargetVariable;
            var activeVariables   = problemData.AllowedInputVariables.Concat(new string[] { problemData.TargetVariable });
            var modifiableDataset = new ModifiableDataset(
                activeVariables,
                activeVariables.Select(v => problemData.Dataset.GetDoubleValues(v).ToList()));

            var trainingRows = problemData.TrainingIndices;
            var testRows     = problemData.TestIndices;
            var yPred        = new double[trainingRows.Count()];
            var yPredTest    = new double[testRows.Count()];
            var y            = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices).ToArray();
            var curY         = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices).ToArray();

            var yTest    = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TestIndices).ToArray();
            var curYTest = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TestIndices).ToArray();
            var nu       = Nu;
            var mVars    = (int)Math.Ceiling(M * problemData.AllowedInputVariables.Count());
            var rRows    = (int)Math.Ceiling(R * problemData.TrainingIndices.Count());
            var alg      = RegressionAlgorithm;
            List <IRegressionModel> models = new List <IRegressionModel>();

            try {
                // Loop until iteration limit reached or canceled.
                for (int i = 0; i < Iterations; i++)
                {
                    cancellationToken.ThrowIfCancellationRequested();

                    modifiableDataset.RemoveVariable(targetVarName);
                    modifiableDataset.AddVariable(targetVarName, curY.Concat(curYTest).ToList());

                    SampleTrainingData(rand, modifiableDataset, rRows, problemData.Dataset, curY, problemData.TargetVariable, problemData.TrainingIndices); // all training indices from the original problem data are allowed
                    var modifiableProblemData = new RegressionProblemData(modifiableDataset,
                                                                          problemData.AllowedInputVariables.SampleRandomWithoutRepetition(rand, mVars),
                                                                          problemData.TargetVariable);
                    modifiableProblemData.TrainingPartition.Start = 0;
                    modifiableProblemData.TrainingPartition.End   = rRows;
                    modifiableProblemData.TestPartition.Start     = problemData.TestPartition.Start;
                    modifiableProblemData.TestPartition.End       = problemData.TestPartition.End;

                    if (!TrySetProblemData(alg, modifiableProblemData))
                    {
                        throw new NotSupportedException("The algorithm cannot be used with GBM.");
                    }

                    IRegressionModel model;
                    IRun             run;

                    // try to find a model. The algorithm might fail to produce a model. In this case we just retry until the iterations are exhausted
                    if (TryExecute(alg, rand.Next(), RegressionAlgorithmResult, out model, out run))
                    {
                        int row = 0;
                        // update predictions for training and test
                        // update new targets (in the case of squared error loss we simply use negative residuals)
                        foreach (var pred in model.GetEstimatedValues(problemData.Dataset, trainingRows))
                        {
                            yPred[row] = yPred[row] + nu * pred;
                            curY[row]  = y[row] - yPred[row];
                            row++;
                        }
                        row = 0;
                        foreach (var pred in model.GetEstimatedValues(problemData.Dataset, testRows))
                        {
                            yPredTest[row] = yPredTest[row] + nu * pred;
                            curYTest[row]  = yTest[row] - yPredTest[row];
                            row++;
                        }
                        // determine quality
                        OnlineCalculatorError error;
                        var trainR = OnlinePearsonsRCalculator.Calculate(yPred, y, out error);
                        var testR  = OnlinePearsonsRCalculator.Calculate(yPredTest, yTest, out error);

                        // iteration results
                        curLoss.Value     = error == OnlineCalculatorError.None ? trainR * trainR : 0.0;
                        curTestLoss.Value = error == OnlineCalculatorError.None ? testR * testR : 0.0;

                        models.Add(model);
                    }

                    if (StoreRuns)
                    {
                        runCollection.Add(run);
                    }
                    table.Rows["R² (train)"].Values.Add(curLoss.Value);
                    table.Rows["R² (test)"].Values.Add(curTestLoss.Value);
                    iterations.Value = i + 1;
                }

                // produce solution
                if (CreateSolution)
                {
                    // when all our models are symbolic models we can easily combine them to a single model
                    if (models.All(m => m is ISymbolicRegressionModel))
                    {
                        Results.Add(new Result("Solution", CreateSymbolicSolution(models, Nu, (IRegressionProblemData)problemData.Clone())));
                    }
                    // just produce an ensemble solution for now (TODO: correct scaling or linear regression for ensemble model weights)

                    var ensembleSolution = CreateEnsembleSolution(models, (IRegressionProblemData)problemData.Clone());
                    Results.Add(new Result("EnsembleSolution", ensembleSolution));
                }
            }
            finally {
                // reset everything
                alg.Prepare(true);
            }
        }