public override double CalculateSolutionSimilarity(IScope leftSolution, IScope rightSolution) { if (leftSolution == rightSolution) { return(1.0); } if (!leftSolution.Variables.ContainsKey("EstimatedValues") || !rightSolution.Variables.ContainsKey("EstimatedValues")) { throw new ArgumentException("No estimated values are present in the subscopes."); } var leftValues = (DoubleArray)leftSolution.Variables["EstimatedValues"].Value; var rightValues = (DoubleArray)rightSolution.Variables["EstimatedValues"].Value; if (leftValues.Variance().IsAlmost(0) && rightValues.Variance().IsAlmost(0)) { return(1.0); } OnlineCalculatorError error; var r = OnlinePearsonsRCalculator.Calculate(leftValues, rightValues, out error); var r2 = error == OnlineCalculatorError.None ? r * r : 0; if (r2 > 1.0) { r2 = 1.0; } return(r2); }
public static double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, IRegressionProblemData problemData, IEnumerable <int> rows, bool applyLinearScaling) { IEnumerable <double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows); IEnumerable <double> targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows); OnlineCalculatorError errorState; double r; if (applyLinearScaling) { var rCalculator = new OnlinePearsonsRCalculator(); CalculateWithScaling(targetValues, estimatedValues, lowerEstimationLimit, upperEstimationLimit, rCalculator, problemData.Dataset.Rows); errorState = rCalculator.ErrorState; r = rCalculator.R; } else { IEnumerable <double> boundedEstimatedValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit); r = OnlinePearsonsRCalculator.Calculate(targetValues, boundedEstimatedValues, out errorState); } if (errorState != OnlineCalculatorError.None) { return(double.NaN); } return(r * r); }
public double CalculateSimilarity(ISymbolicExpressionTree t1, ISymbolicExpressionTree t2) { if (Interpreter == null || ProblemData == null) { throw new InvalidOperationException("Cannot calculate phenotypic similarity when no interpreter or problem data were set."); } var v1 = Interpreter.GetSymbolicExpressionTreeValues(t1, ProblemData.Dataset, ProblemData.TrainingIndices); var v2 = Interpreter.GetSymbolicExpressionTreeValues(t2, ProblemData.Dataset, ProblemData.TrainingIndices); if (v1.Variance().IsAlmost(0) && v2.Variance().IsAlmost(0)) { return(1.0); } OnlineCalculatorError error; var r = OnlinePearsonsRCalculator.Calculate(v1, v2, out error); var r2 = error == OnlineCalculatorError.None ? r * r : 0; if (r2 > 1.0) { r2 = 1.0; } return(r2); }
public static double CalculateQualityForImpacts(ISymbolicRegressionModel model, IRegressionProblemData problemData, IEnumerable <int> rows) { var estimatedValues = model.GetEstimatedValues(problemData.Dataset, rows); // also bounds the values var targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows); OnlineCalculatorError errorState; var r = OnlinePearsonsRCalculator.Calculate(targetValues, estimatedValues, out errorState); var quality = r * r; if (errorState != OnlineCalculatorError.None) { return(double.NaN); } return(quality); }
public static double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, IClassificationProblemData problemData, IEnumerable<int> rows, bool applyLinearScaling) { IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows); IEnumerable<double> targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows); OnlineCalculatorError errorState; double r; if (applyLinearScaling) { var rCalculator = new OnlinePearsonsRCalculator(); CalculateWithScaling(targetValues, estimatedValues, lowerEstimationLimit, upperEstimationLimit, rCalculator, problemData.Dataset.Rows); errorState = rCalculator.ErrorState; r = rCalculator.R; } else { IEnumerable<double> boundedEstimatedValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit); r = OnlinePearsonsRCalculator.Calculate(targetValues, boundedEstimatedValues, out errorState); } if (errorState != OnlineCalculatorError.None) return double.NaN; return r*r; }
public override void CalculateImpactAndReplacementValues(ISymbolicDataAnalysisModel model, ISymbolicExpressionTreeNode node, IDataAnalysisProblemData problemData, IEnumerable <int> rows, out double impactValue, out double replacementValue, out double newQualityForImpactsCalculation, double qualityForImpactsCalculation = Double.NaN) { var regressionModel = (ISymbolicRegressionModel)model; var regressionProblemData = (IRegressionProblemData)problemData; var dataset = regressionProblemData.Dataset; var targetValues = dataset.GetDoubleValues(regressionProblemData.TargetVariable, rows); OnlineCalculatorError errorState; if (double.IsNaN(qualityForImpactsCalculation)) { qualityForImpactsCalculation = CalculateQualityForImpacts(regressionModel, regressionProblemData, rows); } replacementValue = CalculateReplacementValue(regressionModel, node, regressionProblemData, rows); var constantNode = new ConstantTreeNode(new Constant()) { Value = replacementValue }; var cloner = new Cloner(); var tempModel = cloner.Clone(regressionModel); var tempModelNode = (ISymbolicExpressionTreeNode)cloner.GetClone(node); var tempModelParentNode = tempModelNode.Parent; int i = tempModelParentNode.IndexOfSubtree(tempModelNode); tempModelParentNode.RemoveSubtree(i); tempModelParentNode.InsertSubtree(i, constantNode); var estimatedValues = tempModel.GetEstimatedValues(dataset, rows); double r = OnlinePearsonsRCalculator.Calculate(targetValues, estimatedValues, out errorState); if (errorState != OnlineCalculatorError.None) { r = 0.0; } newQualityForImpactsCalculation = r * r; impactValue = qualityForImpactsCalculation - newQualityForImpactsCalculation; }
public override double Evaluate(ISymbolicExpressionTree tree, IRandom random) { // Doesn't use classes from HeuristicLab.Problems.DataAnalysis.Symbolic to make sure that the implementation can be fully understood easily. // HeuristicLab.Problems.DataAnalysis.Symbolic would already provide all the necessary functionality (esp. interpreter) but at a much higher complexity. // Another argument is that we don't need a reference to HeuristicLab.Problems.DataAnalysis.Symbolic var problemData = ProblemData; var rows = ProblemData.TrainingIndices.ToArray(); var target = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows); var predicted = Interpret(tree, problemData.Dataset, rows); OnlineCalculatorError errorState; var r = OnlinePearsonsRCalculator.Calculate(target, predicted, out errorState); if (errorState != OnlineCalculatorError.None) { r = 0; } return(r * r); }
protected override Dictionary <ISymbolicExpressionTreeNode, Tuple <double, double> > CalculateImpactAndReplacementValues(ISymbolicExpressionTree tree) { var interpreter = Content.Model.Interpreter; var rows = Content.ProblemData.TrainingIndices; var dataset = Content.ProblemData.Dataset; var targetVariable = Content.ProblemData.TargetVariable; var targetValues = dataset.GetDoubleValues(targetVariable, rows); var originalOutput = interpreter.GetSymbolicExpressionTreeValues(tree, dataset, rows).ToArray(); var impactAndReplacementValues = new Dictionary <ISymbolicExpressionTreeNode, Tuple <double, double> >(); List <ISymbolicExpressionTreeNode> nodes = tree.Root.GetSubtree(0).GetSubtree(0).IterateNodesPostfix().ToList(); OnlineCalculatorError errorState; double originalR = OnlinePearsonsRCalculator.Calculate(targetValues, originalOutput, out errorState); if (errorState != OnlineCalculatorError.None) { originalR = 0.0; } foreach (ISymbolicExpressionTreeNode node in nodes) { var parent = node.Parent; constantNode.Value = CalculateReplacementValue(node, tree); ISymbolicExpressionTreeNode replacementNode = constantNode; SwitchNode(parent, node, replacementNode); var newOutput = interpreter.GetSymbolicExpressionTreeValues(tree, dataset, rows); double newR = OnlinePearsonsRCalculator.Calculate(targetValues, newOutput, out errorState); if (errorState != OnlineCalculatorError.None) { newR = 0.0; } // impact = 0 if no change // impact < 0 if new solution is better // impact > 0 if new solution is worse double impact = (originalR * originalR) - (newR * newR); impactAndReplacementValues[node] = new Tuple <double, double>(impact, constantNode.Value); SwitchNode(parent, replacementNode, node); } return(impactAndReplacementValues); }
protected override void Run(CancellationToken cancellationToken) { // Set up the algorithm if (SetSeedRandomly) { Seed = RandomSeedGenerator.GetSeed(); } var rand = new MersenneTwister((uint)Seed); // Set up the results display var iterations = new IntValue(0); Results.Add(new Result("Iterations", iterations)); var table = new DataTable("Qualities"); table.Rows.Add(new DataRow("R² (train)")); table.Rows.Add(new DataRow("R² (test)")); Results.Add(new Result("Qualities", table)); var curLoss = new DoubleValue(); var curTestLoss = new DoubleValue(); Results.Add(new Result("R² (train)", curLoss)); Results.Add(new Result("R² (test)", curTestLoss)); var runCollection = new RunCollection(); if (StoreRuns) { Results.Add(new Result("Runs", runCollection)); } // init var problemData = Problem.ProblemData; var targetVarName = problemData.TargetVariable; var activeVariables = problemData.AllowedInputVariables.Concat(new string[] { problemData.TargetVariable }); var modifiableDataset = new ModifiableDataset( activeVariables, activeVariables.Select(v => problemData.Dataset.GetDoubleValues(v).ToList())); var trainingRows = problemData.TrainingIndices; var testRows = problemData.TestIndices; var yPred = new double[trainingRows.Count()]; var yPredTest = new double[testRows.Count()]; var y = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices).ToArray(); var curY = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices).ToArray(); var yTest = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TestIndices).ToArray(); var curYTest = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TestIndices).ToArray(); var nu = Nu; var mVars = (int)Math.Ceiling(M * problemData.AllowedInputVariables.Count()); var rRows = (int)Math.Ceiling(R * problemData.TrainingIndices.Count()); var alg = RegressionAlgorithm; List <IRegressionModel> models = new List <IRegressionModel>(); try { // Loop until iteration limit reached or canceled. for (int i = 0; i < Iterations; i++) { cancellationToken.ThrowIfCancellationRequested(); modifiableDataset.RemoveVariable(targetVarName); modifiableDataset.AddVariable(targetVarName, curY.Concat(curYTest).ToList()); SampleTrainingData(rand, modifiableDataset, rRows, problemData.Dataset, curY, problemData.TargetVariable, problemData.TrainingIndices); // all training indices from the original problem data are allowed var modifiableProblemData = new RegressionProblemData(modifiableDataset, problemData.AllowedInputVariables.SampleRandomWithoutRepetition(rand, mVars), problemData.TargetVariable); modifiableProblemData.TrainingPartition.Start = 0; modifiableProblemData.TrainingPartition.End = rRows; modifiableProblemData.TestPartition.Start = problemData.TestPartition.Start; modifiableProblemData.TestPartition.End = problemData.TestPartition.End; if (!TrySetProblemData(alg, modifiableProblemData)) { throw new NotSupportedException("The algorithm cannot be used with GBM."); } IRegressionModel model; IRun run; // try to find a model. The algorithm might fail to produce a model. In this case we just retry until the iterations are exhausted if (TryExecute(alg, rand.Next(), RegressionAlgorithmResult, out model, out run)) { int row = 0; // update predictions for training and test // update new targets (in the case of squared error loss we simply use negative residuals) foreach (var pred in model.GetEstimatedValues(problemData.Dataset, trainingRows)) { yPred[row] = yPred[row] + nu * pred; curY[row] = y[row] - yPred[row]; row++; } row = 0; foreach (var pred in model.GetEstimatedValues(problemData.Dataset, testRows)) { yPredTest[row] = yPredTest[row] + nu * pred; curYTest[row] = yTest[row] - yPredTest[row]; row++; } // determine quality OnlineCalculatorError error; var trainR = OnlinePearsonsRCalculator.Calculate(yPred, y, out error); var testR = OnlinePearsonsRCalculator.Calculate(yPredTest, yTest, out error); // iteration results curLoss.Value = error == OnlineCalculatorError.None ? trainR * trainR : 0.0; curTestLoss.Value = error == OnlineCalculatorError.None ? testR * testR : 0.0; models.Add(model); } if (StoreRuns) { runCollection.Add(run); } table.Rows["R² (train)"].Values.Add(curLoss.Value); table.Rows["R² (test)"].Values.Add(curTestLoss.Value); iterations.Value = i + 1; } // produce solution if (CreateSolution) { // when all our models are symbolic models we can easily combine them to a single model if (models.All(m => m is ISymbolicRegressionModel)) { Results.Add(new Result("Solution", CreateSymbolicSolution(models, Nu, (IRegressionProblemData)problemData.Clone()))); } // just produce an ensemble solution for now (TODO: correct scaling or linear regression for ensemble model weights) var ensembleSolution = CreateEnsembleSolution(models, (IRegressionProblemData)problemData.Clone()); Results.Add(new Result("EnsembleSolution", ensembleSolution)); } } finally { // reset everything alg.Prepare(true); } }