public override IOperation InstrumentedApply()
        {
            var    solution = SymbolicExpressionTreeParameter.ActualValue;
            double quality;

            if (RandomParameter.ActualValue.NextDouble() < ConstantOptimizationProbability.Value)
            {
                IEnumerable <int> constantOptimizationRows = GenerateRowsToEvaluate(ConstantOptimizationRowsPercentage.Value);
                quality = OptimizeConstants(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, solution, ProblemDataParameter.ActualValue,
                                            constantOptimizationRows, ApplyLinearScalingParameter.ActualValue.Value, ConstantOptimizationIterations.Value, updateVariableWeights: UpdateVariableWeights, lowerEstimationLimit: EstimationLimitsParameter.ActualValue.Lower, upperEstimationLimit: EstimationLimitsParameter.ActualValue.Upper, updateConstantsInTree: UpdateConstantsInTree);

                if (ConstantOptimizationRowsPercentage.Value != RelativeNumberOfEvaluatedSamplesParameter.ActualValue.Value)
                {
                    var evaluationRows = GenerateRowsToEvaluate();
                    quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, solution, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper, ProblemDataParameter.ActualValue, evaluationRows, ApplyLinearScalingParameter.ActualValue.Value);
                }
            }
            else
            {
                var evaluationRows = GenerateRowsToEvaluate();
                quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, solution, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper, ProblemDataParameter.ActualValue, evaluationRows, ApplyLinearScalingParameter.ActualValue.Value);
            }
            QualityParameter.ActualValue = new DoubleValue(quality);

            return(base.InstrumentedApply());
        }
        }                                                                                          // maximize R² and minimize average similarity

        public override IOperation InstrumentedApply()
        {
            IEnumerable <int> rows = GenerateRowsToEvaluate();
            var solution           = SymbolicExpressionTreeParameter.ActualValue;
            var problemData        = ProblemDataParameter.ActualValue;
            var interpreter        = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
            var estimationLimits   = EstimationLimitsParameter.ActualValue;
            var applyLinearScaling = ApplyLinearScalingParameter.ActualValue.Value;

            if (UseConstantOptimization)
            {
                SymbolicRegressionConstantOptimizationEvaluator.OptimizeConstants(interpreter, solution, problemData, rows, applyLinearScaling, ConstantOptimizationIterations, updateVariableWeights: ConstantOptimizationUpdateVariableWeights, lowerEstimationLimit: estimationLimits.Lower, upperEstimationLimit: estimationLimits.Upper);
            }

            double r2 = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, solution, estimationLimits.Lower, estimationLimits.Upper, problemData, rows, applyLinearScaling);

            if (DecimalPlaces >= 0)
            {
                r2 = Math.Round(r2, DecimalPlaces);
            }

            lock (locker) {
                if (AverageSimilarityParameter.ActualValue == null)
                {
                    var context = new ExecutionContext(null, SimilarityCalculator, ExecutionContext.Scope.Parent);
                    SimilarityCalculator.StrictSimilarity = StrictSimilarity;
                    SimilarityCalculator.Execute(context, CancellationToken);
                }
            }
            var avgSimilarity = AverageSimilarityParameter.ActualValue.Value;

            QualitiesParameter.ActualValue = new DoubleArray(new[] { r2, avgSimilarity });
            return(base.InstrumentedApply());
        }
        public override double[] Evaluate(IExecutionContext context, ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable <int> rows)
        {
            SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = context;
            AverageSimilarityParameter.ExecutionContext  = context;
            EstimationLimitsParameter.ExecutionContext   = context;
            ApplyLinearScalingParameter.ExecutionContext = context;

            var estimationLimits   = EstimationLimitsParameter.ActualValue;
            var applyLinearScaling = ApplyLinearScalingParameter.ActualValue.Value;

            double r2 = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, tree, estimationLimits.Lower, estimationLimits.Upper, problemData, rows, applyLinearScaling);

            lock (locker) {
                if (AverageSimilarityParameter.ActualValue == null)
                {
                    var ctx = new ExecutionContext(null, SimilarityCalculator, context.Scope.Parent);
                    SimilarityCalculator.StrictSimilarity = StrictSimilarity;
                    SimilarityCalculator.Execute(context, CancellationToken);
                }
            }
            var avgSimilarity = AverageSimilarityParameter.ActualValue.Value;

            SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = null;
            EstimationLimitsParameter.ExecutionContext   = null;
            ApplyLinearScalingParameter.ExecutionContext = null;

            return(new[] { r2, avgSimilarity });
        }
        public static double[] Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, IRegressionProblemData problemData, IEnumerable <int> rows, bool applyLinearScaling, int decimalPlaces)
        {
            double r2 = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, solution, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling);

            if (decimalPlaces >= 0)
            {
                r2 = Math.Round(r2, decimalPlaces);
            }
            return(new double[2] {
                r2, SymbolicDataAnalysisModelComplexityCalculator.CalculateComplexity(solution)
            });
        }
        public static double[] Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, IRegressionProblemData problemData, IEnumerable <int> rows, bool applyLinearScaling, int decimalPlaces)
        {
            double r2 = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, solution, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling);

            if (decimalPlaces >= 0)
            {
                r2 = Math.Round(r2, decimalPlaces);
            }
            return(new double[2] {
                r2, solution.IterateNodesPostfix().OfType <IVariableTreeNode>().Count()
            });                                                                                        // count the number of variables
        }
Esempio n. 6
0
        public static double[] Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, IRegressionProblemData problemData, IEnumerable <int> rows, bool applyLinearScaling, int decimalPlaces)
        {
            double r2 = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, solution, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling);

            if (decimalPlaces >= 0)
            {
                r2 = Math.Round(r2, decimalPlaces);
            }
            return(new double[2] {
                r2, solution.IterateNodesPostfix().Sum(n => n.GetLength())
            });                                                                            // sum of the length of the whole sub-tree for each node
        }
        public override double Evaluate(IExecutionContext context, ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable <int> rows)
        {
            SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = context;
            EstimationLimitsParameter.ExecutionContext   = context;
            ApplyLinearScalingParameter.ExecutionContext = context;

            // Pearson R² evaluator is used on purpose instead of the const-opt evaluator,
            // because Evaluate() is used to get the quality of evolved models on
            // different partitions of the dataset (e.g., best validation model)
            double r2 = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(SymbolicDataAnalysisTreeInterpreterParameter.ActualValue, tree, EstimationLimitsParameter.ActualValue.Lower, EstimationLimitsParameter.ActualValue.Upper, problemData, rows, ApplyLinearScalingParameter.ActualValue.Value);

            SymbolicDataAnalysisTreeInterpreterParameter.ExecutionContext = null;
            EstimationLimitsParameter.ExecutionContext   = null;
            ApplyLinearScalingParameter.ExecutionContext = null;

            return(r2);
        }
        public static double OptimizeConstants(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable <int> rows, bool applyLinearScaling, int maxIterations, bool updateVariableWeights = true, double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue, bool updateConstantsInTree = true)
        {
            List <AutoDiff.Variable> variables     = new List <AutoDiff.Variable>();
            List <AutoDiff.Variable> parameters    = new List <AutoDiff.Variable>();
            List <string>            variableNames = new List <string>();

            AutoDiff.Term func;
            if (!TryTransformToAutoDiff(tree.Root.GetSubtree(0), variables, parameters, variableNames, updateVariableWeights, out func))
            {
                throw new NotSupportedException("Could not optimize constants of symbolic expression tree due to not supported symbols used in the tree.");
            }
            if (variableNames.Count == 0)
            {
                return(0.0);
            }

            AutoDiff.IParametricCompiledTerm compiledFunc = func.Compile(variables.ToArray(), parameters.ToArray());

            List <SymbolicExpressionTreeTerminalNode> terminalNodes = null;

            if (updateVariableWeights)
            {
                terminalNodes = tree.Root.IterateNodesPrefix().OfType <SymbolicExpressionTreeTerminalNode>().ToList();
            }
            else
            {
                terminalNodes = new List <SymbolicExpressionTreeTerminalNode>(tree.Root.IterateNodesPrefix().OfType <ConstantTreeNode>());
            }

            //extract inital constants
            double[] c = new double[variables.Count];
            {
                c[0] = 0.0;
                c[1] = 1.0;
                int i = 2;
                foreach (var node in terminalNodes)
                {
                    ConstantTreeNode constantTreeNode = node as ConstantTreeNode;
                    VariableTreeNode variableTreeNode = node as VariableTreeNode;
                    if (constantTreeNode != null)
                    {
                        c[i++] = constantTreeNode.Value;
                    }
                    else if (updateVariableWeights && variableTreeNode != null)
                    {
                        c[i++] = variableTreeNode.Weight;
                    }
                }
            }
            double[] originalConstants = (double[])c.Clone();
            double   originalQuality   = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, tree, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling);

            alglib.lsfitstate  state;
            alglib.lsfitreport rep;
            int info;

            IDataset ds = problemData.Dataset;

            double[,] x = new double[rows.Count(), variableNames.Count];
            int row = 0;

            foreach (var r in rows)
            {
                for (int col = 0; col < variableNames.Count; col++)
                {
                    x[row, col] = ds.GetDoubleValue(variableNames[col], r);
                }
                row++;
            }
            double[] y = ds.GetDoubleValues(problemData.TargetVariable, rows).ToArray();
            int      n = x.GetLength(0);
            int      m = x.GetLength(1);
            int      k = c.Length;

            alglib.ndimensional_pfunc function_cx_1_func = CreatePFunc(compiledFunc);
            alglib.ndimensional_pgrad function_cx_1_grad = CreatePGrad(compiledFunc);

            try {
                alglib.lsfitcreatefg(x, y, c, n, m, k, false, out state);
                alglib.lsfitsetcond(state, 0.0, 0.0, maxIterations);
                //alglib.lsfitsetgradientcheck(state, 0.001);
                alglib.lsfitfit(state, function_cx_1_func, function_cx_1_grad, null, null);
                alglib.lsfitresults(state, out info, out c, out rep);
            }
            catch (ArithmeticException) {
                return(originalQuality);
            }
            catch (alglib.alglibexception) {
                return(originalQuality);
            }

            //info == -7  => constant optimization failed due to wrong gradient
            if (info != -7)
            {
                UpdateConstants(tree, c.Skip(2).ToArray(), updateVariableWeights);
            }
            var quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, tree, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling);

            if (!updateConstantsInTree)
            {
                UpdateConstants(tree, originalConstants.Skip(2).ToArray(), updateVariableWeights);
            }
            if (originalQuality - quality > 0.001 || double.IsNaN(quality))
            {
                UpdateConstants(tree, originalConstants.Skip(2).ToArray(), updateVariableWeights);
                return(originalQuality);
            }
            return(quality);
        }
        public static double OptimizeConstants(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter,
                                               ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable <int> rows, bool applyLinearScaling,
                                               int maxIterations, bool updateVariableWeights = true,
                                               double lowerEstimationLimit = double.MinValue, double upperEstimationLimit              = double.MaxValue,
                                               bool updateConstantsInTree  = true, Action <double[], double, object> iterationCallback = null, EvaluationsCounter counter = null)
        {
            // numeric constants in the tree become variables for constant opt
            // variables in the tree become parameters (fixed values) for constant opt
            // for each parameter (variable in the original tree) we store the
            // variable name, variable value (for factor vars) and lag as a DataForVariable object.
            // A dictionary is used to find parameters
            double[] initialConstants;
            var      parameters = new List <TreeToAutoDiffTermConverter.DataForVariable>();

            TreeToAutoDiffTermConverter.ParametricFunction         func;
            TreeToAutoDiffTermConverter.ParametricFunctionGradient func_grad;
            if (!TreeToAutoDiffTermConverter.TryConvertToAutoDiff(tree, updateVariableWeights, applyLinearScaling, out parameters, out initialConstants, out func, out func_grad))
            {
                throw new NotSupportedException("Could not optimize constants of symbolic expression tree due to not supported symbols used in the tree.");
            }
            if (parameters.Count == 0)
            {
                return(0.0);                             // gkronber: constant expressions always have a R² of 0.0
            }
            var parameterEntries = parameters.ToArray(); // order of entries must be the same for x

            //extract inital constants
            double[] c;
            if (applyLinearScaling)
            {
                c    = new double[initialConstants.Length + 2];
                c[0] = 0.0;
                c[1] = 1.0;
                Array.Copy(initialConstants, 0, c, 2, initialConstants.Length);
            }
            else
            {
                c = (double[])initialConstants.Clone();
            }

            double originalQuality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, tree, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling);

            if (counter == null)
            {
                counter = new EvaluationsCounter();
            }
            var rowEvaluationsCounter = new EvaluationsCounter();

            alglib.lsfitstate  state;
            alglib.lsfitreport rep;
            int retVal;

            IDataset ds = problemData.Dataset;

            double[,] x = new double[rows.Count(), parameters.Count];
            int row = 0;

            foreach (var r in rows)
            {
                int col = 0;
                foreach (var info in parameterEntries)
                {
                    if (ds.VariableHasType <double>(info.variableName))
                    {
                        x[row, col] = ds.GetDoubleValue(info.variableName, r + info.lag);
                    }
                    else if (ds.VariableHasType <string>(info.variableName))
                    {
                        x[row, col] = ds.GetStringValue(info.variableName, r) == info.variableValue ? 1 : 0;
                    }
                    else
                    {
                        throw new InvalidProgramException("found a variable of unknown type");
                    }
                    col++;
                }
                row++;
            }
            double[] y = ds.GetDoubleValues(problemData.TargetVariable, rows).ToArray();
            int      n = x.GetLength(0);
            int      m = x.GetLength(1);
            int      k = c.Length;

            alglib.ndimensional_pfunc function_cx_1_func = CreatePFunc(func);
            alglib.ndimensional_pgrad function_cx_1_grad = CreatePGrad(func_grad);
            alglib.ndimensional_rep   xrep = (p, f, obj) => iterationCallback(p, f, obj);

            try {
                alglib.lsfitcreatefg(x, y, c, n, m, k, false, out state);
                alglib.lsfitsetcond(state, 0.0, 0.0, maxIterations);
                alglib.lsfitsetxrep(state, iterationCallback != null);
                //alglib.lsfitsetgradientcheck(state, 0.001);
                alglib.lsfitfit(state, function_cx_1_func, function_cx_1_grad, xrep, rowEvaluationsCounter);
                alglib.lsfitresults(state, out retVal, out c, out rep);
            } catch (ArithmeticException) {
                return(originalQuality);
            } catch (alglib.alglibexception) {
                return(originalQuality);
            }

            counter.FunctionEvaluations += rowEvaluationsCounter.FunctionEvaluations / n;
            counter.GradientEvaluations += rowEvaluationsCounter.GradientEvaluations / n;

            //retVal == -7  => constant optimization failed due to wrong gradient
            if (retVal != -7)
            {
                if (applyLinearScaling)
                {
                    var tmp = new double[c.Length - 2];
                    Array.Copy(c, 2, tmp, 0, tmp.Length);
                    UpdateConstants(tree, tmp, updateVariableWeights);
                }
                else
                {
                    UpdateConstants(tree, c, updateVariableWeights);
                }
            }
            var quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, tree, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling);

            if (!updateConstantsInTree)
            {
                UpdateConstants(tree, initialConstants, updateVariableWeights);
            }

            if (originalQuality - quality > 0.001 || double.IsNaN(quality))
            {
                UpdateConstants(tree, initialConstants, updateVariableWeights);
                return(originalQuality);
            }
            return(quality);
        }