protected SymbolicDataAnalysisModel(SymbolicDataAnalysisModel original, Cloner cloner)
   : base(original, cloner) {
   this.symbolicExpressionTree = cloner.Clone(original.symbolicExpressionTree);
   this.interpreter = cloner.Clone(original.interpreter);
   this.lowerEstimationLimit = original.lowerEstimationLimit;
   this.upperEstimationLimit = original.upperEstimationLimit;
 }
Exemplo n.º 2
0
        public static double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, IClassificationProblemData problemData, IEnumerable <int> rows, bool applyLinearScaling)
        {
            IEnumerable <double>  estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows);
            IEnumerable <double>  targetValues    = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
            OnlineCalculatorError errorState;

            double mse;

            if (applyLinearScaling)
            {
                var mseCalculator = new OnlineMeanSquaredErrorCalculator();
                CalculateWithScaling(targetValues, estimatedValues, lowerEstimationLimit, upperEstimationLimit, mseCalculator, problemData.Dataset.Rows);
                errorState = mseCalculator.ErrorState;
                mse        = mseCalculator.MeanSquaredError;
            }
            else
            {
                IEnumerable <double> boundedEstimatedValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit);
                mse = OnlineMeanSquaredErrorCalculator.Calculate(targetValues, boundedEstimatedValues, out errorState);
            }
            if (errorState != OnlineCalculatorError.None)
            {
                return(Double.NaN);
            }
            return(mse);
        }
Exemplo n.º 3
0
        public IEnumerable <double> GetSignals(IDataset dataset, IEnumerable <int> rows)
        {
            ISymbolicDataAnalysisExpressionTreeInterpreter interpreter = Interpreter;
            ISymbolicExpressionTree tree = SymbolicExpressionTree;

            return(GetSignals(interpreter.GetSymbolicExpressionTreeValues(tree, dataset, rows)));
        }
 public SymbolicDiscriminantFunctionClassificationModel(ISymbolicExpressionTree tree, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, IDiscriminantFunctionThresholdCalculator thresholdCalculator,
   double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue)
   : base(tree, interpreter, lowerEstimationLimit, upperEstimationLimit) {
   this.thresholds = new double[0];
   this.classValues = new double[0];
   this.ThresholdCalculator = thresholdCalculator;
 }
 public SymbolicRegressionModel(string targetVariable, ISymbolicExpressionTree tree,
                                ISymbolicDataAnalysisExpressionTreeInterpreter interpreter,
                                double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue)
     : base(tree, interpreter, lowerEstimationLimit, upperEstimationLimit)
 {
     this.targetVariable = targetVariable;
 }
 protected override ISymbolicDataAnalysisModel CreateModel(ISymbolicExpressionTree tree, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, IDataAnalysisProblemData problemData, DoubleLimit estimationLimits) {
   var model = ModelCreatorParameter.ActualValue.CreateSymbolicClassificationModel(tree, interpreter, estimationLimits.Lower, estimationLimits.Upper);
   var classificationProblemData = (IClassificationProblemData)problemData;
   var rows = classificationProblemData.TrainingIndices;
   model.RecalculateModelParameters(classificationProblemData, rows);
   return model;
 }
        public static double[] Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, IClassificationProblemData problemData, IEnumerable <int> rows, bool applyLinearScaling)
        {
            IEnumerable <double>  estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows);
            IEnumerable <double>  targetValues    = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
            OnlineCalculatorError errorState;

            double r;

            if (applyLinearScaling)
            {
                var rCalculator = new OnlinePearsonsRCalculator();
                CalculateWithScaling(targetValues, estimatedValues, lowerEstimationLimit, upperEstimationLimit, rCalculator, problemData.Dataset.Rows);
                errorState = rCalculator.ErrorState;
                r          = rCalculator.R;
            }
            else
            {
                IEnumerable <double> boundedEstimatedValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit);
                r = OnlinePearsonsRCalculator.Calculate(targetValues, boundedEstimatedValues, out errorState);
            }

            if (errorState != OnlineCalculatorError.None)
            {
                r = double.NaN;
            }
            return(new double[2] {
                r *r, solution.Length
            });
        }
        private void EvaluateLaggedOperations(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, IDataset ds)
        {
            // lag
            Evaluate(interpreter, ds, "(lagVariable 1.0 a -1) ", 1, ds.GetDoubleValue("A", 0));
            Evaluate(interpreter, ds, "(lagVariable 1.0 a -1) ", 2, ds.GetDoubleValue("A", 1));
            Evaluate(interpreter, ds, "(lagVariable 1.0 a 0) ", 2, ds.GetDoubleValue("A", 2));
            Evaluate(interpreter, ds, "(lagVariable 1.0 a 1) ", 0, ds.GetDoubleValue("A", 1));

            // integral
            Evaluate(interpreter, ds, "(integral -1.0 (variable 1.0 a)) ", 1, ds.GetDoubleValue("A", 0) + ds.GetDoubleValue("A", 1));
            Evaluate(interpreter, ds, "(integral -1.0 (lagVariable 1.0 a 1)) ", 1, ds.GetDoubleValue("A", 1) + ds.GetDoubleValue("A", 2));
            Evaluate(interpreter, ds, "(integral -2.0 (variable 1.0 a)) ", 2, ds.GetDoubleValue("A", 0) + ds.GetDoubleValue("A", 1) + ds.GetDoubleValue("A", 2));
            Evaluate(interpreter, ds, "(integral -1.0 (* (variable 1.0 a) (variable 1.0 b)))", 1, ds.GetDoubleValue("A", 0) * ds.GetDoubleValue("B", 0) + ds.GetDoubleValue("A", 1) * ds.GetDoubleValue("B", 1));
            Evaluate(interpreter, ds, "(integral -2.0 3.0)", 1, 9.0);

            // derivative
            // (f_0 + 2 * f_1 - 2 * f_3 - f_4) / 8; // h = 1
            Evaluate(interpreter, ds, "(diff (variable 1.0 a)) ", 5, (ds.GetDoubleValue("A", 5) + 2 * ds.GetDoubleValue("A", 4) - 2 * ds.GetDoubleValue("A", 2) - ds.GetDoubleValue("A", 1)) / 8.0);
            Evaluate(interpreter, ds, "(diff (variable 1.0 b)) ", 5, (ds.GetDoubleValue("B", 5) + 2 * ds.GetDoubleValue("B", 4) - 2 * ds.GetDoubleValue("B", 2) - ds.GetDoubleValue("B", 1)) / 8.0);
            Evaluate(interpreter, ds, "(diff (* (variable 1.0 a) (variable 1.0 b)))", 5, +
                     (ds.GetDoubleValue("A", 5) * ds.GetDoubleValue("B", 5) +
                      2 * ds.GetDoubleValue("A", 4) * ds.GetDoubleValue("B", 4) -
                      2 * ds.GetDoubleValue("A", 2) * ds.GetDoubleValue("B", 2) -
                      ds.GetDoubleValue("A", 1) * ds.GetDoubleValue("B", 1)) / 8.0);
            Evaluate(interpreter, ds, "(diff -2.0 3.0)", 5, 0.0);

            // timelag
            Evaluate(interpreter, ds, "(lag -1.0 (lagVariable 1.0 a 2)) ", 1, ds.GetDoubleValue("A", 2));
            Evaluate(interpreter, ds, "(lag -2.0 (lagVariable 1.0 a 2)) ", 2, ds.GetDoubleValue("A", 2));
            Evaluate(interpreter, ds, "(lag -1.0 (* (lagVariable 1.0 a 1) (lagVariable 1.0 b 2)))", 1, ds.GetDoubleValue("A", 1) * ds.GetDoubleValue("B", 2));
            Evaluate(interpreter, ds, "(lag -2.0 3.0)", 1, 3.0);
        }
    public static ISymbolicExpressionTree Prune(ISymbolicExpressionTree tree, SymbolicRegressionSolutionImpactValuesCalculator impactValuesCalculator, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, IRegressionProblemData problemData, DoubleLimit estimationLimits, IEnumerable<int> rows, double nodeImpactThreshold = 0.0, bool pruneOnlyZeroImpactNodes = false) {
      var clonedTree = (ISymbolicExpressionTree)tree.Clone();
      var model = new SymbolicRegressionModel(problemData.TargetVariable, clonedTree, interpreter, estimationLimits.Lower, estimationLimits.Upper);
      var nodes = clonedTree.Root.GetSubtree(0).GetSubtree(0).IterateNodesPrefix().ToList(); // skip the nodes corresponding to the ProgramRootSymbol and the StartSymbol

      double qualityForImpactsCalculation = double.NaN; // pass a NaN value initially so the impact calculator will calculate the quality

      for (int i = 0; i < nodes.Count; ++i) {
        var node = nodes[i];
        if (node is ConstantTreeNode) continue;

        double impactValue, replacementValue;
        double newQualityForImpactsCalculation;
        impactValuesCalculator.CalculateImpactAndReplacementValues(model, node, problemData, rows, out impactValue, out replacementValue, out newQualityForImpactsCalculation, qualityForImpactsCalculation);

        if (pruneOnlyZeroImpactNodes && !impactValue.IsAlmost(0.0)) continue;
        if (!pruneOnlyZeroImpactNodes && impactValue > nodeImpactThreshold) continue;

        var constantNode = (ConstantTreeNode)node.Grammar.GetSymbol("Constant").CreateTreeNode();
        constantNode.Value = replacementValue;

        ReplaceWithConstant(node, constantNode);
        i += node.GetLength() - 1; // skip subtrees under the node that was folded

        qualityForImpactsCalculation = newQualityForImpactsCalculation;
      }
      return model.SymbolicExpressionTree;
    }
 protected SymbolicDataAnalysisModel(SymbolicDataAnalysisModel original, Cloner cloner)
     : base(original, cloner)
 {
     this.symbolicExpressionTree = cloner.Clone(original.symbolicExpressionTree);
     this.interpreter            = cloner.Clone(original.interpreter);
     this.lowerEstimationLimit   = original.lowerEstimationLimit;
     this.upperEstimationLimit   = original.upperEstimationLimit;
 }
Exemplo n.º 11
0
 public static double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, IProblemData problemData, IEnumerable<int> rows) {
   IEnumerable<double> signals = GetSignals(interpreter, solution, problemData.Dataset, rows);
   IEnumerable<double> returns = problemData.Dataset.GetDoubleValues(problemData.PriceChangeVariable, rows);
   OnlineCalculatorError errorState;
   double sharpRatio = OnlineSharpeRatioCalculator.Calculate(returns, signals, problemData.TransactionCosts, out errorState);
   if (errorState != OnlineCalculatorError.None) return 0.0;
   else return sharpRatio;
 }
        public override ISymbolicExpressionTree Crossover(IRandom random, ISymbolicExpressionTree parent0, ISymbolicExpressionTree parent1)
        {
            ISymbolicDataAnalysisExpressionTreeInterpreter interpreter = SymbolicDataAnalysisTreeInterpreterParameter.ActualValue;
            List <int> rows        = GenerateRowsToEvaluate().ToList();
            T          problemData = ProblemDataParameter.ActualValue;

            return(Cross(random, parent0, parent1, interpreter, problemData, rows, MaximumSymbolicExpressionTreeDepth.Value, MaximumSymbolicExpressionTreeLength.Value, SemanticSimilarityRange));
        }
 public static double[] Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, IClassificationProblemData problemData, IEnumerable<int> rows) {
   IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows);
   IEnumerable<double> originalValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
   IEnumerable<double> boundedEstimationValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit);
   OnlineCalculatorError errorState;
   double mse = OnlineMeanSquaredErrorCalculator.Calculate(originalValues, boundedEstimationValues, out errorState);
   if (errorState != OnlineCalculatorError.None) mse = double.NaN;
   return new double[2] { mse, solution.Length };
 }
        private void EvaluateTerminals(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, Dataset ds)
        {
            // constants
            Evaluate(interpreter, ds, "(+ 1.5 3.5)", 0, 5.0);

            // variables
            Evaluate(interpreter, ds, "(variable 2.0 a)", 0, 2.0);
            Evaluate(interpreter, ds, "(variable 2.0 a)", 1, 4.0);
        }
    public static double[] Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, IRegressionProblemData problemData, IEnumerable<int> rows, bool applyLinearScaling, int decimalPlaces) {
      var mse = SymbolicRegressionSingleObjectiveMeanSquaredErrorEvaluator.Calculate(interpreter, solution, lowerEstimationLimit,
        upperEstimationLimit, problemData, rows, applyLinearScaling);

      if (decimalPlaces >= 0)
        mse = Math.Round(mse, decimalPlaces);

      return new double[2] { mse, solution.Length };
    }
Exemplo n.º 16
0
 protected SymbolicDataAnalysisModel(ISymbolicExpressionTree tree, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter,
    double lowerEstimationLimit, double upperEstimationLimit)
   : base() {
   this.name = ItemName;
   this.description = ItemDescription;
   this.symbolicExpressionTree = tree;
   this.interpreter = interpreter;
   this.lowerEstimationLimit = lowerEstimationLimit;
   this.upperEstimationLimit = upperEstimationLimit;
 }
 protected SymbolicDataAnalysisModel(ISymbolicExpressionTree tree, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter,
                                     double lowerEstimationLimit, double upperEstimationLimit)
     : base()
 {
     this.name                   = ItemName;
     this.description            = ItemDescription;
     this.symbolicExpressionTree = tree;
     this.interpreter            = interpreter;
     this.lowerEstimationLimit   = lowerEstimationLimit;
     this.upperEstimationLimit   = upperEstimationLimit;
 }
        public static double[] Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, IRegressionProblemData problemData, IEnumerable <int> rows, bool applyLinearScaling, int decimalPlaces)
        {
            double r2 = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, solution, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling);

            if (decimalPlaces >= 0)
            {
                r2 = Math.Round(r2, decimalPlaces);
            }
            return(new double[2] {
                r2, SymbolicDataAnalysisModelComplexityCalculator.CalculateComplexity(solution)
            });
        }
Exemplo n.º 19
0
        public static double[] Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, IRegressionProblemData problemData, IEnumerable <int> rows, bool applyLinearScaling, int decimalPlaces)
        {
            double r2 = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, solution, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling);

            if (decimalPlaces >= 0)
            {
                r2 = Math.Round(r2, decimalPlaces);
            }
            return(new double[2] {
                r2, solution.IterateNodesPostfix().Sum(n => n.GetLength())
            });                                                                            // sum of the length of the whole sub-tree for each node
        }
        public static double[] Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, IRegressionProblemData problemData, IEnumerable <int> rows, bool applyLinearScaling, int decimalPlaces)
        {
            double r2 = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, solution, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling);

            if (decimalPlaces >= 0)
            {
                r2 = Math.Round(r2, decimalPlaces);
            }
            return(new double[2] {
                r2, solution.IterateNodesPostfix().OfType <IVariableTreeNode>().Count()
            });                                                                                        // count the number of variables
        }
        public static double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree tree, double lowerEstimationLimit, double upperEstimationLimit, IClassificationProblemData problemData,
                                       IEnumerable <int> rows, bool applyLinearScaling, ISymbolicClassificationModelCreator modelCreator, double normalizedMeanSquaredErrorWeightingFactor, double falseNegativeRateWeightingFactor, double falsePositiveRateWeightingFactor)
        {
            var estimatedValues        = interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, rows);
            var targetClassValues      = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
            var boundedEstimatedValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit).ToArray();
            OnlineCalculatorError errorState;
            double nmse;

            //calculate performance measures
            string positiveClassName = problemData.PositiveClass;

            double[]             classValues, thresholds;
            IEnumerable <double> estimatedClassValues = null;
            ISymbolicDiscriminantFunctionClassificationModel m;

            var model = modelCreator.CreateSymbolicClassificationModel(problemData.TargetVariable, tree, interpreter, lowerEstimationLimit, upperEstimationLimit);

            if ((m = model as ISymbolicDiscriminantFunctionClassificationModel) != null)
            {
                m.ThresholdCalculator.Calculate(problemData, boundedEstimatedValues, targetClassValues, out classValues, out thresholds);
                m.SetThresholdsAndClassValues(thresholds, classValues);
                estimatedClassValues = m.GetEstimatedClassValues(boundedEstimatedValues);
            }
            else
            {
                model.RecalculateModelParameters(problemData, rows);
                estimatedClassValues = model.GetEstimatedClassValues(problemData.Dataset, rows);
            }

            var performanceCalculator = new ClassificationPerformanceMeasuresCalculator(positiveClassName, problemData.GetClassValue(positiveClassName));

            performanceCalculator.Calculate(targetClassValues, estimatedClassValues);
            if (performanceCalculator.ErrorState != OnlineCalculatorError.None)
            {
                return(Double.NaN);
            }
            double falseNegativeRate = 1 - performanceCalculator.TruePositiveRate;
            double falsePositiveRate = performanceCalculator.FalsePositiveRate;

            if (applyLinearScaling)
            {
                throw new NotSupportedException("The Weighted Performance Measures Evaluator does not suppport linear scaling!");
            }
            nmse = OnlineNormalizedMeanSquaredErrorCalculator.Calculate(targetClassValues, boundedEstimatedValues, out errorState);
            if (errorState != OnlineCalculatorError.None)
            {
                return(Double.NaN);
            }
            return(normalizedMeanSquaredErrorWeightingFactor * nmse + falseNegativeRateWeightingFactor * falseNegativeRate + falsePositiveRateWeightingFactor * falsePositiveRate);
        }
        private void Evaluate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, Dataset ds, string expr, int index, double expected)
        {
            var importer = new SymbolicExpressionImporter();
            ISymbolicExpressionTree tree = importer.Import(expr);

            double actual = interpreter.GetSymbolicExpressionTreeValues(tree, ds, Enumerable.Range(index, 1)).First();

            Assert.IsFalse(double.IsNaN(actual) && !double.IsNaN(expected));
            Assert.IsFalse(!double.IsNaN(actual) && double.IsNaN(expected));
            if (!double.IsNaN(actual) && !double.IsNaN(expected))
            {
                Assert.AreEqual(expected, actual, 1.0E-12, expr);
            }
        }
Exemplo n.º 23
0
        public static double[] Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, IRegressionProblemData problemData, IEnumerable <int> rows, bool applyLinearScaling, int decimalPlaces)
        {
            var mse = SymbolicRegressionSingleObjectiveMeanSquaredErrorEvaluator.Calculate(interpreter, solution, lowerEstimationLimit,
                                                                                           upperEstimationLimit, problemData, rows, applyLinearScaling);

            if (decimalPlaces >= 0)
            {
                mse = Math.Round(mse, decimalPlaces);
            }

            return(new double[2] {
                mse, solution.Length
            });
        }
    public static double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, IRegressionProblemData problemData, IEnumerable<int> rows) {
      IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows);
      IEnumerable<double> targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
      IEnumerable<double> boundedEstimatedValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit);

      var logRes = boundedEstimatedValues.Zip(targetValues, (e, t) => Math.Log(1.0 + Math.Abs(e - t)));

      OnlineCalculatorError errorState;
      OnlineCalculatorError varErrorState;
      double mlr;
      double variance;
      OnlineMeanAndVarianceCalculator.Calculate(logRes, out mlr, out variance, out errorState, out varErrorState);
      if (errorState != OnlineCalculatorError.None) return double.NaN;
      return mlr;
    }
Exemplo n.º 25
0
        public static double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, IProblemData problemData, IEnumerable <int> rows)
        {
            IEnumerable <double>  signals = GetSignals(interpreter, solution, problemData.Dataset, rows);
            IEnumerable <double>  returns = problemData.Dataset.GetDoubleValues(problemData.PriceChangeVariable, rows);
            OnlineCalculatorError errorState;
            double sharpRatio = OnlineSharpeRatioCalculator.Calculate(returns, signals, problemData.TransactionCosts, out errorState);

            if (errorState != OnlineCalculatorError.None)
            {
                return(0.0);
            }
            else
            {
                return(sharpRatio);
            }
        }
        private void TestFullGrammarPerformance(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, double nodesPerSecThreshold)
        {
            var twister = new MersenneTwister(31415);
            var dataset = Util.CreateRandomDataset(twister, Rows, Columns);

            var grammar     = new FullFunctionalExpressionGrammar();
            var randomTrees = Util.CreateRandomTrees(twister, dataset, grammar, N, 1, 100, 0, 0);

            foreach (ISymbolicExpressionTree tree in randomTrees)
            {
                Util.InitTree(tree, twister, new List <string>(dataset.VariableNames));
            }
            double nodesPerSec = Util.CalculateEvaluatedNodesPerSec(randomTrees, interpreter, dataset, 3);
            //mkommend: commented due to performance issues on the builder
            //Assert.IsTrue(nodesPerSec > nodesPerSecThreshold); // evaluated nodes per seconds must be larger than 15mNodes/sec
        }
Exemplo n.º 27
0
        public static double[] Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, IClassificationProblemData problemData, IEnumerable <int> rows)
        {
            IEnumerable <double>  estimatedValues         = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows);
            IEnumerable <double>  originalValues          = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
            IEnumerable <double>  boundedEstimationValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit);
            OnlineCalculatorError errorState;
            double mse = OnlineMeanSquaredErrorCalculator.Calculate(originalValues, boundedEstimationValues, out errorState);

            if (errorState != OnlineCalculatorError.None)
            {
                mse = double.NaN;
            }
            return(new double[2] {
                mse, solution.Length
            });
        }
        public static double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree tree, double lowerEstimationLimit, double upperEstimationLimit, IClassificationProblemData problemData, IEnumerable <int> rows, bool applyLinearScaling,
                                       double definiteResidualsWeight, double positiveClassResidualsWeight, double negativeClassesResidualsWeight)
        {
            IEnumerable <double>  estimatedValues = interpreter.GetSymbolicExpressionTreeValues(tree, problemData.Dataset, rows);
            IEnumerable <double>  targetValues    = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
            OnlineCalculatorError errorState;

            double positiveClassValue = problemData.GetClassValue(problemData.PositiveClass);
            //get class values min/max
            double classValuesMin = problemData.ClassValues.ElementAtOrDefault(0);
            double classValuesMax = classValuesMin;

            foreach (double classValue in problemData.ClassValues)
            {
                if (classValuesMin > classValue)
                {
                    classValuesMin = classValue;
                }
                if (classValuesMax < classValue)
                {
                    classValuesMax = classValue;
                }
            }

            double quality;

            if (applyLinearScaling)
            {
                var calculator = new OnlineWeightedClassificationMeanSquaredErrorCalculator(positiveClassValue, classValuesMax, classValuesMin,
                                                                                            definiteResidualsWeight, positiveClassResidualsWeight, negativeClassesResidualsWeight);
                CalculateWithScaling(targetValues, estimatedValues, lowerEstimationLimit, upperEstimationLimit, calculator, problemData.Dataset.Rows);
                errorState = calculator.ErrorState;
                quality    = calculator.WeightedResidualsMeanSquaredError;
            }
            else
            {
                IEnumerable <double> boundedEstimatedValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit);
                quality = OnlineWeightedClassificationMeanSquaredErrorCalculator.Calculate(targetValues, boundedEstimatedValues, positiveClassValue, classValuesMax,
                                                                                           classValuesMin, definiteResidualsWeight, positiveClassResidualsWeight, negativeClassesResidualsWeight, out errorState);
            }
            if (errorState != OnlineCalculatorError.None)
            {
                return(Double.NaN);
            }
            return(quality);
        }
        public void TestInterpretersEstimatedValuesConsistency()
        {
            var twister = new MersenneTwister();
            int seed    = twister.Next(0, int.MaxValue);

            twister.Seed((uint)seed);
            const int numRows = 100;
            var       dataset = Util.CreateRandomDataset(twister, numRows, Columns);

            var grammar = new TypeCoherentExpressionGrammar();

            var interpreters = new ISymbolicDataAnalysisExpressionTreeInterpreter[] {
                new SymbolicDataAnalysisExpressionTreeLinearInterpreter(),
                new SymbolicDataAnalysisExpressionTreeInterpreter(),
            };

            var rows        = Enumerable.Range(0, numRows).ToList();
            var randomTrees = Util.CreateRandomTrees(twister, dataset, grammar, N, 1, 10, 0, 0);

            foreach (ISymbolicExpressionTree tree in randomTrees)
            {
                Util.InitTree(tree, twister, new List <string>(dataset.VariableNames));
            }

            for (int i = 0; i < randomTrees.Length; ++i)
            {
                var tree         = randomTrees[i];
                var valuesMatrix = interpreters.Select(x => x.GetSymbolicExpressionTreeValues(tree, dataset, rows)).ToList();
                for (int m = 0; m < interpreters.Length - 1; ++m)
                {
                    var sum = valuesMatrix[m].Sum();
                    for (int n = m + 1; n < interpreters.Length; ++n)
                    {
                        var s = valuesMatrix[n].Sum();
                        if (double.IsNaN(sum) && double.IsNaN(s))
                        {
                            continue;
                        }

                        string errorMessage = string.Format("Interpreters {0} and {1} do not agree on tree {2} (seed = {3}).", interpreters[m].Name, interpreters[n].Name, i, seed);
                        Assert.AreEqual(sum, s, 1e-12, errorMessage);
                    }
                }
            }
        }
    public static double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, IClassificationProblemData problemData, IEnumerable<int> rows, bool applyLinearScaling) {
      IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows);
      IEnumerable<double> targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
      OnlineCalculatorError errorState;

      double mse;
      if (applyLinearScaling) {
        var mseCalculator = new OnlineMeanSquaredErrorCalculator();
        CalculateWithScaling(targetValues, estimatedValues, lowerEstimationLimit, upperEstimationLimit, mseCalculator, problemData.Dataset.Rows);
        errorState = mseCalculator.ErrorState;
        mse = mseCalculator.MeanSquaredError;
      } else {
        IEnumerable<double> boundedEstimatedValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit);
        mse = OnlineMeanSquaredErrorCalculator.Calculate(targetValues, boundedEstimatedValues, out errorState);
      }
      if (errorState != OnlineCalculatorError.None) return Double.NaN;
      return mse;
    }
    protected static double CalculateReplacementValue(ISymbolicExpressionTreeNode node, ISymbolicExpressionTree sourceTree, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter,
      IDataset dataset, IEnumerable<int> rows) {
      //optimization: constant nodes return always the same value
      ConstantTreeNode constantNode = node as ConstantTreeNode;
      if (constantNode != null) return constantNode.Value;

      var rootSymbol = new ProgramRootSymbol().CreateTreeNode();
      var startSymbol = new StartSymbol().CreateTreeNode();
      rootSymbol.AddSubtree(startSymbol);
      startSymbol.AddSubtree((ISymbolicExpressionTreeNode)node.Clone());

      var tempTree = new SymbolicExpressionTree(rootSymbol);
      // clone ADFs of source tree
      for (int i = 1; i < sourceTree.Root.SubtreeCount; i++) {
        tempTree.Root.AddSubtree((ISymbolicExpressionTreeNode)sourceTree.Root.GetSubtree(i).Clone());
      }
      return interpreter.GetSymbolicExpressionTreeValues(tempTree, dataset, rows).Median();
    }
        public static double Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, IRegressionProblemData problemData, IEnumerable <int> rows)
        {
            IEnumerable <double> estimatedValues        = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows);
            IEnumerable <double> targetValues           = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
            IEnumerable <double> boundedEstimatedValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit);

            var logRes = boundedEstimatedValues.Zip(targetValues, (e, t) => Math.Log(1.0 + Math.Abs(e - t)));

            OnlineCalculatorError errorState;
            OnlineCalculatorError varErrorState;
            double mlr;
            double variance;

            OnlineMeanAndVarianceCalculator.Calculate(logRes, out mlr, out variance, out errorState, out varErrorState);
            if (errorState != OnlineCalculatorError.None)
            {
                return(double.NaN);
            }
            return(mlr);
        }
    public static double[] Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, IClassificationProblemData problemData, IEnumerable<int> rows, bool applyLinearScaling) {
      IEnumerable<double> estimatedValues = interpreter.GetSymbolicExpressionTreeValues(solution, problemData.Dataset, rows);
      IEnumerable<double> targetValues = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, rows);
      OnlineCalculatorError errorState;

      double r;
      if (applyLinearScaling) {
        var rCalculator = new OnlinePearsonsRCalculator();
        CalculateWithScaling(targetValues, estimatedValues, lowerEstimationLimit, upperEstimationLimit, rCalculator, problemData.Dataset.Rows);
        errorState = rCalculator.ErrorState;
        r = rCalculator.R;
      } else {
        IEnumerable<double> boundedEstimatedValues = estimatedValues.LimitToRange(lowerEstimationLimit, upperEstimationLimit);
        r = OnlinePearsonsRCalculator.Calculate(targetValues, boundedEstimatedValues, out errorState);
      }

      if (errorState != OnlineCalculatorError.None) r = double.NaN;
      return new double[2] { r*r, solution.Length };

    }
        private void TestArithmeticGrammarPerformance(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, double nodesPerSecThreshold)
        {
            var twister = new MersenneTwister(31415);
            var dataset = Util.CreateRandomDataset(twister, Rows, Columns);
            var grammar = new ArithmeticExpressionGrammar();

            //grammar.Symbols.OfType<Variable>().First().Enabled = false;
            grammar.MaximumFunctionArguments   = 0;
            grammar.MaximumFunctionDefinitions = 0;
            grammar.MinimumFunctionArguments   = 0;
            grammar.MinimumFunctionDefinitions = 0;
            var randomTrees = Util.CreateRandomTrees(twister, dataset, grammar, N, 1, 100, 0, 0);

            foreach (SymbolicExpressionTree tree in randomTrees)
            {
                Util.InitTree(tree, twister, new List <string>(dataset.VariableNames));
            }

            double nodesPerSec = Util.CalculateEvaluatedNodesPerSec(randomTrees, interpreter, dataset, 3);
            //mkommend: commented due to performance issues on the builder
            //Assert.IsTrue(nodesPerSec > nodesPerSecThreshold); // evaluated nodes per seconds must be larger than 15mNodes/sec
        }
 private void EvaluateAdf(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, Dataset ds)
 {
     // ADF
     Evaluate(interpreter, ds, @"(PROG 
                             (MAIN 
                               (CALL ADF0)) 
                             (defun ADF0 1.0))", 1, 1.0);
     Evaluate(interpreter, ds, @"(PROG 
                             (MAIN 
                               (* (CALL ADF0) (CALL ADF0)))
                             (defun ADF0 2.0))", 1, 4.0);
     Evaluate(interpreter, ds, @"(PROG 
                             (MAIN 
                               (CALL ADF0 2.0 3.0))
                             (defun ADF0 
                               (+ (ARG 0) (ARG 1))))", 1, 5.0);
     Evaluate(interpreter, ds, @"(PROG 
                             (MAIN (CALL ADF1 2.0 3.0))
                             (defun ADF0 
                               (- (ARG 1) (ARG 0)))
                             (defun ADF1
                               (+ (CALL ADF0 (ARG 1) (ARG 0))
                                  (CALL ADF0 (ARG 0) (ARG 1)))))", 1, 0.0);
     Evaluate(interpreter, ds, @"(PROG 
                             (MAIN (CALL ADF1 (variable 2.0 a) 3.0))
                             (defun ADF0 
                               (- (ARG 1) (ARG 0)))
                             (defun ADF1                                                                              
                               (CALL ADF0 (ARG 1) (ARG 0))))", 1, 1.0);
     Evaluate(interpreter, ds,
              @"(PROG 
                             (MAIN (CALL ADF1 (variable 2.0 a) 3.0))
                             (defun ADF0 
                               (- (ARG 1) (ARG 0)))
                             (defun ADF1                                                                              
                               (+ (CALL ADF0 (ARG 1) (ARG 0))
                                  (CALL ADF0 (ARG 0) (ARG 1)))))", 1, 0.0);
 }
Exemplo n.º 36
0
    public static double CalculateEvaluatedNodesPerSec(ISymbolicExpressionTree[] trees, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, Dataset dataset, int repetitions) {
      // warm up
      IEnumerable<int> rows = Enumerable.Range(0, dataset.Rows).ToList();
      long nNodes = 0;
      double c = 0;
      for (int i = 0; i < trees.Length; i++) {
        nNodes += trees[i].Length * (dataset.Rows - 1);
        c = interpreter.GetSymbolicExpressionTreeValues(trees[i], dataset, rows).Count(); // count needs to evaluate all rows
      }

      Stopwatch watch = new Stopwatch();
      for (int rep = 0; rep < repetitions; rep++) {
        watch.Start();
        c = 0;
        for (int i = 0; i < trees.Length; i++) {
          interpreter.GetSymbolicExpressionTreeValues(trees[i], dataset, rows).Count(); // count needs to evaluate all rows
        }
        watch.Stop();
      }
      Console.WriteLine("Random tree evaluation performance of " + interpreter.GetType() + ": " +
        watch.ElapsedMilliseconds + "ms " +
        Util.NodesPerSecond(nNodes * repetitions, watch) + " nodes/sec");
      return Util.NodesPerSecond(nNodes * repetitions, watch);
    }
 public SymbolicRegressionModel(ISymbolicExpressionTree tree, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter,
   double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue)
   : base(tree, interpreter, lowerEstimationLimit, upperEstimationLimit) { }
        public static ISymbolicExpressionTree Prune(ISymbolicExpressionTree tree, ISymbolicClassificationModelCreator modelCreator,
                                                    SymbolicClassificationSolutionImpactValuesCalculator impactValuesCalculator, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter,
                                                    IClassificationProblemData problemData, DoubleLimit estimationLimits, IEnumerable <int> rows,
                                                    double nodeImpactThreshold = 0.0, bool pruneOnlyZeroImpactNodes = false)
        {
            var clonedTree = (ISymbolicExpressionTree)tree.Clone();
            var model      = modelCreator.CreateSymbolicClassificationModel(clonedTree, interpreter, estimationLimits.Lower, estimationLimits.Upper);

            var    nodes = clonedTree.Root.GetSubtree(0).GetSubtree(0).IterateNodesPrefix().ToList();
            double qualityForImpactsCalculation = double.NaN;

            for (int i = 0; i < nodes.Count; ++i)
            {
                var node = nodes[i];
                if (node is ConstantTreeNode)
                {
                    continue;
                }

                double impactValue, replacementValue, newQualityForImpactsCalculation;
                impactValuesCalculator.CalculateImpactAndReplacementValues(model, node, problemData, rows, out impactValue, out replacementValue, out newQualityForImpactsCalculation, qualityForImpactsCalculation);

                if (pruneOnlyZeroImpactNodes && !impactValue.IsAlmost(0.0))
                {
                    continue;
                }
                if (!pruneOnlyZeroImpactNodes && impactValue > nodeImpactThreshold)
                {
                    continue;
                }

                var constantNode = (ConstantTreeNode)node.Grammar.GetSymbol("Constant").CreateTreeNode();
                constantNode.Value = replacementValue;

                ReplaceWithConstant(node, constantNode);
                i += node.GetLength() - 1; // skip subtrees under the node that was folded

                qualityForImpactsCalculation = newQualityForImpactsCalculation;
            }
            return(model.SymbolicExpressionTree);
        }
        protected override ISymbolicDataAnalysisModel CreateModel(ISymbolicExpressionTree tree, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, IDataAnalysisProblemData problemData, DoubleLimit estimationLimits)
        {
            var model = ModelCreatorParameter.ActualValue.CreateSymbolicClassificationModel(tree, interpreter, estimationLimits.Lower, estimationLimits.Upper);
            var classificationProblemData = (IClassificationProblemData)problemData;
            var rows = classificationProblemData.TrainingIndices;

            model.RecalculateModelParameters(classificationProblemData, rows);
            return(model);
        }
        protected IEnumerable <double> CalculateReplacementValues(ISymbolicExpressionTreeNode node, ISymbolicExpressionTree sourceTree, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter,
                                                                  IDataset dataset, IEnumerable <int> rows)
        {
            //optimization: constant nodes return always the same value
            ConstantTreeNode             constantNode     = node as ConstantTreeNode;
            BinaryFactorVariableTreeNode binaryFactorNode = node as BinaryFactorVariableTreeNode;
            FactorVariableTreeNode       factorNode       = node as FactorVariableTreeNode;

            if (constantNode != null)
            {
                yield return(constantNode.Value);
            }
            else if (binaryFactorNode != null)
            {
                // valid replacements are either all off or all on
                yield return(0);

                yield return(1);
            }
            else if (factorNode != null)
            {
                foreach (var w in factorNode.Weights)
                {
                    yield return(w);
                }
                yield return(0.0);
            }
            else
            {
                var rootSymbol  = new ProgramRootSymbol().CreateTreeNode();
                var startSymbol = new StartSymbol().CreateTreeNode();
                rootSymbol.AddSubtree(startSymbol);
                startSymbol.AddSubtree((ISymbolicExpressionTreeNode)node.Clone());

                var tempTree = new SymbolicExpressionTree(rootSymbol);
                // clone ADFs of source tree
                for (int i = 1; i < sourceTree.Root.SubtreeCount; i++)
                {
                    tempTree.Root.AddSubtree((ISymbolicExpressionTreeNode)sourceTree.Root.GetSubtree(i).Clone());
                }
                yield return(interpreter.GetSymbolicExpressionTreeValues(tempTree, dataset, rows).Median());

                yield return(interpreter.GetSymbolicExpressionTreeValues(tempTree, dataset, rows).Average()); // TODO perf
            }
        }
 public SymbolicNearestNeighbourClassificationModel(string targetVariable, int k, ISymbolicExpressionTree tree, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue)
     : base(targetVariable, tree, interpreter, lowerEstimationLimit, upperEstimationLimit)
 {
     this.k            = k;
     frequencyComparer = new ClassFrequencyComparer();
 }
Exemplo n.º 42
0
        private static SymbolicDiscriminantFunctionClassificationModel CreateDiscriminantFunctionModel(ISymbolicExpressionTree tree,
                                                                                                       ISymbolicDataAnalysisExpressionTreeInterpreter interpreter,
                                                                                                       IClassificationProblemData problemData,
                                                                                                       IEnumerable <int> rows)
        {
            var model = new SymbolicDiscriminantFunctionClassificationModel(problemData.TargetVariable, tree, interpreter, new AccuracyMaximizationThresholdCalculator());

            model.RecalculateModelParameters(problemData, rows);
            return(model);
        }
    private void EvaluateOperations(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, Dataset ds) {
      // addition
      Evaluate(interpreter, ds, "(+ (variable 2.0 a ))", 1, 4.0);
      Evaluate(interpreter, ds, "(+ (variable 2.0 a ) (variable 3.0 b ))", 0, 5.0);
      Evaluate(interpreter, ds, "(+ (variable 2.0 a ) (variable 3.0 b ))", 1, 10.0);
      Evaluate(interpreter, ds, "(+ (variable 2.0 a) (variable 3.0 b ))", 2, 8.0);
      Evaluate(interpreter, ds, "(+ 8.0 2.0 2.0)", 0, 12.0);

      // subtraction
      Evaluate(interpreter, ds, "(- (variable 2.0 a ))", 1, -4.0);
      Evaluate(interpreter, ds, "(- (variable 2.0 a ) (variable 3.0 b))", 0, -1.0);
      Evaluate(interpreter, ds, "(- (variable 2.0 a ) (variable 3.0 b ))", 1, -2.0);
      Evaluate(interpreter, ds, "(- (variable 2.0 a ) (variable 3.0 b ))", 2, -4.0);
      Evaluate(interpreter, ds, "(- 8.0 2.0 2.0)", 0, 4.0);

      // multiplication
      Evaluate(interpreter, ds, "(* (variable 2.0 a ))", 0, 2.0);
      Evaluate(interpreter, ds, "(* (variable 2.0 a ) (variable 3.0 b ))", 0, 6.0);
      Evaluate(interpreter, ds, "(* (variable 2.0 a ) (variable 3.0 b ))", 1, 24.0);
      Evaluate(interpreter, ds, "(* (variable 2.0 a ) (variable 3.0 b ))", 2, 12.0);
      Evaluate(interpreter, ds, "(* 8.0 2.0 2.0)", 0, 32.0);

      // division
      Evaluate(interpreter, ds, "(/ (variable 2.0 a ))", 1, 1.0 / 4.0);
      Evaluate(interpreter, ds, "(/ (variable 2.0 a ) 2.0)", 0, 1.0);
      Evaluate(interpreter, ds, "(/ (variable 2.0 a ) 2.0)", 1, 2.0);
      Evaluate(interpreter, ds, "(/ (variable 3.0 b ) 2.0)", 2, 3.0);
      Evaluate(interpreter, ds, "(/ 8.0 2.0 2.0)", 0, 2.0);

      // gt
      Evaluate(interpreter, ds, "(> (variable 2.0 a) 2.0)", 0, -1.0);
      Evaluate(interpreter, ds, "(> 2.0 (variable 2.0 a))", 0, -1.0);
      Evaluate(interpreter, ds, "(> (variable 2.0 a) 1.9)", 0, 1.0);
      Evaluate(interpreter, ds, "(> 1.9 (variable 2.0 a))", 0, -1.0);
      Evaluate(interpreter, ds, "(> (log -1.0) (log -1.0))", 0, -1.0); // (> nan nan) should be false

      // lt
      Evaluate(interpreter, ds, "(< (variable 2.0 a) 2.0)", 0, -1.0);
      Evaluate(interpreter, ds, "(< 2.0 (variable 2.0 a))", 0, -1.0);
      Evaluate(interpreter, ds, "(< (variable 2.0 a) 1.9)", 0, -1.0);
      Evaluate(interpreter, ds, "(< 1.9 (variable 2.0 a))", 0, 1.0);
      Evaluate(interpreter, ds, "(< (log -1.0) (log -1.0))", 0, -1.0); // (< nan nan) should be false

      // If
      Evaluate(interpreter, ds, "(if -10.0 2.0 3.0)", 0, 3.0);
      Evaluate(interpreter, ds, "(if -1.0 2.0 3.0)", 0, 3.0);
      Evaluate(interpreter, ds, "(if 0.0 2.0 3.0)", 0, 3.0);
      Evaluate(interpreter, ds, "(if 1.0 2.0 3.0)", 0, 2.0);
      Evaluate(interpreter, ds, "(if 10.0 2.0 3.0)", 0, 2.0);
      Evaluate(interpreter, ds, "(if (log -1.0) 2.0 3.0)", 0, 3.0); // if(nan) should return the else branch

      // NOT
      Evaluate(interpreter, ds, "(not -1.0)", 0, 1.0);
      Evaluate(interpreter, ds, "(not -2.0)", 0, 1.0);
      Evaluate(interpreter, ds, "(not 1.0)", 0, -1.0);
      Evaluate(interpreter, ds, "(not 2.0)", 0, -1.0);
      Evaluate(interpreter, ds, "(not 0.0)", 0, 1.0);
      Evaluate(interpreter, ds, "(not (log -1.0))", 0, 1.0);

      // AND
      Evaluate(interpreter, ds, "(and -1.0 -2.0)", 0, -1.0);
      Evaluate(interpreter, ds, "(and -1.0 2.0)", 0, -1.0);
      Evaluate(interpreter, ds, "(and 1.0 -2.0)", 0, -1.0);
      Evaluate(interpreter, ds, "(and 1.0 0.0)", 0, -1.0);
      Evaluate(interpreter, ds, "(and 0.0 0.0)", 0, -1.0);
      Evaluate(interpreter, ds, "(and 1.0 2.0)", 0, 1.0);
      Evaluate(interpreter, ds, "(and 1.0 2.0 3.0)", 0, 1.0);
      Evaluate(interpreter, ds, "(and 1.0 -2.0 3.0)", 0, -1.0);
      Evaluate(interpreter, ds, "(and (log -1.0))", 0, -1.0); // (and NaN)
      Evaluate(interpreter, ds, "(and (log -1.0)  1.0)", 0, -1.0); // (and NaN 1.0)

      // OR
      Evaluate(interpreter, ds, "(or -1.0 -2.0)", 0, -1.0);
      Evaluate(interpreter, ds, "(or -1.0 2.0)", 0, 1.0);
      Evaluate(interpreter, ds, "(or 1.0 -2.0)", 0, 1.0);
      Evaluate(interpreter, ds, "(or 1.0 2.0)", 0, 1.0);
      Evaluate(interpreter, ds, "(or 0.0 0.0)", 0, -1.0);
      Evaluate(interpreter, ds, "(or -1.0 -2.0 -3.0)", 0, -1.0);
      Evaluate(interpreter, ds, "(or -1.0 -2.0 3.0)", 0, 1.0);
      Evaluate(interpreter, ds, "(or (log -1.0))", 0, -1.0); // (or NaN)
      Evaluate(interpreter, ds, "(or (log -1.0)  1.0)", 0, -1.0); // (or NaN 1.0)

      // XOR
      Evaluate(interpreter, ds, "(xor -1.0 -2.0)", 0, -1.0);
      Evaluate(interpreter, ds, "(xor -1.0 2.0)", 0, 1.0);
      Evaluate(interpreter, ds, "(xor 1.0 -2.0)", 0, 1.0);
      Evaluate(interpreter, ds, "(xor 1.0 2.0)", 0, -1.0);
      Evaluate(interpreter, ds, "(xor 0.0 0.0)", 0, -1.0);
      Evaluate(interpreter, ds, "(xor -1.0 -2.0 -3.0)", 0, -1.0);
      Evaluate(interpreter, ds, "(xor -1.0 -2.0 3.0)", 0, 1.0);
      Evaluate(interpreter, ds, "(xor -1.0 2.0 3.0)", 0, -1.0);
      Evaluate(interpreter, ds, "(xor 1.0 2.0 3.0)", 0, 1.0);
      Evaluate(interpreter, ds, "(xor (log -1.0))", 0, -1.0);
      Evaluate(interpreter, ds, "(xor (log -1.0)  1.0)", 0, 1.0);

      // sin, cos, tan
      Evaluate(interpreter, ds, "(sin " + Math.PI.ToString(NumberFormatInfo.InvariantInfo) + ")", 0, 0.0);
      Evaluate(interpreter, ds, "(sin 0.0)", 0, 0.0);
      Evaluate(interpreter, ds, "(cos " + Math.PI.ToString(NumberFormatInfo.InvariantInfo) + ")", 0, -1.0);
      Evaluate(interpreter, ds, "(cos 0.0)", 0, 1.0);
      Evaluate(interpreter, ds, "(tan " + Math.PI.ToString(NumberFormatInfo.InvariantInfo) + ")", 0, Math.Tan(Math.PI));
      Evaluate(interpreter, ds, "(tan 0.0)", 0, Math.Tan(Math.PI));

      // exp, log
      Evaluate(interpreter, ds, "(log (exp 7.0))", 0, Math.Log(Math.Exp(7)));
      Evaluate(interpreter, ds, "(exp (log 7.0))", 0, Math.Exp(Math.Log(7)));
      Evaluate(interpreter, ds, "(log -3.0)", 0, Math.Log(-3));

      // power
      Evaluate(interpreter, ds, "(pow 2.0 3.0)", 0, 8.0);
      Evaluate(interpreter, ds, "(pow 4.0 0.5)", 0, 1.0); // interpreter should round to the nearest integer value value (.5 is rounded to the even number)
      Evaluate(interpreter, ds, "(pow 4.0 2.5)", 0, 16.0); // interpreter should round to the nearest integer value value (.5 is rounded to the even number)
      Evaluate(interpreter, ds, "(pow -2.0 3.0)", 0, -8.0);
      Evaluate(interpreter, ds, "(pow 2.0 -3.0)", 0, 1.0 / 8.0);
      Evaluate(interpreter, ds, "(pow -2.0 -3.0)", 0, -1.0 / 8.0);

      // root
      Evaluate(interpreter, ds, "(root 9.0 2.0)", 0, 3.0);
      Evaluate(interpreter, ds, "(root 27.0 3.0)", 0, 3.0);
      Evaluate(interpreter, ds, "(root 2.0 -3.0)", 0, Math.Pow(2.0, -1.0 / 3.0));

      // mean
      Evaluate(interpreter, ds, "(mean -1.0 1.0 -1.0)", 0, -1.0 / 3.0);

      // lag
      Evaluate(interpreter, ds, "(lagVariable 1.0 a -1) ", 1, ds.GetDoubleValue("A", 0));
      Evaluate(interpreter, ds, "(lagVariable 1.0 a -1) ", 2, ds.GetDoubleValue("A", 1));
      Evaluate(interpreter, ds, "(lagVariable 1.0 a 0) ", 2, ds.GetDoubleValue("A", 2));
      Evaluate(interpreter, ds, "(lagVariable 1.0 a 1) ", 0, ds.GetDoubleValue("A", 1));

      // integral
      Evaluate(interpreter, ds, "(integral -1.0 (variable 1.0 a)) ", 1, ds.GetDoubleValue("A", 0) + ds.GetDoubleValue("A", 1));
      Evaluate(interpreter, ds, "(integral -1.0 (lagVariable 1.0 a 1)) ", 1, ds.GetDoubleValue("A", 1) + ds.GetDoubleValue("A", 2));
      Evaluate(interpreter, ds, "(integral -2.0 (variable 1.0 a)) ", 2, ds.GetDoubleValue("A", 0) + ds.GetDoubleValue("A", 1) + ds.GetDoubleValue("A", 2));
      Evaluate(interpreter, ds, "(integral -1.0 (* (variable 1.0 a) (variable 1.0 b)))", 1, ds.GetDoubleValue("A", 0) * ds.GetDoubleValue("B", 0) + ds.GetDoubleValue("A", 1) * ds.GetDoubleValue("B", 1));
      Evaluate(interpreter, ds, "(integral -2.0 3.0)", 1, 9.0);

      // derivative
      // (f_0 + 2 * f_1 - 2 * f_3 - f_4) / 8; // h = 1
      Evaluate(interpreter, ds, "(diff (variable 1.0 a)) ", 5, (ds.GetDoubleValue("A", 5) + 2 * ds.GetDoubleValue("A", 4) - 2 * ds.GetDoubleValue("A", 2) - ds.GetDoubleValue("A", 1)) / 8.0);
      Evaluate(interpreter, ds, "(diff (variable 1.0 b)) ", 5, (ds.GetDoubleValue("B", 5) + 2 * ds.GetDoubleValue("B", 4) - 2 * ds.GetDoubleValue("B", 2) - ds.GetDoubleValue("B", 1)) / 8.0);
      Evaluate(interpreter, ds, "(diff (* (variable 1.0 a) (variable 1.0 b)))", 5, +
        (ds.GetDoubleValue("A", 5) * ds.GetDoubleValue("B", 5) +
        2 * ds.GetDoubleValue("A", 4) * ds.GetDoubleValue("B", 4) -
        2 * ds.GetDoubleValue("A", 2) * ds.GetDoubleValue("B", 2) -
        ds.GetDoubleValue("A", 1) * ds.GetDoubleValue("B", 1)) / 8.0);
      Evaluate(interpreter, ds, "(diff -2.0 3.0)", 5, 0.0);

      // timelag
      Evaluate(interpreter, ds, "(lag -1.0 (lagVariable 1.0 a 2)) ", 1, ds.GetDoubleValue("A", 2));
      Evaluate(interpreter, ds, "(lag -2.0 (lagVariable 1.0 a 2)) ", 2, ds.GetDoubleValue("A", 2));
      Evaluate(interpreter, ds, "(lag -1.0 (* (lagVariable 1.0 a 1) (lagVariable 1.0 b 2)))", 1, ds.GetDoubleValue("A", 1) * ds.GetDoubleValue("B", 2));
      Evaluate(interpreter, ds, "(lag -2.0 3.0)", 1, 3.0);

      {
        // special functions
        Action<double> checkAiry = (x) => {
          double ai, aip, bi, bip;
          alglib.airy(x, out ai, out aip, out bi, out bip);
          Evaluate(interpreter, ds, "(airya " + x + ")", 0, ai);
          Evaluate(interpreter, ds, "(airyb " + x + ")", 0, bi);
        };

        Action<double> checkBessel = (x) => {
          Evaluate(interpreter, ds, "(bessel " + x + ")", 0, alglib.besseli0(x));
        };

        Action<double> checkSinCosIntegrals = (x) => {
          double si, ci;
          alglib.sinecosineintegrals(x, out si, out ci);
          Evaluate(interpreter, ds, "(cosint " + x + ")", 0, ci);
          Evaluate(interpreter, ds, "(sinint " + x + ")", 0, si);
        };
        Action<double> checkHypSinCosIntegrals = (x) => {
          double shi, chi;
          alglib.hyperbolicsinecosineintegrals(x, out shi, out chi);
          Evaluate(interpreter, ds, "(hypcosint " + x + ")", 0, chi);
          Evaluate(interpreter, ds, "(hypsinint " + x + ")", 0, shi);
        };
        Action<double> checkFresnelSinCosIntegrals = (x) => {
          double c = 0, s = 0;
          alglib.fresnelintegral(x, ref c, ref s);
          Evaluate(interpreter, ds, "(fresnelcosint " + x + ")", 0, c);
          Evaluate(interpreter, ds, "(fresnelsinint " + x + ")", 0, s);
        };
        Action<double> checkNormErf = (x) => {
          Evaluate(interpreter, ds, "(norm " + x + ")", 0, alglib.normaldistribution(x));
          Evaluate(interpreter, ds, "(erf " + x + ")", 0, alglib.errorfunction(x));
        };

        Action<double> checkGamma = (x) => {
          Evaluate(interpreter, ds, "(gamma " + x + ")", 0, alglib.gammafunction(x));
        };
        Action<double> checkPsi = (x) => {
          try {
            Evaluate(interpreter, ds, "(psi " + x + ")", 0, alglib.psi(x));
          }
          catch (alglib.alglibexception) { // ignore cases where alglib throws an exception
          }
        };
        Action<double> checkDawson = (x) => {
          Evaluate(interpreter, ds, "(dawson " + x + ")", 0, alglib.dawsonintegral(x));
        };
        Action<double> checkExpInt = (x) => {
          Evaluate(interpreter, ds, "(expint " + x + ")", 0, alglib.exponentialintegralei(x));
        };



        foreach (var e in new[] { -2.0, -1.0, 0.0, 1.0, 2.0 }) {
          checkAiry(e);
          checkBessel(e);
          checkSinCosIntegrals(e);
          checkGamma(e);
          checkExpInt(e);
          checkDawson(e);
          checkPsi(e);
          checkNormErf(e);
          checkFresnelSinCosIntegrals(e);
          checkHypSinCosIntegrals(e);
        }
      }
    }
 protected abstract ISymbolicDataAnalysisModel CreateModel(ISymbolicExpressionTree tree, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, IDataAnalysisProblemData problemData, DoubleLimit estimationLimits);
Exemplo n.º 45
0
 private static IEnumerable<double> GetSignals(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, IDataset dataset, IEnumerable<int> rows) {
   return Model.GetSignals(interpreter.GetSymbolicExpressionTreeValues(solution, dataset, rows));
 }
 public ISymbolicDiscriminantFunctionClassificationModel CreateSymbolicDiscriminantFunctionClassificationModel(ISymbolicExpressionTree tree, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue) {
   return new SymbolicDiscriminantFunctionClassificationModel(tree, interpreter, new NormalDistributionCutPointsThresholdCalculator(), lowerEstimationLimit, upperEstimationLimit);
 }
    private void TestArithmeticGrammarPerformance(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, double nodesPerSecThreshold) {
      var twister = new MersenneTwister(31415);
      var dataset = Util.CreateRandomDataset(twister, Rows, Columns);
      var grammar = new ArithmeticExpressionGrammar();
      //grammar.Symbols.OfType<Variable>().First().Enabled = false;
      grammar.MaximumFunctionArguments = 0;
      grammar.MaximumFunctionDefinitions = 0;
      grammar.MinimumFunctionArguments = 0;
      grammar.MinimumFunctionDefinitions = 0;
      var randomTrees = Util.CreateRandomTrees(twister, dataset, grammar, N, 1, 100, 0, 0);
      foreach (SymbolicExpressionTree tree in randomTrees) {
        Util.InitTree(tree, twister, new List<string>(dataset.VariableNames));
      }

      double nodesPerSec = Util.CalculateEvaluatedNodesPerSec(randomTrees, interpreter, dataset, 3);
      //mkommend: commented due to performance issues on the builder
      //Assert.IsTrue(nodesPerSec > nodesPerSecThreshold); // evaluated nodes per seconds must be larger than 15mNodes/sec
    }
    private void EvaluateTerminals(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, Dataset ds) {
      // constants
      Evaluate(interpreter, ds, "(+ 1.5 3.5)", 0, 5.0);

      // variables
      Evaluate(interpreter, ds, "(variable 2.0 a)", 0, 2.0);
      Evaluate(interpreter, ds, "(variable 2.0 a)", 1, 4.0);
    }
    private void EvaluateAdf(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, Dataset ds) {

      // ADF      
      Evaluate(interpreter, ds, @"(PROG 
                                    (MAIN 
                                      (CALL ADF0)) 
                                    (defun ADF0 1.0))", 1, 1.0);
      Evaluate(interpreter, ds, @"(PROG 
                                    (MAIN 
                                      (* (CALL ADF0) (CALL ADF0)))
                                    (defun ADF0 2.0))", 1, 4.0);
      Evaluate(interpreter, ds, @"(PROG 
                                    (MAIN 
                                      (CALL ADF0 2.0 3.0))
                                    (defun ADF0 
                                      (+ (ARG 0) (ARG 1))))", 1, 5.0);
      Evaluate(interpreter, ds, @"(PROG 
                                    (MAIN (CALL ADF1 2.0 3.0))
                                    (defun ADF0 
                                      (- (ARG 1) (ARG 0)))
                                    (defun ADF1
                                      (+ (CALL ADF0 (ARG 1) (ARG 0))
                                         (CALL ADF0 (ARG 0) (ARG 1)))))", 1, 0.0);
      Evaluate(interpreter, ds, @"(PROG 
                                    (MAIN (CALL ADF1 (variable 2.0 a) 3.0))
                                    (defun ADF0 
                                      (- (ARG 1) (ARG 0)))
                                    (defun ADF1                                                                              
                                      (CALL ADF0 (ARG 1) (ARG 0))))", 1, 1.0);
      Evaluate(interpreter, ds,
               @"(PROG 
                                    (MAIN (CALL ADF1 (variable 2.0 a) 3.0))
                                    (defun ADF0 
                                      (- (ARG 1) (ARG 0)))
                                    (defun ADF1                                                                              
                                      (+ (CALL ADF0 (ARG 1) (ARG 0))
                                         (CALL ADF0 (ARG 0) (ARG 1)))))", 1, 0.0);
    }
Exemplo n.º 50
0
        /// <summary>
        /// Takes two parent individuals P0 and P1.
        /// Randomly choose a node i from the first parent, then for each matching node j from the second parent, calculate the behavioral distance based on the range:
        /// d(i,j) = 0.5 * ( abs(max(i) - max(j)) + abs(min(i) - min(j)) ).
        /// Next, assign probabilities for the selection of a node j based on the inversed and normalized behavioral distance, then make a random weighted choice.
        /// </summary>
        public static ISymbolicExpressionTree Cross(IRandom random, ISymbolicExpressionTree parent0, ISymbolicExpressionTree parent1,
                                                    ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, T problemData, IList <int> rows, int maxDepth, int maxLength)
        {
            var crossoverPoints0 = new List <CutPoint>();

            parent0.Root.ForEachNodePostfix((n) => {
                // the if clauses prevent the root or the startnode from being selected, although the startnode can be the parent of the node being swapped.
                if (n.Parent != null && n.Parent != parent0.Root)
                {
                    crossoverPoints0.Add(new CutPoint(n.Parent, n));
                }
            });

            var crossoverPoint0 = crossoverPoints0.SampleRandom(random);
            int level           = parent0.Root.GetBranchLevel(crossoverPoint0.Child);
            int length          = parent0.Root.GetLength() - crossoverPoint0.Child.GetLength();

            var allowedBranches = new List <ISymbolicExpressionTreeNode>();

            parent1.Root.ForEachNodePostfix((n) => {
                if (n.Parent != null && n.Parent != parent1.Root)
                {
                    if (n.GetDepth() + level <= maxDepth && n.GetLength() + length <= maxLength && crossoverPoint0.IsMatchingPointType(n))
                    {
                        allowedBranches.Add(n);
                    }
                }
            });

            if (allowedBranches.Count == 0)
            {
                return(parent0);
            }

            var dataset = problemData.Dataset;

            // create symbols in order to improvize an ad-hoc tree so that the child can be evaluated
            var    rootSymbol = new ProgramRootSymbol();
            var    startSymbol = new StartSymbol();
            var    tree0 = CreateTreeFromNode(random, crossoverPoint0.Child, rootSymbol, startSymbol); // this will change crossoverPoint0.Child.Parent
            double min0 = 0.0, max0 = 0.0;

            foreach (double v in interpreter.GetSymbolicExpressionTreeValues(tree0, dataset, rows))
            {
                if (min0 > v)
                {
                    min0 = v;
                }
                if (max0 < v)
                {
                    max0 = v;
                }
            }
            crossoverPoint0.Child.Parent = crossoverPoint0.Parent; // restore correct parent

            var weights = new List <double>();

            foreach (var node in allowedBranches)
            {
                var    parent = node.Parent;
                var    tree1 = CreateTreeFromNode(random, node, rootSymbol, startSymbol);
                double min1 = 0.0, max1 = 0.0;
                foreach (double v in interpreter.GetSymbolicExpressionTreeValues(tree1, dataset, rows))
                {
                    if (min1 > v)
                    {
                        min1 = v;
                    }
                    if (max1 < v)
                    {
                        max1 = v;
                    }
                }
                double behavioralDistance = (Math.Abs(min0 - min1) + Math.Abs(max0 - max1)) / 2; // this can be NaN of Infinity because some trees are crazy like exp(exp(exp(...))), we correct that below
                weights.Add(behavioralDistance);
                node.Parent = parent;                                                            // restore correct node parent
            }

            // remove branches with an infinite or NaN behavioral distance
            for (int i = weights.Count - 1; i >= 0; --i)
            {
                if (Double.IsNaN(weights[i]) || Double.IsInfinity(weights[i]))
                {
                    weights.RemoveAt(i);
                    allowedBranches.RemoveAt(i);
                }
            }
            // check if there are any allowed branches left
            if (allowedBranches.Count == 0)
            {
                return(parent0);
            }

            ISymbolicExpressionTreeNode selectedBranch;
            double sum = weights.Sum();

            if (sum.IsAlmost(0.0) || weights.Count == 1) // if there is only one allowed branch, or if all weights are zero
            {
                selectedBranch = allowedBranches[0];
            }
            else
            {
                for (int i = 0; i != weights.Count; ++i) // normalize and invert values
                {
                    weights[i] = 1 - weights[i] / sum;
                }

                sum = weights.Sum(); // take new sum

                // compute the probabilities (selection weights)
                for (int i = 0; i != weights.Count; ++i)
                {
                    weights[i] /= sum;
                }

#pragma warning disable 612, 618
                selectedBranch = allowedBranches.SelectRandom(weights, random);
#pragma warning restore 612, 618
            }
            Swap(crossoverPoint0, selectedBranch);
            return(parent0);
        }
        /// <summary>
        /// Takes two parent individuals P0 and P1.
        /// Randomly choose a node i from the first parent, then get a node j from the second parent that matches the semantic similarity criteria.
        /// </summary>
        public static ISymbolicExpressionTree Cross(IRandom random, ISymbolicExpressionTree parent0, ISymbolicExpressionTree parent1, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter,
                                                    T problemData, List <int> rows, int maxDepth, int maxLength, DoubleRange range)
        {
            var crossoverPoints0 = new List <CutPoint>();

            parent0.Root.ForEachNodePostfix((n) => {
                if (n.Parent != null && n.Parent != parent0.Root)
                {
                    crossoverPoints0.Add(new CutPoint(n.Parent, n));
                }
            });

            var crossoverPoint0 = crossoverPoints0.SampleRandom(random);
            int level           = parent0.Root.GetBranchLevel(crossoverPoint0.Child);
            int length          = parent0.Root.GetLength() - crossoverPoint0.Child.GetLength();

            var allowedBranches = new List <ISymbolicExpressionTreeNode>();

            parent1.Root.ForEachNodePostfix((n) => {
                if (n.Parent != null && n.Parent != parent1.Root)
                {
                    if (n.GetDepth() + level <= maxDepth && n.GetLength() + length <= maxLength && crossoverPoint0.IsMatchingPointType(n))
                    {
                        allowedBranches.Add(n);
                    }
                }
            });

            if (allowedBranches.Count == 0)
            {
                return(parent0);
            }

            var dataset = problemData.Dataset;

            // create symbols in order to improvize an ad-hoc tree so that the child can be evaluated
            var           rootSymbol       = new ProgramRootSymbol();
            var           startSymbol      = new StartSymbol();
            var           tree0            = CreateTreeFromNode(random, crossoverPoint0.Child, rootSymbol, startSymbol);
            List <double> estimatedValues0 = interpreter.GetSymbolicExpressionTreeValues(tree0, dataset, rows).ToList();

            crossoverPoint0.Child.Parent = crossoverPoint0.Parent; // restore parent
            ISymbolicExpressionTreeNode selectedBranch = null;

            // pick the first node that fulfills the semantic similarity conditions
            foreach (var node in allowedBranches)
            {
                var           parent           = node.Parent;
                var           tree1            = CreateTreeFromNode(random, node, startSymbol, rootSymbol); // this will affect node.Parent
                List <double> estimatedValues1 = interpreter.GetSymbolicExpressionTreeValues(tree1, dataset, rows).ToList();
                node.Parent = parent;                                                                       // restore parent

                OnlineCalculatorError errorState;
                double ssd = OnlineMeanAbsoluteErrorCalculator.Calculate(estimatedValues0, estimatedValues1, out errorState);

                if (range.Start <= ssd && ssd <= range.End)
                {
                    selectedBranch = node;
                    break;
                }
            }

            // perform the actual swap
            if (selectedBranch != null)
            {
                Swap(crossoverPoint0, selectedBranch);
            }
            return(parent0);
        }
 public ISymbolicClassificationModel CreateSymbolicClassificationModel(string targetVariable, ISymbolicExpressionTree tree, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue) {
   return new SymbolicNearestNeighbourClassificationModel(targetVariable, KParameter.Value.Value, tree, interpreter, lowerEstimationLimit, upperEstimationLimit);
 }
 public static double[] Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, IRegressionProblemData problemData, IEnumerable<int> rows, bool applyLinearScaling, int decimalPlaces) {
   double r2 = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, solution, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling);
   if (decimalPlaces >= 0)
     r2 = Math.Round(r2, decimalPlaces);
   return new double[2] { r2, SymbolicDataAnalysisModelComplexityCalculator.CalculateComplexity(solution) };
 }
 public static double[] Calculate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree solution, double lowerEstimationLimit, double upperEstimationLimit, IRegressionProblemData problemData, IEnumerable<int> rows, bool applyLinearScaling, int decimalPlaces) {
   double r2 = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, solution, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling);
   if (decimalPlaces >= 0)
     r2 = Math.Round(r2, decimalPlaces);
   return new double[2] { r2, solution.IterateNodesPostfix().OfType<VariableTreeNode>().Count() }; // count the number of variables
 }
    public static double OptimizeConstants(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, ISymbolicExpressionTree tree, IRegressionProblemData problemData, IEnumerable<int> rows, bool applyLinearScaling, int maxIterations, bool updateVariableWeights = true, double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue, bool updateConstantsInTree = true) {

      List<AutoDiff.Variable> variables = new List<AutoDiff.Variable>();
      List<AutoDiff.Variable> parameters = new List<AutoDiff.Variable>();
      List<string> variableNames = new List<string>();

      AutoDiff.Term func;
      if (!TryTransformToAutoDiff(tree.Root.GetSubtree(0), variables, parameters, variableNames, updateVariableWeights, out func))
        throw new NotSupportedException("Could not optimize constants of symbolic expression tree due to not supported symbols used in the tree.");
      if (variableNames.Count == 0) return 0.0;

      AutoDiff.IParametricCompiledTerm compiledFunc = func.Compile(variables.ToArray(), parameters.ToArray());

      List<SymbolicExpressionTreeTerminalNode> terminalNodes = null;
      if (updateVariableWeights)
        terminalNodes = tree.Root.IterateNodesPrefix().OfType<SymbolicExpressionTreeTerminalNode>().ToList();
      else
        terminalNodes = new List<SymbolicExpressionTreeTerminalNode>(tree.Root.IterateNodesPrefix().OfType<ConstantTreeNode>());

      //extract inital constants
      double[] c = new double[variables.Count];
      {
        c[0] = 0.0;
        c[1] = 1.0;
        int i = 2;
        foreach (var node in terminalNodes) {
          ConstantTreeNode constantTreeNode = node as ConstantTreeNode;
          VariableTreeNode variableTreeNode = node as VariableTreeNode;
          if (constantTreeNode != null)
            c[i++] = constantTreeNode.Value;
          else if (updateVariableWeights && variableTreeNode != null)
            c[i++] = variableTreeNode.Weight;
        }
      }
      double[] originalConstants = (double[])c.Clone();
      double originalQuality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, tree, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling);

      alglib.lsfitstate state;
      alglib.lsfitreport rep;
      int info;

      IDataset ds = problemData.Dataset;
      double[,] x = new double[rows.Count(), variableNames.Count];
      int row = 0;
      foreach (var r in rows) {
        for (int col = 0; col < variableNames.Count; col++) {
          x[row, col] = ds.GetDoubleValue(variableNames[col], r);
        }
        row++;
      }
      double[] y = ds.GetDoubleValues(problemData.TargetVariable, rows).ToArray();
      int n = x.GetLength(0);
      int m = x.GetLength(1);
      int k = c.Length;

      alglib.ndimensional_pfunc function_cx_1_func = CreatePFunc(compiledFunc);
      alglib.ndimensional_pgrad function_cx_1_grad = CreatePGrad(compiledFunc);

      try {
        alglib.lsfitcreatefg(x, y, c, n, m, k, false, out state);
        alglib.lsfitsetcond(state, 0.0, 0.0, maxIterations);
        //alglib.lsfitsetgradientcheck(state, 0.001);
        alglib.lsfitfit(state, function_cx_1_func, function_cx_1_grad, null, null);
        alglib.lsfitresults(state, out info, out c, out rep);
      }
      catch (ArithmeticException) {
        return originalQuality;
      }
      catch (alglib.alglibexception) {
        return originalQuality;
      }

      //info == -7  => constant optimization failed due to wrong gradient
      if (info != -7) UpdateConstants(tree, c.Skip(2).ToArray(), updateVariableWeights);
      var quality = SymbolicRegressionSingleObjectivePearsonRSquaredEvaluator.Calculate(interpreter, tree, lowerEstimationLimit, upperEstimationLimit, problemData, rows, applyLinearScaling);

      if (!updateConstantsInTree) UpdateConstants(tree, originalConstants.Skip(2).ToArray(), updateVariableWeights);
      if (originalQuality - quality > 0.001 || double.IsNaN(quality)) {
        UpdateConstants(tree, originalConstants.Skip(2).ToArray(), updateVariableWeights);
        return originalQuality;
      }
      return quality;
    }
    private void Evaluate(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, Dataset ds, string expr, int index, double expected) {
      var importer = new SymbolicExpressionImporter();
      ISymbolicExpressionTree tree = importer.Import(expr);

      double actual = interpreter.GetSymbolicExpressionTreeValues(tree, ds, Enumerable.Range(index, 1)).First();

      Assert.IsFalse(double.IsNaN(actual) && !double.IsNaN(expected));
      Assert.IsFalse(!double.IsNaN(actual) && double.IsNaN(expected));
      if (!double.IsNaN(actual) && !double.IsNaN(expected))
        Assert.AreEqual(expected, actual, 1.0E-12, expr);
    }
 public ISymbolicClassificationModel CreateSymbolicClassificationModel(ISymbolicExpressionTree tree, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue) {
   return CreateSymbolicDiscriminantFunctionClassificationModel(tree, interpreter, lowerEstimationLimit, upperEstimationLimit);
 }
 public ISymbolicDiscriminantFunctionClassificationModel CreateSymbolicDiscriminantFunctionClassificationModel(string targetVariable, ISymbolicExpressionTree tree, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, double lowerEstimationLimit = double.MinValue, double upperEstimationLimit = double.MaxValue) {
   return new SymbolicDiscriminantFunctionClassificationModel(targetVariable, tree, interpreter, new AccuracyMaximizationThresholdCalculator(), lowerEstimationLimit, upperEstimationLimit);
 }
    private void EvaluateOperations(ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, Dataset ds) {
      // addition
      Evaluate(interpreter, ds, "(+ (variable 2.0 a ))", 1, 4.0);
      Evaluate(interpreter, ds, "(+ (variable 2.0 a ) (variable 3.0 b ))", 0, 5.0);
      Evaluate(interpreter, ds, "(+ (variable 2.0 a ) (variable 3.0 b ))", 1, 10.0);
      Evaluate(interpreter, ds, "(+ (variable 2.0 a) (variable 3.0 b ))", 2, 8.0);
      Evaluate(interpreter, ds, "(+ 8.0 2.0 2.0)", 0, 12.0);

      // subtraction
      Evaluate(interpreter, ds, "(- (variable 2.0 a ))", 1, -4.0);
      Evaluate(interpreter, ds, "(- (variable 2.0 a ) (variable 3.0 b))", 0, -1.0);
      Evaluate(interpreter, ds, "(- (variable 2.0 a ) (variable 3.0 b ))", 1, -2.0);
      Evaluate(interpreter, ds, "(- (variable 2.0 a ) (variable 3.0 b ))", 2, -4.0);
      Evaluate(interpreter, ds, "(- 8.0 2.0 2.0)", 0, 4.0);

      // multiplication
      Evaluate(interpreter, ds, "(* (variable 2.0 a ))", 0, 2.0);
      Evaluate(interpreter, ds, "(* (variable 2.0 a ) (variable 3.0 b ))", 0, 6.0);
      Evaluate(interpreter, ds, "(* (variable 2.0 a ) (variable 3.0 b ))", 1, 24.0);
      Evaluate(interpreter, ds, "(* (variable 2.0 a ) (variable 3.0 b ))", 2, 12.0);
      Evaluate(interpreter, ds, "(* 8.0 2.0 2.0)", 0, 32.0);

      // division
      Evaluate(interpreter, ds, "(/ (variable 2.0 a ))", 1, 1.0 / 4.0);
      Evaluate(interpreter, ds, "(/ (variable 2.0 a ) 2.0)", 0, 1.0);
      Evaluate(interpreter, ds, "(/ (variable 2.0 a ) 2.0)", 1, 2.0);
      Evaluate(interpreter, ds, "(/ (variable 3.0 b ) 2.0)", 2, 3.0);
      Evaluate(interpreter, ds, "(/ 8.0 2.0 2.0)", 0, 2.0);

      // gt
      Evaluate(interpreter, ds, "(> (variable 2.0 a) 2.0)", 0, -1.0);
      Evaluate(interpreter, ds, "(> 2.0 (variable 2.0 a))", 0, -1.0);
      Evaluate(interpreter, ds, "(> (variable 2.0 a) 1.9)", 0, 1.0);
      Evaluate(interpreter, ds, "(> 1.9 (variable 2.0 a))", 0, -1.0);
      Evaluate(interpreter, ds, "(> (log -1.0) (log -1.0))", 0, -1.0); // (> nan nan) should be false

      // lt
      Evaluate(interpreter, ds, "(< (variable 2.0 a) 2.0)", 0, -1.0);
      Evaluate(interpreter, ds, "(< 2.0 (variable 2.0 a))", 0, -1.0);
      Evaluate(interpreter, ds, "(< (variable 2.0 a) 1.9)", 0, -1.0);
      Evaluate(interpreter, ds, "(< 1.9 (variable 2.0 a))", 0, 1.0);
      Evaluate(interpreter, ds, "(< (log -1.0) (log -1.0))", 0, -1.0); // (< nan nan) should be false

      // If
      Evaluate(interpreter, ds, "(if -10.0 2.0 3.0)", 0, 3.0);
      Evaluate(interpreter, ds, "(if -1.0 2.0 3.0)", 0, 3.0);
      Evaluate(interpreter, ds, "(if 0.0 2.0 3.0)", 0, 3.0);
      Evaluate(interpreter, ds, "(if 1.0 2.0 3.0)", 0, 2.0);
      Evaluate(interpreter, ds, "(if 10.0 2.0 3.0)", 0, 2.0);
      Evaluate(interpreter, ds, "(if (log -1.0) 2.0 3.0)", 0, 3.0); // if(nan) should return the else branch

      // NOT
      Evaluate(interpreter, ds, "(not -1.0)", 0, 1.0);
      Evaluate(interpreter, ds, "(not -2.0)", 0, 1.0);
      Evaluate(interpreter, ds, "(not 1.0)", 0, -1.0);
      Evaluate(interpreter, ds, "(not 2.0)", 0, -1.0);
      Evaluate(interpreter, ds, "(not 0.0)", 0, 1.0);
      Evaluate(interpreter, ds, "(not (log -1.0))", 0, 1.0);

      // AND
      Evaluate(interpreter, ds, "(and -1.0 -2.0)", 0, -1.0);
      Evaluate(interpreter, ds, "(and -1.0 2.0)", 0, -1.0);
      Evaluate(interpreter, ds, "(and 1.0 -2.0)", 0, -1.0);
      Evaluate(interpreter, ds, "(and 1.0 0.0)", 0, -1.0);
      Evaluate(interpreter, ds, "(and 0.0 0.0)", 0, -1.0);
      Evaluate(interpreter, ds, "(and 1.0 2.0)", 0, 1.0);
      Evaluate(interpreter, ds, "(and 1.0 2.0 3.0)", 0, 1.0);
      Evaluate(interpreter, ds, "(and 1.0 -2.0 3.0)", 0, -1.0);
      Evaluate(interpreter, ds, "(and (log -1.0))", 0, -1.0); // (and NaN)
      Evaluate(interpreter, ds, "(and (log -1.0)  1.0)", 0, -1.0); // (and NaN 1.0)


      // OR
      Evaluate(interpreter, ds, "(or -1.0 -2.0)", 0, -1.0);
      Evaluate(interpreter, ds, "(or -1.0 2.0)", 0, 1.0);
      Evaluate(interpreter, ds, "(or 1.0 -2.0)", 0, 1.0);
      Evaluate(interpreter, ds, "(or 1.0 2.0)", 0, 1.0);
      Evaluate(interpreter, ds, "(or 0.0 0.0)", 0, -1.0);
      Evaluate(interpreter, ds, "(or -1.0 -2.0 -3.0)", 0, -1.0);
      Evaluate(interpreter, ds, "(or -1.0 -2.0 3.0)", 0, 1.0);
      Evaluate(interpreter, ds, "(or (log -1.0))", 0, -1.0); // (or NaN)
      Evaluate(interpreter, ds, "(or (log -1.0)  1.0)", 0, -1.0); // (or NaN 1.0)

      // sin, cos, tan
      Evaluate(interpreter, ds, "(sin " + Math.PI.ToString(NumberFormatInfo.InvariantInfo) + ")", 0, 0.0);
      Evaluate(interpreter, ds, "(sin 0.0)", 0, 0.0);
      Evaluate(interpreter, ds, "(cos " + Math.PI.ToString(NumberFormatInfo.InvariantInfo) + ")", 0, -1.0);
      Evaluate(interpreter, ds, "(cos 0.0)", 0, 1.0);
      Evaluate(interpreter, ds, "(tan " + Math.PI.ToString(NumberFormatInfo.InvariantInfo) + ")", 0, Math.Tan(Math.PI));
      Evaluate(interpreter, ds, "(tan 0.0)", 0, Math.Tan(Math.PI));

      // exp, log
      Evaluate(interpreter, ds, "(log (exp 7.0))", 0, Math.Log(Math.Exp(7)));
      Evaluate(interpreter, ds, "(exp (log 7.0))", 0, Math.Exp(Math.Log(7)));
      Evaluate(interpreter, ds, "(log -3.0)", 0, Math.Log(-3));

      // power
      Evaluate(interpreter, ds, "(pow 2.0 3.0)", 0, 8.0);
      Evaluate(interpreter, ds, "(pow 4.0 0.5)", 0, 1.0); // interpreter should round to the nearest integer value value (.5 is rounded to the even number)
      Evaluate(interpreter, ds, "(pow 4.0 2.5)", 0, 16.0); // interpreter should round to the nearest integer value value (.5 is rounded to the even number)
      Evaluate(interpreter, ds, "(pow -2.0 3.0)", 0, -8.0);
      Evaluate(interpreter, ds, "(pow 2.0 -3.0)", 0, 1.0 / 8.0);
      Evaluate(interpreter, ds, "(pow -2.0 -3.0)", 0, -1.0 / 8.0);

      // root
      Evaluate(interpreter, ds, "(root 9.0 2.0)", 0, 3.0);
      Evaluate(interpreter, ds, "(root 27.0 3.0)", 0, 3.0);
      Evaluate(interpreter, ds, "(root 2.0 -3.0)", 0, Math.Pow(2.0, -1.0 / 3.0));

      // mean
      Evaluate(interpreter, ds, "(mean -1.0 1.0 -1.0)", 0, -1.0 / 3.0);

      // lag
      Evaluate(interpreter, ds, "(lagVariable 1.0 a -1) ", 1, ds.GetDoubleValue("A", 0));
      Evaluate(interpreter, ds, "(lagVariable 1.0 a -1) ", 2, ds.GetDoubleValue("A", 1));
      Evaluate(interpreter, ds, "(lagVariable 1.0 a 0) ", 2, ds.GetDoubleValue("A", 2));
      Evaluate(interpreter, ds, "(lagVariable 1.0 a 1) ", 0, ds.GetDoubleValue("A", 1));

      // integral
      Evaluate(interpreter, ds, "(integral -1.0 (variable 1.0 a)) ", 1, ds.GetDoubleValue("A", 0) + ds.GetDoubleValue("A", 1));
      Evaluate(interpreter, ds, "(integral -1.0 (lagVariable 1.0 a 1)) ", 1, ds.GetDoubleValue("A", 1) + ds.GetDoubleValue("A", 2));
      Evaluate(interpreter, ds, "(integral -2.0 (variable 1.0 a)) ", 2, ds.GetDoubleValue("A", 0) + ds.GetDoubleValue("A", 1) + ds.GetDoubleValue("A", 2));
      Evaluate(interpreter, ds, "(integral -1.0 (* (variable 1.0 a) (variable 1.0 b)))", 1, ds.GetDoubleValue("A", 0) * ds.GetDoubleValue("B", 0) + ds.GetDoubleValue("A", 1) * ds.GetDoubleValue("B", 1));
      Evaluate(interpreter, ds, "(integral -2.0 3.0)", 1, 9.0);

      // derivative
      // (f_0 + 2 * f_1 - 2 * f_3 - f_4) / 8; // h = 1
      Evaluate(interpreter, ds, "(diff (variable 1.0 a)) ", 5, (ds.GetDoubleValue("A", 5) + 2 * ds.GetDoubleValue("A", 4) - 2 * ds.GetDoubleValue("A", 2) - ds.GetDoubleValue("A", 1)) / 8.0);
      Evaluate(interpreter, ds, "(diff (variable 1.0 b)) ", 5, (ds.GetDoubleValue("B", 5) + 2 * ds.GetDoubleValue("B", 4) - 2 * ds.GetDoubleValue("B", 2) - ds.GetDoubleValue("B", 1)) / 8.0);
      Evaluate(interpreter, ds, "(diff (* (variable 1.0 a) (variable 1.0 b)))", 5, +
        (ds.GetDoubleValue("A", 5) * ds.GetDoubleValue("B", 5) +
        2 * ds.GetDoubleValue("A", 4) * ds.GetDoubleValue("B", 4) -
        2 * ds.GetDoubleValue("A", 2) * ds.GetDoubleValue("B", 2) -
        ds.GetDoubleValue("A", 1) * ds.GetDoubleValue("B", 1)) / 8.0);
      Evaluate(interpreter, ds, "(diff -2.0 3.0)", 5, 0.0);

      // timelag
      Evaluate(interpreter, ds, "(lag -1.0 (lagVariable 1.0 a 2)) ", 1, ds.GetDoubleValue("A", 2));
      Evaluate(interpreter, ds, "(lag -2.0 (lagVariable 1.0 a 2)) ", 2, ds.GetDoubleValue("A", 2));
      Evaluate(interpreter, ds, "(lag -1.0 (* (lagVariable 1.0 a 1) (lagVariable 1.0 b 2)))", 1, ds.GetDoubleValue("A", 1) * ds.GetDoubleValue("B", 2));
      Evaluate(interpreter, ds, "(lag -2.0 3.0)", 1, 3.0);
    }
 protected override ISymbolicDataAnalysisModel CreateModel(ISymbolicExpressionTree tree, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, IDataAnalysisProblemData problemData, DoubleLimit estimationLimits) {
   var regressionProblemData = (IRegressionProblemData)problemData;
   return new SymbolicRegressionModel(regressionProblemData.TargetVariable, tree, interpreter, estimationLimits.Lower, estimationLimits.Upper);
 }