protected static double CalculateReplacementValue(ISymbolicExpressionTreeNode node, ISymbolicExpressionTree sourceTree, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, IDataset dataset, IEnumerable <int> rows) { //optimization: constant nodes return always the same value ConstantTreeNode constantNode = node as ConstantTreeNode; if (constantNode != null) { return(constantNode.Value); } var rootSymbol = new ProgramRootSymbol().CreateTreeNode(); var startSymbol = new StartSymbol().CreateTreeNode(); rootSymbol.AddSubtree(startSymbol); startSymbol.AddSubtree((ISymbolicExpressionTreeNode)node.Clone()); var tempTree = new SymbolicExpressionTree(rootSymbol); // clone ADFs of source tree for (int i = 1; i < sourceTree.Root.SubtreeCount; i++) { tempTree.Root.AddSubtree((ISymbolicExpressionTreeNode)sourceTree.Root.GetSubtree(i).Clone()); } return(interpreter.GetSymbolicExpressionTreeValues(tempTree, dataset, rows).Median()); }
private static ISymbolicRegressionSolution CreateSymbolicSolution(List <IRegressionModel> models, double nu, IRegressionProblemData problemData) { var symbModels = models.OfType <ISymbolicRegressionModel>(); var lowerLimit = symbModels.Min(m => m.LowerEstimationLimit); var upperLimit = symbModels.Max(m => m.UpperEstimationLimit); var interpreter = new SymbolicDataAnalysisExpressionTreeLinearInterpreter(); var progRootNode = new ProgramRootSymbol().CreateTreeNode(); var startNode = new StartSymbol().CreateTreeNode(); var addNode = new Addition().CreateTreeNode(); var mulNode = new Multiplication().CreateTreeNode(); var scaleNode = (ConstantTreeNode) new Constant().CreateTreeNode(); // all models are scaled using the same nu scaleNode.Value = nu; foreach (var m in symbModels) { var relevantPart = m.SymbolicExpressionTree.Root.GetSubtree(0).GetSubtree(0); // skip root and start addNode.AddSubtree((ISymbolicExpressionTreeNode)relevantPart.Clone()); } mulNode.AddSubtree(addNode); mulNode.AddSubtree(scaleNode); startNode.AddSubtree(mulNode); progRootNode.AddSubtree(startNode); var t = new SymbolicExpressionTree(progRootNode); var combinedModel = new SymbolicRegressionModel(problemData.TargetVariable, t, interpreter, lowerLimit, upperLimit); var sol = new SymbolicRegressionSolution(combinedModel, problemData); return(sol); }
public static ISymbolicExpressionTree ToTree(this HashNode <ISymbolicExpressionTreeNode>[] nodes) { var root = new ProgramRootSymbol().CreateTreeNode(); var start = new StartSymbol().CreateTreeNode(); root.AddSubtree(start); start.AddSubtree(nodes.ToSubtree()); return(new SymbolicExpressionTree(root)); }
public static IClassificationSolution CreateLinearDiscriminantAnalysisSolution(IClassificationProblemData problemData) { var dataset = problemData.Dataset; string targetVariable = problemData.TargetVariable; IEnumerable <string> allowedInputVariables = problemData.AllowedInputVariables; IEnumerable <int> rows = problemData.TrainingIndices; int nClasses = problemData.ClassNames.Count(); double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows); if (inputMatrix.Cast <double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) { throw new NotSupportedException("Linear discriminant analysis does not support NaN or infinity values in the input dataset."); } // change class values into class index int targetVariableColumn = inputMatrix.GetLength(1) - 1; List <double> classValues = problemData.ClassValues.OrderBy(x => x).ToList(); for (int row = 0; row < inputMatrix.GetLength(0); row++) { inputMatrix[row, targetVariableColumn] = classValues.IndexOf(inputMatrix[row, targetVariableColumn]); } int info; double[] w; alglib.fisherlda(inputMatrix, inputMatrix.GetLength(0), allowedInputVariables.Count(), nClasses, out info, out w); if (info < 1) { throw new ArgumentException("Error in calculation of linear discriminant analysis solution"); } ISymbolicExpressionTree tree = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode()); ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode(); tree.Root.AddSubtree(startNode); ISymbolicExpressionTreeNode addition = new Addition().CreateTreeNode(); startNode.AddSubtree(addition); int col = 0; foreach (string column in allowedInputVariables) { VariableTreeNode vNode = (VariableTreeNode) new HeuristicLab.Problems.DataAnalysis.Symbolic.Variable().CreateTreeNode(); vNode.VariableName = column; vNode.Weight = w[col]; addition.AddSubtree(vNode); col++; } var model = LinearDiscriminantAnalysis.CreateDiscriminantFunctionModel(tree, new SymbolicDataAnalysisExpressionTreeInterpreter(), problemData, rows); SymbolicDiscriminantFunctionClassificationSolution solution = new SymbolicDiscriminantFunctionClassificationSolution(model, (IClassificationProblemData)problemData.Clone()); return(solution); }
public static ISymbolicExpressionTree CreateTree( IEnumerable <KeyValuePair <string, IEnumerable <string> > > factors, double[] factorCoefficients, string[] variableNames, double[] coefficients, double @const = 0) { if (factorCoefficients.Length == 0 && coefficients.Length == 0 && @const == 0) { throw new ArgumentException(); } // Combine both trees ISymbolicExpressionTreeNode add = (new Addition()).CreateTreeNode(); // Create tree for double variables if (coefficients.Length > 0) { var varTree = CreateTree(variableNames, new int[variableNames.Length], coefficients); foreach (var varNode in varTree.IterateNodesPrefix().OfType <VariableTreeNode>()) { add.AddSubtree(varNode); } } // Create tree for string variables if (factorCoefficients.Length > 0) { var factorTree = CreateTree(factors, factorCoefficients); foreach (var binFactorNode in factorTree.IterateNodesPrefix().OfType <BinaryFactorVariableTreeNode>()) { add.AddSubtree(binFactorNode); } } if (@const != 0.0) { ConstantTreeNode cNode = (ConstantTreeNode) new Constant().CreateTreeNode(); cNode.Value = @const; add.AddSubtree(cNode); } ISymbolicExpressionTree tree = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode()); ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode(); tree.Root.AddSubtree(startNode); startNode.AddSubtree(add); return(tree); }
public static ISymbolicExpressionTree CreateTree(string[] variableNames, int[] lags, double[] coefficients, double @const = 0) { if (variableNames.Length == 0 || variableNames.Length != coefficients.Length || variableNames.Length != lags.Length) { throw new ArgumentException("The length of the variable names, lags, and coefficients vectors must match"); } ISymbolicExpressionTree tree = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode()); ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode(); tree.Root.AddSubtree(startNode); ISymbolicExpressionTreeNode addition = new Addition().CreateTreeNode(); startNode.AddSubtree(addition); for (int i = 0; i < variableNames.Length; i++) { if (lags[i] == 0) { VariableTreeNode vNode = (VariableTreeNode) new Variable().CreateTreeNode(); vNode.VariableName = variableNames[i]; vNode.Weight = coefficients[i]; addition.AddSubtree(vNode); } else { LaggedVariableTreeNode vNode = (LaggedVariableTreeNode) new LaggedVariable().CreateTreeNode(); vNode.VariableName = variableNames[i]; vNode.Weight = coefficients[i]; vNode.Lag = lags[i]; addition.AddSubtree(vNode); } } if ([email protected](0.0)) { ConstantTreeNode cNode = (ConstantTreeNode) new Constant().CreateTreeNode(); cNode.Value = @const; addition.AddSubtree(cNode); } return(tree); }
protected IEnumerable <double> CalculateReplacementValues(ISymbolicExpressionTreeNode node, ISymbolicExpressionTree sourceTree, ISymbolicDataAnalysisExpressionTreeInterpreter interpreter, IDataset dataset, IEnumerable <int> rows) { //optimization: constant nodes return always the same value ConstantTreeNode constantNode = node as ConstantTreeNode; BinaryFactorVariableTreeNode binaryFactorNode = node as BinaryFactorVariableTreeNode; FactorVariableTreeNode factorNode = node as FactorVariableTreeNode; if (constantNode != null) { yield return(constantNode.Value); } else if (binaryFactorNode != null) { // valid replacements are either all off or all on yield return(0); yield return(1); } else if (factorNode != null) { foreach (var w in factorNode.Weights) { yield return(w); } yield return(0.0); } else { var rootSymbol = new ProgramRootSymbol().CreateTreeNode(); var startSymbol = new StartSymbol().CreateTreeNode(); rootSymbol.AddSubtree(startSymbol); startSymbol.AddSubtree((ISymbolicExpressionTreeNode)node.Clone()); var tempTree = new SymbolicExpressionTree(rootSymbol); // clone ADFs of source tree for (int i = 1; i < sourceTree.Root.SubtreeCount; i++) { tempTree.Root.AddSubtree((ISymbolicExpressionTreeNode)sourceTree.Root.GetSubtree(i).Clone()); } yield return(interpreter.GetSymbolicExpressionTreeValues(tempTree, dataset, rows).Median()); yield return(interpreter.GetSymbolicExpressionTreeValues(tempTree, dataset, rows).Average()); // TODO perf } }
public static ISymbolicExpressionTree CreateTree(IEnumerable <KeyValuePair <string, IEnumerable <string> > > factors, double[] factorCoefficients, double @const = 0) { ISymbolicExpressionTree tree = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode()); ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode(); tree.Root.AddSubtree(startNode); ISymbolicExpressionTreeNode addition = new Addition().CreateTreeNode(); startNode.AddSubtree(addition); int i = 0; foreach (var factor in factors) { var varName = factor.Key; foreach (var factorValue in factor.Value) { var node = (BinaryFactorVariableTreeNode) new BinaryFactorVariable().CreateTreeNode(); node.VariableValue = factorValue; node.VariableName = varName; node.Weight = factorCoefficients[i]; addition.AddSubtree(node); i++; } } if ([email protected](0.0)) { ConstantTreeNode cNode = (ConstantTreeNode) new Constant().CreateTreeNode(); cNode.Value = @const; addition.AddSubtree(cNode); } return(tree); }
public ISymbolicExpressionTree ExtractTree(int treeIdx) { var rf = RandomForest; // hoping that the internal representation of alglib is stable // TREE FORMAT // W[Offs] - size of sub-array (for the tree) // node info: // W[K+0] - variable number (-1 for leaf mode) // W[K+1] - threshold (class/value for leaf node) // W[K+2] - ">=" branch index (absent for leaf node) // skip irrelevant trees int offset = 0; for (int i = 0; i < treeIdx - 1; i++) { offset = offset + (int)Math.Round(rf.innerobj.trees[offset]); } var constSy = new Constant(); var varCondSy = new VariableCondition() { IgnoreSlope = true }; var node = CreateRegressionTreeRec(rf.innerobj.trees, offset, offset + 1, constSy, varCondSy); var startNode = new StartSymbol().CreateTreeNode(); startNode.AddSubtree(node); var root = new ProgramRootSymbol().CreateTreeNode(); root.AddSubtree(startNode); return(new SymbolicExpressionTree(root)); }
public void DeriveExpressions() { var formatter = new InfixExpressionFormatter(); var parser = new InfixExpressionParser(); Assert.AreEqual("0", Derive("3", "x")); Assert.AreEqual("1", Derive("x", "x")); Assert.AreEqual("10", Derive("10*x", "x")); Assert.AreEqual("10", Derive("x*10", "x")); Assert.AreEqual("(2*'x')", Derive("x*x", "x")); Assert.AreEqual("((('x' * 'x') * 2) + ('x' * 'x'))", Derive("x*x*x", "x")); // simplifier does not merge (x*x)*2 + x*x to 3*x*x Assert.AreEqual("0", Derive("10*x", "y")); Assert.AreEqual("20", Derive("10*x+20*y", "y")); Assert.AreEqual("6", Derive("2*3*x", "x")); Assert.AreEqual("(10*'y')", Derive("10*x*y+20*y", "x")); Assert.AreEqual("(1 / (SQR('x') * (-1)))", Derive("1/x", "x")); Assert.AreEqual("('y' / (SQR('x') * (-1)))", Derive("y/x", "x")); Assert.AreEqual("((((-2*'x') + (-1)) * ('a' + 'b')) / SQR(('x' + ('x' * 'x'))))", Derive("(a+b)/(x+x*x)", "x")); Assert.AreEqual("((((-2*'x') + (-1)) * ('a' + 'b')) / SQR(('x' + SQR('x'))))", Derive("(a+b)/(x+SQR(x))", "x")); Assert.AreEqual("EXP('x')", Derive("exp(x)", "x")); Assert.AreEqual("(EXP((3*'x')) * 3)", Derive("exp(3*x)", "x")); Assert.AreEqual("(1 / 'x')", Derive("log(x)", "x")); Assert.AreEqual("(1 / 'x')", Derive("log(3*x)", "x")); // 3 * 1/(3*x) Assert.AreEqual("(1 / ('x' + (0.333333333333333*'y')))", Derive("log(3*x+y)", "x")); // simplifier does not try to keep fractions Assert.AreEqual("(1 / (SQRT(((3*'x') + 'y')) * 0.666666666666667))", Derive("sqrt(3*x+y)", "x")); // 3 / (2 * sqrt(3*x+y)) = 1 / ((2/3) * sqrt(3*x+y)) Assert.AreEqual("(COS((3*'x')) * 3)", Derive("sin(3*x)", "x")); Assert.AreEqual("(SIN((3*'x')) * (-3))", Derive("cos(3*x)", "x")); Assert.AreEqual("(1 / (SQR(COS((3*'x'))) * 0.333333333333333))", Derive("tan(3*x)", "x")); // diff(tan(f(x)), x) = 1.0 / cos²(f(x)), simplifier puts constant factor into the denominator Assert.AreEqual("((9*'x') / ABS((3*'x')))", Derive("abs(3*x)", "x")); Assert.AreEqual("(SQR('x') * 3)", Derive("cube(x)", "x")); Assert.AreEqual("(1 / (SQR(CUBEROOT('x')) * 3))", Derive("cuberoot(x)", "x")); Assert.AreEqual("0", Derive("(a+b)/(x+SQR(x))", "y")); // df(a,b,x) / dy = 0 Assert.AreEqual("('a' * 'b' * 'c')", Derive("a*b*c*d", "d")); Assert.AreEqual("('a' / ('b' * 'c' * SQR('d') * (-1)))", Derive("a/b/c/d", "d")); Assert.AreEqual("('x' * ((SQR(TANH(SQR('x'))) * (-1)) + 1) * 2)", Derive("tanh(sqr(x))", "x")); // (2*'x'*(1 - SQR(TANH(SQR('x')))) { // special case: Inv(x) using only one argument to the division symbol // f(x) = 1/x var root = new ProgramRootSymbol().CreateTreeNode(); var start = new StartSymbol().CreateTreeNode(); var div = new Division().CreateTreeNode(); var varNode = (VariableTreeNode)(new Variable().CreateTreeNode()); varNode.Weight = 1.0; varNode.VariableName = "x"; div.AddSubtree(varNode); start.AddSubtree(div); root.AddSubtree(start); var t = new SymbolicExpressionTree(root); Assert.AreEqual("(1 / (SQR('x') * (-1)))", formatter.Format(DerivativeCalculator.Derive(t, "x"))); } { // special case: multiplication with only one argument var root = new ProgramRootSymbol().CreateTreeNode(); var start = new StartSymbol().CreateTreeNode(); var mul = new Multiplication().CreateTreeNode(); var varNode = (VariableTreeNode)(new Variable().CreateTreeNode()); varNode.Weight = 3.0; varNode.VariableName = "x"; mul.AddSubtree(varNode); start.AddSubtree(mul); root.AddSubtree(start); var t = new SymbolicExpressionTree(root); Assert.AreEqual("3", formatter.Format(DerivativeCalculator.Derive(t, "x"))); } { // division with multiple arguments // div(x, y, z) is interpreted as (x / y) / z var root = new ProgramRootSymbol().CreateTreeNode(); var start = new StartSymbol().CreateTreeNode(); var div = new Division().CreateTreeNode(); var varNode1 = (VariableTreeNode)(new Variable().CreateTreeNode()); varNode1.Weight = 3.0; varNode1.VariableName = "x"; var varNode2 = (VariableTreeNode)(new Variable().CreateTreeNode()); varNode2.Weight = 4.0; varNode2.VariableName = "y"; var varNode3 = (VariableTreeNode)(new Variable().CreateTreeNode()); varNode3.Weight = 5.0; varNode3.VariableName = "z"; div.AddSubtree(varNode1); div.AddSubtree(varNode2); div.AddSubtree(varNode3); start.AddSubtree(div); root.AddSubtree(start); var t = new SymbolicExpressionTree(root); Assert.AreEqual("(('y' * 'z' * 60) / (SQR('y') * SQR('z') * 400))", // actually 3 / (4y 5z) but simplifier is not smart enough to cancel numerator and denominator // 60 y z / y² z² 20² == 6 / y z 40 == 3 / y z 20 formatter.Format(DerivativeCalculator.Derive(t, "x"))); Assert.AreEqual("(('x' * 'z' * (-60)) / (SQR('y') * SQR('z') * 400))", // actually 3x * -(4 5 z) / (4y 5z)² = -3x / (20 y² z) // -3 4 5 x z / 4² y² 5² z² = -60 x z / 20² z² y² == -60 x z / y² z² 20² formatter.Format(DerivativeCalculator.Derive(t, "y"))); Assert.AreEqual("(('x' * 'y' * (-60)) / (SQR('y') * SQR('z') * 400))", formatter.Format(DerivativeCalculator.Derive(t, "z"))); } }
private static ITimeSeriesPrognosisSolution CreateAutoRegressiveSolution(ITimeSeriesPrognosisProblemData problemData, int timeOffset, out double rmsError, out double cvRmsError) { string targetVariable = problemData.TargetVariable; double[,] inputMatrix = new double[problemData.TrainingPartition.Size, timeOffset + 1]; var targetValues = problemData.Dataset.GetDoubleValues(targetVariable).ToList(); for (int i = 0, row = problemData.TrainingPartition.Start; i < problemData.TrainingPartition.Size; i++, row++) { for (int col = 0; col < timeOffset; col++) { inputMatrix[i, col] = targetValues[row - col - 1]; } } // set target values in last column for (int i = 0; i < inputMatrix.GetLength(0); i++) { inputMatrix[i, timeOffset] = targetValues[i + problemData.TrainingPartition.Start]; } if (inputMatrix.Cast <double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) { throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset."); } alglib.linearmodel lm = new alglib.linearmodel(); alglib.lrreport ar = new alglib.lrreport(); int nRows = inputMatrix.GetLength(0); int nFeatures = inputMatrix.GetLength(1) - 1; double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant int retVal = 1; alglib.lrbuild(inputMatrix, nRows, nFeatures, out retVal, out lm, out ar); if (retVal != 1) { throw new ArgumentException("Error in calculation of linear regression solution"); } rmsError = ar.rmserror; cvRmsError = ar.cvrmserror; alglib.lrunpack(lm, out coefficients, out nFeatures); ISymbolicExpressionTree tree = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode()); ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode(); tree.Root.AddSubtree(startNode); ISymbolicExpressionTreeNode addition = new Addition().CreateTreeNode(); startNode.AddSubtree(addition); for (int i = 0; i < timeOffset; i++) { LaggedVariableTreeNode node = (LaggedVariableTreeNode) new LaggedVariable().CreateTreeNode(); node.VariableName = targetVariable; node.Weight = coefficients[i]; node.Lag = (i + 1) * -1; addition.AddSubtree(node); } ConstantTreeNode cNode = (ConstantTreeNode) new Constant().CreateTreeNode(); cNode.Value = coefficients[coefficients.Length - 1]; addition.AddSubtree(cNode); var interpreter = new SymbolicTimeSeriesPrognosisExpressionTreeInterpreter(problemData.TargetVariable); var model = new SymbolicTimeSeriesPrognosisModel(problemData.TargetVariable, tree, interpreter); var solution = model.CreateTimeSeriesPrognosisSolution((ITimeSeriesPrognosisProblemData)problemData.Clone()); return(solution); }
public static ISymbolicRegressionSolution CreateLinearRegressionSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError) { var dataset = problemData.Dataset; string targetVariable = problemData.TargetVariable; IEnumerable <string> allowedInputVariables = problemData.AllowedInputVariables; IEnumerable <int> rows = problemData.TrainingIndices; double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows); if (inputMatrix.Cast <double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) { throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset."); } alglib.linearmodel lm = new alglib.linearmodel(); alglib.lrreport ar = new alglib.lrreport(); int nRows = inputMatrix.GetLength(0); int nFeatures = inputMatrix.GetLength(1) - 1; double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant int retVal = 1; alglib.lrbuild(inputMatrix, nRows, nFeatures, out retVal, out lm, out ar); if (retVal != 1) { throw new ArgumentException("Error in calculation of linear regression solution"); } rmsError = ar.rmserror; cvRmsError = ar.cvrmserror; alglib.lrunpack(lm, out coefficients, out nFeatures); ISymbolicExpressionTree tree = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode()); ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode(); tree.Root.AddSubtree(startNode); ISymbolicExpressionTreeNode addition = new Addition().CreateTreeNode(); startNode.AddSubtree(addition); int col = 0; foreach (string column in allowedInputVariables) { VariableTreeNode vNode = (VariableTreeNode) new HeuristicLab.Problems.DataAnalysis.Symbolic.Variable().CreateTreeNode(); vNode.VariableName = column; vNode.Weight = coefficients[col]; addition.AddSubtree(vNode); col++; } ConstantTreeNode cNode = (ConstantTreeNode) new Constant().CreateTreeNode(); cNode.Value = coefficients[coefficients.Length - 1]; addition.AddSubtree(cNode); SymbolicRegressionSolution solution = new SymbolicRegressionSolution(new SymbolicRegressionModel(tree, new SymbolicDataAnalysisExpressionTreeInterpreter()), (IRegressionProblemData)problemData.Clone()); solution.Model.Name = "Linear Regression Model"; solution.Name = "Linear Regression Solution"; return(solution); }