private static ISymbolicRegressionSolution CreateSymbolicSolution(List <IRegressionModel> models, double nu, IRegressionProblemData problemData) { var symbModels = models.OfType <ISymbolicRegressionModel>(); var lowerLimit = symbModels.Min(m => m.LowerEstimationLimit); var upperLimit = symbModels.Max(m => m.UpperEstimationLimit); var interpreter = new SymbolicDataAnalysisExpressionTreeLinearInterpreter(); var progRootNode = new ProgramRootSymbol().CreateTreeNode(); var startNode = new StartSymbol().CreateTreeNode(); var addNode = new Addition().CreateTreeNode(); var mulNode = new Multiplication().CreateTreeNode(); var scaleNode = (ConstantTreeNode) new Constant().CreateTreeNode(); // all models are scaled using the same nu scaleNode.Value = nu; foreach (var m in symbModels) { var relevantPart = m.SymbolicExpressionTree.Root.GetSubtree(0).GetSubtree(0); // skip root and start addNode.AddSubtree((ISymbolicExpressionTreeNode)relevantPart.Clone()); } mulNode.AddSubtree(addNode); mulNode.AddSubtree(scaleNode); startNode.AddSubtree(mulNode); progRootNode.AddSubtree(startNode); var t = new SymbolicExpressionTree(progRootNode); var combinedModel = new SymbolicRegressionModel(problemData.TargetVariable, t, interpreter, lowerLimit, upperLimit); var sol = new SymbolicRegressionSolution(combinedModel, problemData); return(sol); }
public static IClassificationSolution CreateLinearDiscriminantAnalysisSolution(IClassificationProblemData problemData) { var dataset = problemData.Dataset; string targetVariable = problemData.TargetVariable; IEnumerable <string> allowedInputVariables = problemData.AllowedInputVariables; IEnumerable <int> rows = problemData.TrainingIndices; int nClasses = problemData.ClassNames.Count(); double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows); if (inputMatrix.Cast <double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) { throw new NotSupportedException("Linear discriminant analysis does not support NaN or infinity values in the input dataset."); } // change class values into class index int targetVariableColumn = inputMatrix.GetLength(1) - 1; List <double> classValues = problemData.ClassValues.OrderBy(x => x).ToList(); for (int row = 0; row < inputMatrix.GetLength(0); row++) { inputMatrix[row, targetVariableColumn] = classValues.IndexOf(inputMatrix[row, targetVariableColumn]); } int info; double[] w; alglib.fisherlda(inputMatrix, inputMatrix.GetLength(0), allowedInputVariables.Count(), nClasses, out info, out w); if (info < 1) { throw new ArgumentException("Error in calculation of linear discriminant analysis solution"); } ISymbolicExpressionTree tree = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode()); ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode(); tree.Root.AddSubtree(startNode); ISymbolicExpressionTreeNode addition = new Addition().CreateTreeNode(); startNode.AddSubtree(addition); int col = 0; foreach (string column in allowedInputVariables) { VariableTreeNode vNode = (VariableTreeNode) new HeuristicLab.Problems.DataAnalysis.Symbolic.Variable().CreateTreeNode(); vNode.VariableName = column; vNode.Weight = w[col]; addition.AddSubtree(vNode); col++; } var model = LinearDiscriminantAnalysis.CreateDiscriminantFunctionModel(tree, new SymbolicDataAnalysisExpressionTreeInterpreter(), problemData, rows); SymbolicDiscriminantFunctionClassificationSolution solution = new SymbolicDiscriminantFunctionClassificationSolution(model, (IClassificationProblemData)problemData.Clone()); return(solution); }
private static ITimeSeriesPrognosisSolution CreateAutoRegressiveSolution(ITimeSeriesPrognosisProblemData problemData, int timeOffset, out double rmsError, out double cvRmsError) { string targetVariable = problemData.TargetVariable; double[,] inputMatrix = new double[problemData.TrainingPartition.Size, timeOffset + 1]; var targetValues = problemData.Dataset.GetDoubleValues(targetVariable).ToList(); for (int i = 0, row = problemData.TrainingPartition.Start; i < problemData.TrainingPartition.Size; i++, row++) { for (int col = 0; col < timeOffset; col++) { inputMatrix[i, col] = targetValues[row - col - 1]; } } // set target values in last column for (int i = 0; i < inputMatrix.GetLength(0); i++) { inputMatrix[i, timeOffset] = targetValues[i + problemData.TrainingPartition.Start]; } if (inputMatrix.Cast <double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) { throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset."); } alglib.linearmodel lm = new alglib.linearmodel(); alglib.lrreport ar = new alglib.lrreport(); int nRows = inputMatrix.GetLength(0); int nFeatures = inputMatrix.GetLength(1) - 1; double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant int retVal = 1; alglib.lrbuild(inputMatrix, nRows, nFeatures, out retVal, out lm, out ar); if (retVal != 1) { throw new ArgumentException("Error in calculation of linear regression solution"); } rmsError = ar.rmserror; cvRmsError = ar.cvrmserror; alglib.lrunpack(lm, out coefficients, out nFeatures); ISymbolicExpressionTree tree = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode()); ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode(); tree.Root.AddSubtree(startNode); ISymbolicExpressionTreeNode addition = new Addition().CreateTreeNode(); startNode.AddSubtree(addition); for (int i = 0; i < timeOffset; i++) { LaggedVariableTreeNode node = (LaggedVariableTreeNode) new LaggedVariable().CreateTreeNode(); node.VariableName = targetVariable; node.Weight = coefficients[i]; node.Lag = (i + 1) * -1; addition.AddSubtree(node); } ConstantTreeNode cNode = (ConstantTreeNode) new Constant().CreateTreeNode(); cNode.Value = coefficients[coefficients.Length - 1]; addition.AddSubtree(cNode); var interpreter = new SymbolicTimeSeriesPrognosisExpressionTreeInterpreter(problemData.TargetVariable); var model = new SymbolicTimeSeriesPrognosisModel(problemData.TargetVariable, tree, interpreter); var solution = model.CreateTimeSeriesPrognosisSolution((ITimeSeriesPrognosisProblemData)problemData.Clone()); return(solution); }
public static ISymbolicRegressionSolution CreateLinearRegressionSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError) { var dataset = problemData.Dataset; string targetVariable = problemData.TargetVariable; IEnumerable <string> allowedInputVariables = problemData.AllowedInputVariables; IEnumerable <int> rows = problemData.TrainingIndices; double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows); if (inputMatrix.Cast <double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) { throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset."); } alglib.linearmodel lm = new alglib.linearmodel(); alglib.lrreport ar = new alglib.lrreport(); int nRows = inputMatrix.GetLength(0); int nFeatures = inputMatrix.GetLength(1) - 1; double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant int retVal = 1; alglib.lrbuild(inputMatrix, nRows, nFeatures, out retVal, out lm, out ar); if (retVal != 1) { throw new ArgumentException("Error in calculation of linear regression solution"); } rmsError = ar.rmserror; cvRmsError = ar.cvrmserror; alglib.lrunpack(lm, out coefficients, out nFeatures); ISymbolicExpressionTree tree = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode()); ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode(); tree.Root.AddSubtree(startNode); ISymbolicExpressionTreeNode addition = new Addition().CreateTreeNode(); startNode.AddSubtree(addition); int col = 0; foreach (string column in allowedInputVariables) { VariableTreeNode vNode = (VariableTreeNode) new HeuristicLab.Problems.DataAnalysis.Symbolic.Variable().CreateTreeNode(); vNode.VariableName = column; vNode.Weight = coefficients[col]; addition.AddSubtree(vNode); col++; } ConstantTreeNode cNode = (ConstantTreeNode) new Constant().CreateTreeNode(); cNode.Value = coefficients[coefficients.Length - 1]; addition.AddSubtree(cNode); SymbolicRegressionSolution solution = new SymbolicRegressionSolution(new SymbolicRegressionModel(tree, new SymbolicDataAnalysisExpressionTreeInterpreter()), (IRegressionProblemData)problemData.Clone()); solution.Model.Name = "Linear Regression Model"; solution.Name = "Linear Regression Solution"; return(solution); }