private static ITimeSeriesPrognosisSolution CreateAutoRegressiveSolution(ITimeSeriesPrognosisProblemData problemData, int timeOffset, out double rmsError, out double cvRmsError) { string targetVariable = problemData.TargetVariable; double[,] inputMatrix = new double[problemData.TrainingPartition.Size, timeOffset + 1]; var targetValues = problemData.Dataset.GetDoubleValues(targetVariable).ToList(); for (int i = 0, row = problemData.TrainingPartition.Start; i < problemData.TrainingPartition.Size; i++, row++) { for (int col = 0; col < timeOffset; col++) { inputMatrix[i, col] = targetValues[row - col - 1]; } } // set target values in last column for (int i = 0; i < inputMatrix.GetLength(0); i++) { inputMatrix[i, timeOffset] = targetValues[i + problemData.TrainingPartition.Start]; } if (inputMatrix.Cast <double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) { throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset."); } alglib.linearmodel lm = new alglib.linearmodel(); alglib.lrreport ar = new alglib.lrreport(); int nRows = inputMatrix.GetLength(0); int nFeatures = inputMatrix.GetLength(1) - 1; double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant int retVal = 1; alglib.lrbuild(inputMatrix, nRows, nFeatures, out retVal, out lm, out ar); if (retVal != 1) { throw new ArgumentException("Error in calculation of linear regression solution"); } rmsError = ar.rmserror; cvRmsError = ar.cvrmserror; alglib.lrunpack(lm, out coefficients, out nFeatures); var tree = LinearModelToTreeConverter.CreateTree( variableNames: Enumerable.Repeat(problemData.TargetVariable, nFeatures).ToArray(), lags: Enumerable.Range(0, timeOffset).Select(i => (i + 1) * -1).ToArray(), coefficients: coefficients.Take(nFeatures).ToArray(), @const: coefficients[nFeatures] ); var interpreter = new SymbolicTimeSeriesPrognosisExpressionTreeInterpreter(problemData.TargetVariable); var model = new SymbolicTimeSeriesPrognosisModel(problemData.TargetVariable, tree, interpreter); var solution = model.CreateTimeSeriesPrognosisSolution((ITimeSeriesPrognosisProblemData)problemData.Clone()); return(solution); }
private static IRegressionEnsembleSolution CreateEnsembleSolution(List <IRegressionModel> models, IRegressionProblemData problemData) { var rows = problemData.TrainingPartition.Size; var features = models.Count; double[,] inputMatrix = new double[rows, features + 1]; //add model estimates for (int m = 0; m < models.Count; m++) { var model = models[m]; var estimates = model.GetEstimatedValues(problemData.Dataset, problemData.TrainingIndices); int estimatesCounter = 0; foreach (var estimate in estimates) { inputMatrix[estimatesCounter, m] = estimate; estimatesCounter++; } } //add target var targets = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices); int targetCounter = 0; foreach (var target in targets) { inputMatrix[targetCounter, models.Count] = target; targetCounter++; } alglib.linearmodel lm = new alglib.linearmodel(); alglib.lrreport ar = new alglib.lrreport(); double[] coefficients; int retVal = 1; alglib.lrbuildz(inputMatrix, rows, features, out retVal, out lm, out ar); if (retVal != 1) { throw new ArgumentException("Error in calculation of linear regression solution"); } alglib.lrunpack(lm, out coefficients, out features); var ensembleModel = new RegressionEnsembleModel(models, coefficients.Take(models.Count)) { AverageModelEstimates = false }; var ensembleSolution = (IRegressionEnsembleSolution)ensembleModel.CreateRegressionSolution(problemData); return(ensembleSolution); }
public static ISymbolicRegressionSolution CreateLinearRegressionSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError) { var dataset = problemData.Dataset; string targetVariable = problemData.TargetVariable; IEnumerable <string> allowedInputVariables = problemData.AllowedInputVariables; IEnumerable <int> rows = problemData.TrainingIndices; var doubleVariables = allowedInputVariables.Where(dataset.VariableHasType <double>); var factorVariableNames = allowedInputVariables.Where(dataset.VariableHasType <string>); var factorVariables = dataset.GetFactorVariableValues(factorVariableNames, rows); double[,] binaryMatrix = dataset.ToArray(factorVariables, rows); double[,] doubleVarMatrix = dataset.ToArray(doubleVariables.Concat(new string[] { targetVariable }), rows); var inputMatrix = binaryMatrix.HorzCat(doubleVarMatrix); if (inputMatrix.Cast <double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) { throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset."); } alglib.linearmodel lm = new alglib.linearmodel(); alglib.lrreport ar = new alglib.lrreport(); int nRows = inputMatrix.GetLength(0); int nFeatures = inputMatrix.GetLength(1) - 1; double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant int retVal = 1; alglib.lrbuild(inputMatrix, nRows, nFeatures, out retVal, out lm, out ar); if (retVal != 1) { throw new ArgumentException("Error in calculation of linear regression solution"); } rmsError = ar.rmserror; cvRmsError = ar.cvrmserror; alglib.lrunpack(lm, out coefficients, out nFeatures); int nFactorCoeff = binaryMatrix.GetLength(1); int nVarCoeff = doubleVariables.Count(); var tree = LinearModelToTreeConverter.CreateTree(factorVariables, coefficients.Take(nFactorCoeff).ToArray(), doubleVariables.ToArray(), coefficients.Skip(nFactorCoeff).Take(nVarCoeff).ToArray(), @const: coefficients[nFeatures]); SymbolicRegressionSolution solution = new SymbolicRegressionSolution(new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeLinearInterpreter()), (IRegressionProblemData)problemData.Clone()); solution.Model.Name = "Linear Regression Model"; solution.Name = "Linear Regression Solution"; return(solution); }
public static double[] Build(double[,] data, int response_variable_index = -1) { int n = data.GetLength(0); int m = data.GetLength(1); double[] w = null; double[] weight = new double[m + 1]; int res = 0; int nvars = 0; if (response_variable_index == -1) { response_variable_index = m - 1; } if (response_variable_index < m - 1) { //Swap columns (m - 1) and for (int i = 0; i < n; i++) { double temp = data[i, m - 1]; data[i, m - 1] = data[i, response_variable_index]; data[i, response_variable_index] = temp; } } alglib.linearmodel lm = null; alglib.lrreport ar = null; alglib.lrbuild(data, n, m - 1, out res, out lm, out ar); alglib.lrunpack(lm, out w, out nvars); for (int j = 0; j < m - 1; j++) { weight[j] = w[j]; } //Swap back if (response_variable_index < m - 1) { //Swap columns (m - 1) and for (int i = 0; i < n; i++) { double temp = data[i, m - 1]; data[i, m - 1] = data[i, response_variable_index]; data[i, response_variable_index] = temp; } } weight[m - 1] = weight[response_variable_index]; weight[response_variable_index] = -1; weight[m] = w[w.Length - 1]; //Intercept return(weight); }
public static ISymbolicRegressionSolution CreateLinearRegressionSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError) { IEnumerable <string> doubleVariables; IEnumerable <KeyValuePair <string, IEnumerable <string> > > factorVariables; double[,] inputMatrix; PrepareData(problemData, out inputMatrix, out doubleVariables, out factorVariables); alglib.linearmodel lm = new alglib.linearmodel(); alglib.lrreport ar = new alglib.lrreport(); int nRows = inputMatrix.GetLength(0); int nFeatures = inputMatrix.GetLength(1) - 1; int retVal = 1; alglib.lrbuild(inputMatrix, nRows, nFeatures, out retVal, out lm, out ar); if (retVal != 1) { throw new ArgumentException("Error in calculation of linear regression solution"); } rmsError = ar.rmserror; cvRmsError = ar.cvrmserror; double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant alglib.lrunpack(lm, out coefficients, out nFeatures); int nFactorCoeff = factorVariables.Sum(kvp => kvp.Value.Count()); int nVarCoeff = doubleVariables.Count(); var tree = LinearModelToTreeConverter.CreateTree(factorVariables, coefficients.Take(nFactorCoeff).ToArray(), doubleVariables.ToArray(), coefficients.Skip(nFactorCoeff).Take(nVarCoeff).ToArray(), @const: coefficients[nFeatures]); SymbolicRegressionSolution solution = new SymbolicRegressionSolution(new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeLinearInterpreter()), (IRegressionProblemData)problemData.Clone()); solution.Model.Name = "Linear Regression Model"; solution.Name = "Linear Regression Solution"; return(solution); }
public static IRegressionSolution CreateSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError) { IEnumerable <string> doubleVariables; IEnumerable <KeyValuePair <string, IEnumerable <string> > > factorVariables; double[,] inputMatrix; PrepareData(problemData, out inputMatrix, out doubleVariables, out factorVariables); alglib.linearmodel lm = new alglib.linearmodel(); alglib.lrreport ar = new alglib.lrreport(); int nRows = inputMatrix.GetLength(0); int nFeatures = inputMatrix.GetLength(1) - 1; int retVal = 1; alglib.lrbuild(inputMatrix, nRows, nFeatures, out retVal, out lm, out ar); if (retVal != 1) { throw new ArgumentException("Error in calculation of linear regression solution"); } rmsError = ar.rmserror; cvRmsError = ar.cvrmserror; // get parameters of the model double[] w; int nVars; alglib.lrunpack(lm, out w, out nVars); // ar.c is the covariation matrix, array[0..NVars,0..NVars]. // C[i, j] = Cov(A[i], A[j]) var solution = new LinearRegressionModel(w, ar.c, cvRmsError, problemData.TargetVariable, doubleVariables, factorVariables) .CreateRegressionSolution((IRegressionProblemData)problemData.Clone()); solution.Name = "Linear Regression Solution"; return(solution); }
public static double MFactorCarbonPlus(List<MFCPData> matrixStateData, MFCPData currentStateData) { const int nFeatures = 4; int nFeaturesCoefficcients; int info = 0; var inVector = new double[matrixStateData.Count, nFeatures + 1]; double[] coefficcients; var lm = new alglib.linearmodel(); var lr = new alglib.lrreport(); int lenghtData = matrixStateData.Count; for (int item = 0; item < lenghtData; item++) { inVector[item, 0] = matrixStateData[item].TimeFromX; // X1 inVector[item, 1] = matrixStateData[item].CarbonOxideIVP; // X2 inVector[item, 2] = matrixStateData[item].CarbonMonoxideVP; // X3 inVector[item, 3] = matrixStateData[item].CarbonOxideVP; // X4 inVector[item, 4] = matrixStateData[item].SteelCarbonPercent; // Y } alglib.lrbuild(inVector, lenghtData, nFeatures, out info, out lm, out lr); if (info != 1) { return info; } alglib.lrunpack(lm, out coefficcients, out nFeaturesCoefficcients); if (nFeaturesCoefficcients != nFeatures) { return -2.011; } double calculatedCarbon = coefficcients[4]; calculatedCarbon += coefficcients[0] * currentStateData.TimeFromX; calculatedCarbon += coefficcients[1] * currentStateData.CarbonOxideIVP; calculatedCarbon += coefficcients[2] * currentStateData.CarbonMonoxideVP; calculatedCarbon += coefficcients[3] * currentStateData.CarbonOxideVP; return calculatedCarbon; }
private static IRegressionEnsembleSolution CreateEnsembleSolution(List<IRegressionModel> models, IRegressionProblemData problemData) { var rows = problemData.TrainingPartition.Size; var features = models.Count; double[,] inputMatrix = new double[rows, features + 1]; //add model estimates for (int m = 0; m < models.Count; m++) { var model = models[m]; var estimates = model.GetEstimatedValues(problemData.Dataset, problemData.TrainingIndices); int estimatesCounter = 0; foreach (var estimate in estimates) { inputMatrix[estimatesCounter, m] = estimate; estimatesCounter++; } } //add target var targets = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices); int targetCounter = 0; foreach (var target in targets) { inputMatrix[targetCounter, models.Count] = target; targetCounter++; } alglib.linearmodel lm = new alglib.linearmodel(); alglib.lrreport ar = new alglib.lrreport(); double[] coefficients; int retVal = 1; alglib.lrbuildz(inputMatrix, rows, features, out retVal, out lm, out ar); if (retVal != 1) throw new ArgumentException("Error in calculation of linear regression solution"); alglib.lrunpack(lm, out coefficients, out features); var ensembleModel = new RegressionEnsembleModel(models, coefficients.Take(models.Count)) { AverageModelEstimates = false }; var ensembleSolution = (IRegressionEnsembleSolution)ensembleModel.CreateRegressionSolution(problemData); return ensembleSolution; }
private static ITimeSeriesPrognosisSolution CreateAutoRegressiveSolution(ITimeSeriesPrognosisProblemData problemData, int timeOffset, out double rmsError, out double cvRmsError) { string targetVariable = problemData.TargetVariable; double[,] inputMatrix = new double[problemData.TrainingPartition.Size, timeOffset + 1]; var targetValues = problemData.Dataset.GetDoubleValues(targetVariable).ToList(); for (int i = 0, row = problemData.TrainingPartition.Start; i < problemData.TrainingPartition.Size; i++, row++) { for (int col = 0; col < timeOffset; col++) { inputMatrix[i, col] = targetValues[row - col - 1]; } } // set target values in last column for (int i = 0; i < inputMatrix.GetLength(0); i++) { inputMatrix[i, timeOffset] = targetValues[i + problemData.TrainingPartition.Start]; } if (inputMatrix.Cast <double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) { throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset."); } alglib.linearmodel lm = new alglib.linearmodel(); alglib.lrreport ar = new alglib.lrreport(); int nRows = inputMatrix.GetLength(0); int nFeatures = inputMatrix.GetLength(1) - 1; double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant int retVal = 1; alglib.lrbuild(inputMatrix, nRows, nFeatures, out retVal, out lm, out ar); if (retVal != 1) { throw new ArgumentException("Error in calculation of linear regression solution"); } rmsError = ar.rmserror; cvRmsError = ar.cvrmserror; alglib.lrunpack(lm, out coefficients, out nFeatures); ISymbolicExpressionTree tree = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode()); ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode(); tree.Root.AddSubtree(startNode); ISymbolicExpressionTreeNode addition = new Addition().CreateTreeNode(); startNode.AddSubtree(addition); for (int i = 0; i < timeOffset; i++) { LaggedVariableTreeNode node = (LaggedVariableTreeNode) new LaggedVariable().CreateTreeNode(); node.VariableName = targetVariable; node.Weight = coefficients[i]; node.Lag = (i + 1) * -1; addition.AddSubtree(node); } ConstantTreeNode cNode = (ConstantTreeNode) new Constant().CreateTreeNode(); cNode.Value = coefficients[coefficients.Length - 1]; addition.AddSubtree(cNode); var interpreter = new SymbolicTimeSeriesPrognosisExpressionTreeInterpreter(problemData.TargetVariable); var model = new SymbolicTimeSeriesPrognosisModel(problemData.TargetVariable, tree, interpreter); var solution = model.CreateTimeSeriesPrognosisSolution((ITimeSeriesPrognosisProblemData)problemData.Clone()); return(solution); }
public static ISymbolicRegressionSolution CreateLinearRegressionSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError) { var dataset = problemData.Dataset; string targetVariable = problemData.TargetVariable; IEnumerable <string> allowedInputVariables = problemData.AllowedInputVariables; IEnumerable <int> rows = problemData.TrainingIndices; double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows); if (inputMatrix.Cast <double>().Any(x => double.IsNaN(x) || double.IsInfinity(x))) { throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset."); } alglib.linearmodel lm = new alglib.linearmodel(); alglib.lrreport ar = new alglib.lrreport(); int nRows = inputMatrix.GetLength(0); int nFeatures = inputMatrix.GetLength(1) - 1; double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant int retVal = 1; alglib.lrbuild(inputMatrix, nRows, nFeatures, out retVal, out lm, out ar); if (retVal != 1) { throw new ArgumentException("Error in calculation of linear regression solution"); } rmsError = ar.rmserror; cvRmsError = ar.cvrmserror; alglib.lrunpack(lm, out coefficients, out nFeatures); ISymbolicExpressionTree tree = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode()); ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode(); tree.Root.AddSubtree(startNode); ISymbolicExpressionTreeNode addition = new Addition().CreateTreeNode(); startNode.AddSubtree(addition); int col = 0; foreach (string column in allowedInputVariables) { VariableTreeNode vNode = (VariableTreeNode) new HeuristicLab.Problems.DataAnalysis.Symbolic.Variable().CreateTreeNode(); vNode.VariableName = column; vNode.Weight = coefficients[col]; addition.AddSubtree(vNode); col++; } ConstantTreeNode cNode = (ConstantTreeNode) new Constant().CreateTreeNode(); cNode.Value = coefficients[coefficients.Length - 1]; addition.AddSubtree(cNode); SymbolicRegressionSolution solution = new SymbolicRegressionSolution(new SymbolicRegressionModel(tree, new SymbolicDataAnalysisExpressionTreeInterpreter()), (IRegressionProblemData)problemData.Clone()); solution.Model.Name = "Linear Regression Model"; solution.Name = "Linear Regression Solution"; return(solution); }