private static ITimeSeriesPrognosisSolution CreateAutoRegressiveSolution(ITimeSeriesPrognosisProblemData problemData, int timeOffset, out double rmsError, out double cvRmsError)
        {
            string targetVariable = problemData.TargetVariable;

            double[,] inputMatrix = new double[problemData.TrainingPartition.Size, timeOffset + 1];
            var targetValues = problemData.Dataset.GetDoubleValues(targetVariable).ToList();

            for (int i = 0, row = problemData.TrainingPartition.Start; i < problemData.TrainingPartition.Size; i++, row++)
            {
                for (int col = 0; col < timeOffset; col++)
                {
                    inputMatrix[i, col] = targetValues[row - col - 1];
                }
            }
            // set target values in last column
            for (int i = 0; i < inputMatrix.GetLength(0); i++)
            {
                inputMatrix[i, timeOffset] = targetValues[i + problemData.TrainingPartition.Start];
            }

            if (inputMatrix.Cast <double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
            {
                throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset.");
            }


            alglib.linearmodel lm = new alglib.linearmodel();
            alglib.lrreport    ar = new alglib.lrreport();
            int nRows             = inputMatrix.GetLength(0);
            int nFeatures         = inputMatrix.GetLength(1) - 1;

            double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant

            int retVal = 1;

            alglib.lrbuild(inputMatrix, nRows, nFeatures, out retVal, out lm, out ar);
            if (retVal != 1)
            {
                throw new ArgumentException("Error in calculation of linear regression solution");
            }
            rmsError   = ar.rmserror;
            cvRmsError = ar.cvrmserror;

            alglib.lrunpack(lm, out coefficients, out nFeatures);

            var tree = LinearModelToTreeConverter.CreateTree(
                variableNames: Enumerable.Repeat(problemData.TargetVariable, nFeatures).ToArray(),
                lags: Enumerable.Range(0, timeOffset).Select(i => (i + 1) * -1).ToArray(),
                coefficients: coefficients.Take(nFeatures).ToArray(),
                @const: coefficients[nFeatures]
                );

            var interpreter = new SymbolicTimeSeriesPrognosisExpressionTreeInterpreter(problemData.TargetVariable);
            var model       = new SymbolicTimeSeriesPrognosisModel(problemData.TargetVariable, tree, interpreter);
            var solution    = model.CreateTimeSeriesPrognosisSolution((ITimeSeriesPrognosisProblemData)problemData.Clone());

            return(solution);
        }
Exemplo n.º 2
0
        private static IRegressionEnsembleSolution CreateEnsembleSolution(List <IRegressionModel> models,
                                                                          IRegressionProblemData problemData)
        {
            var rows     = problemData.TrainingPartition.Size;
            var features = models.Count;

            double[,] inputMatrix = new double[rows, features + 1];

            //add model estimates
            for (int m = 0; m < models.Count; m++)
            {
                var model            = models[m];
                var estimates        = model.GetEstimatedValues(problemData.Dataset, problemData.TrainingIndices);
                int estimatesCounter = 0;
                foreach (var estimate in estimates)
                {
                    inputMatrix[estimatesCounter, m] = estimate;
                    estimatesCounter++;
                }
            }

            //add target
            var targets       = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices);
            int targetCounter = 0;

            foreach (var target in targets)
            {
                inputMatrix[targetCounter, models.Count] = target;
                targetCounter++;
            }

            alglib.linearmodel lm = new alglib.linearmodel();
            alglib.lrreport    ar = new alglib.lrreport();
            double[]           coefficients;
            int retVal = 1;

            alglib.lrbuildz(inputMatrix, rows, features, out retVal, out lm, out ar);
            if (retVal != 1)
            {
                throw new ArgumentException("Error in calculation of linear regression solution");
            }

            alglib.lrunpack(lm, out coefficients, out features);

            var ensembleModel = new RegressionEnsembleModel(models, coefficients.Take(models.Count))
            {
                AverageModelEstimates = false
            };
            var ensembleSolution = (IRegressionEnsembleSolution)ensembleModel.CreateRegressionSolution(problemData);      return(ensembleSolution);
        }
Exemplo n.º 3
0
        public static ISymbolicRegressionSolution CreateLinearRegressionSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError)
        {
            var    dataset        = problemData.Dataset;
            string targetVariable = problemData.TargetVariable;
            IEnumerable <string> allowedInputVariables = problemData.AllowedInputVariables;
            IEnumerable <int>    rows = problemData.TrainingIndices;
            var doubleVariables       = allowedInputVariables.Where(dataset.VariableHasType <double>);
            var factorVariableNames   = allowedInputVariables.Where(dataset.VariableHasType <string>);
            var factorVariables       = dataset.GetFactorVariableValues(factorVariableNames, rows);

            double[,] binaryMatrix    = dataset.ToArray(factorVariables, rows);
            double[,] doubleVarMatrix = dataset.ToArray(doubleVariables.Concat(new string[] { targetVariable }), rows);
            var inputMatrix = binaryMatrix.HorzCat(doubleVarMatrix);

            if (inputMatrix.Cast <double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
            {
                throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset.");
            }

            alglib.linearmodel lm = new alglib.linearmodel();
            alglib.lrreport    ar = new alglib.lrreport();
            int nRows             = inputMatrix.GetLength(0);
            int nFeatures         = inputMatrix.GetLength(1) - 1;

            double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant

            int retVal = 1;

            alglib.lrbuild(inputMatrix, nRows, nFeatures, out retVal, out lm, out ar);
            if (retVal != 1)
            {
                throw new ArgumentException("Error in calculation of linear regression solution");
            }
            rmsError   = ar.rmserror;
            cvRmsError = ar.cvrmserror;

            alglib.lrunpack(lm, out coefficients, out nFeatures);

            int nFactorCoeff = binaryMatrix.GetLength(1);
            int nVarCoeff    = doubleVariables.Count();
            var tree         = LinearModelToTreeConverter.CreateTree(factorVariables, coefficients.Take(nFactorCoeff).ToArray(),
                                                                     doubleVariables.ToArray(), coefficients.Skip(nFactorCoeff).Take(nVarCoeff).ToArray(),
                                                                     @const: coefficients[nFeatures]);

            SymbolicRegressionSolution solution = new SymbolicRegressionSolution(new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeLinearInterpreter()), (IRegressionProblemData)problemData.Clone());

            solution.Model.Name = "Linear Regression Model";
            solution.Name       = "Linear Regression Solution";
            return(solution);
        }
        public static double[] Build(double[,] data, int response_variable_index = -1)
        {
            int n = data.GetLength(0);
            int m = data.GetLength(1);

            double[] w      = null;
            double[] weight = new double[m + 1];
            int      res    = 0;
            int      nvars  = 0;

            if (response_variable_index == -1)
            {
                response_variable_index = m - 1;
            }
            if (response_variable_index < m - 1)
            {
                //Swap columns   (m - 1) and
                for (int i = 0; i < n; i++)
                {
                    double temp = data[i, m - 1];
                    data[i, m - 1] = data[i, response_variable_index];
                    data[i, response_variable_index] = temp;
                }
            }
            alglib.linearmodel lm = null;
            alglib.lrreport    ar = null;
            alglib.lrbuild(data, n, m - 1, out res, out lm, out ar);
            alglib.lrunpack(lm, out w, out nvars);
            for (int j = 0; j < m - 1; j++)
            {
                weight[j] = w[j];
            }
            //Swap back
            if (response_variable_index < m - 1)
            {
                //Swap columns   (m - 1) and
                for (int i = 0; i < n; i++)
                {
                    double temp = data[i, m - 1];
                    data[i, m - 1] = data[i, response_variable_index];
                    data[i, response_variable_index] = temp;
                }
            }
            weight[m - 1] = weight[response_variable_index];
            weight[response_variable_index] = -1;
            weight[m] = w[w.Length - 1]; //Intercept
            return(weight);
        }
Exemplo n.º 5
0
        public static ISymbolicRegressionSolution CreateLinearRegressionSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError)
        {
            IEnumerable <string> doubleVariables;
            IEnumerable <KeyValuePair <string, IEnumerable <string> > > factorVariables;

            double[,] inputMatrix;
            PrepareData(problemData, out inputMatrix, out doubleVariables, out factorVariables);

            alglib.linearmodel lm = new alglib.linearmodel();
            alglib.lrreport    ar = new alglib.lrreport();
            int nRows             = inputMatrix.GetLength(0);
            int nFeatures         = inputMatrix.GetLength(1) - 1;

            int retVal = 1;

            alglib.lrbuild(inputMatrix, nRows, nFeatures, out retVal, out lm, out ar);
            if (retVal != 1)
            {
                throw new ArgumentException("Error in calculation of linear regression solution");
            }
            rmsError   = ar.rmserror;
            cvRmsError = ar.cvrmserror;

            double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant
            alglib.lrunpack(lm, out coefficients, out nFeatures);

            int nFactorCoeff = factorVariables.Sum(kvp => kvp.Value.Count());
            int nVarCoeff    = doubleVariables.Count();
            var tree         = LinearModelToTreeConverter.CreateTree(factorVariables, coefficients.Take(nFactorCoeff).ToArray(),
                                                                     doubleVariables.ToArray(), coefficients.Skip(nFactorCoeff).Take(nVarCoeff).ToArray(),
                                                                     @const: coefficients[nFeatures]);

            SymbolicRegressionSolution solution = new SymbolicRegressionSolution(new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeLinearInterpreter()), (IRegressionProblemData)problemData.Clone());

            solution.Model.Name = "Linear Regression Model";
            solution.Name       = "Linear Regression Solution";
            return(solution);
        }
Exemplo n.º 6
0
        public static IRegressionSolution CreateSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError)
        {
            IEnumerable <string> doubleVariables;
            IEnumerable <KeyValuePair <string, IEnumerable <string> > > factorVariables;

            double[,] inputMatrix;
            PrepareData(problemData, out inputMatrix, out doubleVariables, out factorVariables);

            alglib.linearmodel lm = new alglib.linearmodel();
            alglib.lrreport    ar = new alglib.lrreport();
            int nRows             = inputMatrix.GetLength(0);
            int nFeatures         = inputMatrix.GetLength(1) - 1;

            int retVal = 1;

            alglib.lrbuild(inputMatrix, nRows, nFeatures, out retVal, out lm, out ar);
            if (retVal != 1)
            {
                throw new ArgumentException("Error in calculation of linear regression solution");
            }
            rmsError   = ar.rmserror;
            cvRmsError = ar.cvrmserror;

            // get parameters of the model
            double[] w;
            int      nVars;

            alglib.lrunpack(lm, out w, out nVars);

            // ar.c is the covariation matrix,  array[0..NVars,0..NVars].
            // C[i, j] = Cov(A[i], A[j])

            var solution = new LinearRegressionModel(w, ar.c, cvRmsError, problemData.TargetVariable, doubleVariables, factorVariables)
                           .CreateRegressionSolution((IRegressionProblemData)problemData.Clone());

            solution.Name = "Linear Regression Solution";
            return(solution);
        }
        public static double MFactorCarbonPlus(List<MFCPData> matrixStateData, MFCPData currentStateData)
        {
            const int nFeatures = 4;
            int nFeaturesCoefficcients;
            int info = 0;
            var inVector = new double[matrixStateData.Count, nFeatures + 1];
            double[] coefficcients;
            var lm = new alglib.linearmodel();
            var lr = new alglib.lrreport();

            int lenghtData = matrixStateData.Count;
            for (int item = 0; item < lenghtData; item++)
            {
                inVector[item, 0] = matrixStateData[item].TimeFromX;                   // X1
                inVector[item, 1] = matrixStateData[item].CarbonOxideIVP;              // X2
                inVector[item, 2] = matrixStateData[item].CarbonMonoxideVP;            // X3
                inVector[item, 3] = matrixStateData[item].CarbonOxideVP;               // X4
                inVector[item, 4] = matrixStateData[item].SteelCarbonPercent;          // Y
            }

            alglib.lrbuild(inVector, lenghtData, nFeatures, out info, out lm, out lr);
            if (info != 1)
            {
                return info;
            }
            alglib.lrunpack(lm, out coefficcients, out nFeaturesCoefficcients);
            if (nFeaturesCoefficcients != nFeatures)
            {
                return -2.011;
            }
            double calculatedCarbon = coefficcients[4];
            calculatedCarbon += coefficcients[0] * currentStateData.TimeFromX;
            calculatedCarbon += coefficcients[1] * currentStateData.CarbonOxideIVP;
            calculatedCarbon += coefficcients[2] * currentStateData.CarbonMonoxideVP;
            calculatedCarbon += coefficcients[3] * currentStateData.CarbonOxideVP;

            return calculatedCarbon;
        }
    private static IRegressionEnsembleSolution CreateEnsembleSolution(List<IRegressionModel> models,
      IRegressionProblemData problemData) {
      var rows = problemData.TrainingPartition.Size;
      var features = models.Count;
      double[,] inputMatrix = new double[rows, features + 1];

      //add model estimates
      for (int m = 0; m < models.Count; m++) {
        var model = models[m];
        var estimates = model.GetEstimatedValues(problemData.Dataset, problemData.TrainingIndices);
        int estimatesCounter = 0;
        foreach (var estimate in estimates) {
          inputMatrix[estimatesCounter, m] = estimate;
          estimatesCounter++;
        }
      }

      //add target
      var targets = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices);
      int targetCounter = 0;
      foreach (var target in targets) {
        inputMatrix[targetCounter, models.Count] = target;
        targetCounter++;
      }

      alglib.linearmodel lm = new alglib.linearmodel();
      alglib.lrreport ar = new alglib.lrreport();
      double[] coefficients;
      int retVal = 1;
      alglib.lrbuildz(inputMatrix, rows, features, out retVal, out lm, out ar);
      if (retVal != 1) throw new ArgumentException("Error in calculation of linear regression solution");

      alglib.lrunpack(lm, out coefficients, out features);

      var ensembleModel = new RegressionEnsembleModel(models, coefficients.Take(models.Count)) { AverageModelEstimates = false };
      var ensembleSolution = (IRegressionEnsembleSolution)ensembleModel.CreateRegressionSolution(problemData);      return ensembleSolution;
    }
        private static ITimeSeriesPrognosisSolution CreateAutoRegressiveSolution(ITimeSeriesPrognosisProblemData problemData, int timeOffset, out double rmsError, out double cvRmsError)
        {
            string targetVariable = problemData.TargetVariable;

            double[,] inputMatrix = new double[problemData.TrainingPartition.Size, timeOffset + 1];
            var targetValues = problemData.Dataset.GetDoubleValues(targetVariable).ToList();

            for (int i = 0, row = problemData.TrainingPartition.Start; i < problemData.TrainingPartition.Size; i++, row++)
            {
                for (int col = 0; col < timeOffset; col++)
                {
                    inputMatrix[i, col] = targetValues[row - col - 1];
                }
            }
            // set target values in last column
            for (int i = 0; i < inputMatrix.GetLength(0); i++)
            {
                inputMatrix[i, timeOffset] = targetValues[i + problemData.TrainingPartition.Start];
            }

            if (inputMatrix.Cast <double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
            {
                throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset.");
            }


            alglib.linearmodel lm = new alglib.linearmodel();
            alglib.lrreport    ar = new alglib.lrreport();
            int nRows             = inputMatrix.GetLength(0);
            int nFeatures         = inputMatrix.GetLength(1) - 1;

            double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant

            int retVal = 1;

            alglib.lrbuild(inputMatrix, nRows, nFeatures, out retVal, out lm, out ar);
            if (retVal != 1)
            {
                throw new ArgumentException("Error in calculation of linear regression solution");
            }
            rmsError   = ar.rmserror;
            cvRmsError = ar.cvrmserror;

            alglib.lrunpack(lm, out coefficients, out nFeatures);


            ISymbolicExpressionTree     tree      = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode());
            ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode();

            tree.Root.AddSubtree(startNode);
            ISymbolicExpressionTreeNode addition = new Addition().CreateTreeNode();

            startNode.AddSubtree(addition);

            for (int i = 0; i < timeOffset; i++)
            {
                LaggedVariableTreeNode node = (LaggedVariableTreeNode) new LaggedVariable().CreateTreeNode();
                node.VariableName = targetVariable;
                node.Weight       = coefficients[i];
                node.Lag          = (i + 1) * -1;
                addition.AddSubtree(node);
            }

            ConstantTreeNode cNode = (ConstantTreeNode) new Constant().CreateTreeNode();

            cNode.Value = coefficients[coefficients.Length - 1];
            addition.AddSubtree(cNode);

            var interpreter = new SymbolicTimeSeriesPrognosisExpressionTreeInterpreter(problemData.TargetVariable);
            var model       = new SymbolicTimeSeriesPrognosisModel(problemData.TargetVariable, tree, interpreter);
            var solution    = model.CreateTimeSeriesPrognosisSolution((ITimeSeriesPrognosisProblemData)problemData.Clone());

            return(solution);
        }
Exemplo n.º 10
0
        public static ISymbolicRegressionSolution CreateLinearRegressionSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError)
        {
            var    dataset        = problemData.Dataset;
            string targetVariable = problemData.TargetVariable;
            IEnumerable <string> allowedInputVariables = problemData.AllowedInputVariables;
            IEnumerable <int>    rows = problemData.TrainingIndices;

            double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
            if (inputMatrix.Cast <double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
            {
                throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset.");
            }

            alglib.linearmodel lm = new alglib.linearmodel();
            alglib.lrreport    ar = new alglib.lrreport();
            int nRows             = inputMatrix.GetLength(0);
            int nFeatures         = inputMatrix.GetLength(1) - 1;

            double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant

            int retVal = 1;

            alglib.lrbuild(inputMatrix, nRows, nFeatures, out retVal, out lm, out ar);
            if (retVal != 1)
            {
                throw new ArgumentException("Error in calculation of linear regression solution");
            }
            rmsError   = ar.rmserror;
            cvRmsError = ar.cvrmserror;

            alglib.lrunpack(lm, out coefficients, out nFeatures);

            ISymbolicExpressionTree     tree      = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode());
            ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode();

            tree.Root.AddSubtree(startNode);
            ISymbolicExpressionTreeNode addition = new Addition().CreateTreeNode();

            startNode.AddSubtree(addition);

            int col = 0;

            foreach (string column in allowedInputVariables)
            {
                VariableTreeNode vNode = (VariableTreeNode) new HeuristicLab.Problems.DataAnalysis.Symbolic.Variable().CreateTreeNode();
                vNode.VariableName = column;
                vNode.Weight       = coefficients[col];
                addition.AddSubtree(vNode);
                col++;
            }

            ConstantTreeNode cNode = (ConstantTreeNode) new Constant().CreateTreeNode();

            cNode.Value = coefficients[coefficients.Length - 1];
            addition.AddSubtree(cNode);

            SymbolicRegressionSolution solution = new SymbolicRegressionSolution(new SymbolicRegressionModel(tree, new SymbolicDataAnalysisExpressionTreeInterpreter()), (IRegressionProblemData)problemData.Clone());

            solution.Model.Name = "Linear Regression Model";
            solution.Name       = "Linear Regression Solution";
            return(solution);
        }