public RegressionEnsembleSolution(IEnumerable <IRegressionModel> models, IRegressionProblemData problemData, IEnumerable <IntRange> trainingPartitions, IEnumerable <IntRange> testPartitions)
            : base(new RegressionEnsembleModel(Enumerable.Empty <IRegressionModel>()), new RegressionEnsembleProblemData(problemData))
        {
            this.trainingPartitions  = new Dictionary <IRegressionModel, IntRange>();
            this.testPartitions      = new Dictionary <IRegressionModel, IntRange>();
            this.regressionSolutions = new ItemCollection <IRegressionSolution>();

            List <IRegressionSolution> solutions = new List <IRegressionSolution>();
            var modelEnumerator             = models.GetEnumerator();
            var trainingPartitionEnumerator = trainingPartitions.GetEnumerator();
            var testPartitionEnumerator     = testPartitions.GetEnumerator();

            while (modelEnumerator.MoveNext() & trainingPartitionEnumerator.MoveNext() & testPartitionEnumerator.MoveNext())
            {
                var p = (IRegressionProblemData)problemData.Clone();
                p.TrainingPartition.Start = trainingPartitionEnumerator.Current.Start;
                p.TrainingPartition.End   = trainingPartitionEnumerator.Current.End;
                p.TestPartition.Start     = testPartitionEnumerator.Current.Start;
                p.TestPartition.End       = testPartitionEnumerator.Current.End;

                solutions.Add(modelEnumerator.Current.CreateRegressionSolution(p));
            }
            if (modelEnumerator.MoveNext() | trainingPartitionEnumerator.MoveNext() | testPartitionEnumerator.MoveNext())
            {
                throw new ArgumentException();
            }

            trainingEvaluationCache = new Dictionary <int, double>(problemData.TrainingIndices.Count());
            testEvaluationCache     = new Dictionary <int, double>(problemData.TestIndices.Count());

            RegisterRegressionSolutionsEventHandler();
            regressionSolutions.AddRange(solutions);
        }
        public static IRegressionSolution CreateSymbolicSolution(double[] coeff, IRegressionProblemData problemData)
        {
            var ds                       = problemData.Dataset;
            var allVariables             = problemData.AllowedInputVariables.ToArray();
            var doubleVariables          = allVariables.Where(ds.VariableHasType <double>);
            var factorVariableNames      = allVariables.Where(ds.VariableHasType <string>);
            var factorVariablesAndValues = ds.GetFactorVariableValues(factorVariableNames, Enumerable.Range(0, ds.Rows)); // must consider all factor values (in train and test set)

            List <KeyValuePair <string, IEnumerable <string> > > remainingFactorVariablesAndValues = new List <KeyValuePair <string, IEnumerable <string> > >();
            List <double> factorCoeff = new List <double>();
            List <string> remainingDoubleVariables = new List <string>();
            List <double> doubleVarCoeff           = new List <double>();

            {
                int i = 0;
                // find factor varibles & value combinations with non-zero coeff
                foreach (var factorVarAndValues in factorVariablesAndValues)
                {
                    var l = new List <string>();
                    foreach (var factorValue in factorVarAndValues.Value)
                    {
                        if (!coeff[i].IsAlmost(0.0))
                        {
                            l.Add(factorValue);
                            factorCoeff.Add(coeff[i]);
                        }
                        i++;
                    }
                    if (l.Any())
                    {
                        remainingFactorVariablesAndValues.Add(new KeyValuePair <string, IEnumerable <string> >(factorVarAndValues.Key, l));
                    }
                }
                // find double variables with non-zero coeff
                foreach (var doubleVar in doubleVariables)
                {
                    if (!coeff[i].IsAlmost(0.0))
                    {
                        remainingDoubleVariables.Add(doubleVar);
                        doubleVarCoeff.Add(coeff[i]);
                    }
                    i++;
                }
            }
            var tree = LinearModelToTreeConverter.CreateTree(
                remainingFactorVariablesAndValues, factorCoeff.ToArray(),
                remainingDoubleVariables.ToArray(), doubleVarCoeff.ToArray(),
                coeff.Last());


            SymbolicRegressionSolution solution = new SymbolicRegressionSolution(
                new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeInterpreter()),
                (IRegressionProblemData)problemData.Clone());

            solution.Model.Name = "Elastic-net Linear Regression Model";
            solution.Name       = "Elastic-net Linear Regression Solution";

            return(solution);
        }
        // keep for compatibility with old API
        public static RandomForestRegressionSolution CreateRandomForestRegressionSolution(IRegressionProblemData problemData, int nTrees, double r, double m, int seed,
                                                                                          out double rmsError, out double avgRelError, out double outOfBagRmsError, out double outOfBagAvgRelError)
        {
            var model = CreateRandomForestRegressionModel(problemData, nTrees, r, m, seed,
                                                          out rmsError, out avgRelError, out outOfBagRmsError, out outOfBagAvgRelError);

            return(new RandomForestRegressionSolution(model, (IRegressionProblemData)problemData.Clone()));
        }
Example #4
0
        public static IRegressionSolution CreateNeuralNetworkRegressionSolution(IRegressionProblemData problemData, int nLayers, int nHiddenNodes1, int nHiddenNodes2, double decay, int restarts,
                                                                                out double rmsError, out double avgRelError)
        {
            var    dataset        = problemData.Dataset;
            string targetVariable = problemData.TargetVariable;
            IEnumerable <string> allowedInputVariables = problemData.AllowedInputVariables;
            IEnumerable <int>    rows = problemData.TrainingIndices;

            double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
            if (inputMatrix.Cast <double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
            {
                throw new NotSupportedException("Neural network regression does not support NaN or infinity values in the input dataset.");
            }

            alglib.multilayerperceptron multiLayerPerceptron = null;
            if (nLayers == 0)
            {
                alglib.mlpcreate0(allowedInputVariables.Count(), 1, out multiLayerPerceptron);
            }
            else if (nLayers == 1)
            {
                alglib.mlpcreate1(allowedInputVariables.Count(), nHiddenNodes1, 1, out multiLayerPerceptron);
            }
            else if (nLayers == 2)
            {
                alglib.mlpcreate2(allowedInputVariables.Count(), nHiddenNodes1, nHiddenNodes2, 1, out multiLayerPerceptron);
            }
            else
            {
                throw new ArgumentException("Number of layers must be zero, one, or two.", "nLayers");
            }
            alglib.mlpreport rep;
            int nRows = inputMatrix.GetLength(0);

            int info;

            // using mlptrainlm instead of mlptraines or mlptrainbfgs because only one parameter is necessary
            alglib.mlptrainlm(multiLayerPerceptron, inputMatrix, nRows, decay, restarts, out info, out rep);
            if (info != 2)
            {
                throw new ArgumentException("Error in calculation of neural network regression solution");
            }

            rmsError    = alglib.mlprmserror(multiLayerPerceptron, inputMatrix, nRows);
            avgRelError = alglib.mlpavgrelerror(multiLayerPerceptron, inputMatrix, nRows);

            return(new NeuralNetworkRegressionSolution((IRegressionProblemData)problemData.Clone(), new NeuralNetworkModel(multiLayerPerceptron, targetVariable, allowedInputVariables)));
        }
Example #5
0
        public static IRegressionSolution CreateRadialBasisRegressionSolution(IRegressionProblemData problemData, ICovarianceFunction kernel, double lambda, bool scaleInputs, out double rmsError, out double looCvRMSE)
        {
            var model = KernelRidgeRegressionModel.Create(problemData.Dataset, problemData.TargetVariable, problemData.AllowedInputVariables, problemData.TrainingIndices, scaleInputs, kernel, lambda);

            rmsError = double.NaN;
            if (problemData.TestIndices.Any())
            {
                rmsError = Math.Sqrt(model.GetEstimatedValues(problemData.Dataset, problemData.TestIndices)
                                     .Zip(problemData.TargetVariableTestValues, (a, b) => (a - b) * (a - b))
                                     .Average());
            }
            var solution = model.CreateRegressionSolution((IRegressionProblemData)problemData.Clone());

            solution.Model.Name = "Kernel ridge regression model";
            solution.Name       = SolutionResultName;
            looCvRMSE           = model.LooCvRMSE;
            return(solution);
        }
        // simple interface
        public static GradientBoostedTreesSolution TrainGbm(IRegressionProblemData problemData, ILossFunction lossFunction, int maxSize, double nu, double r, double m, int maxIterations, uint randSeed = 31415)
        {
            Contract.Assert(r > 0);
            Contract.Assert(r <= 1.0);
            Contract.Assert(nu > 0);
            Contract.Assert(nu <= 1.0);

            var state = (GbmState)CreateGbmState(problemData, lossFunction, randSeed, maxSize, r, m, nu);

            for (int iter = 0; iter < maxIterations; iter++)
            {
                MakeStep(state);
            }

            var model = state.GetModel();

            return(new GradientBoostedTreesSolution(model, (IRegressionProblemData)problemData.Clone()));
        }
Example #7
0
        public static IRegressionSolution CreateSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError)
        {
            IEnumerable <string> doubleVariables;
            IEnumerable <KeyValuePair <string, IEnumerable <string> > > factorVariables;

            double[,] inputMatrix;
            PrepareData(problemData, out inputMatrix, out doubleVariables, out factorVariables);

            alglib.linearmodel lm = new alglib.linearmodel();
            alglib.lrreport    ar = new alglib.lrreport();
            int nRows             = inputMatrix.GetLength(0);
            int nFeatures         = inputMatrix.GetLength(1) - 1;

            int retVal = 1;

            alglib.lrbuild(inputMatrix, nRows, nFeatures, out retVal, out lm, out ar);
            if (retVal != 1)
            {
                throw new ArgumentException("Error in calculation of linear regression solution");
            }
            rmsError   = ar.rmserror;
            cvRmsError = ar.cvrmserror;

            // get parameters of the model
            double[] w;
            int      nVars;

            alglib.lrunpack(lm, out w, out nVars);

            // ar.c is the covariation matrix,  array[0..NVars,0..NVars].
            // C[i, j] = Cov(A[i], A[j])

            var solution = new LinearRegressionModel(w, ar.c, cvRmsError, problemData.TargetVariable, doubleVariables, factorVariables)
                           .CreateRegressionSolution((IRegressionProblemData)problemData.Clone());

            solution.Name = "Linear Regression Solution";
            return(solution);
        }
 public override IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData) {
   return new RegressionSolution(this, (IRegressionProblemData)problemData.Clone());
 }
        public void CustomModelVariableImpactTest()
        {
            IRegressionProblemData      problemData     = CreateDefaultProblem();
            ISymbolicExpressionTree     tree            = CreateCustomExpressionTree();
            IRegressionModel            model           = new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeInterpreter());
            IRegressionSolution         solution        = new RegressionSolution(model, (IRegressionProblemData)problemData.Clone());
            Dictionary <string, double> expectedImpacts = GetExpectedValuesForCustomProblem();

            CheckDefaultAsserts(solution, expectedImpacts);
        }
Example #10
0
        public static IRegressionSolution CreateNeuralNetworkEnsembleRegressionSolution(IRegressionProblemData problemData, int ensembleSize, int nLayers, int nHiddenNodes1, int nHiddenNodes2, double decay, int restarts,
                                                                                        out double rmsError, out double avgRelError)
        {
            var    dataset        = problemData.Dataset;
            string targetVariable = problemData.TargetVariable;
            IEnumerable <string> allowedInputVariables = problemData.AllowedInputVariables;
            IEnumerable <int>    rows = problemData.TrainingIndices;

            double[,] inputMatrix = dataset.ToArray(allowedInputVariables.Concat(new string[] { targetVariable }), rows);
            if (inputMatrix.ContainsNanOrInfinity())
            {
                throw new NotSupportedException("Neural network ensemble regression does not support NaN or infinity values in the input dataset.");
            }

            alglib.mlpensemble mlpEnsemble = null;
            if (nLayers == 0)
            {
                alglib.mlpecreate0(allowedInputVariables.Count(), 1, ensembleSize, out mlpEnsemble);
            }
            else if (nLayers == 1)
            {
                alglib.mlpecreate1(allowedInputVariables.Count(), nHiddenNodes1, 1, ensembleSize, out mlpEnsemble);
            }
            else if (nLayers == 2)
            {
                alglib.mlpecreate2(allowedInputVariables.Count(), nHiddenNodes1, nHiddenNodes2, 1, ensembleSize, out mlpEnsemble);
            }
            else
            {
                throw new ArgumentException("Number of layers must be zero, one, or two.", "nLayers");
            }
            alglib.mlpreport rep;
            int nRows = inputMatrix.GetLength(0);

            int info;

            alglib.mlpetraines(mlpEnsemble, inputMatrix, nRows, decay, restarts, out info, out rep);
            if (info != 6)
            {
                throw new ArgumentException("Error in calculation of neural network ensemble regression solution");
            }

            rmsError    = alglib.mlpermserror(mlpEnsemble, inputMatrix, nRows);
            avgRelError = alglib.mlpeavgrelerror(mlpEnsemble, inputMatrix, nRows);

            return(new NeuralNetworkEnsembleRegressionSolution(new NeuralNetworkEnsembleModel(mlpEnsemble, targetVariable, allowedInputVariables), (IRegressionProblemData)problemData.Clone()));
        }
 public IRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData)
 {
     return(new RegressionSolution(this, (IRegressionProblemData)problemData.Clone()));
 }
    // simple interface
    public static GradientBoostedTreesSolution TrainGbm(IRegressionProblemData problemData, ILossFunction lossFunction, int maxSize, double nu, double r, double m, int maxIterations, uint randSeed = 31415) {
      Contract.Assert(r > 0);
      Contract.Assert(r <= 1.0);
      Contract.Assert(nu > 0);
      Contract.Assert(nu <= 1.0);

      var state = (GbmState)CreateGbmState(problemData, lossFunction, randSeed, maxSize, r, m, nu);

      for (int iter = 0; iter < maxIterations; iter++) {
        MakeStep(state);
      }

      var model = state.GetModel();
      return new GradientBoostedTreesSolution(model, (IRegressionProblemData)problemData.Clone());
    }
        protected override void itemsListView_DragDrop(object sender, DragEventArgs e)
        {
            if (e.Effect != DragDropEffects.None)
            {
                var droppedData = e.Data.GetData(HeuristicLab.Common.Constants.DragDropDataFormat);
                if (droppedData is IValueParameter)
                {
                    droppedData = ((IValueParameter)droppedData).Value;
                }
                if (droppedData is IRegressionProblem)
                {
                    droppedData = ((IRegressionProblem)droppedData).ProblemData;
                }

                RegressionEnsembleProblemData ensembleProblemData = droppedData as RegressionEnsembleProblemData;
                IRegressionProblemData        problemData         = droppedData as IRegressionProblemData;
                if (ensembleProblemData != null)
                {
                    Content.ProblemData = (RegressionEnsembleProblemData)ensembleProblemData.Clone();
                }
                else if (problemData != null)
                {
                    Content.ProblemData = new RegressionEnsembleProblemData((IRegressionProblemData)problemData.Clone());
                }
            }
        }
Example #14
0
        public static ISymbolicRegressionSolution CreateLinearRegressionSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError)
        {
            IEnumerable <string> doubleVariables;
            IEnumerable <KeyValuePair <string, IEnumerable <string> > > factorVariables;

            double[,] inputMatrix;
            PrepareData(problemData, out inputMatrix, out doubleVariables, out factorVariables);

            alglib.linearmodel lm = new alglib.linearmodel();
            alglib.lrreport    ar = new alglib.lrreport();
            int nRows             = inputMatrix.GetLength(0);
            int nFeatures         = inputMatrix.GetLength(1) - 1;

            int retVal = 1;

            alglib.lrbuild(inputMatrix, nRows, nFeatures, out retVal, out lm, out ar);
            if (retVal != 1)
            {
                throw new ArgumentException("Error in calculation of linear regression solution");
            }
            rmsError   = ar.rmserror;
            cvRmsError = ar.cvrmserror;

            double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant
            alglib.lrunpack(lm, out coefficients, out nFeatures);

            int nFactorCoeff = factorVariables.Sum(kvp => kvp.Value.Count());
            int nVarCoeff    = doubleVariables.Count();
            var tree         = LinearModelToTreeConverter.CreateTree(factorVariables, coefficients.Take(nFactorCoeff).ToArray(),
                                                                     doubleVariables.ToArray(), coefficients.Skip(nFactorCoeff).Take(nVarCoeff).ToArray(),
                                                                     @const: coefficients[nFeatures]);

            SymbolicRegressionSolution solution = new SymbolicRegressionSolution(new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeLinearInterpreter()), (IRegressionProblemData)problemData.Clone());

            solution.Model.Name = "Linear Regression Model";
            solution.Name       = "Linear Regression Solution";
            return(solution);
        }
        protected override void Run()
        {
            IRegressionProblemData problemData            = Problem.ProblemData;
            IEnumerable <string>   selectedInputVariables = problemData.AllowedInputVariables;
            int nSv;
            ISupportVectorMachineModel model;

            Run(problemData, selectedInputVariables, SvmType.Value, KernelType.Value, Cost.Value, Nu.Value, Gamma.Value, Epsilon.Value, Degree.Value, out model, out nSv);

            if (CreateSolution)
            {
                var solution = new SupportVectorRegressionSolution((SupportVectorMachineModel)model, (IRegressionProblemData)problemData.Clone());
                Results.Add(new Result("Support vector regression solution", "The support vector regression solution.", solution));
            }

            Results.Add(new Result("Number of support vectors", "The number of support vectors of the SVR solution.", new IntValue(nSv)));


            {
                // calculate regression model metrics
                var ds         = problemData.Dataset;
                var trainRows  = problemData.TrainingIndices;
                var testRows   = problemData.TestIndices;
                var yTrain     = ds.GetDoubleValues(problemData.TargetVariable, trainRows);
                var yTest      = ds.GetDoubleValues(problemData.TargetVariable, testRows);
                var yPredTrain = model.GetEstimatedValues(ds, trainRows).ToArray();
                var yPredTest  = model.GetEstimatedValues(ds, testRows).ToArray();

                OnlineCalculatorError error;
                var trainMse = OnlineMeanSquaredErrorCalculator.Calculate(yPredTrain, yTrain, out error);
                if (error != OnlineCalculatorError.None)
                {
                    trainMse = double.MaxValue;
                }
                var testMse = OnlineMeanSquaredErrorCalculator.Calculate(yPredTest, yTest, out error);
                if (error != OnlineCalculatorError.None)
                {
                    testMse = double.MaxValue;
                }

                Results.Add(new Result("Mean squared error (training)", "The mean of squared errors of the SVR solution on the training partition.", new DoubleValue(trainMse)));
                Results.Add(new Result("Mean squared error (test)", "The mean of squared errors of the SVR solution on the test partition.", new DoubleValue(testMse)));


                var trainMae = OnlineMeanAbsoluteErrorCalculator.Calculate(yPredTrain, yTrain, out error);
                if (error != OnlineCalculatorError.None)
                {
                    trainMae = double.MaxValue;
                }
                var testMae = OnlineMeanAbsoluteErrorCalculator.Calculate(yPredTest, yTest, out error);
                if (error != OnlineCalculatorError.None)
                {
                    testMae = double.MaxValue;
                }

                Results.Add(new Result("Mean absolute error (training)", "The mean of absolute errors of the SVR solution on the training partition.", new DoubleValue(trainMae)));
                Results.Add(new Result("Mean absolute error (test)", "The mean of absolute errors of the SVR solution on the test partition.", new DoubleValue(testMae)));


                var trainRelErr = OnlineMeanAbsolutePercentageErrorCalculator.Calculate(yPredTrain, yTrain, out error);
                if (error != OnlineCalculatorError.None)
                {
                    trainRelErr = double.MaxValue;
                }
                var testRelErr = OnlineMeanAbsolutePercentageErrorCalculator.Calculate(yPredTest, yTest, out error);
                if (error != OnlineCalculatorError.None)
                {
                    testRelErr = double.MaxValue;
                }

                Results.Add(new Result("Average relative error (training)", "The mean of relative errors of the SVR solution on the training partition.", new DoubleValue(trainRelErr)));
                Results.Add(new Result("Average relative error (test)", "The mean of relative errors of the SVR solution on the test partition.", new DoubleValue(testRelErr)));
            }
        }
    public RegressionEnsembleSolution(IEnumerable<IRegressionModel> models, IRegressionProblemData problemData, IEnumerable<IntRange> trainingPartitions, IEnumerable<IntRange> testPartitions)
      : base(new RegressionEnsembleModel(Enumerable.Empty<IRegressionModel>()), new RegressionEnsembleProblemData(problemData)) {
      this.trainingPartitions = new Dictionary<IRegressionModel, IntRange>();
      this.testPartitions = new Dictionary<IRegressionModel, IntRange>();
      this.regressionSolutions = new ItemCollection<IRegressionSolution>();

      List<IRegressionSolution> solutions = new List<IRegressionSolution>();
      var modelEnumerator = models.GetEnumerator();
      var trainingPartitionEnumerator = trainingPartitions.GetEnumerator();
      var testPartitionEnumerator = testPartitions.GetEnumerator();

      while (modelEnumerator.MoveNext() & trainingPartitionEnumerator.MoveNext() & testPartitionEnumerator.MoveNext()) {
        var p = (IRegressionProblemData)problemData.Clone();
        p.TrainingPartition.Start = trainingPartitionEnumerator.Current.Start;
        p.TrainingPartition.End = trainingPartitionEnumerator.Current.End;
        p.TestPartition.Start = testPartitionEnumerator.Current.Start;
        p.TestPartition.End = testPartitionEnumerator.Current.End;

        solutions.Add(modelEnumerator.Current.CreateRegressionSolution(p));
      }
      if (modelEnumerator.MoveNext() | trainingPartitionEnumerator.MoveNext() | testPartitionEnumerator.MoveNext()) {
        throw new ArgumentException();
      }

      trainingEvaluationCache = new Dictionary<int, double>(problemData.TrainingIndices.Count());
      testEvaluationCache = new Dictionary<int, double>(problemData.TestIndices.Count());

      RegisterRegressionSolutionsEventHandler();
      regressionSolutions.AddRange(solutions);
    }
        public static IRegressionSolution CreateNearestNeighbourRegressionSolution(IRegressionProblemData problemData, int k)
        {
            var clonedProblemData = (IRegressionProblemData)problemData.Clone();

            return(new NearestNeighbourRegressionSolution(clonedProblemData, Train(problemData, k)));
        }
        public static ISymbolicRegressionSolution CreateLinearRegressionSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError)
        {
            var    dataset        = problemData.Dataset;
            string targetVariable = problemData.TargetVariable;
            IEnumerable <string> allowedInputVariables = problemData.AllowedInputVariables;
            IEnumerable <int>    rows = problemData.TrainingIndices;
            var doubleVariables       = allowedInputVariables.Where(dataset.VariableHasType <double>);
            var factorVariableNames   = allowedInputVariables.Where(dataset.VariableHasType <string>);
            var factorVariables       = dataset.GetFactorVariableValues(factorVariableNames, rows);

            double[,] binaryMatrix    = dataset.ToArray(factorVariables, rows);
            double[,] doubleVarMatrix = dataset.ToArray(doubleVariables.Concat(new string[] { targetVariable }), rows);
            var inputMatrix = binaryMatrix.HorzCat(doubleVarMatrix);

            if (inputMatrix.Cast <double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
            {
                throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset.");
            }

            alglib.linearmodel lm = new alglib.linearmodel();
            alglib.lrreport    ar = new alglib.lrreport();
            int nRows             = inputMatrix.GetLength(0);
            int nFeatures         = inputMatrix.GetLength(1) - 1;

            double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant

            int retVal = 1;

            alglib.lrbuild(inputMatrix, nRows, nFeatures, out retVal, out lm, out ar);
            if (retVal != 1)
            {
                throw new ArgumentException("Error in calculation of linear regression solution");
            }
            rmsError   = ar.rmserror;
            cvRmsError = ar.cvrmserror;

            alglib.lrunpack(lm, out coefficients, out nFeatures);

            int nFactorCoeff = binaryMatrix.GetLength(1);
            int nVarCoeff    = doubleVariables.Count();
            var tree         = LinearModelToTreeConverter.CreateTree(factorVariables, coefficients.Take(nFactorCoeff).ToArray(),
                                                                     doubleVariables.ToArray(), coefficients.Skip(nFactorCoeff).Take(nVarCoeff).ToArray(),
                                                                     @const: coefficients[nFeatures]);

            SymbolicRegressionSolution solution = new SymbolicRegressionSolution(new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeLinearInterpreter()), (IRegressionProblemData)problemData.Clone());

            solution.Model.Name = "Linear Regression Model";
            solution.Name       = "Linear Regression Solution";
            return(solution);
        }
Example #19
0
        public static ISymbolicRegressionSolution CreateLinearRegressionSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError)
        {
            var    dataset        = problemData.Dataset;
            string targetVariable = problemData.TargetVariable;
            IEnumerable <string> allowedInputVariables = problemData.AllowedInputVariables;
            IEnumerable <int>    rows = problemData.TrainingIndices;

            double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
            if (inputMatrix.Cast <double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
            {
                throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset.");
            }

            alglib.linearmodel lm = new alglib.linearmodel();
            alglib.lrreport    ar = new alglib.lrreport();
            int nRows             = inputMatrix.GetLength(0);
            int nFeatures         = inputMatrix.GetLength(1) - 1;

            double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant

            int retVal = 1;

            alglib.lrbuild(inputMatrix, nRows, nFeatures, out retVal, out lm, out ar);
            if (retVal != 1)
            {
                throw new ArgumentException("Error in calculation of linear regression solution");
            }
            rmsError   = ar.rmserror;
            cvRmsError = ar.cvrmserror;

            alglib.lrunpack(lm, out coefficients, out nFeatures);

            ISymbolicExpressionTree     tree      = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode());
            ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode();

            tree.Root.AddSubtree(startNode);
            ISymbolicExpressionTreeNode addition = new Addition().CreateTreeNode();

            startNode.AddSubtree(addition);

            int col = 0;

            foreach (string column in allowedInputVariables)
            {
                VariableTreeNode vNode = (VariableTreeNode) new HeuristicLab.Problems.DataAnalysis.Symbolic.Variable().CreateTreeNode();
                vNode.VariableName = column;
                vNode.Weight       = coefficients[col];
                addition.AddSubtree(vNode);
                col++;
            }

            ConstantTreeNode cNode = (ConstantTreeNode) new Constant().CreateTreeNode();

            cNode.Value = coefficients[coefficients.Length - 1];
            addition.AddSubtree(cNode);

            SymbolicRegressionSolution solution = new SymbolicRegressionSolution(new SymbolicRegressionModel(tree, new SymbolicDataAnalysisExpressionTreeInterpreter()), (IRegressionProblemData)problemData.Clone());

            solution.Model.Name = "Linear Regression Model";
            solution.Name       = "Linear Regression Solution";
            return(solution);
        }
        public RegressionEnsembleSolution(IRegressionEnsembleModel model, IRegressionProblemData problemData)
            : base(model, new RegressionEnsembleProblemData(problemData))
        {
            trainingPartitions  = new Dictionary <IRegressionModel, IntRange>();
            testPartitions      = new Dictionary <IRegressionModel, IntRange>();
            regressionSolutions = new ItemCollection <IRegressionSolution>();

            evaluationCache         = new Dictionary <int, double>(problemData.Dataset.Rows);
            trainingEvaluationCache = new Dictionary <int, double>(problemData.TrainingIndices.Count());
            testEvaluationCache     = new Dictionary <int, double>(problemData.TestIndices.Count());


            var solutions = model.Models.Select(m => m.CreateRegressionSolution((IRegressionProblemData)problemData.Clone()));

            foreach (var solution in solutions)
            {
                regressionSolutions.Add(solution);
                trainingPartitions.Add(solution.Model, solution.ProblemData.TrainingPartition);
                testPartitions.Add(solution.Model, solution.ProblemData.TestPartition);
            }

            RecalculateResults();
            RegisterModelEvents();
            RegisterRegressionSolutionsEventHandler();
        }
Example #21
0
        /// <summary>
        /// Fits a model to the data by optimizing the numeric constants.
        /// Model is specified as infix expression containing variable names and numbers.
        /// The starting point for the numeric constants is initialized randomly if a random number generator is specified (~N(0,1)). Otherwise the user specified constants are
        /// used as a starting point.
        /// </summary>-
        /// <param name="problemData">Training and test data</param>
        /// <param name="modelStructure">The function as infix expression</param>
        /// <param name="maxIterations">Number of constant optimization iterations (using Levenberg-Marquardt algorithm)</param>
        /// <param name="random">Optional random number generator for random initialization of numeric constants.</param>
        /// <returns></returns>
        public static ISymbolicRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData, string modelStructure, int maxIterations, bool applyLinearScaling, IRandom rand = null)
        {
            var parser = new InfixExpressionParser();
            var tree   = parser.Parse(modelStructure);
            // parser handles double and string variables equally by creating a VariableTreeNode
            // post-process to replace VariableTreeNodes by FactorVariableTreeNodes for all string variables
            var factorSymbol = new FactorVariable();

            factorSymbol.VariableNames =
                problemData.AllowedInputVariables.Where(name => problemData.Dataset.VariableHasType <string>(name));
            factorSymbol.AllVariableNames = factorSymbol.VariableNames;
            factorSymbol.VariableValues   =
                factorSymbol.VariableNames.Select(name =>
                                                  new KeyValuePair <string, Dictionary <string, int> >(name,
                                                                                                       problemData.Dataset.GetReadOnlyStringValues(name).Distinct()
                                                                                                       .Select((n, i) => Tuple.Create(n, i))
                                                                                                       .ToDictionary(tup => tup.Item1, tup => tup.Item2)));

            foreach (var parent in tree.IterateNodesPrefix().ToArray())
            {
                for (int i = 0; i < parent.SubtreeCount; i++)
                {
                    var varChild       = parent.GetSubtree(i) as VariableTreeNode;
                    var factorVarChild = parent.GetSubtree(i) as FactorVariableTreeNode;
                    if (varChild != null && factorSymbol.VariableNames.Contains(varChild.VariableName))
                    {
                        parent.RemoveSubtree(i);
                        var factorTreeNode = (FactorVariableTreeNode)factorSymbol.CreateTreeNode();
                        factorTreeNode.VariableName = varChild.VariableName;
                        factorTreeNode.Weights      =
                            factorTreeNode.Symbol.GetVariableValues(factorTreeNode.VariableName).Select(_ => 1.0).ToArray();
                        // weight = 1.0 for each value
                        parent.InsertSubtree(i, factorTreeNode);
                    }
                    else if (factorVarChild != null && factorSymbol.VariableNames.Contains(factorVarChild.VariableName))
                    {
                        if (factorSymbol.GetVariableValues(factorVarChild.VariableName).Count() != factorVarChild.Weights.Length)
                        {
                            throw new ArgumentException(
                                      string.Format("Factor variable {0} needs exactly {1} weights",
                                                    factorVarChild.VariableName,
                                                    factorSymbol.GetVariableValues(factorVarChild.VariableName).Count()));
                        }
                        parent.RemoveSubtree(i);
                        var factorTreeNode = (FactorVariableTreeNode)factorSymbol.CreateTreeNode();
                        factorTreeNode.VariableName = factorVarChild.VariableName;
                        factorTreeNode.Weights      = factorVarChild.Weights;
                        parent.InsertSubtree(i, factorTreeNode);
                    }
                }
            }

            if (!SymbolicRegressionConstantOptimizationEvaluator.CanOptimizeConstants(tree))
            {
                throw new ArgumentException("The optimizer does not support the specified model structure.");
            }

            // initialize constants randomly
            if (rand != null)
            {
                foreach (var node in tree.IterateNodesPrefix().OfType <ConstantTreeNode>())
                {
                    double f = Math.Exp(NormalDistributedRandom.NextDouble(rand, 0, 1));
                    double s = rand.NextDouble() < 0.5 ? -1 : 1;
                    node.Value = s * node.Value * f;
                }
            }
            var interpreter = new SymbolicDataAnalysisExpressionTreeLinearInterpreter();

            SymbolicRegressionConstantOptimizationEvaluator.OptimizeConstants(interpreter, tree, problemData, problemData.TrainingIndices,
                                                                              applyLinearScaling: applyLinearScaling, maxIterations: maxIterations,
                                                                              updateVariableWeights: false, updateConstantsInTree: true);

            var model = new SymbolicRegressionModel(problemData.TargetVariable, tree, (ISymbolicDataAnalysisExpressionTreeInterpreter)interpreter.Clone());

            if (applyLinearScaling)
            {
                model.Scale(problemData);
            }

            SymbolicRegressionSolution solution = new SymbolicRegressionSolution(model, (IRegressionProblemData)problemData.Clone());

            solution.Model.Name = "Regression Model";
            solution.Name       = "Regression Solution";
            return(solution);
        }
        // BackwardsCompatibility3.4
        #region Backwards compatible code, remove with 3.5
        // for compatibility with old API
        public static SupportVectorRegressionSolution CreateSupportVectorRegressionSolution(
            IRegressionProblemData problemData, IEnumerable <string> allowedInputVariables,
            string svmType, string kernelType, double cost, double nu, double gamma, double epsilon, int degree,
            out double trainingR2, out double testR2, out int nSv)
        {
            ISupportVectorMachineModel model;

            Run(problemData, allowedInputVariables, svmType, kernelType, cost, nu, gamma, epsilon, degree, out model, out nSv);

            var solution = new SupportVectorRegressionSolution((SupportVectorMachineModel)model, (IRegressionProblemData)problemData.Clone());

            trainingR2 = solution.TrainingRSquared;
            testR2     = solution.TestRSquared;
            return(solution);
        }
        public static ISymbolicRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData, string modelStructure, int maxIterations)
        {
            var parser     = new InfixExpressionParser();
            var tree       = parser.Parse(modelStructure);
            var simplifier = new SymbolicDataAnalysisExpressionTreeSimplifier();

            if (!SymbolicRegressionConstantOptimizationEvaluator.CanOptimizeConstants(tree))
            {
                throw new ArgumentException("The optimizer does not support the specified model structure.");
            }

            var interpreter = new SymbolicDataAnalysisExpressionTreeLinearInterpreter();

            SymbolicRegressionConstantOptimizationEvaluator.OptimizeConstants(interpreter, tree, problemData, problemData.TrainingIndices,
                                                                              applyLinearScaling: false, maxIterations: maxIterations,
                                                                              updateVariableWeights: false, updateConstantsInTree: true);


            var scaledModel = new SymbolicRegressionModel(problemData.TargetVariable, tree, (ISymbolicDataAnalysisExpressionTreeInterpreter)interpreter.Clone());

            scaledModel.Scale(problemData);
            SymbolicRegressionSolution solution = new SymbolicRegressionSolution(scaledModel, (IRegressionProblemData)problemData.Clone());

            solution.Model.Name = "Regression Model";
            solution.Name       = "Regression Solution";
            return(solution);
        }
    public RegressionEnsembleSolution(IRegressionEnsembleModel model, IRegressionProblemData problemData)
      : base(model, new RegressionEnsembleProblemData(problemData)) {
      trainingPartitions = new Dictionary<IRegressionModel, IntRange>();
      testPartitions = new Dictionary<IRegressionModel, IntRange>();
      regressionSolutions = new ItemCollection<IRegressionSolution>();

      evaluationCache = new Dictionary<int, double>(problemData.Dataset.Rows);
      trainingEvaluationCache = new Dictionary<int, double>(problemData.TrainingIndices.Count());
      testEvaluationCache = new Dictionary<int, double>(problemData.TestIndices.Count());


      var solutions = model.Models.Select(m => m.CreateRegressionSolution((IRegressionProblemData)problemData.Clone()));
      foreach (var solution in solutions) {
        regressionSolutions.Add(solution);
        trainingPartitions.Add(solution.Model, solution.ProblemData.TrainingPartition);
        testPartitions.Add(solution.Model, solution.ProblemData.TestPartition);
      }

      RecalculateResults();
      RegisterModelEvents();
      RegisterRegressionSolutionsEventHandler();
    }
        public static IRegressionSolution CreateNearestNeighbourRegressionSolution(IRegressionProblemData problemData, int k, double[] weights = null)
        {
            var clonedProblemData = (IRegressionProblemData)problemData.Clone();

            return(new NearestNeighbourRegressionSolution(Train(problemData, k, weights), clonedProblemData));
        }