public static ISymbolicRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData, string modelStructure, int maxIterations)
        {
            var parser     = new InfixExpressionParser();
            var tree       = parser.Parse(modelStructure);
            var simplifier = new SymbolicDataAnalysisExpressionTreeSimplifier();

            if (!SymbolicRegressionConstantOptimizationEvaluator.CanOptimizeConstants(tree))
            {
                throw new ArgumentException("The optimizer does not support the specified model structure.");
            }

            var interpreter = new SymbolicDataAnalysisExpressionTreeLinearInterpreter();

            SymbolicRegressionConstantOptimizationEvaluator.OptimizeConstants(interpreter, tree, problemData, problemData.TrainingIndices,
                                                                              applyLinearScaling: false, maxIterations: maxIterations,
                                                                              updateVariableWeights: false, updateConstantsInTree: true);


            var scaledModel = new SymbolicRegressionModel(problemData.TargetVariable, tree, (ISymbolicDataAnalysisExpressionTreeInterpreter)interpreter.Clone());

            scaledModel.Scale(problemData);
            SymbolicRegressionSolution solution = new SymbolicRegressionSolution(scaledModel, (IRegressionProblemData)problemData.Clone());

            solution.Model.Name = "Regression Model";
            solution.Name       = "Regression Solution";
            return(solution);
        }
Пример #2
0
        private static ISymbolicRegressionSolution CreateSymbolicSolution(List <IRegressionModel> models, double nu, IRegressionProblemData problemData)
        {
            var symbModels   = models.OfType <ISymbolicRegressionModel>();
            var lowerLimit   = symbModels.Min(m => m.LowerEstimationLimit);
            var upperLimit   = symbModels.Max(m => m.UpperEstimationLimit);
            var interpreter  = new SymbolicDataAnalysisExpressionTreeLinearInterpreter();
            var progRootNode = new ProgramRootSymbol().CreateTreeNode();
            var startNode    = new StartSymbol().CreateTreeNode();

            var addNode   = new Addition().CreateTreeNode();
            var mulNode   = new Multiplication().CreateTreeNode();
            var scaleNode = (ConstantTreeNode) new Constant().CreateTreeNode(); // all models are scaled using the same nu

            scaleNode.Value = nu;

            foreach (var m in symbModels)
            {
                var relevantPart = m.SymbolicExpressionTree.Root.GetSubtree(0).GetSubtree(0); // skip root and start
                addNode.AddSubtree((ISymbolicExpressionTreeNode)relevantPart.Clone());
            }

            mulNode.AddSubtree(addNode);
            mulNode.AddSubtree(scaleNode);
            startNode.AddSubtree(mulNode);
            progRootNode.AddSubtree(startNode);
            var t             = new SymbolicExpressionTree(progRootNode);
            var combinedModel = new SymbolicRegressionModel(problemData.TargetVariable, t, interpreter, lowerLimit, upperLimit);
            var sol           = new SymbolicRegressionSolution(combinedModel, problemData);

            return(sol);
        }
        public static IRegressionSolution CreateSymbolicSolution(double[] coeff, IRegressionProblemData problemData)
        {
            var ds                       = problemData.Dataset;
            var allVariables             = problemData.AllowedInputVariables.ToArray();
            var doubleVariables          = allVariables.Where(ds.VariableHasType <double>);
            var factorVariableNames      = allVariables.Where(ds.VariableHasType <string>);
            var factorVariablesAndValues = ds.GetFactorVariableValues(factorVariableNames, Enumerable.Range(0, ds.Rows)); // must consider all factor values (in train and test set)

            List <KeyValuePair <string, IEnumerable <string> > > remainingFactorVariablesAndValues = new List <KeyValuePair <string, IEnumerable <string> > >();
            List <double> factorCoeff = new List <double>();
            List <string> remainingDoubleVariables = new List <string>();
            List <double> doubleVarCoeff           = new List <double>();

            {
                int i = 0;
                // find factor varibles & value combinations with non-zero coeff
                foreach (var factorVarAndValues in factorVariablesAndValues)
                {
                    var l = new List <string>();
                    foreach (var factorValue in factorVarAndValues.Value)
                    {
                        if (!coeff[i].IsAlmost(0.0))
                        {
                            l.Add(factorValue);
                            factorCoeff.Add(coeff[i]);
                        }
                        i++;
                    }
                    if (l.Any())
                    {
                        remainingFactorVariablesAndValues.Add(new KeyValuePair <string, IEnumerable <string> >(factorVarAndValues.Key, l));
                    }
                }
                // find double variables with non-zero coeff
                foreach (var doubleVar in doubleVariables)
                {
                    if (!coeff[i].IsAlmost(0.0))
                    {
                        remainingDoubleVariables.Add(doubleVar);
                        doubleVarCoeff.Add(coeff[i]);
                    }
                    i++;
                }
            }
            var tree = LinearModelToTreeConverter.CreateTree(
                remainingFactorVariablesAndValues, factorCoeff.ToArray(),
                remainingDoubleVariables.ToArray(), doubleVarCoeff.ToArray(),
                coeff.Last());


            SymbolicRegressionSolution solution = new SymbolicRegressionSolution(
                new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeInterpreter()),
                (IRegressionProblemData)problemData.Clone());

            solution.Model.Name = "Elastic-net Linear Regression Model";
            solution.Name       = "Elastic-net Linear Regression Solution";

            return(solution);
        }
    private void transformModelButton_Click(object sender, EventArgs e) {
      var mapper = new TransformationToSymbolicTreeMapper();
      var transformator = new SymbolicExpressionTreeBacktransformator(mapper);

      var transformations = Content.ProblemData.Transformations;
      var targetVar = Content.ProblemData.TargetVariable;

      var transformedModel = (ISymbolicRegressionModel)transformator.Backtransform(Content.Model, transformations, targetVar);
      var transformedSolution = new SymbolicRegressionSolution(transformedModel, (IRegressionProblemData)Content.ProblemData.Clone());

      MainFormManager.MainForm.ShowContent(transformedSolution);
    }
Пример #5
0
        private void transformModelButton_Click(object sender, EventArgs e)
        {
            var mapper        = new TransformationToSymbolicTreeMapper();
            var transformator = new SymbolicExpressionTreeBacktransformator(mapper);

            var transformations = Content.ProblemData.Transformations;
            var targetVar       = Content.ProblemData.TargetVariable;

            var transformedModel    = (ISymbolicRegressionModel)transformator.Backtransform(Content.Model, transformations, targetVar);
            var transformedSolution = new SymbolicRegressionSolution(transformedModel, (IRegressionProblemData)Content.ProblemData.Clone());

            MainFormManager.MainForm.ShowContent(transformedSolution);
        }
Пример #6
0
        public static ISymbolicRegressionSolution CreateLinearRegressionSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError)
        {
            var    dataset        = problemData.Dataset;
            string targetVariable = problemData.TargetVariable;
            IEnumerable <string> allowedInputVariables = problemData.AllowedInputVariables;
            IEnumerable <int>    rows = problemData.TrainingIndices;
            var doubleVariables       = allowedInputVariables.Where(dataset.VariableHasType <double>);
            var factorVariableNames   = allowedInputVariables.Where(dataset.VariableHasType <string>);
            var factorVariables       = dataset.GetFactorVariableValues(factorVariableNames, rows);

            double[,] binaryMatrix    = dataset.ToArray(factorVariables, rows);
            double[,] doubleVarMatrix = dataset.ToArray(doubleVariables.Concat(new string[] { targetVariable }), rows);
            var inputMatrix = binaryMatrix.HorzCat(doubleVarMatrix);

            if (inputMatrix.Cast <double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
            {
                throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset.");
            }

            alglib.linearmodel lm = new alglib.linearmodel();
            alglib.lrreport    ar = new alglib.lrreport();
            int nRows             = inputMatrix.GetLength(0);
            int nFeatures         = inputMatrix.GetLength(1) - 1;

            double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant

            int retVal = 1;

            alglib.lrbuild(inputMatrix, nRows, nFeatures, out retVal, out lm, out ar);
            if (retVal != 1)
            {
                throw new ArgumentException("Error in calculation of linear regression solution");
            }
            rmsError   = ar.rmserror;
            cvRmsError = ar.cvrmserror;

            alglib.lrunpack(lm, out coefficients, out nFeatures);

            int nFactorCoeff = binaryMatrix.GetLength(1);
            int nVarCoeff    = doubleVariables.Count();
            var tree         = LinearModelToTreeConverter.CreateTree(factorVariables, coefficients.Take(nFactorCoeff).ToArray(),
                                                                     doubleVariables.ToArray(), coefficients.Skip(nFactorCoeff).Take(nVarCoeff).ToArray(),
                                                                     @const: coefficients[nFeatures]);

            SymbolicRegressionSolution solution = new SymbolicRegressionSolution(new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeLinearInterpreter()), (IRegressionProblemData)problemData.Clone());

            solution.Model.Name = "Linear Regression Model";
            solution.Name       = "Linear Regression Solution";
            return(solution);
        }
Пример #7
0
        public static ISymbolicRegressionSolution CreateLinearRegressionSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError)
        {
            IEnumerable <string> doubleVariables;
            IEnumerable <KeyValuePair <string, IEnumerable <string> > > factorVariables;

            double[,] inputMatrix;
            PrepareData(problemData, out inputMatrix, out doubleVariables, out factorVariables);

            alglib.linearmodel lm = new alglib.linearmodel();
            alglib.lrreport    ar = new alglib.lrreport();
            int nRows             = inputMatrix.GetLength(0);
            int nFeatures         = inputMatrix.GetLength(1) - 1;

            int retVal = 1;

            alglib.lrbuild(inputMatrix, nRows, nFeatures, out retVal, out lm, out ar);
            if (retVal != 1)
            {
                throw new ArgumentException("Error in calculation of linear regression solution");
            }
            rmsError   = ar.rmserror;
            cvRmsError = ar.cvrmserror;

            double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant
            alglib.lrunpack(lm, out coefficients, out nFeatures);

            int nFactorCoeff = factorVariables.Sum(kvp => kvp.Value.Count());
            int nVarCoeff    = doubleVariables.Count();
            var tree         = LinearModelToTreeConverter.CreateTree(factorVariables, coefficients.Take(nFactorCoeff).ToArray(),
                                                                     doubleVariables.ToArray(), coefficients.Skip(nFactorCoeff).Take(nVarCoeff).ToArray(),
                                                                     @const: coefficients[nFeatures]);

            SymbolicRegressionSolution solution = new SymbolicRegressionSolution(new SymbolicRegressionModel(problemData.TargetVariable, tree, new SymbolicDataAnalysisExpressionTreeLinearInterpreter()), (IRegressionProblemData)problemData.Clone());

            solution.Model.Name = "Linear Regression Model";
            solution.Name       = "Linear Regression Solution";
            return(solution);
        }
Пример #8
0
        /// <summary>
        /// Fits a model to the data by optimizing the numeric constants.
        /// Model is specified as infix expression containing variable names and numbers.
        /// The starting point for the numeric constants is initialized randomly if a random number generator is specified (~N(0,1)). Otherwise the user specified constants are
        /// used as a starting point.
        /// </summary>-
        /// <param name="problemData">Training and test data</param>
        /// <param name="modelStructure">The function as infix expression</param>
        /// <param name="maxIterations">Number of constant optimization iterations (using Levenberg-Marquardt algorithm)</param>
        /// <param name="random">Optional random number generator for random initialization of numeric constants.</param>
        /// <returns></returns>
        public static ISymbolicRegressionSolution CreateRegressionSolution(IRegressionProblemData problemData, string modelStructure, int maxIterations, bool applyLinearScaling, IRandom rand = null)
        {
            var parser = new InfixExpressionParser();
            var tree   = parser.Parse(modelStructure);
            // parser handles double and string variables equally by creating a VariableTreeNode
            // post-process to replace VariableTreeNodes by FactorVariableTreeNodes for all string variables
            var factorSymbol = new FactorVariable();

            factorSymbol.VariableNames =
                problemData.AllowedInputVariables.Where(name => problemData.Dataset.VariableHasType <string>(name));
            factorSymbol.AllVariableNames = factorSymbol.VariableNames;
            factorSymbol.VariableValues   =
                factorSymbol.VariableNames.Select(name =>
                                                  new KeyValuePair <string, Dictionary <string, int> >(name,
                                                                                                       problemData.Dataset.GetReadOnlyStringValues(name).Distinct()
                                                                                                       .Select((n, i) => Tuple.Create(n, i))
                                                                                                       .ToDictionary(tup => tup.Item1, tup => tup.Item2)));

            foreach (var parent in tree.IterateNodesPrefix().ToArray())
            {
                for (int i = 0; i < parent.SubtreeCount; i++)
                {
                    var varChild       = parent.GetSubtree(i) as VariableTreeNode;
                    var factorVarChild = parent.GetSubtree(i) as FactorVariableTreeNode;
                    if (varChild != null && factorSymbol.VariableNames.Contains(varChild.VariableName))
                    {
                        parent.RemoveSubtree(i);
                        var factorTreeNode = (FactorVariableTreeNode)factorSymbol.CreateTreeNode();
                        factorTreeNode.VariableName = varChild.VariableName;
                        factorTreeNode.Weights      =
                            factorTreeNode.Symbol.GetVariableValues(factorTreeNode.VariableName).Select(_ => 1.0).ToArray();
                        // weight = 1.0 for each value
                        parent.InsertSubtree(i, factorTreeNode);
                    }
                    else if (factorVarChild != null && factorSymbol.VariableNames.Contains(factorVarChild.VariableName))
                    {
                        if (factorSymbol.GetVariableValues(factorVarChild.VariableName).Count() != factorVarChild.Weights.Length)
                        {
                            throw new ArgumentException(
                                      string.Format("Factor variable {0} needs exactly {1} weights",
                                                    factorVarChild.VariableName,
                                                    factorSymbol.GetVariableValues(factorVarChild.VariableName).Count()));
                        }
                        parent.RemoveSubtree(i);
                        var factorTreeNode = (FactorVariableTreeNode)factorSymbol.CreateTreeNode();
                        factorTreeNode.VariableName = factorVarChild.VariableName;
                        factorTreeNode.Weights      = factorVarChild.Weights;
                        parent.InsertSubtree(i, factorTreeNode);
                    }
                }
            }

            if (!SymbolicRegressionConstantOptimizationEvaluator.CanOptimizeConstants(tree))
            {
                throw new ArgumentException("The optimizer does not support the specified model structure.");
            }

            // initialize constants randomly
            if (rand != null)
            {
                foreach (var node in tree.IterateNodesPrefix().OfType <ConstantTreeNode>())
                {
                    double f = Math.Exp(NormalDistributedRandom.NextDouble(rand, 0, 1));
                    double s = rand.NextDouble() < 0.5 ? -1 : 1;
                    node.Value = s * node.Value * f;
                }
            }
            var interpreter = new SymbolicDataAnalysisExpressionTreeLinearInterpreter();

            SymbolicRegressionConstantOptimizationEvaluator.OptimizeConstants(interpreter, tree, problemData, problemData.TrainingIndices,
                                                                              applyLinearScaling: applyLinearScaling, maxIterations: maxIterations,
                                                                              updateVariableWeights: false, updateConstantsInTree: true);

            var model = new SymbolicRegressionModel(problemData.TargetVariable, tree, (ISymbolicDataAnalysisExpressionTreeInterpreter)interpreter.Clone());

            if (applyLinearScaling)
            {
                model.Scale(problemData);
            }

            SymbolicRegressionSolution solution = new SymbolicRegressionSolution(model, (IRegressionProblemData)problemData.Clone());

            solution.Model.Name = "Regression Model";
            solution.Name       = "Regression Solution";
            return(solution);
        }
Пример #9
0
        public static ISymbolicRegressionSolution CreateLinearRegressionSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError)
        {
            var    dataset        = problemData.Dataset;
            string targetVariable = problemData.TargetVariable;
            IEnumerable <string> allowedInputVariables = problemData.AllowedInputVariables;
            IEnumerable <int>    rows = problemData.TrainingIndices;

            double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
            if (inputMatrix.Cast <double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
            {
                throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset.");
            }

            alglib.linearmodel lm = new alglib.linearmodel();
            alglib.lrreport    ar = new alglib.lrreport();
            int nRows             = inputMatrix.GetLength(0);
            int nFeatures         = inputMatrix.GetLength(1) - 1;

            double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant

            int retVal = 1;

            alglib.lrbuild(inputMatrix, nRows, nFeatures, out retVal, out lm, out ar);
            if (retVal != 1)
            {
                throw new ArgumentException("Error in calculation of linear regression solution");
            }
            rmsError   = ar.rmserror;
            cvRmsError = ar.cvrmserror;

            alglib.lrunpack(lm, out coefficients, out nFeatures);

            ISymbolicExpressionTree     tree      = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode());
            ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode();

            tree.Root.AddSubtree(startNode);
            ISymbolicExpressionTreeNode addition = new Addition().CreateTreeNode();

            startNode.AddSubtree(addition);

            int col = 0;

            foreach (string column in allowedInputVariables)
            {
                VariableTreeNode vNode = (VariableTreeNode) new HeuristicLab.Problems.DataAnalysis.Symbolic.Variable().CreateTreeNode();
                vNode.VariableName = column;
                vNode.Weight       = coefficients[col];
                addition.AddSubtree(vNode);
                col++;
            }

            ConstantTreeNode cNode = (ConstantTreeNode) new Constant().CreateTreeNode();

            cNode.Value = coefficients[coefficients.Length - 1];
            addition.AddSubtree(cNode);

            SymbolicRegressionSolution solution = new SymbolicRegressionSolution(new SymbolicRegressionModel(tree, new SymbolicDataAnalysisExpressionTreeInterpreter()), (IRegressionProblemData)problemData.Clone());

            solution.Model.Name = "Linear Regression Model";
            solution.Name       = "Linear Regression Solution";
            return(solution);
        }