Beispiel #1
0
        private static ISymbolicRegressionSolution CreateSymbolicSolution(List <IRegressionModel> models, double nu, IRegressionProblemData problemData)
        {
            var symbModels   = models.OfType <ISymbolicRegressionModel>();
            var lowerLimit   = symbModels.Min(m => m.LowerEstimationLimit);
            var upperLimit   = symbModels.Max(m => m.UpperEstimationLimit);
            var interpreter  = new SymbolicDataAnalysisExpressionTreeLinearInterpreter();
            var progRootNode = new ProgramRootSymbol().CreateTreeNode();
            var startNode    = new StartSymbol().CreateTreeNode();

            var addNode   = new Addition().CreateTreeNode();
            var mulNode   = new Multiplication().CreateTreeNode();
            var scaleNode = (ConstantTreeNode) new Constant().CreateTreeNode(); // all models are scaled using the same nu

            scaleNode.Value = nu;

            foreach (var m in symbModels)
            {
                var relevantPart = m.SymbolicExpressionTree.Root.GetSubtree(0).GetSubtree(0); // skip root and start
                addNode.AddSubtree((ISymbolicExpressionTreeNode)relevantPart.Clone());
            }

            mulNode.AddSubtree(addNode);
            mulNode.AddSubtree(scaleNode);
            startNode.AddSubtree(mulNode);
            progRootNode.AddSubtree(startNode);
            var t             = new SymbolicExpressionTree(progRootNode);
            var combinedModel = new SymbolicRegressionModel(problemData.TargetVariable, t, interpreter, lowerLimit, upperLimit);
            var sol           = new SymbolicRegressionSolution(combinedModel, problemData);

            return(sol);
        }
        public static IClassificationSolution CreateLinearDiscriminantAnalysisSolution(IClassificationProblemData problemData)
        {
            var    dataset        = problemData.Dataset;
            string targetVariable = problemData.TargetVariable;
            IEnumerable <string> allowedInputVariables = problemData.AllowedInputVariables;
            IEnumerable <int>    rows = problemData.TrainingIndices;
            int nClasses = problemData.ClassNames.Count();

            double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
            if (inputMatrix.Cast <double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
            {
                throw new NotSupportedException("Linear discriminant analysis does not support NaN or infinity values in the input dataset.");
            }

            // change class values into class index
            int           targetVariableColumn = inputMatrix.GetLength(1) - 1;
            List <double> classValues          = problemData.ClassValues.OrderBy(x => x).ToList();

            for (int row = 0; row < inputMatrix.GetLength(0); row++)
            {
                inputMatrix[row, targetVariableColumn] = classValues.IndexOf(inputMatrix[row, targetVariableColumn]);
            }
            int info;

            double[] w;
            alglib.fisherlda(inputMatrix, inputMatrix.GetLength(0), allowedInputVariables.Count(), nClasses, out info, out w);
            if (info < 1)
            {
                throw new ArgumentException("Error in calculation of linear discriminant analysis solution");
            }

            ISymbolicExpressionTree     tree      = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode());
            ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode();

            tree.Root.AddSubtree(startNode);
            ISymbolicExpressionTreeNode addition = new Addition().CreateTreeNode();

            startNode.AddSubtree(addition);

            int col = 0;

            foreach (string column in allowedInputVariables)
            {
                VariableTreeNode vNode = (VariableTreeNode) new HeuristicLab.Problems.DataAnalysis.Symbolic.Variable().CreateTreeNode();
                vNode.VariableName = column;
                vNode.Weight       = w[col];
                addition.AddSubtree(vNode);
                col++;
            }

            var model = LinearDiscriminantAnalysis.CreateDiscriminantFunctionModel(tree, new SymbolicDataAnalysisExpressionTreeInterpreter(), problemData, rows);
            SymbolicDiscriminantFunctionClassificationSolution solution = new SymbolicDiscriminantFunctionClassificationSolution(model, (IClassificationProblemData)problemData.Clone());

            return(solution);
        }
        private static ITimeSeriesPrognosisSolution CreateAutoRegressiveSolution(ITimeSeriesPrognosisProblemData problemData, int timeOffset, out double rmsError, out double cvRmsError)
        {
            string targetVariable = problemData.TargetVariable;

            double[,] inputMatrix = new double[problemData.TrainingPartition.Size, timeOffset + 1];
            var targetValues = problemData.Dataset.GetDoubleValues(targetVariable).ToList();

            for (int i = 0, row = problemData.TrainingPartition.Start; i < problemData.TrainingPartition.Size; i++, row++)
            {
                for (int col = 0; col < timeOffset; col++)
                {
                    inputMatrix[i, col] = targetValues[row - col - 1];
                }
            }
            // set target values in last column
            for (int i = 0; i < inputMatrix.GetLength(0); i++)
            {
                inputMatrix[i, timeOffset] = targetValues[i + problemData.TrainingPartition.Start];
            }

            if (inputMatrix.Cast <double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
            {
                throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset.");
            }


            alglib.linearmodel lm = new alglib.linearmodel();
            alglib.lrreport    ar = new alglib.lrreport();
            int nRows             = inputMatrix.GetLength(0);
            int nFeatures         = inputMatrix.GetLength(1) - 1;

            double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant

            int retVal = 1;

            alglib.lrbuild(inputMatrix, nRows, nFeatures, out retVal, out lm, out ar);
            if (retVal != 1)
            {
                throw new ArgumentException("Error in calculation of linear regression solution");
            }
            rmsError   = ar.rmserror;
            cvRmsError = ar.cvrmserror;

            alglib.lrunpack(lm, out coefficients, out nFeatures);


            ISymbolicExpressionTree     tree      = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode());
            ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode();

            tree.Root.AddSubtree(startNode);
            ISymbolicExpressionTreeNode addition = new Addition().CreateTreeNode();

            startNode.AddSubtree(addition);

            for (int i = 0; i < timeOffset; i++)
            {
                LaggedVariableTreeNode node = (LaggedVariableTreeNode) new LaggedVariable().CreateTreeNode();
                node.VariableName = targetVariable;
                node.Weight       = coefficients[i];
                node.Lag          = (i + 1) * -1;
                addition.AddSubtree(node);
            }

            ConstantTreeNode cNode = (ConstantTreeNode) new Constant().CreateTreeNode();

            cNode.Value = coefficients[coefficients.Length - 1];
            addition.AddSubtree(cNode);

            var interpreter = new SymbolicTimeSeriesPrognosisExpressionTreeInterpreter(problemData.TargetVariable);
            var model       = new SymbolicTimeSeriesPrognosisModel(problemData.TargetVariable, tree, interpreter);
            var solution    = model.CreateTimeSeriesPrognosisSolution((ITimeSeriesPrognosisProblemData)problemData.Clone());

            return(solution);
        }
Beispiel #4
0
        public static ISymbolicRegressionSolution CreateLinearRegressionSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError)
        {
            var    dataset        = problemData.Dataset;
            string targetVariable = problemData.TargetVariable;
            IEnumerable <string> allowedInputVariables = problemData.AllowedInputVariables;
            IEnumerable <int>    rows = problemData.TrainingIndices;

            double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
            if (inputMatrix.Cast <double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
            {
                throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset.");
            }

            alglib.linearmodel lm = new alglib.linearmodel();
            alglib.lrreport    ar = new alglib.lrreport();
            int nRows             = inputMatrix.GetLength(0);
            int nFeatures         = inputMatrix.GetLength(1) - 1;

            double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant

            int retVal = 1;

            alglib.lrbuild(inputMatrix, nRows, nFeatures, out retVal, out lm, out ar);
            if (retVal != 1)
            {
                throw new ArgumentException("Error in calculation of linear regression solution");
            }
            rmsError   = ar.rmserror;
            cvRmsError = ar.cvrmserror;

            alglib.lrunpack(lm, out coefficients, out nFeatures);

            ISymbolicExpressionTree     tree      = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode());
            ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode();

            tree.Root.AddSubtree(startNode);
            ISymbolicExpressionTreeNode addition = new Addition().CreateTreeNode();

            startNode.AddSubtree(addition);

            int col = 0;

            foreach (string column in allowedInputVariables)
            {
                VariableTreeNode vNode = (VariableTreeNode) new HeuristicLab.Problems.DataAnalysis.Symbolic.Variable().CreateTreeNode();
                vNode.VariableName = column;
                vNode.Weight       = coefficients[col];
                addition.AddSubtree(vNode);
                col++;
            }

            ConstantTreeNode cNode = (ConstantTreeNode) new Constant().CreateTreeNode();

            cNode.Value = coefficients[coefficients.Length - 1];
            addition.AddSubtree(cNode);

            SymbolicRegressionSolution solution = new SymbolicRegressionSolution(new SymbolicRegressionModel(tree, new SymbolicDataAnalysisExpressionTreeInterpreter()), (IRegressionProblemData)problemData.Clone());

            solution.Model.Name = "Linear Regression Model";
            solution.Name       = "Linear Regression Solution";
            return(solution);
        }