public IEnumerable <double> GetEstimatedClassValues(IDataset dataset, IEnumerable <int> rows)
        {
            double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);

            int n       = inputData.GetLength(0);
            int columns = inputData.GetLength(1);

            double[] x = new double[columns];
            double[] y = new double[classValues.Length];

            for (int row = 0; row < n; row++)
            {
                for (int column = 0; column < columns; column++)
                {
                    x[column] = inputData[row, column];
                }
                alglib.mlpeprocess(mlpEnsemble, x, ref y);
                // find class for with the largest probability value
                int    maxProbClassIndex = 0;
                double maxProb           = y[0];
                for (int i = 1; i < y.Length; i++)
                {
                    if (maxProb < y[i])
                    {
                        maxProb           = y[i];
                        maxProbClassIndex = i;
                    }
                }
                yield return(classValues[maxProbClassIndex]);
            }
        }
Exemple #2
0
        public override double[,] Initialize(IClassificationProblemData data, int dimensions)
        {
            var instances  = data.TrainingIndices.Count();
            var attributes = data.AllowedInputVariables.Count();

            var ldaDs = AlglibUtil.PrepareInputMatrix(data.Dataset,
                                                      data.AllowedInputVariables.Concat(data.TargetVariable.ToEnumerable()),
                                                      data.TrainingIndices);

            // map class values to sequential natural numbers (required by alglib)
            var uniqueClasses = data.Dataset.GetDoubleValues(data.TargetVariable, data.TrainingIndices)
                                .Distinct()
                                .Select((v, i) => new { v, i })
                                .ToDictionary(x => x.v, x => x.i);

            for (int row = 0; row < instances; row++)
            {
                ldaDs[row, attributes] = uniqueClasses[ldaDs[row, attributes]];
            }

            int info;

            double[,] matrix;
            alglib.fisherldan(ldaDs, instances, attributes, uniqueClasses.Count, out info, out matrix);

            return(matrix);
        }
        public IEnumerable <double> GetEstimatedValues(IDataset dataset, IEnumerable <int> rows)
        {
            double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);

            int n       = inputData.GetLength(0);
            int columns = inputData.GetLength(1);

            double[] x     = new double[columns];
            double[] y     = new double[1];
            double[] dists = new double[k];
            double[,] neighbours = new double[k, columns + 1];

            for (int row = 0; row < n; row++)
            {
                for (int column = 0; column < columns; column++)
                {
                    x[column] = inputData[row, column];
                }
                int actNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, false);
                alglib.nearestneighbor.kdtreequeryresultsdistances(kdTree, ref dists);
                alglib.nearestneighbor.kdtreequeryresultsxy(kdTree, ref neighbours);

                double distanceWeightedValue = 0.0;
                double distsSum = 0.0;
                for (int i = 0; i < actNeighbours; i++)
                {
                    distanceWeightedValue += neighbours[i, columns] / dists[i];
                    distsSum += 1.0 / dists[i];
                }
                yield return(distanceWeightedValue / distsSum);
            }
        }
Exemple #4
0
        public override IOperation Apply()
        {
            var problemData     = ProblemDataParameter.ActualValue;
            var dimensions      = DimensionsParameter.ActualValue.Value;
            var neighborSamples = NeighborSamplesParameter.ActualValue.Value;
            var regularization  = RegularizationParameter.ActualValue.Value;

            var vector    = NcaMatrixParameter.ActualValue;
            var gradients = NcaMatrixGradientsParameter.ActualValue;

            if (gradients == null)
            {
                gradients = new RealVector(vector.Length);
                NcaMatrixGradientsParameter.ActualValue = gradients;
            }

            var data = AlglibUtil.PrepareInputMatrix(problemData.Dataset, problemData.AllowedInputVariables,
                                                     problemData.TrainingIndices);
            var classes = problemData.Dataset.GetDoubleValues(problemData.TargetVariable, problemData.TrainingIndices).ToArray();

            var quality = Gradient(vector, gradients, data, classes, dimensions, neighborSamples, regularization);

            QualityParameter.ActualValue = new DoubleValue(quality);

            return(base.Apply());
        }
Exemple #5
0
        public static KMeansClusteringSolution CreateKMeansSolution(IClusteringProblemData problemData, int k, int restarts)
        {
            var dataset = problemData.Dataset;
            IEnumerable <string> allowedInputVariables = problemData.AllowedInputVariables;
            IEnumerable <int>    rows = problemData.TrainingIndices;
            int info;

            double[,] centers;
            int[] xyc;
            double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);
            if (inputMatrix.Cast <double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
            {
                throw new NotSupportedException("k-Means clustering does not support NaN or infinity values in the input dataset.");
            }

            alglib.kmeansgenerate(inputMatrix, inputMatrix.GetLength(0), inputMatrix.GetLength(1), k, restarts + 1, out info, out centers, out xyc);
            if (info != 1)
            {
                throw new ArgumentException("Error in calculation of k-Means clustering solution");
            }

            KMeansClusteringSolution solution = new KMeansClusteringSolution(new KMeansClusteringModel(centers, allowedInputVariables), (IClusteringProblemData)problemData.Clone());

            return(solution);
        }
        public static IClassificationSolution CreateLinearDiscriminantAnalysisSolution(IClassificationProblemData problemData)
        {
            var    dataset        = problemData.Dataset;
            string targetVariable = problemData.TargetVariable;
            IEnumerable <string> allowedInputVariables = problemData.AllowedInputVariables;
            IEnumerable <int>    rows = problemData.TrainingIndices;
            int nClasses = problemData.ClassNames.Count();

            double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
            if (inputMatrix.Cast <double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
            {
                throw new NotSupportedException("Linear discriminant analysis does not support NaN or infinity values in the input dataset.");
            }

            // change class values into class index
            int           targetVariableColumn = inputMatrix.GetLength(1) - 1;
            List <double> classValues          = problemData.ClassValues.OrderBy(x => x).ToList();

            for (int row = 0; row < inputMatrix.GetLength(0); row++)
            {
                inputMatrix[row, targetVariableColumn] = classValues.IndexOf(inputMatrix[row, targetVariableColumn]);
            }
            int info;

            double[] w;
            alglib.fisherlda(inputMatrix, inputMatrix.GetLength(0), allowedInputVariables.Count(), nClasses, out info, out w);
            if (info < 1)
            {
                throw new ArgumentException("Error in calculation of linear discriminant analysis solution");
            }

            ISymbolicExpressionTree     tree      = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode());
            ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode();

            tree.Root.AddSubtree(startNode);
            ISymbolicExpressionTreeNode addition = new Addition().CreateTreeNode();

            startNode.AddSubtree(addition);

            int col = 0;

            foreach (string column in allowedInputVariables)
            {
                VariableTreeNode vNode = (VariableTreeNode) new HeuristicLab.Problems.DataAnalysis.Symbolic.Variable().CreateTreeNode();
                vNode.VariableName = column;
                vNode.Weight       = w[col];
                addition.AddSubtree(vNode);
                col++;
            }

            var model = LinearDiscriminantAnalysis.CreateDiscriminantFunctionModel(tree, new SymbolicDataAnalysisExpressionTreeInterpreter(), problemData, rows);
            SymbolicDiscriminantFunctionClassificationSolution solution = new SymbolicDiscriminantFunctionClassificationSolution(model, (IClassificationProblemData)problemData.Clone());

            return(solution);
        }
 private static double[,] GetData(IDataset ds, IEnumerable <string> allowedInputs, IEnumerable <int> rows, Scaling scaling)
 {
     if (scaling != null)
     {
         return(AlglibUtil.PrepareAndScaleInputMatrix(ds, allowedInputs, rows, scaling));
     }
     else
     {
         return(AlglibUtil.PrepareInputMatrix(ds, allowedInputs, rows));
     }
 }
Exemple #8
0
        public override double[,] Initialize(IClassificationProblemData data, int dimensions)
        {
            var instances  = data.TrainingIndices.Count();
            var attributes = data.AllowedInputVariables.Count();

            var pcaDs = AlglibUtil.PrepareInputMatrix(data.Dataset, data.AllowedInputVariables, data.TrainingIndices);

            int info;

            double[] varianceValues;
            double[,] matrix;
            alglib.pcabuildbasis(pcaDs, instances, attributes, out info, out varianceValues, out matrix);

            return(matrix);
        }
        public static IRegressionSolution CreateNeuralNetworkRegressionSolution(IRegressionProblemData problemData, int nLayers, int nHiddenNodes1, int nHiddenNodes2, double decay, int restarts,
                                                                                out double rmsError, out double avgRelError)
        {
            var    dataset        = problemData.Dataset;
            string targetVariable = problemData.TargetVariable;
            IEnumerable <string> allowedInputVariables = problemData.AllowedInputVariables;
            IEnumerable <int>    rows = problemData.TrainingIndices;

            double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
            if (inputMatrix.Cast <double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
            {
                throw new NotSupportedException("Neural network regression does not support NaN or infinity values in the input dataset.");
            }

            alglib.multilayerperceptron multiLayerPerceptron = null;
            if (nLayers == 0)
            {
                alglib.mlpcreate0(allowedInputVariables.Count(), 1, out multiLayerPerceptron);
            }
            else if (nLayers == 1)
            {
                alglib.mlpcreate1(allowedInputVariables.Count(), nHiddenNodes1, 1, out multiLayerPerceptron);
            }
            else if (nLayers == 2)
            {
                alglib.mlpcreate2(allowedInputVariables.Count(), nHiddenNodes1, nHiddenNodes2, 1, out multiLayerPerceptron);
            }
            else
            {
                throw new ArgumentException("Number of layers must be zero, one, or two.", "nLayers");
            }
            alglib.mlpreport rep;
            int nRows = inputMatrix.GetLength(0);

            int info;

            // using mlptrainlm instead of mlptraines or mlptrainbfgs because only one parameter is necessary
            alglib.mlptrainlm(multiLayerPerceptron, inputMatrix, nRows, decay, restarts, out info, out rep);
            if (info != 2)
            {
                throw new ArgumentException("Error in calculation of neural network regression solution");
            }

            rmsError    = alglib.mlprmserror(multiLayerPerceptron, inputMatrix, nRows);
            avgRelError = alglib.mlpavgrelerror(multiLayerPerceptron, inputMatrix, nRows);

            return(new NeuralNetworkRegressionSolution(new NeuralNetworkModel(multiLayerPerceptron, targetVariable, allowedInputVariables), (IRegressionProblemData)problemData.Clone()));
        }
        public static IRegressionSolution CreateNeuralNetworkEnsembleRegressionSolution(IRegressionProblemData problemData, int ensembleSize, int nLayers, int nHiddenNodes1, int nHiddenNodes2, double decay, int restarts,
                                                                                        out double rmsError, out double avgRelError)
        {
            var    dataset        = problemData.Dataset;
            string targetVariable = problemData.TargetVariable;
            IEnumerable <string> allowedInputVariables = problemData.AllowedInputVariables;
            IEnumerable <int>    rows = problemData.TrainingIndices;

            double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
            if (inputMatrix.Cast <double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
            {
                throw new NotSupportedException("Neural network ensemble regression does not support NaN or infinity values in the input dataset.");
            }

            alglib.mlpensemble mlpEnsemble = null;
            if (nLayers == 0)
            {
                alglib.mlpecreate0(allowedInputVariables.Count(), 1, ensembleSize, out mlpEnsemble);
            }
            else if (nLayers == 1)
            {
                alglib.mlpecreate1(allowedInputVariables.Count(), nHiddenNodes1, 1, ensembleSize, out mlpEnsemble);
            }
            else if (nLayers == 2)
            {
                alglib.mlpecreate2(allowedInputVariables.Count(), nHiddenNodes1, nHiddenNodes2, 1, ensembleSize, out mlpEnsemble);
            }
            else
            {
                throw new ArgumentException("Number of layers must be zero, one, or two.", "nLayers");
            }
            alglib.mlpreport rep;
            int nRows = inputMatrix.GetLength(0);

            int info;

            alglib.mlpetraines(mlpEnsemble, inputMatrix, nRows, decay, restarts, out info, out rep);
            if (info != 6)
            {
                throw new ArgumentException("Error in calculation of neural network ensemble regression solution");
            }

            rmsError    = alglib.mlpermserror(mlpEnsemble, inputMatrix, nRows);
            avgRelError = alglib.mlpeavgrelerror(mlpEnsemble, inputMatrix, nRows);

            return(new NeuralNetworkEnsembleRegressionSolution((IRegressionProblemData)problemData.Clone(), new NeuralNetworkEnsembleModel(mlpEnsemble, targetVariable, allowedInputVariables)));
        }
        public IEnumerable <double> GetEstimatedClassValues(IDataset dataset, IEnumerable <int> rows)
        {
            if (classValues == null)
            {
                throw new InvalidOperationException("No class values are defined.");
            }
            double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);

            int n       = inputData.GetLength(0);
            int columns = inputData.GetLength(1);

            double[] x     = new double[columns];
            int[]    y     = new int[classValues.Length];
            double[] dists = new double[k];
            double[,] neighbours = new double[k, columns + 1];

            for (int row = 0; row < n; row++)
            {
                for (int column = 0; column < columns; column++)
                {
                    x[column] = inputData[row, column];
                }
                int actNeighbours = alglib.nearestneighbor.kdtreequeryknn(kdTree, x, k, false);
                alglib.nearestneighbor.kdtreequeryresultsdistances(kdTree, ref dists);
                alglib.nearestneighbor.kdtreequeryresultsxy(kdTree, ref neighbours);

                Array.Clear(y, 0, y.Length);
                for (int i = 0; i < actNeighbours; i++)
                {
                    int classValue = (int)Math.Round(neighbours[i, columns]);
                    y[classValue]++;
                }

                // find class for with the largest probability value
                int    maxProbClassIndex = 0;
                double maxProb           = y[0];
                for (int i = 1; i < y.Length; i++)
                {
                    if (maxProb < y[i])
                    {
                        maxProb           = y[i];
                        maxProbClassIndex = i;
                    }
                }
                yield return(classValues[maxProbClassIndex]);
            }
        }
        public static RandomForestModel CreateRegressionModel(IRegressionProblemData problemData, IEnumerable <int> trainingIndices, int nTrees, double r, double m, int seed,
                                                              out double rmsError, out double outOfBagRmsError, out double avgRelError, out double outOfBagAvgRelError)
        {
            var variables = problemData.AllowedInputVariables.Concat(new string[] { problemData.TargetVariable });

            double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(problemData.Dataset, variables, trainingIndices);

            alglib.dfreport rep;
            var             dForest = CreateRandomForestModel(seed, inputMatrix, nTrees, r, m, 1, out rep);

            rmsError            = rep.rmserror;
            avgRelError         = rep.avgrelerror;
            outOfBagAvgRelError = rep.oobavgrelerror;
            outOfBagRmsError    = rep.oobrmserror;

            return(new RandomForestModel(dForest, seed, problemData, nTrees, r, m));
        }
Exemple #13
0
        public static IClassificationSolution CreateLogitClassificationSolution(IClassificationProblemData problemData, out double rmsError, out double relClassError)
        {
            var    dataset        = problemData.Dataset;
            string targetVariable = problemData.TargetVariable;
            IEnumerable <string> allowedInputVariables = problemData.AllowedInputVariables;
            IEnumerable <int>    rows = problemData.TrainingIndices;

            double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
            if (inputMatrix.Cast <double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
            {
                throw new NotSupportedException("Multinomial logit classification does not support NaN or infinity values in the input dataset.");
            }

            alglib.logitmodel lm  = new alglib.logitmodel();
            alglib.mnlreport  rep = new alglib.mnlreport();
            int nRows             = inputMatrix.GetLength(0);
            int nFeatures         = inputMatrix.GetLength(1) - 1;

            double[] classValues = dataset.GetDoubleValues(targetVariable).Distinct().OrderBy(x => x).ToArray();
            int      nClasses    = classValues.Count();
            // map original class values to values [0..nClasses-1]
            Dictionary <double, double> classIndices = new Dictionary <double, double>();

            for (int i = 0; i < nClasses; i++)
            {
                classIndices[classValues[i]] = i;
            }
            for (int row = 0; row < nRows; row++)
            {
                inputMatrix[row, nFeatures] = classIndices[inputMatrix[row, nFeatures]];
            }
            int info;

            alglib.mnltrainh(inputMatrix, nRows, nFeatures, nClasses, out info, out lm, out rep);
            if (info != 1)
            {
                throw new ArgumentException("Error in calculation of logit classification solution");
            }

            rmsError      = alglib.mnlrmserror(lm, inputMatrix, nRows);
            relClassError = alglib.mnlrelclserror(lm, inputMatrix, nRows);

            MultinomialLogitClassificationSolution solution = new MultinomialLogitClassificationSolution((IClassificationProblemData)problemData.Clone(), new MultinomialLogitModel(lm, targetVariable, allowedInputVariables, classValues));

            return(solution);
        }
        public double[,] Reduce(IDataset dataset, IEnumerable <int> rows)
        {
            var data = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);

            var targets = dataset.GetDoubleValues(TargetVariable, rows).ToArray();
            var result  = new double[data.GetLength(0), transformationMatrix.GetLength(1) + 1];

            for (int i = 0; i < data.GetLength(0); i++)
            {
                for (int j = 0; j < data.GetLength(1); j++)
                {
                    for (int x = 0; x < transformationMatrix.GetLength(1); x++)
                    {
                        result[i, x] += data[i, j] * transformationMatrix[j, x];
                    }
                    result[i, transformationMatrix.GetLength(1)] = targets[i];
                }
            }
            return(result);
        }
        public IEnumerable <double> GetEstimatedValues(IDataset dataset, IEnumerable <int> rows)
        {
            double[,] inputData = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables, rows);

            int n       = inputData.GetLength(0);
            int columns = inputData.GetLength(1);

            double[] x = new double[columns];
            double[] y = new double[1];

            for (int row = 0; row < n; row++)
            {
                for (int column = 0; column < columns; column++)
                {
                    x[column] = inputData[row, column];
                }
                alglib.mlpeprocess(mlpEnsemble, x, ref y);
                yield return(y[0]);
            }
        }
        public NearestNeighbourModel(IDataset dataset, IEnumerable <int> rows, int k, string targetVariable, IEnumerable <string> allowedInputVariables, double[] classValues = null)
        {
            Name                       = ItemName;
            Description                = ItemDescription;
            this.k                     = k;
            this.targetVariable        = targetVariable;
            this.allowedInputVariables = allowedInputVariables.ToArray();

            var inputMatrix = AlglibUtil.PrepareInputMatrix(dataset,
                                                            allowedInputVariables.Concat(new string[] { targetVariable }),
                                                            rows);

            if (inputMatrix.Cast <double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
            {
                throw new NotSupportedException(
                          "Nearest neighbour classification does not support NaN or infinity values in the input dataset.");
            }

            this.kdTree = new alglib.nearestneighbor.kdtree();

            var nRows     = inputMatrix.GetLength(0);
            var nFeatures = inputMatrix.GetLength(1) - 1;

            if (classValues != null)
            {
                this.classValues = (double[])classValues.Clone();
                int nClasses = classValues.Length;
                // map original class values to values [0..nClasses-1]
                var classIndices = new Dictionary <double, double>();
                for (int i = 0; i < nClasses; i++)
                {
                    classIndices[classValues[i]] = i;
                }

                for (int row = 0; row < nRows; row++)
                {
                    inputMatrix[row, nFeatures] = classIndices[inputMatrix[row, nFeatures]];
                }
            }
            alglib.nearestneighbor.kdtreebuild(inputMatrix, nRows, inputMatrix.GetLength(1) - 1, 1, 2, kdTree);
        }
        public static RandomForestModel CreateClassificationModel(IClassificationProblemData problemData, IEnumerable <int> trainingIndices, int nTrees, double r, double m, int seed,
                                                                  out double rmsError, out double outOfBagRmsError, out double relClassificationError, out double outOfBagRelClassificationError)
        {
            var variables = problemData.AllowedInputVariables.Concat(new string[] { problemData.TargetVariable });

            double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(problemData.Dataset, variables, trainingIndices);

            var classValues = problemData.ClassValues.ToArray();
            int nClasses    = classValues.Length;

            // map original class values to values [0..nClasses-1]
            var classIndices = new Dictionary <double, double>();

            for (int i = 0; i < nClasses; i++)
            {
                classIndices[classValues[i]] = i;
            }

            int nRows    = inputMatrix.GetLength(0);
            int nColumns = inputMatrix.GetLength(1);

            for (int row = 0; row < nRows; row++)
            {
                inputMatrix[row, nColumns - 1] = classIndices[inputMatrix[row, nColumns - 1]];
            }

            alglib.dfreport rep;
            var             dForest = CreateRandomForestModel(seed, inputMatrix, nTrees, r, m, nClasses, out rep);

            rmsError                       = rep.rmserror;
            outOfBagRmsError               = rep.oobrmserror;
            relClassificationError         = rep.relclserror;
            outOfBagRelClassificationError = rep.oobrelclserror;

            return(new RandomForestModel(dForest, seed, problemData, nTrees, r, m, classValues));
        }
Exemple #18
0
        public static ISymbolicRegressionSolution CreateLinearRegressionSolution(IRegressionProblemData problemData, out double rmsError, out double cvRmsError)
        {
            var    dataset        = problemData.Dataset;
            string targetVariable = problemData.TargetVariable;
            IEnumerable <string> allowedInputVariables = problemData.AllowedInputVariables;
            IEnumerable <int>    rows = problemData.TrainingIndices;

            double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
            if (inputMatrix.Cast <double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
            {
                throw new NotSupportedException("Linear regression does not support NaN or infinity values in the input dataset.");
            }

            alglib.linearmodel lm = new alglib.linearmodel();
            alglib.lrreport    ar = new alglib.lrreport();
            int nRows             = inputMatrix.GetLength(0);
            int nFeatures         = inputMatrix.GetLength(1) - 1;

            double[] coefficients = new double[nFeatures + 1]; // last coefficient is for the constant

            int retVal = 1;

            alglib.lrbuild(inputMatrix, nRows, nFeatures, out retVal, out lm, out ar);
            if (retVal != 1)
            {
                throw new ArgumentException("Error in calculation of linear regression solution");
            }
            rmsError   = ar.rmserror;
            cvRmsError = ar.cvrmserror;

            alglib.lrunpack(lm, out coefficients, out nFeatures);

            ISymbolicExpressionTree     tree      = new SymbolicExpressionTree(new ProgramRootSymbol().CreateTreeNode());
            ISymbolicExpressionTreeNode startNode = new StartSymbol().CreateTreeNode();

            tree.Root.AddSubtree(startNode);
            ISymbolicExpressionTreeNode addition = new Addition().CreateTreeNode();

            startNode.AddSubtree(addition);

            int col = 0;

            foreach (string column in allowedInputVariables)
            {
                VariableTreeNode vNode = (VariableTreeNode) new HeuristicLab.Problems.DataAnalysis.Symbolic.Variable().CreateTreeNode();
                vNode.VariableName = column;
                vNode.Weight       = coefficients[col];
                addition.AddSubtree(vNode);
                col++;
            }

            ConstantTreeNode cNode = (ConstantTreeNode) new Constant().CreateTreeNode();

            cNode.Value = coefficients[coefficients.Length - 1];
            addition.AddSubtree(cNode);

            SymbolicRegressionSolution solution = new SymbolicRegressionSolution(new SymbolicRegressionModel(tree, new SymbolicDataAnalysisExpressionTreeInterpreter()), (IRegressionProblemData)problemData.Clone());

            solution.Model.Name = "Linear Regression Model";
            solution.Name       = "Linear Regression Solution";
            return(solution);
        }
Exemple #19
0
        public static IClassificationSolution CreateNeuralNetworkClassificationSolution(IClassificationProblemData problemData, int nLayers, int nHiddenNodes1, int nHiddenNodes2, double decay, int restarts,
                                                                                        out double rmsError, out double avgRelError, out double relClassError)
        {
            var    dataset        = problemData.Dataset;
            string targetVariable = problemData.TargetVariable;
            IEnumerable <string> allowedInputVariables = problemData.AllowedInputVariables;
            IEnumerable <int>    rows = problemData.TrainingIndices;

            double[,] inputMatrix = AlglibUtil.PrepareInputMatrix(dataset, allowedInputVariables.Concat(new string[] { targetVariable }), rows);
            if (inputMatrix.Cast <double>().Any(x => double.IsNaN(x) || double.IsInfinity(x)))
            {
                throw new NotSupportedException("Neural network classification does not support NaN or infinity values in the input dataset.");
            }

            int nRows     = inputMatrix.GetLength(0);
            int nFeatures = inputMatrix.GetLength(1) - 1;

            double[] classValues = dataset.GetDoubleValues(targetVariable).Distinct().OrderBy(x => x).ToArray();
            int      nClasses    = classValues.Count();
            // map original class values to values [0..nClasses-1]
            Dictionary <double, double> classIndices = new Dictionary <double, double>();

            for (int i = 0; i < nClasses; i++)
            {
                classIndices[classValues[i]] = i;
            }
            for (int row = 0; row < nRows; row++)
            {
                inputMatrix[row, nFeatures] = classIndices[inputMatrix[row, nFeatures]];
            }

            alglib.multilayerperceptron multiLayerPerceptron = null;
            if (nLayers == 0)
            {
                alglib.mlpcreatec0(allowedInputVariables.Count(), nClasses, out multiLayerPerceptron);
            }
            else if (nLayers == 1)
            {
                alglib.mlpcreatec1(allowedInputVariables.Count(), nHiddenNodes1, nClasses, out multiLayerPerceptron);
            }
            else if (nLayers == 2)
            {
                alglib.mlpcreatec2(allowedInputVariables.Count(), nHiddenNodes1, nHiddenNodes2, nClasses, out multiLayerPerceptron);
            }
            else
            {
                throw new ArgumentException("Number of layers must be zero, one, or two.", "nLayers");
            }
            alglib.mlpreport rep;

            int info;

            // using mlptrainlm instead of mlptraines or mlptrainbfgs because only one parameter is necessary
            alglib.mlptrainlm(multiLayerPerceptron, inputMatrix, nRows, decay, restarts, out info, out rep);
            if (info != 2)
            {
                throw new ArgumentException("Error in calculation of neural network classification solution");
            }

            rmsError      = alglib.mlprmserror(multiLayerPerceptron, inputMatrix, nRows);
            avgRelError   = alglib.mlpavgrelerror(multiLayerPerceptron, inputMatrix, nRows);
            relClassError = alglib.mlpclserror(multiLayerPerceptron, inputMatrix, nRows) / (double)nRows;

            var problemDataClone = (IClassificationProblemData)problemData.Clone();

            return(new NeuralNetworkClassificationSolution(new NeuralNetworkModel(multiLayerPerceptron, targetVariable, allowedInputVariables, problemDataClone.ClassValues.ToArray()), problemDataClone));
        }