Beispiel #1
0
        public override IRegressionModel Build(IRegressionProblemData pd, IRandom random,
                                               CancellationToken cancellationToken, out int numberOfParameters)
        {
            var pca    = PrincipleComponentTransformation.CreateProjection(pd.Dataset, pd.TrainingIndices, pd.AllowedInputVariables, normalize: true);
            var pcdata = pca.TransformProblemData(pd);
            ComponentReducedLinearModel bestModel = null;
            var bestCvrmse = double.MaxValue;

            numberOfParameters = 1;
            for (var i = 1; i <= Math.Min(NumberOfComponents, pd.AllowedInputVariables.Count()); i++)
            {
                var pd2    = (IRegressionProblemData)pcdata.Clone();
                var inputs = new HashSet <string>(pca.ComponentNames.Take(i));
                foreach (var v in pd2.InputVariables.CheckedItems.ToArray())
                {
                    pd2.InputVariables.SetItemCheckedState(v.Value, inputs.Contains(v.Value.Value));
                }
                double rmse;
                var    model = PreconstructedLinearModel.CreateLinearModel(pd2, out rmse);
                if (rmse > bestCvrmse)
                {
                    continue;
                }
                bestModel          = new ComponentReducedLinearModel(pd2.TargetVariable, model, pca);
                numberOfParameters = i + 1;
                bestCvrmse         = rmse;
            }
            return(bestModel);
        }
        private static PreconstructedLinearModel ClassicCalculation(IRegressionProblemData pd)
        {
            var inputMatrix = pd.Dataset.ToArray(pd.AllowedInputVariables.Concat(new[] {
                pd.TargetVariable
            }), pd.AllIndices);

            var nFeatures = inputMatrix.GetLength(1) - 1;

            double[] coefficients;

            alglib.linearmodel lm;
            alglib.lrreport    ar;
            int retVal;

            alglib.lrbuild(inputMatrix, inputMatrix.GetLength(0), nFeatures, out retVal, out lm, out ar);
            if (retVal != 1)
            {
                throw new ArgumentException("Error in calculation of linear regression solution");
            }

            alglib.lrunpack(lm, out coefficients, out nFeatures);
            var coeffs = pd.AllowedInputVariables.Zip(coefficients, (s, d) => new { s, d }).ToDictionary(x => x.s, x => x.d);
            var res    = new PreconstructedLinearModel(coeffs, coefficients[nFeatures], pd.TargetVariable);

            return(res);
        }
 private PreconstructedLinearModel(PreconstructedLinearModel original, Cloner cloner) : base(original, cloner)
 {
     if (original.Coefficients != null)
     {
         Coefficients = original.Coefficients.ToDictionary(x => x.Key, x => x.Value);
     }
     Intercept = original.Intercept;
 }
        private static PreconstructedLinearModel AlternativeCalculation(IRegressionProblemData pd, out double rmse)
        {
            var variables = pd.AllowedInputVariables.ToList();
            var n         = variables.Count;
            var m         = pd.TrainingIndices.Count();

            //Set up X^T
            var inTr = new double[n + 1, m];

            for (var i = 0; i < n; i++)
            {
                var vdata = pd.Dataset.GetDoubleValues(variables[i], pd.TrainingIndices).ToArray();
                for (var j = 0; j < m; j++)
                {
                    inTr[i, j] = vdata[j];
                }
            }
            for (var i = 0; i < m; i++)
            {
                inTr[n, i] = 1;
            }

            //Set up y
            var y     = new double[m, 1];
            var ydata = pd.TargetVariableTrainingValues.ToArray();

            for (var i = 0; i < m; i++)
            {
                y[i, 0] = ydata[i];
            }

            //Perform linear regression
            var aTy       = new double[n + 1, 1];
            var aTa       = new double[n + 1, n + 1];
            var aTyVector = new double[n + 1];
            int info;

            alglib.densesolverreport report;
            double[] coefficients;

            //Perform linear regression
            alglib.rmatrixgemm(n + 1, 1, m, 1, inTr, 0, 0, 0, y, 0, 0, 0, 0, ref aTy, 0, 0);        //aTy = inTr * y;
            alglib.rmatrixgemm(n + 1, n + 1, m, 1, inTr, 0, 0, 0, inTr, 0, 0, 1, 0, ref aTa, 0, 0); //aTa = inTr * t(inTr) +aTa //
            alglib.spdmatrixcholesky(ref aTa, n + 1, true);
            for (var i = 0; i < n + 1; i++)
            {
                aTyVector[i] = aTy[i, 0];
            }
            alglib.spdmatrixcholeskysolve(aTa, n + 1, true, aTyVector, out info, out report, out coefficients);

            //if Cholesky calculation fails fall back to classic linear regresseion
            if (info != 1)
            {
                alglib.linearmodel lm;
                alglib.lrreport    ar;
                int retVal;
                var inputMatrix = pd.Dataset.ToArray(pd.AllowedInputVariables.Concat(new[] {
                    pd.TargetVariable
                }), pd.AllIndices);
                alglib.lrbuild(inputMatrix, inputMatrix.GetLength(0), n, out retVal, out lm, out ar);
                if (retVal != 1)
                {
                    throw new ArgumentException("Error in calculation of linear regression solution");
                }
                alglib.lrunpack(lm, out coefficients, out n);
            }

            var coeffs = Enumerable.Range(0, n).ToDictionary(i => variables[i], i => coefficients[i]);
            var model  = new PreconstructedLinearModel(coeffs, coefficients[n], pd.TargetVariable);

            rmse = pd.TrainingIndices.Select(i => pd.Dataset.GetDoubleValue(pd.TargetVariable, i) - model.GetEstimatedValue(pd.Dataset, i)).Sum(r => r * r) / m;
            rmse = Math.Sqrt(rmse);
            return(model);
        }