public override IRegressionModel Build(IRegressionProblemData pd, IRandom random, CancellationToken cancellationToken, out int numberOfParameters) { var pca = PrincipleComponentTransformation.CreateProjection(pd.Dataset, pd.TrainingIndices, pd.AllowedInputVariables, normalize: true); var pcdata = pca.TransformProblemData(pd); ComponentReducedLinearModel bestModel = null; var bestCvrmse = double.MaxValue; numberOfParameters = 1; for (var i = 1; i <= Math.Min(NumberOfComponents, pd.AllowedInputVariables.Count()); i++) { var pd2 = (IRegressionProblemData)pcdata.Clone(); var inputs = new HashSet <string>(pca.ComponentNames.Take(i)); foreach (var v in pd2.InputVariables.CheckedItems.ToArray()) { pd2.InputVariables.SetItemCheckedState(v.Value, inputs.Contains(v.Value.Value)); } double rmse; var model = PreconstructedLinearModel.CreateLinearModel(pd2, out rmse); if (rmse > bestCvrmse) { continue; } bestModel = new ComponentReducedLinearModel(pd2.TargetVariable, model, pca); numberOfParameters = i + 1; bestCvrmse = rmse; } return(bestModel); }
private static PreconstructedLinearModel ClassicCalculation(IRegressionProblemData pd) { var inputMatrix = pd.Dataset.ToArray(pd.AllowedInputVariables.Concat(new[] { pd.TargetVariable }), pd.AllIndices); var nFeatures = inputMatrix.GetLength(1) - 1; double[] coefficients; alglib.linearmodel lm; alglib.lrreport ar; int retVal; alglib.lrbuild(inputMatrix, inputMatrix.GetLength(0), nFeatures, out retVal, out lm, out ar); if (retVal != 1) { throw new ArgumentException("Error in calculation of linear regression solution"); } alglib.lrunpack(lm, out coefficients, out nFeatures); var coeffs = pd.AllowedInputVariables.Zip(coefficients, (s, d) => new { s, d }).ToDictionary(x => x.s, x => x.d); var res = new PreconstructedLinearModel(coeffs, coefficients[nFeatures], pd.TargetVariable); return(res); }
private PreconstructedLinearModel(PreconstructedLinearModel original, Cloner cloner) : base(original, cloner) { if (original.Coefficients != null) { Coefficients = original.Coefficients.ToDictionary(x => x.Key, x => x.Value); } Intercept = original.Intercept; }
private static PreconstructedLinearModel AlternativeCalculation(IRegressionProblemData pd, out double rmse) { var variables = pd.AllowedInputVariables.ToList(); var n = variables.Count; var m = pd.TrainingIndices.Count(); //Set up X^T var inTr = new double[n + 1, m]; for (var i = 0; i < n; i++) { var vdata = pd.Dataset.GetDoubleValues(variables[i], pd.TrainingIndices).ToArray(); for (var j = 0; j < m; j++) { inTr[i, j] = vdata[j]; } } for (var i = 0; i < m; i++) { inTr[n, i] = 1; } //Set up y var y = new double[m, 1]; var ydata = pd.TargetVariableTrainingValues.ToArray(); for (var i = 0; i < m; i++) { y[i, 0] = ydata[i]; } //Perform linear regression var aTy = new double[n + 1, 1]; var aTa = new double[n + 1, n + 1]; var aTyVector = new double[n + 1]; int info; alglib.densesolverreport report; double[] coefficients; //Perform linear regression alglib.rmatrixgemm(n + 1, 1, m, 1, inTr, 0, 0, 0, y, 0, 0, 0, 0, ref aTy, 0, 0); //aTy = inTr * y; alglib.rmatrixgemm(n + 1, n + 1, m, 1, inTr, 0, 0, 0, inTr, 0, 0, 1, 0, ref aTa, 0, 0); //aTa = inTr * t(inTr) +aTa // alglib.spdmatrixcholesky(ref aTa, n + 1, true); for (var i = 0; i < n + 1; i++) { aTyVector[i] = aTy[i, 0]; } alglib.spdmatrixcholeskysolve(aTa, n + 1, true, aTyVector, out info, out report, out coefficients); //if Cholesky calculation fails fall back to classic linear regresseion if (info != 1) { alglib.linearmodel lm; alglib.lrreport ar; int retVal; var inputMatrix = pd.Dataset.ToArray(pd.AllowedInputVariables.Concat(new[] { pd.TargetVariable }), pd.AllIndices); alglib.lrbuild(inputMatrix, inputMatrix.GetLength(0), n, out retVal, out lm, out ar); if (retVal != 1) { throw new ArgumentException("Error in calculation of linear regression solution"); } alglib.lrunpack(lm, out coefficients, out n); } var coeffs = Enumerable.Range(0, n).ToDictionary(i => variables[i], i => coefficients[i]); var model = new PreconstructedLinearModel(coeffs, coefficients[n], pd.TargetVariable); rmse = pd.TrainingIndices.Select(i => pd.Dataset.GetDoubleValue(pd.TargetVariable, i) - model.GetEstimatedValue(pd.Dataset, i)).Sum(r => r * r) / m; rmse = Math.Sqrt(rmse); return(model); }