public RegressionOutputs CloneBasicStatistics() { RegressionOutputs newOutputs = new RegressionOutputs(); if (Coeffs != null) { newOutputs.Coeffs = new double[Coeffs.Length]; Coeffs.CopyTo(newOutputs.Coeffs, 0); } newOutputs.Rsquared = Rsquared; newOutputs.AdjRsquared = AdjRsquared; newOutputs.StandardError = StandardError; if (Tstats != null) { newOutputs.Tstats = new double[Tstats.Length]; Tstats.CopyTo(newOutputs.Tstats, 0); } if (CoeffsStandardErrors != null) { newOutputs.CoeffsStandardErrors = new double[CoeffsStandardErrors.Length]; CoeffsStandardErrors.CopyTo(newOutputs.CoeffsStandardErrors, 0); } newOutputs.CV = CV; newOutputs.NMBE = NMBE; return(newOutputs); }
/// <summary> /// SimpleLinearRegression returns the regression outputs for a simple y=mx+b model. /// </summary> /// <param name="y">List of double dependent variables</param> /// <param name="x">List of double independent variables</param> /// <returns>Regression outputs</returns> public static RegressionOutputs SimpleLinearRegression(List <double> y, List <double> x) { RegressionOutputs result = new RegressionOutputs(); //Make the sure counts for Y and X are the same if (y.Count != x.Count) { throw new Exception("Matrices must be the same size for regression"); } double yMean = y.Average(); double xMean = x.Average(); double[] xSquared = new double[y.Count]; double[] xy = new double[y.Count]; double[] ySquared = new double[y.Count]; for (int i = 0; i < y.Count; i++) { xSquared[i] = x[i] * x[i]; ySquared[i] = y[i] * y[i]; xy[i] = x[i] * y[i]; } double ssXY = xy.Sum() - x.Count * xMean * yMean; double ssXX = xSquared.Sum() - x.Count * xMean * xMean; double ssYY = ySquared.Sum() - y.Count * yMean * yMean; double slope = ssXY / ssXX; double constant = yMean - slope * xMean; double sse = ssYY - slope * ssXY; result.Coeffs = new double[2]; result.Coeffs[0] = constant; result.Coeffs[1] = slope; result.StandardError = Math.Sqrt(sse / (y.Count - 2)); result.Rsquared = 1 - (sse / ssYY); return(result); }
static void Main(string[] args) { string input; if (Console.IsInputRedirected) { input = Console.In.ReadToEnd(); } else if (args.Length > 0) { input = File.ReadAllText(args[0]); } else { Console.Out.WriteLine("No input."); return; } if (input.Length == 0) { Console.Out.WriteLine("No input."); return; } List <string> lines = input.Split(new[] { "\r\n", "\r", "\n" }, StringSplitOptions.None).ToList(); List <string[]> splitLines = lines.Select(line => line.Split(null)).ToList(); IEnumerable <IGrouping <int, string[]> > columnCounts = splitLines.GroupBy(cols => cols.Length).ToList(); int mostCommonColumnLength = columnCounts.OrderByDescending(grouping => grouping.Count()).First().Key; List <List <double> > data = splitLines .Where(line => IsValidLine(line, mostCommonColumnLength)) .Select(datas => datas.Select(double.Parse).ToList()).ToList(); if (!data.Any()) { Console.Out.WriteLine($"None of the original {splitLines.Count} lines contained records of completely clean data."); return; } double[,] xArray = new double[data.Count, mostCommonColumnLength - 1]; double[] yArray = new double[data.Count]; foreach ((List <double> record, int rowIndex) in data.WithIndex()) { foreach ((double dataPoint, int colIndex) in record.Skip(1).WithIndex()) { xArray[rowIndex, colIndex] = dataPoint; } yArray[rowIndex] = record[0]; } RegressionOutputs outputs = Regression.MultipleLinearRegression(yArray, xArray, false); foreach (double coefficient in outputs.Coeffs) { int numberOfSignificantFigures = Math.Min(10, Math.Max(0, 5 - (int)Math.Floor(Math.Log10(Math.Abs(coefficient))))); Console.Out.WriteLine($"{Math.Round(coefficient, numberOfSignificantFigures).ToString($"f{numberOfSignificantFigures}").TrimZeros()}"); } }
/// <summary> /// MultipleLinearRegression runs a MLR on y and x data. A column of 1's is added for a constant value. /// </summary> /// <param name="y">1-D array of double dependent variable values</param> /// <param name="x">n x p array of independent variable observations. Do not add a column of 1's</param> /// <param name="advancedStats">true will return: /// predictions /// residuals /// standardized residuals /// Cook's distance /// standard error for coefficients /// t-stats /// F-statistic /// adjusted R^2</param> /// <returns>RegressionOutputs class</returns> public static RegressionOutputs MultipleLinearRegression(double[] y, double[,] x, bool advancedStats) { RegressionOutputs toReturn = new RegressionOutputs(); //Make sure that the length for y and x are the same if (y.Count() != x.GetLength(0)) { throw new Exception("Matrices must be the same size for regression"); } //Add a column of 1's to the X matrix to calculate a constant in the regression double[,] xFull = new double[x.GetLength(0), x.GetLength(1) + 1]; for (int i = 0; i < x.GetLength(0); i++) { xFull[i, 0] = 1; for (int j = 0; j < x.GetLength(1); j++) { xFull[i, j + 1] = x[i, j]; } } //Have to make y a two dimensional array to pass to my matrix methods. double[,] yAdjusted = new double[y.Length, 1]; for (int i = 0; i < y.Length; i++) { yAdjusted[i, 0] = y[i]; } double yAve = y.Average(); int n = y.Count(); int p = xFull.GetLength(1); //Calculate X'X var xTrans = MatrixMethods.MatTranspose(xFull); var xTransX = MatrixMethods.MatMultiply(xTrans, xFull); //Calculate X'Y var xTransY = MatrixMethods.MatMultiply(xTrans, yAdjusted); //Calculate [X'X]^-1 var choleskyDecomp = MatrixMethods.CholeskyDecomp(xTransX); double[,] c = MatrixMethods.MatrixInv(xTransX, choleskyDecomp); for (int i = 0; i < c.GetLength(0); i++) { for (int j = 0; j < c.GetLength(1); j++) { if (double.IsNaN(c[i, j])) { throw new InvalidOperationException("Matrix inverse not defined. Check for constant input array."); } } } double[,] coeffs = MatrixMethods.MatMultiply(c, xTransY); // Calculate Y'Y double[,] yTransY = MatrixMethods.MatMultiply(MatrixMethods.MatTranspose(yAdjusted), yAdjusted); // Calculate B'X'Y double[,] betaTransXtransY = MatrixMethods.MatMultiply(MatrixMethods.MatTranspose(coeffs), xTransY); double sse = yTransY[0, 0] - betaTransXtransY[0, 0]; //Sum of squared errors (sum of squared residuals) //Check if sse is less than zero. Ran into round off problem when passed constant data. if (sse < 0) { sse = 0; } double[] ssmArray = new double[n]; for (int i = 0; i < ssmArray.Count(); i++) { ssmArray[i] = (y[i] - yAve) * (y[i] - yAve); } double ssm = ssmArray.Sum(); //"Sum of squares regression" or "Sum of squares model" or simply (y - y_ave)^2 double mse = sse / (n - p); //mean squared error //Returning Basic Statistics //========================================================================================== toReturn.Coeffs = new double[p]; for (int i = 0; i < coeffs.GetLength(0); i++) { toReturn.Coeffs[i] = coeffs[i, 0]; } toReturn.NMBE = 0; //This is defined to be zero, other than round-off errors. toReturn.StandardError = Math.Sqrt(sse / (y.GetLength(0) - p)); toReturn.CV = toReturn.StandardError / yAve; toReturn.Tstats = new double[p]; toReturn.Rsquared = 1 - (sse / ssm); //Advanced Statistics //============================================================================================ if (advancedStats == true) { //Send back original data and modified data //----------------------------------------- toReturn.Ydata = y; toReturn.Xdata = x; //Calculate Predictions and Residuals //----------------------------------- double[] predictions = new double[n]; double[] residuals = new double[n]; for (int i = 0; i < n; i++) { predictions[i] = 0; for (int j = 0; j < coeffs.GetLength(0); j++) { predictions[i] = predictions[i] + coeffs[j, 0] * xFull[i, j]; } residuals[i] = y[i] - predictions[i]; } //Calculate standardized residuals //-------------------------------- double tempSum = 0; double residAve = residuals.Average(); for (int i = 0; i < n; i++) { tempSum += ((residuals[i] - residAve) * (residuals[i] - residAve)); } double residualStandDev = Math.Sqrt(tempSum / (n - 1)); double[] standResiduals = new double[residuals.Count()]; for (int i = 0; i < residuals.Count(); i++) { standResiduals[i] = residuals[i] / residualStandDev; } ////Calculate Hat Matrix X*(X'X)^-1*X' //double[,] hatMatrix = MatrixMethods.MatMultiply(MatrixMethods.MatMultiply(xFull, c), MatrixMethods.MatTranspose(xFull)); ////Calculate Cook's distance //double[] cooksDistance = new double[n]; //for (int i = 0; i < n; i++) //{ // cooksDistance[i] = (residuals[i] * residuals[i] * hatMatrix[i, i]) / // (p * mse * (1 - hatMatrix[i, i]) * (1 - hatMatrix[i, i])); //} toReturn.CoeffsStandardErrors = new double[p]; for (int i = 0; i < p; i++) { toReturn.CoeffsStandardErrors[i] = toReturn.StandardError * Math.Sqrt(c[i, i]); toReturn.Tstats[i] = toReturn.Coeffs[i] / toReturn.CoeffsStandardErrors[i]; } toReturn.Residuals = residuals; toReturn.StandResiduals = standResiduals; toReturn.Yhat = predictions; toReturn.Fstatistic = ((toReturn.Rsquared) / (p - 1)) / ((1 - toReturn.Rsquared) / (n - p)); //toReturn.CooksDistance = cooksDistance; toReturn.AdjRsquared = 1 - ((((double)n - 1) / ((double)n - (double)p)) * (1 - toReturn.Rsquared)); } return(toReturn); }
public static RegressionOutputs CloneRegressionOutputs(RegressionOutputs regOutputToClone) { return(regOutputToClone); }