public void createRegression(List <Variable> variablesD, List <Variable> variablesI, DataSet dataSet, DataSet dataSet2, bool[] graphs, List <Variable> variablesPrediction) { _Worksheet sheet = WorksheetHelper.NewWorksheet("Regression"); double confLevel = model.confidenceLevel; sheet.Cells[100, 100] = "=ROWS(" + dataSet.getWorksheet().Name + "!" + variablesI[0].Range + ")"; int length = Convert.ToInt32((sheet.Cells[100, 100] as Range).Value); sheet.Cells[100, 100] = ""; int row = 1; sheet.Cells[1, 1] = "Regression Summary"; sheet.Cells[2, 1] = "R"; sheet.Cells[3, 1] = "R-square"; sheet.Cells[4, 1] = "adjusted R-square"; sheet.Cells[5, 1] = "stErr of Estimate"; sheet.Cells[9, 1] = "ANOVA Table"; sheet.Cells[10, 1] = "Explained"; sheet.Cells[11, 1] = "Unexplained"; sheet.Cells[8, 3] = "Sum of"; sheet.Cells[9, 3] = "Squares"; sheet.Cells[8, 2] = "Degrees of"; sheet.Cells[9, 2] = "Freedom"; sheet.Cells[8, 4] = "Mean"; sheet.Cells[9, 4] = "Squares"; sheet.Cells[9, 5] = "F-Ratio"; sheet.Cells[9, 6] = "p-Value"; sheet.Cells[15, 1] = "Regression Table"; sheet.Cells[16, 1] = "Constant"; row = 17; foreach (Variable var in variablesI) { sheet.Cells[row, 1] = var.name; row++; } lowestDataRow = row; sheet.Cells[15, 2] = "Coefficient"; sheet.Cells[14, 3] = "Standard"; sheet.Cells[15, 3] = "Error"; sheet.Cells[15, 4] = "t-value"; sheet.Cells[15, 5] = "p-value"; sheet.Cells[14, 6] = "Confidence Interval " + confLevel.ToString() + "%"; sheet.Range[sheet.Cells[14, 6], sheet.Cells[14, 7]].Merge(); sheet.Cells[15, 6] = "Lower"; sheet.Cells[15, 7] = "Upper"; sheet.get_Range("B1", "J200").Cells.HorizontalAlignment = XlHAlign.xlHAlignCenter; sheet.get_Range("A1", "B1").Borders[XlBordersIndex.xlEdgeBottom].LineStyle = XlLineStyle.xlDouble; sheet.get_Range("A9", "F9").Borders[XlBordersIndex.xlEdgeBottom].LineStyle = XlLineStyle.xlDouble; sheet.get_Range("A15", "G15").Borders[XlBordersIndex.xlEdgeBottom].LineStyle = XlLineStyle.xlDouble; sheet.get_Range("B3", "B5").NumberFormat = "0.0000"; sheet.get_Range("B18", "B20").NumberFormat = "0.0000"; sheet.get_Range("D18", "E19").NumberFormat = "0.0000"; Globals.ExcelAddIn.Application.ActiveWindow.DisplayGridlines = false; // //calculate values of regression // double[] b = new double[variablesI.Count]; double[] yData = calcYdata(variablesD, dataSet, length); double[,] xData = calcXdata(length, dataSet, variablesI); b = calculateCoefB(yData, xData, b); double a = b[0]; var X = DenseMatrix.OfArray(xData); var B = new DenseVector(b); var Y = new DenseVector(yData); var yh = X.Multiply(B); var er = Y.Subtract(yh); double[] error = er.ToArray(); double[] yhat = yh.ToArray(); double R2 = sheet.Application.WorksheetFunction.Correl(yData, yhat); double R = Math.Pow(R2, 2); double Radj = 1.0 - ((1 - R) * (length - 1) / (length - variablesI.Count - 1)); double err = 0; int i = 0; while (i < error.Length) { err = err + Math.Pow(error[i], 2); i++; } err = Math.Sqrt(err / (length - 4)); double[] anovaResults = calculateAnova(yData, yhat, variablesI.Count(), sheet); multicollinearity(sheet, xData, variablesI.Count); if (model.doDurbinWatson == true) { DurbinWatson(sheet, error); } // //Calculate regressionTable // double MSE = anovaResults[6]; var Xt = X.Transpose(); var tempM = (Xt.Multiply(X)).Inverse(); var variancesM = tempM.Multiply(MSE); double[] variances = variancesM.Diagonal().ToArray(); double[] std = new double[variances.Length]; int index = 0; foreach (double elem in variances) { std[index] = Math.Sqrt(elem); //System.Diagnostics.Debug.WriteLine(std[index]); index++; } double[] tValue = new double[b.Length]; double[] pValue = new double[b.Length]; double[] lowerInt = new double[b.Length]; double[] higherInt = new double[b.Length]; for (index = 0; index < b.Length; index++) { tValue[index] = b[index] / std[index]; pValue[index] = sheet.Application.WorksheetFunction.TDist(Math.Abs(tValue[index]), length - variablesI.Count - 1, 2); lowerInt[index] = b[index] - sheet.Application.WorksheetFunction.TInv(1 - confLevel / 100, length - variablesI.Count - 1) * std[index]; higherInt[index] = b[index] + sheet.Application.WorksheetFunction.TInv(1 - confLevel / 100, length - variablesI.Count - 1) * std[index]; //System.Diagnostics.Debug.WriteLine("{0}, p={1}, lower = {2}, n={3}, inv={4}, std={5}, inv2={6}",real_p, pValue[index], lowerInt[index], length - 1, sheet.Application.WorksheetFunction.TInv(1-0.975, length - 1), std[index], sheet.Application.WorksheetFunction.TInv(1 - 0.95, length - 1)); } // //Prediction // if (model.doPrediction) { double[,] xDataPred = calcXdata(dataSet2.getNrDataRows(), dataSet2, variablesPrediction); int[] rightCell = { dataSet2.getRange().Row, dataSet2.getRange().Column + dataSet2.amountOfVariables() }; //{row, column} prediction(dataSet2.getWorksheet(), xDataPred, b, length, variablesI.Count, X, MSE, rightCell, variablesD[0].name); setPrediction(false); } // //Draw graphs // if (graphs[0]) { drawGraphs(sheet, yData, yhat, "Scatter plot of fitted values vs. actual values"); } if (graphs[1]) { drawGraphs(sheet, yData, error, "Scatter plot of residuals vs. fitted values"); } if (graphs[2]) { int varNumber = 1; foreach (Variable var in variablesI) { double[] xDataTemp = new double[xData.GetLength(0)]; for (int x = 0; x < xData.GetLength(0); x++) { xDataTemp[x] = xData[x, varNumber]; } drawGraphs(sheet, xDataTemp, error, "Scatter plot of residuals vs. " + var.name); varNumber++; } } if (graphs[3]) { int varNumber = 1; foreach (Variable var in variablesI) { double[] xDataTemp = new double[xData.GetLength(0)]; for (int x = 0; x < xData.GetLength(0); x++) { xDataTemp[x] = xData[x, varNumber]; } drawGraphs(sheet, xDataTemp, yData, "Scatter plot of actual Y values vs. " + var.name); varNumber++; } } if (graphs[4]) { int varNumber = 1; foreach (Variable var in variablesI) { double[] xDataTemp = new double[xData.GetLength(0)]; for (int x = 0; x < xData.GetLength(0); x++) { xDataTemp[x] = xData[x, varNumber]; } drawGraphs(sheet, xDataTemp, yhat, "Scatter plot of fitted Y values vs. " + var.name); varNumber++; } } // //print results to excel sheet // sheet.Cells[16, 2] = a; row = 17; foreach (Variable var in variablesI) { sheet.Cells[row, 2] = b[row - 16]; row++; } FillR(sheet, R2, R, Radj, err); FillAnova(sheet, anovaResults); FillRegressionTable(sheet, std, tValue, pValue, lowerInt, higherInt); ((Range)sheet.Cells[1, 1]).EntireColumn.AutoFit(); ((Range)sheet.Cells[1, 2]).EntireColumn.AutoFit(); ((Range)sheet.Cells[1, 3]).EntireColumn.AutoFit(); ((Range)sheet.Cells[1, 4]).EntireColumn.AutoFit(); ((Range)sheet.Cells[1, 5]).EntireColumn.AutoFit(); ((Range)sheet.Cells[1, 6]).EntireColumn.ColumnWidth = 13; ((Range)sheet.Cells[1, 7]).EntireColumn.ColumnWidth = 13; ((Range)sheet.Cells[1, 8]).EntireColumn.ColumnWidth = 13; ((Range)sheet.Cells[1, 9]).EntireColumn.ColumnWidth = 13; ((Range)sheet.Cells[1, 10]).EntireColumn.AutoFit(); }