Exemplo n.º 1
0
        public void createRegression(List <Variable> variablesD, List <Variable> variablesI, DataSet dataSet, DataSet dataSet2, bool[] graphs, List <Variable> variablesPrediction)
        {
            _Worksheet sheet     = WorksheetHelper.NewWorksheet("Regression");
            double     confLevel = model.confidenceLevel;

            sheet.Cells[100, 100] = "=ROWS(" + dataSet.getWorksheet().Name + "!" + variablesI[0].Range + ")";
            int length = Convert.ToInt32((sheet.Cells[100, 100] as Range).Value);

            sheet.Cells[100, 100] = "";

            int row = 1;

            sheet.Cells[1, 1] = "Regression Summary";
            sheet.Cells[2, 1] = "R";
            sheet.Cells[3, 1] = "R-square";
            sheet.Cells[4, 1] = "adjusted R-square";
            sheet.Cells[5, 1] = "stErr of Estimate";


            sheet.Cells[9, 1]  = "ANOVA Table";
            sheet.Cells[10, 1] = "Explained";
            sheet.Cells[11, 1] = "Unexplained";
            sheet.Cells[8, 3]  = "Sum of";
            sheet.Cells[9, 3]  = "Squares";
            sheet.Cells[8, 2]  = "Degrees of";
            sheet.Cells[9, 2]  = "Freedom";
            sheet.Cells[8, 4]  = "Mean";
            sheet.Cells[9, 4]  = "Squares";
            sheet.Cells[9, 5]  = "F-Ratio";
            sheet.Cells[9, 6]  = "p-Value";


            sheet.Cells[15, 1] = "Regression Table";
            sheet.Cells[16, 1] = "Constant";
            row = 17;
            foreach (Variable var in variablesI)
            {
                sheet.Cells[row, 1] = var.name;
                row++;
            }
            lowestDataRow = row;

            sheet.Cells[15, 2] = "Coefficient";
            sheet.Cells[14, 3] = "Standard";
            sheet.Cells[15, 3] = "Error";
            sheet.Cells[15, 4] = "t-value";
            sheet.Cells[15, 5] = "p-value";
            sheet.Cells[14, 6] = "Confidence Interval " + confLevel.ToString() + "%";
            sheet.Range[sheet.Cells[14, 6], sheet.Cells[14, 7]].Merge();
            sheet.Cells[15, 6] = "Lower";
            sheet.Cells[15, 7] = "Upper";


            sheet.get_Range("B1", "J200").Cells.HorizontalAlignment = XlHAlign.xlHAlignCenter;
            sheet.get_Range("A1", "B1").Borders[XlBordersIndex.xlEdgeBottom].LineStyle   = XlLineStyle.xlDouble;
            sheet.get_Range("A9", "F9").Borders[XlBordersIndex.xlEdgeBottom].LineStyle   = XlLineStyle.xlDouble;
            sheet.get_Range("A15", "G15").Borders[XlBordersIndex.xlEdgeBottom].LineStyle = XlLineStyle.xlDouble;
            sheet.get_Range("B3", "B5").NumberFormat   = "0.0000";
            sheet.get_Range("B18", "B20").NumberFormat = "0.0000";
            sheet.get_Range("D18", "E19").NumberFormat = "0.0000";

            Globals.ExcelAddIn.Application.ActiveWindow.DisplayGridlines = false;


            //
            //calculate values of regression
            //
            double[] b     = new double[variablesI.Count];
            double[] yData = calcYdata(variablesD, dataSet, length);
            double[,] xData = calcXdata(length, dataSet, variablesI);


            b = calculateCoefB(yData, xData, b);
            double a = b[0];

            var X  = DenseMatrix.OfArray(xData);
            var B  = new DenseVector(b);
            var Y  = new DenseVector(yData);
            var yh = X.Multiply(B);
            var er = Y.Subtract(yh);

            double[] error = er.ToArray();

            double[] yhat = yh.ToArray();

            double R2   = sheet.Application.WorksheetFunction.Correl(yData, yhat);
            double R    = Math.Pow(R2, 2);
            double Radj = 1.0 - ((1 - R) * (length - 1) / (length - variablesI.Count - 1));
            double err  = 0;
            int    i    = 0;

            while (i < error.Length)
            {
                err = err + Math.Pow(error[i], 2);
                i++;
            }
            err = Math.Sqrt(err / (length - 4));


            double[] anovaResults = calculateAnova(yData, yhat, variablesI.Count(), sheet);

            multicollinearity(sheet, xData, variablesI.Count);
            if (model.doDurbinWatson == true)
            {
                DurbinWatson(sheet, error);
            }

            //
            //Calculate regressionTable
            //
            double MSE        = anovaResults[6];
            var    Xt         = X.Transpose();
            var    tempM      = (Xt.Multiply(X)).Inverse();
            var    variancesM = tempM.Multiply(MSE);

            double[] variances = variancesM.Diagonal().ToArray();
            double[] std       = new double[variances.Length];

            int index = 0;

            foreach (double elem in variances)
            {
                std[index] = Math.Sqrt(elem);
                //System.Diagnostics.Debug.WriteLine(std[index]);
                index++;
            }

            double[] tValue    = new double[b.Length];
            double[] pValue    = new double[b.Length];
            double[] lowerInt  = new double[b.Length];
            double[] higherInt = new double[b.Length];
            for (index = 0; index < b.Length; index++)
            {
                tValue[index]    = b[index] / std[index];
                pValue[index]    = sheet.Application.WorksheetFunction.TDist(Math.Abs(tValue[index]), length - variablesI.Count - 1, 2);
                lowerInt[index]  = b[index] - sheet.Application.WorksheetFunction.TInv(1 - confLevel / 100, length - variablesI.Count - 1) * std[index];
                higherInt[index] = b[index] + sheet.Application.WorksheetFunction.TInv(1 - confLevel / 100, length - variablesI.Count - 1) * std[index];
                //System.Diagnostics.Debug.WriteLine("{0}, p={1}, lower = {2}, n={3}, inv={4}, std={5}, inv2={6}",real_p, pValue[index], lowerInt[index], length - 1, sheet.Application.WorksheetFunction.TInv(1-0.975, length - 1), std[index], sheet.Application.WorksheetFunction.TInv(1 - 0.95, length - 1));
            }

            //
            //Prediction
            //
            if (model.doPrediction)
            {
                double[,] xDataPred = calcXdata(dataSet2.getNrDataRows(), dataSet2, variablesPrediction);
                int[] rightCell = { dataSet2.getRange().Row, dataSet2.getRange().Column + dataSet2.amountOfVariables() };                 //{row, column}

                prediction(dataSet2.getWorksheet(), xDataPred, b, length, variablesI.Count, X, MSE, rightCell, variablesD[0].name);
                setPrediction(false);
            }



            //
            //Draw graphs
            //
            if (graphs[0])
            {
                drawGraphs(sheet, yData, yhat, "Scatter plot of fitted values vs. actual values");
            }
            if (graphs[1])
            {
                drawGraphs(sheet, yData, error, "Scatter plot of residuals vs. fitted values");
            }
            if (graphs[2])
            {
                int varNumber = 1;
                foreach (Variable var in variablesI)
                {
                    double[] xDataTemp = new double[xData.GetLength(0)];
                    for (int x = 0; x < xData.GetLength(0); x++)
                    {
                        xDataTemp[x] = xData[x, varNumber];
                    }
                    drawGraphs(sheet, xDataTemp, error, "Scatter plot of residuals vs. " + var.name);
                    varNumber++;
                }
            }
            if (graphs[3])
            {
                int varNumber = 1;
                foreach (Variable var in variablesI)
                {
                    double[] xDataTemp = new double[xData.GetLength(0)];
                    for (int x = 0; x < xData.GetLength(0); x++)
                    {
                        xDataTemp[x] = xData[x, varNumber];
                    }
                    drawGraphs(sheet, xDataTemp, yData, "Scatter plot of actual Y values vs. " + var.name);
                    varNumber++;
                }
            }
            if (graphs[4])
            {
                int varNumber = 1;
                foreach (Variable var in variablesI)
                {
                    double[] xDataTemp = new double[xData.GetLength(0)];
                    for (int x = 0; x < xData.GetLength(0); x++)
                    {
                        xDataTemp[x] = xData[x, varNumber];
                    }
                    drawGraphs(sheet, xDataTemp, yhat, "Scatter plot of fitted Y values vs. " + var.name);
                    varNumber++;
                }
            }

            //
            //print results to excel sheet
            //
            sheet.Cells[16, 2] = a;
            row = 17;
            foreach (Variable var in variablesI)
            {
                sheet.Cells[row, 2] = b[row - 16];
                row++;
            }


            FillR(sheet, R2, R, Radj, err);
            FillAnova(sheet, anovaResults);
            FillRegressionTable(sheet, std, tValue, pValue, lowerInt, higherInt);


            ((Range)sheet.Cells[1, 1]).EntireColumn.AutoFit();
            ((Range)sheet.Cells[1, 2]).EntireColumn.AutoFit();
            ((Range)sheet.Cells[1, 3]).EntireColumn.AutoFit();
            ((Range)sheet.Cells[1, 4]).EntireColumn.AutoFit();
            ((Range)sheet.Cells[1, 5]).EntireColumn.AutoFit();
            ((Range)sheet.Cells[1, 6]).EntireColumn.ColumnWidth = 13;
            ((Range)sheet.Cells[1, 7]).EntireColumn.ColumnWidth = 13;
            ((Range)sheet.Cells[1, 8]).EntireColumn.ColumnWidth = 13;
            ((Range)sheet.Cells[1, 9]).EntireColumn.ColumnWidth = 13;
            ((Range)sheet.Cells[1, 10]).EntireColumn.AutoFit();
        }