Beispiel #1
0
        public void createRegression(DataSet dataSet, List <Variable> independentVariables, Variable dependentVariable)
        {
            _Worksheet sheet = WorksheetHelper.NewWorksheet("Simple Regression");

            var matrixX = Matrix <double> .Build.Dense(dataSet.rangeSize(), independentVariables.Count + 1);

            for (var i = 0; i < dataSet.rangeSize(); i++)
            {
                matrixX[i, 0] = 1;
                for (var j = 0; j < independentVariables.Count; j++)
                {
                    matrixX[i, j + 1] = dataSet.getValuesArray(independentVariables[j])[i];
                }
            }

            var matrixY = Matrix <double> .Build.Dense(dataSet.rangeSize(), 1);

            for (var i = 0; i < dataSet.rangeSize(); i++)
            {
                matrixY[i, 0] = dataSet.getValuesArray(dependentVariable)[i];
            }

            var matrixXt     = matrixX.Transpose();
            var matrixXtX    = matrixXt * matrixX;
            var matrixInv    = matrixXtX.Inverse();
            var matrixInvXt  = matrixInv * matrixXt;
            var matrixResult = matrixInvXt * matrixY;

            const int title                = 1;
            const int name                 = 3;
            const int betaCoeff            = 2;
            const int stdErrorName         = betaCoeff + 1;
            const int pValuesName          = stdErrorName + 1;
            const int lowerLimitName       = pValuesName + 1;
            const int upperLimitName       = lowerLimitName + 1;
            var       summaryName          = name + independentVariables.Count + 3;
            var       anovaTableName       = summaryName + 3;
            const int degreesOfFreedomName = 2;
            const int sumOfSquaresName     = 3;
            const int meanOfSquaresName    = 4;
            const int anovaFValueName      = 5;
            const int anovaPValueName      = 6;
            var       dataName             = anovaTableName + 4;

            sheet.Cells[title, 1]             = "Regression";
            sheet.Cells[name + 1, 1]          = "Constant";
            sheet.Cells[name, betaCoeff]      = "Beta";
            sheet.Cells[name, stdErrorName]   = "Standard Error";
            sheet.Cells[name, pValuesName]    = "P-Value";
            sheet.Cells[name, lowerLimitName] = "Lower Limit";
            sheet.Cells[name, upperLimitName] = "Upper Limit";
            sheet.Cells[summaryName + 1, 1]   = "Regression Summary";
            sheet.Cells[summaryName, 2]       = "R-Square";
            sheet.Cells[summaryName, 3]       = "Adjusted R-Square";
            sheet.Cells[summaryName, 4]       = "Standard Error of estimation";

            if (model.regressionEquation)
            {
                sheet.Cells[summaryName, 5] = "Regression Equation";
                var regressionEquation = "" + matrixResult[0, 0];
                for (var i = 0; i < independentVariables.Count; i++)
                {
                    regressionEquation = regressionEquation + " + (" + matrixResult[i + 1, 0] + "*" + independentVariables[i].name + ")";
                }
                sheet.Cells[summaryName + 1, 5] = dependentVariable.name + " = " + regressionEquation;
            }

            sheet.Cells[anovaTableName, 1]     = "Anova Table";
            sheet.Cells[anovaTableName + 1, 1] = "Explained";
            sheet.Cells[anovaTableName + 2, 1] = "Unexplained";
            sheet.Cells[anovaTableName, degreesOfFreedomName]     = "Degrees of Freedom";
            sheet.Cells[anovaTableName + 1, degreesOfFreedomName] = independentVariables.Count;
            var dF = dataSet.rangeSize() - 1 - independentVariables.Count;

            sheet.Cells[anovaTableName + 2, degreesOfFreedomName] = dF;
            sheet.Cells[anovaTableName, sumOfSquaresName]         = "Sum of Squares";
            var sumOfSquare = CalculateSumOfSquares(dataSet, independentVariables, dependentVariable, matrixResult);

            sheet.Cells[anovaTableName + 1, sumOfSquaresName]  = sumOfSquare[0];
            sheet.Cells[anovaTableName + 2, sumOfSquaresName]  = sumOfSquare[1];
            sheet.Cells[anovaTableName, meanOfSquaresName]     = "Mean of Squares";
            sheet.Cells[anovaTableName + 1, meanOfSquaresName] = sumOfSquare[0] / independentVariables.Count;
            sheet.Cells[anovaTableName + 2, meanOfSquaresName] = sumOfSquare[1] / (dataSet.rangeSize() - 1 - independentVariables.Count);
            sheet.Cells[anovaTableName, anovaFValueName]       = "F";
            sheet.Cells[anovaTableName, anovaPValueName]       = "P-Value";
            var fTest = (sumOfSquare[0] / independentVariables.Count) / (sumOfSquare[1] / (dataSet.rangeSize() - 1 - independentVariables.Count));

            sheet.Cells[anovaTableName + 1, anovaFValueName] = fTest;
            sheet.Cells[anovaTableName + 1, anovaPValueName] = _functions.FDist(fTest, independentVariables.Count, dataSet.rangeSize() - 1 - independentVariables.Count);
            sheet.Cells[dataName, 1] = "Data";
            sheet.Cells[dataName, 2] = "Y: " + dependentVariable.name;
            sheet.Cells[dataName, 3] = "Fit";
            sheet.Cells[dataName, 4] = "Residuals";

            // calculate r-square, adj r-square and std error of estimation
            var rSquare            = CalculateRsquare(dataSet, independentVariables, dependentVariable, matrixResult);
            var adjRSquare         = CalculateAdjRSquare(rSquare, matrixX);
            var stdErrorEstimation = CalculateStdErrorEstimation(dataSet, independentVariables, dependentVariable, matrixResult);

            sheet.Cells[summaryName + 1, 2] = rSquare;
            sheet.Cells[summaryName + 1, 3] = adjRSquare;
            sheet.Cells[summaryName + 1, 4] = stdErrorEstimation;

            for (int i = 1; i < matrixResult.RowCount; i++)
            {
                sheet.Cells[name + i + 1, 1] = independentVariables[i - 1].name;
            }
            var meanOfSquaresError = sumOfSquare[1] / (dataSet.rangeSize() - 1 - independentVariables.Count);

            for (var i = 0; i < matrixResult.RowCount; i++)
            {
                var coeff              = matrixResult[i, 0];
                var stdError           = Math.Sqrt(matrixInv[i, i] * meanOfSquaresError);
                var pValue             = _functions.TDist(Math.Abs(coeff / stdError), dF, 2);
                var confidenceConstant = _functions.T_Inv_2T(1 - model.confidenceLevel, dF);
                var lower              = coeff - stdError * confidenceConstant;
                var upper              = coeff + stdError * confidenceConstant;
                sheet.Cells[name + i + 1, betaCoeff]      = coeff;
                sheet.Cells[name + i + 1, stdErrorName]   = stdError;
                sheet.Cells[name + i + 1, pValuesName]    = pValue;
                sheet.Cells[name + i + 1, lowerLimitName] = lower;
                sheet.Cells[name + i + 1, upperLimitName] = upper;
            }

            for (var i = 0; i < independentVariables.Count; i++)
            {
                sheet.Cells[dataName, 5 + i] = independentVariables[i].name;
            }

            var nextFigure = CreateDataRegression(matrixX, matrixY, matrixResult, dataName, sheet);

            if (model.fittedVSActual)
            {
                var rangeX = sheet.Range[sheet.Cells[dataName + 1, 2], sheet.Cells[dataName + dataSet.rangeSize(), 2]];
                var rangeY = sheet.Range[sheet.Cells[dataName + 1, 3], sheet.Cells[dataName + dataSet.rangeSize(), 3]];
                nextFigure = CreateNewFigure(rangeX, rangeY, nextFigure, "Fitted Values vs Actual Y-Values: " + dependentVariable.name, sheet);
            }

            if (model.residualsVSFitted)
            {
                var rangeX = sheet.Range[sheet.Cells[dataName + 1, 3], sheet.Cells[dataName + dataSet.rangeSize(), 3]];
                var rangeY = sheet.Range[sheet.Cells[dataName + 1, 4], sheet.Cells[dataName + dataSet.rangeSize(), 4]];
                nextFigure = CreateNewFigure(rangeX, rangeY, nextFigure, "Residuals vs Fitted Values", sheet);
            }

            if (model.residualsVSX)
            {
                var rangeY = sheet.Range[sheet.Cells[dataName + 1, 4], sheet.Cells[dataName + dataSet.rangeSize(), 4]];
                for (var i = 0; i < independentVariables.Count; i++)
                {
                    var rangeX = sheet.Range[sheet.Cells[dataName + 1, 5 + i], sheet.Cells[dataName + dataSet.rangeSize(), 5 + i]];
                    var nameX  = independentVariables[i].name;
                    nextFigure = CreateNewFigure(rangeX, rangeY, nextFigure, "Residuals vs " + nameX, sheet);
                }
            }
        }
Beispiel #2
0
        /// <summary>
        /// Print the Discriminant Analysis to a new <see cref="Microsoft.Office.Interop.Excel._Worksheet"/>.
        /// </summary>
        /// <param name="dataSet">The <see cref="DataSet"/> which needs (a) Scatterplot(s).</param>
        /// <param name="doIncludeX">A <see cref="List{T}"/> of <see cref="bool"/>s that corresponds to which <see cref="Models.Data"/> in the <see cref="DataSet.DataList"/> should be included for X.</param>
        /// <param name="doIncludeY">A <see cref="List{T}"/> of <see cref="bool"/>s that corresponds to which <see cref="Models.Data"/> in the <see cref="DataSet.DataList"/> should be included for Y.</param>
        /// <param name="doCalculate">A collection of <see cref="bool"/>s that indicate which summary statistic has to be calculated.</param>
        /// <param name="confidenceLevel">The confidence level.</param>
        public bool Print(DataSet dataSet, List <bool> doIncludeX, List <bool> doIncludeY, SummaryStatisticsBool doCalculate, int confidenceLevel)
        {
            var valuesArraysX = new List <Models.Data>();
            var valuesArraysY = new List <Models.Data>();
            var sheet         = WorksheetHelper.NewWorksheet("Regression");

            // Loop to add X
            for (var j = 0; j < dataSet.DataList.Count; j++)
            {
                // Check if the Set of Data is an X.
                if (!doIncludeX[j])
                {
                    continue;
                }

                var safe = true;
                foreach (var value in dataSet.DataList[j].GetValuesList())
                {
                    if (value != null)
                    {
                        continue;
                    }

                    MessageBox.Show(dataSet.DataList[j].Name + " has null data and will not be included.", "NoruST - Discriminant Analysis", MessageBoxButtons.OK, MessageBoxIcon.Warning);
                    safe = false;
                }

                // If the Set of Data is an X, add it to the list.
                if (safe)
                {
                    valuesArraysX.Add(dataSet.DataList[j]);
                }
            }

            for (var j = 0; j < dataSet.DataList.Count; j++)
            {
                // Check if the Set of Data is an Y.
                if (!doIncludeY[j])
                {
                    continue;
                }

                // If the Set of Data is Y add to list.
                valuesArraysY.Add(dataSet.DataList[j]);
                // only one Y (currently) so break loop if Y is found.
                break;
            }

            // create X matrix
            var matrixX = Matrix <double> .Build.Dense(valuesArraysX[0].GetValuesList().Count, valuesArraysX.Count + 1);

            // create 1 column with 1
            for (var i = 0; i < valuesArraysX[0].GetValuesList().Count; i++)
            {
                matrixX[i, 0] = 1;
                for (var j = 0; j < valuesArraysX.Count; j++)
                {
                    matrixX[i, j + 1] = valuesArraysX[j].GetValuesArray()[i];
                }
            }

            // create Y matrix
            var matrixY = Matrix <double> .Build.Dense(valuesArraysY[0].GetValuesList().Count, 1);

            // create 1 column with 1
            for (var i = 0; i < valuesArraysY[0].GetValuesList().Count; i++)
            {
                matrixY[i, 0] = valuesArraysY[0].GetValuesArray()[i];
            }

            var matrixXt     = matrixX.Transpose();
            var matrixXtX    = matrixXt * matrixX;
            var matrixInv    = matrixXtX.Inverse();
            var matrixInvXt  = matrixInv * matrixXt;
            var matrixResult = matrixInvXt * matrixY;

            // variables for sheet
            const int title                = 1;
            const int name                 = 3;
            const int betaCoeff            = 2;
            const int stdErrorName         = betaCoeff + 1;
            const int pValuesName          = stdErrorName + 1;
            const int lowerLimitName       = pValuesName + 1;
            const int upperLimitName       = lowerLimitName + 1;
            var       summaryName          = name + valuesArraysX.Count + 3;
            var       anovaTableName       = summaryName + 3;
            const int degreesOfFreedomName = 2;
            const int sumOfSquaresName     = 3;
            const int meanOfSquaresName    = 4;
            const int anovaFValueName      = 5;
            const int anovaPValueName      = 6;
            var       dataName             = anovaTableName + 4;

            // names of variables on sheet
            sheet.Cells[title, 1]             = "Regression";
            sheet.Cells[name + 1, 1]          = "Constant";
            sheet.Cells[name, betaCoeff]      = "Beta";
            sheet.Cells[name, stdErrorName]   = "Standard Error";
            sheet.Cells[name, pValuesName]    = "P-Value";
            sheet.Cells[name, lowerLimitName] = "Lower Limit";
            sheet.Cells[name, upperLimitName] = "Upper Limit";
            sheet.Cells[summaryName + 1, 1]   = "Regression Summary";
            sheet.Cells[summaryName, 2]       = "R-Square";
            sheet.Cells[summaryName, 3]       = "Adjusted R-Square";
            sheet.Cells[summaryName, 4]       = "Standard Error of estimation";

            if (doCalculate.DisplayRegressionEquation)
            {
                sheet.Cells[summaryName, 5] = "Regression Equation";
                var regressionEquation = "" + matrixResult[0, 0];
                for (var i = 0; i < valuesArraysX.Count; i++)
                {
                    regressionEquation = regressionEquation + " + (" + matrixResult[i + 1, 0] + "*" + valuesArraysX[i].Name + ")";
                }
                sheet.Cells[summaryName + 1, 5] = valuesArraysY[0].Name + " = " + regressionEquation;
            }
            sheet.Cells[anovaTableName, 1]     = "Anova Table";
            sheet.Cells[anovaTableName + 1, 1] = "Explained";
            sheet.Cells[anovaTableName + 2, 1] = "Unexplained";
            sheet.Cells[anovaTableName, degreesOfFreedomName]     = "Degrees of Freedom";
            sheet.Cells[anovaTableName + 1, degreesOfFreedomName] = valuesArraysX.Count;
            var dF = valuesArraysY[0].GetValuesList().Count - 1 - valuesArraysX.Count;

            sheet.Cells[anovaTableName + 2, degreesOfFreedomName] = dF;
            sheet.Cells[anovaTableName, sumOfSquaresName]         = "Sum of Squares";
            var sumOfSquare = CalculateSumOfSquares(valuesArraysX, valuesArraysY, matrixResult);

            sheet.Cells[anovaTableName + 1, sumOfSquaresName]  = sumOfSquare[0];
            sheet.Cells[anovaTableName + 2, sumOfSquaresName]  = sumOfSquare[1];
            sheet.Cells[anovaTableName, meanOfSquaresName]     = "Mean of Squares";
            sheet.Cells[anovaTableName + 1, meanOfSquaresName] = sumOfSquare[0] / valuesArraysX.Count;
            sheet.Cells[anovaTableName + 2, meanOfSquaresName] = sumOfSquare[1] / (valuesArraysY[0].GetValuesList().Count - 1 - valuesArraysX.Count);
            sheet.Cells[anovaTableName, anovaFValueName]       = "F";
            sheet.Cells[anovaTableName, anovaPValueName]       = "P-Value";
            var fTest = (sumOfSquare[0] / valuesArraysX.Count) / (sumOfSquare[1] / (valuesArraysY[0].GetValuesList().Count - 1 - valuesArraysX.Count));

            sheet.Cells[anovaTableName + 1, anovaFValueName] = fTest;
            sheet.Cells[anovaTableName + 1, anovaPValueName] = _functions.FDist(fTest, valuesArraysX.Count, valuesArraysY[0].GetValuesList().Count - 1 - valuesArraysX.Count);
            sheet.Cells[dataName, 1] = "Data";
            sheet.Cells[dataName, 2] = "Y: " + valuesArraysY[0].Name;
            sheet.Cells[dataName, 3] = "Fit";
            sheet.Cells[dataName, 4] = "Residuals";

            // calculate r-square, adj r-square and std error of estimation
            var rSquare            = CalculateRsquare(valuesArraysX, valuesArraysY, matrixResult);
            var adjRSquare         = CalculateAdjRSquare(rSquare, matrixX);
            var stdErrorEstimation = CalculateStdErrorEstimation(valuesArraysX, valuesArraysY, matrixResult);

            sheet.Cells[summaryName + 1, 2] = rSquare;
            sheet.Cells[summaryName + 1, 3] = adjRSquare;
            sheet.Cells[summaryName + 1, 4] = stdErrorEstimation;

            for (int i = 1; i < matrixResult.RowCount; i++)
            {
                sheet.Cells[name + i + 1, 1] = valuesArraysX[i - 1].Name;
            }

            var meanOfSquaresError = sumOfSquare[1] / (valuesArraysY[0].GetValuesList().Count - 1 - valuesArraysX.Count);

            for (var i = 0; i < matrixResult.RowCount; i++)
            {
                var coeff              = matrixResult[i, 0];
                var stdError           = Math.Sqrt(matrixInv[i, i] * meanOfSquaresError);
                var pValue             = _functions.TDist(Math.Abs(coeff / stdError), dF, 2);
                var confidenceConstant = _functions.T_Inv_2T(1 - confidenceLevel / 100.0, dF);
                var lower              = coeff - stdError * confidenceConstant;
                var upper              = coeff + stdError * confidenceConstant;
                sheet.Cells[name + i + 1, betaCoeff]      = coeff;
                sheet.Cells[name + i + 1, stdErrorName]   = stdError;
                sheet.Cells[name + i + 1, pValuesName]    = pValue;
                sheet.Cells[name + i + 1, lowerLimitName] = lower;
                sheet.Cells[name + i + 1, upperLimitName] = upper;
            }

            for (var i = 0; i < valuesArraysX.Count; i++)
            {
                sheet.Cells[dataName, 5 + i] = valuesArraysX[i].Name;
            }


            var nextFigure = CreateDataRegression(matrixX, matrixY, matrixResult, dataName, sheet);

            if (doCalculate.FittedValuesVsActualYValues)
            {
                var rangeX = sheet.Range[sheet.Cells[dataName + 1, 2], sheet.Cells[dataName + valuesArraysY[0].GetValuesList().Count, 2]];
                var rangeY = sheet.Range[sheet.Cells[dataName + 1, 3], sheet.Cells[dataName + valuesArraysY[0].GetValuesList().Count, 3]];
                nextFigure = CreateNewFigure(rangeX, rangeY, nextFigure, "Fitted Values vs Actual Y-Values: " + valuesArraysY[0].Name, sheet);
            }

            if (doCalculate.ResidualsVsFittedValues)
            {
                var rangeX = sheet.Range[sheet.Cells[dataName + 1, 3], sheet.Cells[dataName + valuesArraysY[0].GetValuesList().Count, 3]];
                var rangeY = sheet.Range[sheet.Cells[dataName + 1, 4], sheet.Cells[dataName + valuesArraysY[0].GetValuesList().Count, 4]];
                nextFigure = CreateNewFigure(rangeX, rangeY, nextFigure, "Residuals vs Fitted Values", sheet);
            }

            if (doCalculate.ResidualsVsXValues)
            {
                var rangeY = sheet.Range[sheet.Cells[dataName + 1, 4], sheet.Cells[dataName + valuesArraysY[0].GetValuesList().Count, 4]];
                for (var i = 0; i < valuesArraysX.Count; i++)
                {
                    var rangeX = sheet.Range[sheet.Cells[dataName + 1, 5 + i], sheet.Cells[dataName + valuesArraysY[0].GetValuesList().Count, 5 + i]];
                    var nameX  = valuesArraysX[i].Name;
                    nextFigure = CreateNewFigure(rangeX, rangeY, nextFigure, "Residuals vs " + nameX, sheet);
                }
            }

            return(true);
        }