예제 #1
0
        public void addLags(Variable variable, int numberOfLags, DataSet dataSet)
        {
            //for (int i = 1; i <= numberOfLags; i++)
            //{
            //    for (int j = 1; j <= numberOfLags; j++)
            //    {
            //        worksheet.Cells[i + 1, range.Columns.Count + j] = 0;
            //    }
            //}

            for (int i = 1; i <= numberOfLags; i++)
            {
                Range source = rangeLayout == COLUMNS
                    ? variable.getRange().extendRangeByRows(-i)
                    : variable.getRange().extendRangeByColumns(-i);

                Range destination = rangeLayout == COLUMNS
                    ? variable.getRange()
                                    .extendRangeByRows(-i, false)
                                    .shiftRangeByColumns(variables.Count - variables.IndexOf(variable) + i - 1)
                    : variable.getRange()
                                    .extendRangeByColumns(-i, false)
                                    .shiftRangeByRows(variables.Count - variables.IndexOf(variable) + i - 1);

                source.Copy(destination);
                if (!variableNamesInFirstRowOrColumn)
                {
                    continue;
                }
                source = rangeLayout == COLUMNS
                    ? variable.getRange().first().shiftRangeByRows(-1)
                    : variable.getRange().first().shiftRangeByColumns(-1);

                destination = rangeLayout == COLUMNS
                    ? variable.getRange()
                              .first()
                              .shiftRangeByRows(-1)
                              .shiftRangeByColumns(variables.Count - variables.IndexOf(variable) + i - 1)
                    : variable.getRange()
                              .first()
                              .shiftRangeByColumns(-1)
                              .shiftRangeByRows(variables.Count - variables.IndexOf(variable) + i - 1);

                destination.Value = source.Value + " Lag " + i.ToString();
            }
            Globals.ExcelAddIn.Application.CutCopyMode = XlCutCopyMode.xlCopy;

            DataSet nwRange = DataSetFactory.modify(dataSet, ColumnIndexToColumnLetter(range.Columns.Count + numberOfLags));

            dataSet.setRange(nwRange.getRange());
            dataSet.setVariables(nwRange.getVariables());

            //range = rangeLayout == COLUMNS
            //    ? range.Resize[range.Rows.Count, range.Columns.Count + numberOfLags]
            //    : range.Resize[range.Rows.Count + numberOfLags, range.Columns.Count];
            //recalculateVariables();
        }
예제 #2
0
        public void setVariableNamesInFirstRowOrColumn(DataSet dataSet, bool variableNamesInFirstRowOrColumn)
        {
            if (dataSet == null)
            {
                return;
            }
            DataSet newDataSet = DataSetFactory.create(dataSet.getWorksheet(), dataSet.getRange(), dataSet.getName(), dataSet.getRangeLayout(),
                                                       variableNamesInFirstRowOrColumn);

            model.swapDataSets(dataSet, newDataSet);
        }
예제 #3
0
        public void addDummy(Variable variable, DataSet dataSet)
        {
            string ran          = variable.Range.ToString();
            String colLetter2   = variable.Range[1].ToString();
            int    columnIndex2 = ColumnLetterToColumnIndex(colLetter2) - 1;
            String colLetter    = dataSet.getVariables()[dataSet.getVariables().Count - 1].Range[1].ToString();
            int    columnIndex  = ColumnLetterToColumnIndex(colLetter) + 1;
            Array  dist         = dataSet.getWorksheet().Range[ran].Value;
            int    count        = 0;

            foreach (var item in dist)
            {
                if (item.GetType().ToString() == "System.String")
                {
                    count = 1;
                }
                else
                {
                    count = 2;
                }
                break;
            }
            if (count == 1)
            {
                List <String> values = dist.OfType <String>().ToList();
                dist = values.Distinct <String>().ToArray();
                int row    = 1;
                int column = columnIndex;
                foreach (var item in dist)
                {
                    worksheet.Cells[row, column] = dataSet.getVariables()[columnIndex2].name + "=" + item.ToString();
                    column = column + 1;
                }
                row = 0;
                while (row < values.Count)
                {
                    String temp = values[row];
                    column = columnIndex;
                    foreach (var item in dist)
                    {
                        if (temp.Equals(item.ToString()))
                        {
                            worksheet.Cells[row + 2, column] = "1";
                        }
                        else
                        {
                            worksheet.Cells[row + 2, column] = "0";
                        }
                        column = column + 1;
                    }
                    row = row + 1;
                }
                DataSet nwRange = DataSetFactory.modify(dataSet, ColumnIndexToColumnLetter(column - 1));
                dataSet.setRange(nwRange.getRange());
                dataSet.setVariables(nwRange.getVariables());
            }
            else
            {
                List <Double> values = dist.OfType <Double>().ToList();
                List <Double> value  = dist.OfType <Double>().ToList();
                values.Sort();
                dist = values.Distinct <Double>().ToArray();
                int row    = 1;
                int column = columnIndex;
                foreach (var item in dist)
                {
                    worksheet.Cells[row, column] = dataSet.getVariables()[columnIndex2].name + "=" + item.ToString();
                    column = column + 1;
                }
                row = 0;
                while (row < values.Count)
                {
                    double temp = value[row];
                    column = columnIndex;
                    foreach (var item in dist)
                    {
                        if (temp == Convert.ToInt16(item))
                        {
                            worksheet.Cells[row + 2, column] = "1";
                        }
                        else
                        {
                            worksheet.Cells[row + 2, column] = "0";
                        }
                        column = column + 1;
                    }
                    row = row + 1;
                }
                DataSet nwRange = DataSetFactory.modify(dataSet, ColumnIndexToColumnLetter(column - 1));
                dataSet.setRange(nwRange.getRange());
                dataSet.setVariables(nwRange.getVariables());
            }
        }
예제 #4
0
        public void Interaction(Variable variable, Variable variable2, DataSet dataSet) //cat = gender var = salary
        {
            String colLetter      = dataSet.getVariables()[dataSet.getVariables().Count - 1].Range[1].ToString();
            int    columnIndex    = ColumnLetterToColumnIndex(colLetter) + 1;
            string ran2           = variable.Range.ToString();
            String colLetterCat   = variable.Range[1].ToString();
            int    columnIndexCat = ColumnLetterToColumnIndex(colLetterCat) - 1;
            Array  distCat        = dataSet.getWorksheet().Range[ran2].Value;
            Array  cat            = dataSet.getWorksheet().Range[ran2].Value;

            string ran            = variable2.Range.ToString();
            String colLetterVar   = variable2.Range[1].ToString();
            int    columnIndexVar = ColumnLetterToColumnIndex(colLetterVar) - 1;
            Array  dist           = dataSet.getWorksheet().Range[ran].Value;
            int    count          = 0;

            foreach (var item in distCat)
            {
                if (item == null)
                {
                    count = 2;
                }
                else if (item.GetType().ToString() == "System.String")
                {
                    count = 1;
                    break;
                }
                else
                {
                    count = 2;
                    break;
                }
            }
            foreach (var item in dist)
            {
                if (item == null)
                {
                    count = 2;
                }
                else if (item.GetType().ToString() == "System.String")
                {
                    count = 1;
                    break;
                }
                else
                {
                    count = 2;
                    break;
                }
            }
            if (count == 1)
            {
                MessageBox.Show("Interaction can only be calculated for numbers.");
                return;
            }
            double[] var1 = distCat.OfType <double>().ToArray();
            double[] var2 = dist.OfType <double>().ToArray();

            System.Diagnostics.Debug.WriteLine(var1.Length);
            System.Diagnostics.Debug.WriteLine(var2.Length);

            int row    = 1;
            int column = columnIndex;

            worksheet.Cells[row, column] = dataSet.getVariables()[columnIndexCat].name + " - " + dataSet.getVariables()[columnIndexVar].name;
            row = 0;

            int len = var1.Length;

            if (var2.Length < var1.Length)
            {
                len = var2.Length;
            }

            while (row < len)
            {
                if (var1.Length < var2.Length)
                {
                    column = columnIndex;
                    int temp = var2.Length - var1.Length;
                    worksheet.Cells[row + 2 + temp, column] = var1[row] * var2[row + temp];
                    row = row + 1;
                }
                else
                {
                    column = columnIndex;
                    int temp = var1.Length - var2.Length;
                    worksheet.Cells[row + 2 + temp, column] = var1[row + temp] * var2[row];
                    row = row + 1;
                }
            }
            DataSet nwRange = DataSetFactory.modify(dataSet, ColumnIndexToColumnLetter(column));

            dataSet.setRange(nwRange.getRange());
            dataSet.setVariables(nwRange.getVariables());
        }
예제 #5
0
        public void createRegression(List <Variable> variablesD, List <Variable> variablesI, DataSet dataSet, DataSet dataSet2, bool[] graphs, List <Variable> variablesPrediction)
        {
            _Worksheet sheet     = WorksheetHelper.NewWorksheet("Regression");
            double     confLevel = model.confidenceLevel;

            sheet.Cells[100, 100] = "=ROWS(" + dataSet.getWorksheet().Name + "!" + variablesI[0].Range + ")";
            int length = Convert.ToInt32((sheet.Cells[100, 100] as Range).Value);

            sheet.Cells[100, 100] = "";

            int row = 1;

            sheet.Cells[1, 1] = "Regression Summary";
            sheet.Cells[2, 1] = "R";
            sheet.Cells[3, 1] = "R-square";
            sheet.Cells[4, 1] = "adjusted R-square";
            sheet.Cells[5, 1] = "stErr of Estimate";


            sheet.Cells[9, 1]  = "ANOVA Table";
            sheet.Cells[10, 1] = "Explained";
            sheet.Cells[11, 1] = "Unexplained";
            sheet.Cells[8, 3]  = "Sum of";
            sheet.Cells[9, 3]  = "Squares";
            sheet.Cells[8, 2]  = "Degrees of";
            sheet.Cells[9, 2]  = "Freedom";
            sheet.Cells[8, 4]  = "Mean";
            sheet.Cells[9, 4]  = "Squares";
            sheet.Cells[9, 5]  = "F-Ratio";
            sheet.Cells[9, 6]  = "p-Value";


            sheet.Cells[15, 1] = "Regression Table";
            sheet.Cells[16, 1] = "Constant";
            row = 17;
            foreach (Variable var in variablesI)
            {
                sheet.Cells[row, 1] = var.name;
                row++;
            }
            lowestDataRow = row;

            sheet.Cells[15, 2] = "Coefficient";
            sheet.Cells[14, 3] = "Standard";
            sheet.Cells[15, 3] = "Error";
            sheet.Cells[15, 4] = "t-value";
            sheet.Cells[15, 5] = "p-value";
            sheet.Cells[14, 6] = "Confidence Interval " + confLevel.ToString() + "%";
            sheet.Range[sheet.Cells[14, 6], sheet.Cells[14, 7]].Merge();
            sheet.Cells[15, 6] = "Lower";
            sheet.Cells[15, 7] = "Upper";


            sheet.get_Range("B1", "J200").Cells.HorizontalAlignment = XlHAlign.xlHAlignCenter;
            sheet.get_Range("A1", "B1").Borders[XlBordersIndex.xlEdgeBottom].LineStyle   = XlLineStyle.xlDouble;
            sheet.get_Range("A9", "F9").Borders[XlBordersIndex.xlEdgeBottom].LineStyle   = XlLineStyle.xlDouble;
            sheet.get_Range("A15", "G15").Borders[XlBordersIndex.xlEdgeBottom].LineStyle = XlLineStyle.xlDouble;
            sheet.get_Range("B3", "B5").NumberFormat   = "0.0000";
            sheet.get_Range("B18", "B20").NumberFormat = "0.0000";
            sheet.get_Range("D18", "E19").NumberFormat = "0.0000";

            Globals.ExcelAddIn.Application.ActiveWindow.DisplayGridlines = false;


            //
            //calculate values of regression
            //
            double[] b     = new double[variablesI.Count];
            double[] yData = calcYdata(variablesD, dataSet, length);
            double[,] xData = calcXdata(length, dataSet, variablesI);


            b = calculateCoefB(yData, xData, b);
            double a = b[0];

            var X  = DenseMatrix.OfArray(xData);
            var B  = new DenseVector(b);
            var Y  = new DenseVector(yData);
            var yh = X.Multiply(B);
            var er = Y.Subtract(yh);

            double[] error = er.ToArray();

            double[] yhat = yh.ToArray();

            double R2   = sheet.Application.WorksheetFunction.Correl(yData, yhat);
            double R    = Math.Pow(R2, 2);
            double Radj = 1.0 - ((1 - R) * (length - 1) / (length - variablesI.Count - 1));
            double err  = 0;
            int    i    = 0;

            while (i < error.Length)
            {
                err = err + Math.Pow(error[i], 2);
                i++;
            }
            err = Math.Sqrt(err / (length - 4));


            double[] anovaResults = calculateAnova(yData, yhat, variablesI.Count(), sheet);

            multicollinearity(sheet, xData, variablesI.Count);
            if (model.doDurbinWatson == true)
            {
                DurbinWatson(sheet, error);
            }

            //
            //Calculate regressionTable
            //
            double MSE        = anovaResults[6];
            var    Xt         = X.Transpose();
            var    tempM      = (Xt.Multiply(X)).Inverse();
            var    variancesM = tempM.Multiply(MSE);

            double[] variances = variancesM.Diagonal().ToArray();
            double[] std       = new double[variances.Length];

            int index = 0;

            foreach (double elem in variances)
            {
                std[index] = Math.Sqrt(elem);
                //System.Diagnostics.Debug.WriteLine(std[index]);
                index++;
            }

            double[] tValue    = new double[b.Length];
            double[] pValue    = new double[b.Length];
            double[] lowerInt  = new double[b.Length];
            double[] higherInt = new double[b.Length];
            for (index = 0; index < b.Length; index++)
            {
                tValue[index]    = b[index] / std[index];
                pValue[index]    = sheet.Application.WorksheetFunction.TDist(Math.Abs(tValue[index]), length - variablesI.Count - 1, 2);
                lowerInt[index]  = b[index] - sheet.Application.WorksheetFunction.TInv(1 - confLevel / 100, length - variablesI.Count - 1) * std[index];
                higherInt[index] = b[index] + sheet.Application.WorksheetFunction.TInv(1 - confLevel / 100, length - variablesI.Count - 1) * std[index];
                //System.Diagnostics.Debug.WriteLine("{0}, p={1}, lower = {2}, n={3}, inv={4}, std={5}, inv2={6}",real_p, pValue[index], lowerInt[index], length - 1, sheet.Application.WorksheetFunction.TInv(1-0.975, length - 1), std[index], sheet.Application.WorksheetFunction.TInv(1 - 0.95, length - 1));
            }

            //
            //Prediction
            //
            if (model.doPrediction)
            {
                double[,] xDataPred = calcXdata(dataSet2.getNrDataRows(), dataSet2, variablesPrediction);
                int[] rightCell = { dataSet2.getRange().Row, dataSet2.getRange().Column + dataSet2.amountOfVariables() };                 //{row, column}

                prediction(dataSet2.getWorksheet(), xDataPred, b, length, variablesI.Count, X, MSE, rightCell, variablesD[0].name);
                setPrediction(false);
            }



            //
            //Draw graphs
            //
            if (graphs[0])
            {
                drawGraphs(sheet, yData, yhat, "Scatter plot of fitted values vs. actual values");
            }
            if (graphs[1])
            {
                drawGraphs(sheet, yData, error, "Scatter plot of residuals vs. fitted values");
            }
            if (graphs[2])
            {
                int varNumber = 1;
                foreach (Variable var in variablesI)
                {
                    double[] xDataTemp = new double[xData.GetLength(0)];
                    for (int x = 0; x < xData.GetLength(0); x++)
                    {
                        xDataTemp[x] = xData[x, varNumber];
                    }
                    drawGraphs(sheet, xDataTemp, error, "Scatter plot of residuals vs. " + var.name);
                    varNumber++;
                }
            }
            if (graphs[3])
            {
                int varNumber = 1;
                foreach (Variable var in variablesI)
                {
                    double[] xDataTemp = new double[xData.GetLength(0)];
                    for (int x = 0; x < xData.GetLength(0); x++)
                    {
                        xDataTemp[x] = xData[x, varNumber];
                    }
                    drawGraphs(sheet, xDataTemp, yData, "Scatter plot of actual Y values vs. " + var.name);
                    varNumber++;
                }
            }
            if (graphs[4])
            {
                int varNumber = 1;
                foreach (Variable var in variablesI)
                {
                    double[] xDataTemp = new double[xData.GetLength(0)];
                    for (int x = 0; x < xData.GetLength(0); x++)
                    {
                        xDataTemp[x] = xData[x, varNumber];
                    }
                    drawGraphs(sheet, xDataTemp, yhat, "Scatter plot of fitted Y values vs. " + var.name);
                    varNumber++;
                }
            }

            //
            //print results to excel sheet
            //
            sheet.Cells[16, 2] = a;
            row = 17;
            foreach (Variable var in variablesI)
            {
                sheet.Cells[row, 2] = b[row - 16];
                row++;
            }


            FillR(sheet, R2, R, Radj, err);
            FillAnova(sheet, anovaResults);
            FillRegressionTable(sheet, std, tValue, pValue, lowerInt, higherInt);


            ((Range)sheet.Cells[1, 1]).EntireColumn.AutoFit();
            ((Range)sheet.Cells[1, 2]).EntireColumn.AutoFit();
            ((Range)sheet.Cells[1, 3]).EntireColumn.AutoFit();
            ((Range)sheet.Cells[1, 4]).EntireColumn.AutoFit();
            ((Range)sheet.Cells[1, 5]).EntireColumn.AutoFit();
            ((Range)sheet.Cells[1, 6]).EntireColumn.ColumnWidth = 13;
            ((Range)sheet.Cells[1, 7]).EntireColumn.ColumnWidth = 13;
            ((Range)sheet.Cells[1, 8]).EntireColumn.ColumnWidth = 13;
            ((Range)sheet.Cells[1, 9]).EntireColumn.ColumnWidth = 13;
            ((Range)sheet.Cells[1, 10]).EntireColumn.AutoFit();
        }