Esempio n. 1
0
        public RegressionOutputs CloneBasicStatistics()
        {
            RegressionOutputs newOutputs = new RegressionOutputs();


            if (Coeffs != null)
            {
                newOutputs.Coeffs = new double[Coeffs.Length];
                Coeffs.CopyTo(newOutputs.Coeffs, 0);
            }
            newOutputs.Rsquared      = Rsquared;
            newOutputs.AdjRsquared   = AdjRsquared;
            newOutputs.StandardError = StandardError;
            if (Tstats != null)
            {
                newOutputs.Tstats = new double[Tstats.Length];
                Tstats.CopyTo(newOutputs.Tstats, 0);
            }
            if (CoeffsStandardErrors != null)
            {
                newOutputs.CoeffsStandardErrors = new double[CoeffsStandardErrors.Length];
                CoeffsStandardErrors.CopyTo(newOutputs.CoeffsStandardErrors, 0);
            }
            newOutputs.CV   = CV;
            newOutputs.NMBE = NMBE;



            return(newOutputs);
        }
Esempio n. 2
0
        /// <summary>
        /// SimpleLinearRegression returns the regression outputs for a simple y=mx+b model.
        /// </summary>
        /// <param name="y">List of double dependent variables</param>
        /// <param name="x">List of double independent variables</param>
        /// <returns>Regression outputs</returns>
        public static RegressionOutputs SimpleLinearRegression(List <double> y, List <double> x)
        {
            RegressionOutputs result = new RegressionOutputs();

            //Make the sure counts for Y and X are the same
            if (y.Count != x.Count)
            {
                throw new Exception("Matrices must be the same size for regression");
            }

            double yMean = y.Average();
            double xMean = x.Average();

            double[] xSquared = new double[y.Count];
            double[] xy       = new double[y.Count];
            double[] ySquared = new double[y.Count];

            for (int i = 0; i < y.Count; i++)
            {
                xSquared[i] = x[i] * x[i];
                ySquared[i] = y[i] * y[i];
                xy[i]       = x[i] * y[i];
            }

            double ssXY = xy.Sum() - x.Count * xMean * yMean;
            double ssXX = xSquared.Sum() - x.Count * xMean * xMean;
            double ssYY = ySquared.Sum() - y.Count * yMean * yMean;

            double slope    = ssXY / ssXX;
            double constant = yMean - slope * xMean;

            double sse = ssYY - slope * ssXY;

            result.Coeffs    = new double[2];
            result.Coeffs[0] = constant;
            result.Coeffs[1] = slope;

            result.StandardError = Math.Sqrt(sse / (y.Count - 2));
            result.Rsquared      = 1 - (sse / ssYY);


            return(result);
        }
Esempio n. 3
0
        static void Main(string[] args)
        {
            string input;

            if (Console.IsInputRedirected)
            {
                input = Console.In.ReadToEnd();
            }
            else if (args.Length > 0)
            {
                input = File.ReadAllText(args[0]);
            }
            else
            {
                Console.Out.WriteLine("No input.");
                return;
            }

            if (input.Length == 0)
            {
                Console.Out.WriteLine("No input.");
                return;
            }

            List <string> lines = input.Split(new[] { "\r\n", "\r", "\n" }, StringSplitOptions.None).ToList();

            List <string[]> splitLines = lines.Select(line => line.Split(null)).ToList();

            IEnumerable <IGrouping <int, string[]> > columnCounts = splitLines.GroupBy(cols => cols.Length).ToList();

            int mostCommonColumnLength = columnCounts.OrderByDescending(grouping => grouping.Count()).First().Key;

            List <List <double> > data = splitLines
                                         .Where(line => IsValidLine(line, mostCommonColumnLength))
                                         .Select(datas => datas.Select(double.Parse).ToList()).ToList();

            if (!data.Any())
            {
                Console.Out.WriteLine($"None of the original {splitLines.Count} lines contained records of completely clean data.");
                return;
            }

            double[,] xArray = new double[data.Count, mostCommonColumnLength - 1];
            double[] yArray = new double[data.Count];

            foreach ((List <double> record, int rowIndex) in data.WithIndex())
            {
                foreach ((double dataPoint, int colIndex) in record.Skip(1).WithIndex())
                {
                    xArray[rowIndex, colIndex] = dataPoint;
                }

                yArray[rowIndex] = record[0];
            }

            RegressionOutputs outputs = Regression.MultipleLinearRegression(yArray, xArray, false);

            foreach (double coefficient in outputs.Coeffs)
            {
                int numberOfSignificantFigures = Math.Min(10, Math.Max(0, 5 - (int)Math.Floor(Math.Log10(Math.Abs(coefficient)))));
                Console.Out.WriteLine($"{Math.Round(coefficient, numberOfSignificantFigures).ToString($"f{numberOfSignificantFigures}").TrimZeros()}");
            }
        }
Esempio n. 4
0
        /// <summary>
        /// MultipleLinearRegression runs a MLR on y and x data.  A column of 1's is added for a constant value.
        /// </summary>
        /// <param name="y">1-D array of double dependent variable values</param>
        /// <param name="x">n x p array of independent variable observations.  Do not add a column of 1's</param>
        /// <param name="advancedStats">true will return:
        ///                                 predictions
        ///                                 residuals
        ///                                 standardized residuals
        ///                                 Cook's distance
        ///                                 standard error for coefficients
        ///                                 t-stats
        ///                                 F-statistic
        ///                                 adjusted R^2</param>
        /// <returns>RegressionOutputs class</returns>
        public static RegressionOutputs MultipleLinearRegression(double[] y, double[,] x, bool advancedStats)
        {
            RegressionOutputs toReturn = new RegressionOutputs();


            //Make sure that the length for y and x are the same
            if (y.Count() != x.GetLength(0))
            {
                throw new Exception("Matrices must be the same size for regression");
            }


            //Add a column of 1's to the X matrix to calculate a constant in the regression
            double[,] xFull = new double[x.GetLength(0), x.GetLength(1) + 1];
            for (int i = 0; i < x.GetLength(0); i++)
            {
                xFull[i, 0] = 1;
                for (int j = 0; j < x.GetLength(1); j++)
                {
                    xFull[i, j + 1] = x[i, j];
                }
            }
            //Have to make y a two dimensional array to pass to my matrix methods.
            double[,] yAdjusted = new double[y.Length, 1];
            for (int i = 0; i < y.Length; i++)
            {
                yAdjusted[i, 0] = y[i];
            }

            double yAve = y.Average();
            int    n    = y.Count();
            int    p    = xFull.GetLength(1);



            //Calculate X'X
            var xTrans  = MatrixMethods.MatTranspose(xFull);
            var xTransX = MatrixMethods.MatMultiply(xTrans, xFull);
            //Calculate X'Y

            var xTransY = MatrixMethods.MatMultiply(xTrans, yAdjusted);

            //Calculate [X'X]^-1
            var choleskyDecomp = MatrixMethods.CholeskyDecomp(xTransX);

            double[,] c = MatrixMethods.MatrixInv(xTransX, choleskyDecomp);

            for (int i = 0; i < c.GetLength(0); i++)
            {
                for (int j = 0; j < c.GetLength(1); j++)
                {
                    if (double.IsNaN(c[i, j]))
                    {
                        throw new InvalidOperationException("Matrix inverse not defined. Check for constant input array.");
                    }
                }
            }

            double[,] coeffs = MatrixMethods.MatMultiply(c, xTransY);

            // Calculate Y'Y
            double[,] yTransY = MatrixMethods.MatMultiply(MatrixMethods.MatTranspose(yAdjusted), yAdjusted);
            // Calculate B'X'Y
            double[,] betaTransXtransY = MatrixMethods.MatMultiply(MatrixMethods.MatTranspose(coeffs), xTransY);



            double sse = yTransY[0, 0] - betaTransXtransY[0, 0];    //Sum of squared errors (sum of squared residuals)

            //Check if sse is less than zero.  Ran into round off problem when passed constant data.
            if (sse < 0)
            {
                sse = 0;
            }


            double[] ssmArray = new double[n];
            for (int i = 0; i < ssmArray.Count(); i++)
            {
                ssmArray[i] = (y[i] - yAve) * (y[i] - yAve);
            }
            double ssm = ssmArray.Sum();  //"Sum of squares regression" or "Sum of squares model" or simply (y - y_ave)^2

            double mse = sse / (n - p);   //mean squared error


            //Returning Basic Statistics
            //==========================================================================================

            toReturn.Coeffs = new double[p];
            for (int i = 0; i < coeffs.GetLength(0); i++)
            {
                toReturn.Coeffs[i] = coeffs[i, 0];
            }

            toReturn.NMBE          = 0;             //This is defined to be zero, other than round-off errors.
            toReturn.StandardError = Math.Sqrt(sse / (y.GetLength(0) - p));
            toReturn.CV            = toReturn.StandardError / yAve;
            toReturn.Tstats        = new double[p];
            toReturn.Rsquared      = 1 - (sse / ssm);


            //Advanced Statistics
            //============================================================================================
            if (advancedStats == true)
            {
                //Send back original data and modified data
                //-----------------------------------------
                toReturn.Ydata = y;
                toReturn.Xdata = x;


                //Calculate Predictions and Residuals
                //-----------------------------------
                double[] predictions = new double[n];
                double[] residuals   = new double[n];
                for (int i = 0; i < n; i++)
                {
                    predictions[i] = 0;
                    for (int j = 0; j < coeffs.GetLength(0); j++)
                    {
                        predictions[i] = predictions[i] + coeffs[j, 0] * xFull[i, j];
                    }
                    residuals[i] = y[i] - predictions[i];
                }



                //Calculate standardized residuals
                //--------------------------------
                double tempSum  = 0;
                double residAve = residuals.Average();

                for (int i = 0; i < n; i++)
                {
                    tempSum += ((residuals[i] - residAve) * (residuals[i] - residAve));
                }
                double residualStandDev = Math.Sqrt(tempSum / (n - 1));

                double[] standResiduals = new double[residuals.Count()];
                for (int i = 0; i < residuals.Count(); i++)
                {
                    standResiduals[i] = residuals[i] / residualStandDev;
                }



                ////Calculate Hat Matrix X*(X'X)^-1*X'
                //double[,] hatMatrix = MatrixMethods.MatMultiply(MatrixMethods.MatMultiply(xFull, c), MatrixMethods.MatTranspose(xFull));


                ////Calculate Cook's distance
                //double[] cooksDistance = new double[n];
                //for (int i = 0; i < n; i++)
                //{
                //    cooksDistance[i] = (residuals[i] * residuals[i] * hatMatrix[i, i]) /
                //        (p * mse * (1 - hatMatrix[i, i]) * (1 - hatMatrix[i, i]));
                //}

                toReturn.CoeffsStandardErrors = new double[p];
                for (int i = 0; i < p; i++)
                {
                    toReturn.CoeffsStandardErrors[i] = toReturn.StandardError * Math.Sqrt(c[i, i]);
                    toReturn.Tstats[i] = toReturn.Coeffs[i] / toReturn.CoeffsStandardErrors[i];
                }

                toReturn.Residuals      = residuals;
                toReturn.StandResiduals = standResiduals;
                toReturn.Yhat           = predictions;
                toReturn.Fstatistic     = ((toReturn.Rsquared) / (p - 1)) / ((1 - toReturn.Rsquared) / (n - p));
                //toReturn.CooksDistance = cooksDistance;

                toReturn.AdjRsquared = 1 - ((((double)n - 1) / ((double)n - (double)p)) * (1 - toReturn.Rsquared));
            }
            return(toReturn);
        }
Esempio n. 5
0
 public static RegressionOutputs CloneRegressionOutputs(RegressionOutputs regOutputToClone)
 {
     return(regOutputToClone);
 }