public void RegressTest()
        {
            MultipleLinearRegression target = new MultipleLinearRegression(1, true);

            double[][] inputs = 
            {
                new double[] { 80 },
                new double[] { 60 },
                new double[] { 10 },
                new double[] { 20 },
                new double[] { 30 },
            };

            double[] outputs = { 20, 40, 30, 50, 60 };


            double error = target.Regress(inputs, outputs);

            double slope = target.Coefficients[0];
            double intercept = target.Coefficients[1];

            Assert.AreEqual(-0.264706, slope, 1e-5);
            Assert.AreEqual(50.588235, intercept, 1e-5);
            Assert.AreEqual(761.764705, error, 1e-5);


            double r = target.CoefficientOfDetermination(inputs, outputs);
            Assert.AreEqual(0.23823529, r, 1e-6);

            string str = target.ToString(null, System.Globalization.CultureInfo.GetCultureInfo("pt-BR"));

            Assert.AreEqual("y(x0) = -0,264705882352941*x0 + 50,5882352941176", str);
        }
        /// <summary>
        ///   Gets the coefficient of determination, as known as R² (r-squared).
        /// </summary>
        ///
        /// <remarks>
        ///   <para>
        ///    The coefficient of determination is used in the context of statistical models
        ///    whose main purpose is the prediction of future outcomes on the basis of other
        ///    related information. It is the proportion of variability in a data set that
        ///    is accounted for by the statistical model. It provides a measure of how well
        ///    future outcomes are likely to be predicted by the model.</para>
        ///   <para>
        ///    The R² coefficient of determination is a statistical measure of how well the
        ///    regression line approximates the real data points. An R² of 1.0 indicates
        ///    that the regression line perfectly fits the data.</para>
        /// </remarks>
        ///
        /// <returns>The R² (r-squared) coefficient for the given data.</returns>
        ///
        public double CoefficientOfDetermination(double[] inputs, double[] outputs, bool adjust)
        {
            double[][] X = new double[inputs.Length][];

            for (int i = 0; i < inputs.Length; i++)
            {
                // b[0]*1 + b[1]*inputs[i]
                X[i] = new double[] { 1.0, inputs[i] };
            }

            return(regression.CoefficientOfDetermination(X, outputs, adjust));
        }
Beispiel #3
0
        /// <summary>
        ///   Gets the coefficient of determination, as known as R² (r-squared).
        /// </summary>
        ///
        /// <remarks>
        ///   <para>
        ///    The coefficient of determination is used in the context of statistical models
        ///    whose main purpose is the prediction of future outcomes on the basis of other
        ///    related information. It is the proportion of variability in a data set that
        ///    is accounted for by the statistical model. It provides a measure of how well
        ///    future outcomes are likely to be predicted by the model.</para>
        ///   <para>
        ///    The R² coefficient of determination is a statistical measure of how well the
        ///    regression line approximates the real data points. An R² of 1.0 indicates
        ///    that the regression line perfectly fits the data.</para>
        /// </remarks>
        ///
        /// <returns>The R² (r-squared) coefficient for the given data.</returns>
        ///
        public double CoefficientOfDetermination(double[] inputs, double[] outputs, bool adjust)
        {
            double[][] X = new double[inputs.Length][];
            for (int i = 0; i < inputs.Length; i++)
            {
                X[i] = new double[NumberOfInputs];
                for (int j = 0; j < X[i].Length; j++)
                {
                    X[i][j] = Math.Pow(inputs[i], j);
                }
            }

            return(regression.CoefficientOfDetermination(X, outputs, adjust));
        }
        /// <summary>
        ///   Gets the coefficient of determination, as known as R² (r-squared).
        /// </summary>
        ///
        /// <remarks>
        ///   <para>
        ///    The coefficient of determination is used in the context of statistical models
        ///    whose main purpose is the prediction of future outcomes on the basis of other
        ///    related information. It is the proportion of variability in a data set that
        ///    is accounted for by the statistical model. It provides a measure of how well
        ///    future outcomes are likely to be predicted by the model.</para>
        ///   <para>
        ///    The R² coefficient of determination is a statistical measure of how well the
        ///    regression line approximates the real data points. An R² of 1.0 indicates
        ///    that the regression line perfectly fits the data.</para>
        /// </remarks>
        ///
        /// <returns>The R² (r-squared) coefficient for the given data.</returns>
        ///
        public double CoefficientOfDetermination(double[] inputs, double[] outputs, bool adjust, double[] weights = null)
        {
            var polynomial = new double[inputs.Length][];

            for (int i = 0; i < inputs.Length; i++)
            {
                polynomial[i] = new double[this.Degree];
                for (int j = 0; j < polynomial[i].Length; j++)
                {
                    polynomial[i][j] = Math.Pow(inputs[i], this.Degree - j);
                }
            }

            return(regression.CoefficientOfDetermination(polynomial, outputs, adjust, weights));
        }
        public void RegressTest3()
        {
            // We will try to model a plane as an equation in the form
            // "ax + by + c = z". We have two input variables (x and y)
            // and we will be trying to find two parameters a and b and 
            // an intercept term c.

            // Create a multiple linear regression for two input and an intercept
            MultipleLinearRegression target = new MultipleLinearRegression(2, true);

            // Now suppose you have some points
            double[][] inputs = 
            {
                new double[] { 1, 1 },
                new double[] { 0, 1 },
                new double[] { 1, 0 },
                new double[] { 0, 0 },
            };

            // located in the same Z (z = 1)
            double[] outputs = { 1, 1, 1, 1 };


            // Now we will try to fit a regression model
            double error = target.Regress(inputs, outputs);

            // As result, we will be given the following:
            double a = target.Coefficients[0]; // a = 0
            double b = target.Coefficients[1]; // b = 0
            double c = target.Coefficients[2]; // c = 1

            // This is the plane described by the equation
            // ax + by + c = z => 0x + 0y + 1 = z => 1 = z.


            Assert.AreEqual(0.0, a, 1e-6);
            Assert.AreEqual(0.0, b, 1e-6);
            Assert.AreEqual(1.0, c, 1e-6);
            Assert.AreEqual(0.0, error, 1e-6);


            double r = target.CoefficientOfDetermination(inputs, outputs);
            Assert.AreEqual(1.0, r);

        }
        public void RegressTest2()
        {
            MultipleLinearRegression target = new MultipleLinearRegression(1, false);

            double[][] inputs = 
            {
                new double[] { 80, 1 },
                new double[] { 60, 1 },
                new double[] { 10, 1 },
                new double[] { 20, 1 },
                new double[] { 30, 1 },
            };

            double[] outputs = { 20, 40, 30, 50, 60 };


            double error = target.Regress(inputs, outputs);

            double slope = target.Coefficients[0];
            double intercept = target.Coefficients[1];

            Assert.AreEqual(-0.264706, slope, 1e-5);
            Assert.AreEqual(50.588235, intercept, 1e-5);
            Assert.AreEqual(761.764705, error, 1e-5);


            double r = target.CoefficientOfDetermination(inputs, outputs);
            Assert.AreEqual(0.23823529, r, 1e-6);

            string str = target.ToString();

            Assert.AreEqual("y(x0, x1) = -0,264705882352942*x0 + 50,5882352941177*x1", str);
        }
        public void RegressTest6()
        {
            MultipleLinearRegression target = new MultipleLinearRegression(2, false);

            double[][] inputs = 
            {
                new double[] { 0, 0 },
                new double[] { 0, 0 },
                new double[] { 0, 0 },
                new double[] { 0, 0 },
                new double[] { 0, 0 },
            };

            double[] outputs = { 20, 40, 30, 50, 60 };


            double error = target.Regress(inputs, outputs);

            double slope = target.Coefficients[0];
            double intercept = target.Coefficients[1];

            Assert.AreEqual(0, slope, 1e-5);
            Assert.AreEqual(0, intercept, 1e-5);
            Assert.AreEqual(9000, error);


            double r = target.CoefficientOfDetermination(inputs, outputs);
            Assert.AreEqual(-8, r, 1e-6);

            string str = target.ToString(null, System.Globalization.CultureInfo.GetCultureInfo("pt-BR"));

            Assert.AreEqual("y(x0, x1) = 0*x0 + 0*x1", str);
        }
        public void ComputeTest()
        {
            // Example 5.1 from 
            // http://www.weibull.com/DOEWeb/estimating_regression_models_using_least_squares.htm

            double[][] inputs = 
            {
                new double[] { 41.9, 29.1 }, // 1 
                new double[] { 43.4, 29.3 }, // 2
                new double[] { 43.9, 29.5 }, // 3
                new double[] { 44.5, 29.7 }, // 4
                new double[] { 47.3, 29.9 }, // 5
                new double[] { 47.5, 30.3 }, // 6
                new double[] { 47.9, 30.5 }, // 7
                new double[] { 50.2, 30.7 }, // 8
                new double[] { 52.8, 30.8 }, // 9
                new double[] { 53.2, 30.9 }, // 10
                new double[] { 56.7, 31.5 }, // 11
                new double[] { 57.0, 31.7 }, // 12
                new double[] { 63.5, 31.9 }, // 13
                new double[] { 65.3, 32.0 }, // 14
                new double[] { 71.1, 32.1 }, // 15
                new double[] { 77.0, 32.5 }, // 16
                new double[] { 77.8, 32.9 }, // 17
            };

            double[] outputs = 
            {
                251.3,
                251.3,
                248.3,
                267.5,
                273.0,
                276.5,
                270.3,
                274.9,
                285.0,
                290.0,
                297.0,
                302.5,
                304.5,
                309.3,
                321.7,
                330.7,
                349.0,
            };

            var target = new MultipleLinearRegressionAnalysis(inputs, outputs, intercept: true);

            target.Compute();

            Assert.AreEqual(0.968022, target.RSquared, 1e-5);
            Assert.AreEqual(0.963454, target.RSquareAdjusted, 1e-5);

            Assert.AreEqual(2, target.Table[0].DegreesOfFreedom);
            Assert.AreEqual(14, target.Table[1].DegreesOfFreedom);
            Assert.AreEqual(16, target.Table[2].DegreesOfFreedom);

            Assert.AreEqual(12816.345909673832, target.Table[0].SumOfSquares, 1e-10);
            Assert.AreEqual(423.37409032616614, target.Table[1].SumOfSquares, 1e-10);
            Assert.AreEqual(13239.719999999998, target.Table[2].SumOfSquares, 1e-10);

            Assert.IsFalse(Double.IsNaN(target.Table[0].SumOfSquares));
            Assert.IsFalse(Double.IsNaN(target.Table[1].SumOfSquares));
            Assert.IsFalse(Double.IsNaN(target.Table[2].SumOfSquares));

            Assert.AreEqual(6408.1729548369158, target.Table[0].MeanSquares, 1e-10);
            Assert.AreEqual(30.241006451869008, target.Table[1].MeanSquares, 1e-10);

            Assert.IsFalse(Double.IsNaN(target.Table[0].MeanSquares));
            Assert.IsFalse(Double.IsNaN(target.Table[1].MeanSquares));

            Assert.AreEqual(211.90342871147618, target.Table[0].Statistic.Value, 1e-10);
            Assert.AreEqual(0.000000000034191538489380946, target.Table[0].Significance.PValue, 1e-16);
            Assert.IsFalse(Double.IsNaN(target.Table[0].Significance.PValue));

            Assert.AreEqual(1.2387232694931045, target.Coefficients[0].Value, 1e-10);
            Assert.AreEqual(12.082353323342893, target.Coefficients[1].Value, 1e-10);
            Assert.AreEqual(-153.51169396147372, target.Coefficients[2].Value, 1e-10);

            Assert.IsFalse(Double.IsNaN(target.Coefficients[0].Value));
            Assert.IsFalse(Double.IsNaN(target.Coefficients[1].Value));
            Assert.IsFalse(Double.IsNaN(target.Coefficients[2].Value));

            Assert.IsFalse(target.Coefficients[0].IsIntercept);
            Assert.IsFalse(target.Coefficients[1].IsIntercept);
            Assert.IsTrue(target.Coefficients[2].IsIntercept);

            Assert.AreEqual(0.394590262021004, target.Coefficients[0].StandardError, 1e-10);
            Assert.AreEqual(3.9322914100115307, target.Coefficients[1].StandardError, 1e-10);

            Assert.IsFalse(Double.IsNaN(target.Coefficients[0].StandardError));
            Assert.IsFalse(Double.IsNaN(target.Coefficients[1].StandardError));

            Assert.AreEqual(3.1392646720388844, target.Coefficients[0].TTest.Statistic, 1e-10);
            Assert.AreEqual(3.0725986615797285, target.Coefficients[1].TTest.Statistic, 1e-10);

            Assert.IsFalse(Double.IsNaN(target.Coefficients[0].TTest.Statistic));
            Assert.IsFalse(Double.IsNaN(target.Coefficients[1].TTest.Statistic));

            DoubleRange range;

            range = target.Coefficients[0].TTest.GetConfidenceInterval(0.9);
            Assert.AreEqual(0.54372744151743968, range.Min, 1e-10);
            Assert.AreEqual(1.9337190974687695, range.Max, 1e-10);

            range = target.Coefficients[1].TTest.GetConfidenceInterval(0.9);
            Assert.AreEqual(5.1563686060690417, range.Min, 1e-10);
            Assert.AreEqual(19.008338040616746, range.Max, 1e-10);


            MultipleLinearRegression mlr = new MultipleLinearRegression(2, true);
            mlr.Regress(inputs, outputs);
            double r2 = mlr.CoefficientOfDetermination(inputs, outputs, false);
            double r2a = mlr.CoefficientOfDetermination(inputs, outputs, true);

            Assert.AreEqual(r2, target.RSquared);
            Assert.AreEqual(r2a, target.RSquareAdjusted);
        }
        public void RegressTest7()
        {
            double[][] example2 =
            {
                new double[] { -0.47, 1.16, -1.25 },
                new double[] {  0.55, 1.15, -0.78 },
                new double[] {  1.38, 0.63, -0.84 },
                new double[] {  0.99, 0.63, -0.81 },
                new double[] {  1.72, 0.62, -1.59 },
                new double[] {  1.05, 0.62, -1.05 },
                new double[] { -0.51, 0.62, -0.98 },
                new double[] {  1.83, 0.61,  0.86 },
                new double[] {  1.16, 0.61,  0.15 },
                new double[] {  0.59, 0.61, -0.28 },
                new double[] {  0.40, 0.60, -0.30 },
                new double[] {  0.48, 0.60, -0.41 },
                new double[] {  1.28, 0.53, -0.31 },
                new double[] {  0.36, 0.53, -0.41 },
                new double[] {  0.93, 0.16, -0.19 },
                new double[] { -0.61, 0.16, -0.32 },
                new double[] { -0.58, 0.16, -0.01 },
                new double[] {  0.53, 0.16, -0.13 },
                new double[] {  1.48, 0.16,  1.12 },
                new double[] { -0.34, 0.15, -0.10 },
                new double[] {  0.81, 0.15,  0.14 },
                new double[] {  0.85, 0.15, -0.02 },
                new double[] {  0.69, 0.15, -0.16 },
                new double[] {  0.39, 0.15, -0.33 },
                new double[] {  0.70, 0.00,  2.00 },
                new double[] {  0.25, 0.00, -0.01 },
                new double[] { -0.96, 0.85, -0.19 },
                new double[] {  1.04, 0.84,  0.35 },
                new double[] {  0.30, 0.83,  0.05 },
                new double[] {  0.28, 0.83,  0.84 },
                new double[] {  0.18, 0.82,  0.06 },
                new double[] {  0.49, 0.81,  0.41 },
                new double[] {  0.40, 0.81,  0.50 },
                new double[] {  0.41, 0.80,  0.00 },
                new double[] {  0.06, 0.79,  0.39 },
                new double[] {  0.55, 0.79,  0,55 },
            };


            double[][] inputs = example2.GetColumns(1, 2);
            double[] outputs = example2.GetColumn(0);

            bool thrown = false;

            MultipleLinearRegressionAnalysis target;

            try
            {
                target = new MultipleLinearRegressionAnalysis(inputs, outputs, new string[0], "Test", false);
            }
            catch (ArgumentException) { thrown = true; }

            Assert.IsTrue(thrown);

            target = new MultipleLinearRegressionAnalysis(inputs, outputs, new string[2], "Test", false);

            target.Compute();

            Assert.AreEqual(target.Array, inputs);
            Assert.AreEqual(target.Outputs, outputs);

            Assert.AreEqual(-0.19371930561139417, target.RSquared, 1e-5);
            Assert.AreEqual(-0.26606593019390279, target.RSquareAdjusted, 1e-5);

            Assert.AreEqual(2, target.Table[0].DegreesOfFreedom);
            Assert.AreEqual(33, target.Table[1].DegreesOfFreedom);
            Assert.AreEqual(35, target.Table[2].DegreesOfFreedom);

            Assert.AreEqual(-2.9165797494934651, target.Table[0].SumOfSquares, 1e-10);
            Assert.AreEqual(17.972279749493463, target.Table[1].SumOfSquares, 1e-10);
            Assert.AreEqual(15.055699999999998, target.Table[2].SumOfSquares, 1e-10);

            Assert.IsFalse(Double.IsNaN(target.Table[0].SumOfSquares));
            Assert.IsFalse(Double.IsNaN(target.Table[1].SumOfSquares));
            Assert.IsFalse(Double.IsNaN(target.Table[2].SumOfSquares));

            Assert.AreEqual(-1.4582898747467326, target.Table[0].MeanSquares, 1e-10);
            Assert.AreEqual(0.54461453786343827, target.Table[1].MeanSquares, 1e-10);

            Assert.IsFalse(Double.IsNaN(target.Table[0].MeanSquares));
            Assert.IsFalse(Double.IsNaN(target.Table[1].MeanSquares));

            Assert.AreEqual(-2.6776550630978524, target.Table[0].Statistic.Value, 1e-10);
            Assert.AreEqual(1, target.Table[0].Significance.PValue, 1e-16);
            Assert.IsFalse(Double.IsNaN(target.Table[0].Significance.PValue));

            Assert.AreEqual(0.72195200211671728, target.Coefficients[0].Value, 1e-10);
            Assert.AreEqual(0.15872233321508125, target.Coefficients[1].Value, 1e-10);

            Assert.IsFalse(Double.IsNaN(target.Coefficients[0].Value));
            Assert.IsFalse(Double.IsNaN(target.Coefficients[1].Value));

            Assert.IsFalse(target.Coefficients[0].IsIntercept);
            Assert.IsFalse(target.Coefficients[1].IsIntercept);

            Assert.AreEqual(0.20506051379737225, target.Coefficients[0].StandardError, 1e-10);
            Assert.AreEqual(0.18842330299464302, target.Coefficients[1].StandardError, 1e-10);

            Assert.IsFalse(Double.IsNaN(target.Coefficients[0].StandardError));
            Assert.IsFalse(Double.IsNaN(target.Coefficients[1].StandardError));

            Assert.AreEqual(3.5206778172325479, target.Coefficients[0].TTest.Statistic, 1e-10);
            Assert.AreEqual(0.84237103740609942, target.Coefficients[1].TTest.Statistic, 1e-10);

            Assert.IsFalse(Double.IsNaN(target.Coefficients[0].TTest.Statistic));
            Assert.IsFalse(Double.IsNaN(target.Coefficients[1].TTest.Statistic));

            DoubleRange range;

            range = target.Coefficients[0].TTest.GetConfidenceInterval(0.9);
            Assert.AreEqual(0.37491572761667513, range.Min, 1e-10);
            Assert.AreEqual(1.0689882766167593, range.Max, 1e-10);

            range = target.Coefficients[1].TTest.GetConfidenceInterval(0.9);
            Assert.AreEqual(-0.16015778606945111, range.Min, 1e-10);
            Assert.AreEqual(0.47760245249961364, range.Max, 1e-10);


            MultipleLinearRegression mlr = new MultipleLinearRegression(2, false);
            mlr.Regress(inputs, outputs);
            double r2 = mlr.CoefficientOfDetermination(inputs, outputs, false);
            double r2a = mlr.CoefficientOfDetermination(inputs, outputs, true);

            Assert.AreEqual(r2, target.RSquared);
            Assert.AreEqual(r2a, target.RSquareAdjusted);
        }
        public void RegressTest2()
        {
            var target = new MultipleLinearRegression(2, false);

            Assert.IsFalse(target.HasIntercept);

            double[][] inputs = 
            {
                new double[] { 80, 1 },
                new double[] { 60, 1 },
                new double[] { 10, 1 },
                new double[] { 20, 1 },
                new double[] { 30, 1 },
            };

            double[] outputs = { 20, 40, 30, 50, 60 };


            double error = target.Regress(inputs, outputs);
            Assert.AreEqual(2, target.NumberOfInputs);
            Assert.AreEqual(1, target.NumberOfOutputs);
            double slope = target.Coefficients[0];
            double intercept = target.Coefficients[1];

            Assert.AreEqual(-0.264706, slope, 1e-5);
            Assert.AreEqual(50.588235, intercept, 1e-5);
            Assert.AreEqual(761.764705, error, 1e-5);


            double r = target.CoefficientOfDetermination(inputs, outputs);
            Assert.AreEqual(0.23823529, r, 1e-6);

            string str = target.ToString(null, System.Globalization.CultureInfo.GetCultureInfo("en-US"));

            Assert.AreEqual("y(x0, x1) = -0.264705882352941*x0 + 50.5882352941176*x1", str);
        }
Beispiel #11
0
        private void btnSampleRunAnalysis_Click(object sender, EventArgs e)
        {
            // Check requirements
            if (sourceTable == null)
            {
                MessageBox.Show("A sample spreadsheet can be found in the " +
                    "Resources folder in the same directory as this application.",
                    "Please load some data before attempting an analysis");
                return;
            }

            if (checkedListBox1.CheckedItems.Count == 0)
            {
                MessageBox.Show("Please select the dependent input variables to be used in the regression model.",
                    "Please choose at least one input variable");
            }

            // Finishes and save any pending changes to the given data
            dgvAnalysisSource.EndEdit();
            sourceTable.AcceptChanges();

            // Gets the column of the dependent variable
            String dependentName = (string)comboBox1.SelectedItem;
            DataTable dependent = sourceTable.DefaultView.ToTable(false, dependentName);

            // Gets the columns of the independent variables
            List<string> names = new List<string>();
            foreach (string name in checkedListBox1.CheckedItems)
            {
                names.Add(name);
            }
            String[] independentNames = names.ToArray();
            DataTable independent = sourceTable.DefaultView.ToTable(false, independentNames);

            // Creates the input and output matrices from the source data table
            double[][] input = independent.ToArray();
            double[] output = dependent.Columns[dependentName].ToArray();

            String[] sourceColumns;
            double[,] sourceMatrix = sourceTable.ToMatrix(out sourceColumns);

            // Creates the Simple Descriptive Analysis of the given source
            DescriptiveAnalysis sda = new DescriptiveAnalysis(sourceMatrix, sourceColumns);
            sda.Compute();

            // Populates statistics overview tab with analysis data
            dgvDistributionMeasures.DataSource = sda.Measures;

            // Creates the Logistic Regression Analysis of the given source
            lra = new LogisticRegressionAnalysis(input, output, independentNames, dependentName);

            // Compute the Logistic Regression Analysis
            lra.Compute();

            // Populates coefficient overview with analysis data
            dgvLogisticCoefficients.DataSource = lra.Coefficients;

            // Populate details about the fitted model
            tbChiSquare.Text = lra.ChiSquare.Statistic.ToString("N5");
            tbPValue.Text = lra.ChiSquare.PValue.ToString("N5");
            checkBox1.Checked = lra.ChiSquare.Significant;
            tbDeviance.Text = lra.Deviance.ToString("N5");
            tbLogLikelihood.Text = lra.LogLikelihood.ToString("N5");

            // Perform linear regression
            mlr = new MultipleLinearRegression(independentNames.Length, true);
            mlr.Regress(input, output);

            tbLinearExpression.Text = mlr.ToString();
            tbLinearR.Text = mlr.CoefficientOfDetermination(input, output, false).ToString("N5");
            tbLinearAdjustedR.Text = mlr.CoefficientOfDetermination(input, output, true).ToString("N5");

            // Populate projection source table
            string[] cols = independentNames;
            if (!independentNames.Contains(dependentName))
                cols = independentNames.Concatenate(dependentName);

            DataTable projSource = sourceTable.DefaultView.ToTable(false, cols);
            dgvProjectionSource.DataSource = projSource;
        }