Esempio n. 1
0
        public void LinearLogisticRegressionSimple()
        {
            Polynomial m = Polynomial.FromCoefficients(-1.0, 2.0);

            FrameTable table = new FrameTable();

            table.AddColumn <double>("x");
            table.AddColumn <string>("z");

            Random rng = new Random(2);
            ContinuousDistribution xDistribution = new CauchyDistribution(4.0, 2.0);

            for (int i = 0; i < 24; i++)
            {
                double x = xDistribution.GetRandomValue(rng);
                double y = m.Evaluate(x);
                double p = 1.0 / (1.0 + Math.Exp(-y));
                bool   z = (rng.NextDouble() < p);
                table.AddRow(x, z.ToString());
            }

            LinearLogisticRegressionResult fit = table["z"].As((string s) => Boolean.Parse(s)).LinearLogisticRegression(table["x"].As <double>());

            Assert.IsTrue(fit.Intercept.ConfidenceInterval(0.99).ClosedContains(m.Coefficient(0)));
            Assert.IsTrue(fit.Slope.ConfidenceInterval(0.99).ClosedContains(m.Coefficient(1)));
        }
Esempio n. 2
0
        public void LinearLogisticRegressionVariances()
        {
            // define model y = a + b0 * x0 + b1 * x1 + noise
            double a = -2.0;
            double b = 1.0;
            ContinuousDistribution xDistribution = new StudentDistribution(2.0);

            FrameTable data = new FrameTable();

            data.AddColumns <double>("a", "da", "b", "db", "abcov", "p", "dp");

            // draw a sample from the model
            Random rng = new Random(3);

            for (int j = 0; j < 32; j++)
            {
                List <double> xs = new List <double>();
                List <bool>   ys = new List <bool>();

                for (int i = 0; i < 32; i++)
                {
                    double x = xDistribution.GetRandomValue(rng);
                    double t = a + b * x;
                    double p = 1.0 / (1.0 + Math.Exp(-t));
                    bool   y = (rng.NextDouble() < p);
                    xs.Add(x);
                    ys.Add(y);
                }

                // do a linear regression fit on the model
                LinearLogisticRegressionResult result = ys.LinearLogisticRegression(xs);
                UncertainValue pp = result.Predict(1.0);

                data.AddRow(
                    result.Intercept.Value, result.Intercept.Uncertainty,
                    result.Slope.Value, result.Slope.Uncertainty,
                    result.Parameters.CovarianceMatrix[0, 1],
                    pp.Value, pp.Uncertainty
                    );
            }

            // The estimated parameters should agree with the model that generated the data.

            // The variances of the estimates should agree with the claimed variances
            Assert.IsTrue(data["a"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["da"].As <double>().Mean()));
            Assert.IsTrue(data["b"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["db"].As <double>().Mean()));
            Assert.IsTrue(data["a"].As <double>().PopulationCovariance(data["b"].As <double>()).ConfidenceInterval(0.99).ClosedContains(data["abcov"].As <double>().Mean()));
            Assert.IsTrue(data["p"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["dp"].As <double>().Mean()));
        }
Esempio n. 3
0
        public void LinearLogisticRegression()
        {
            // do a set of logistic regression fits
            // make sure not only that the fit parameters are what they should be, but that their variances/covariances are as returned

            Random rng = new Random(314159);

            // define logistic parameters
            double a0 = 1.0; double b0 = -1.0 / 2.0;
            //double a0 = -0.5; double b0 = 2.0;

            // keep track of sample of returned a and b fit parameters
            BivariateSample ps = new BivariateSample();

            // also keep track of returned covariance estimates
            // since these vary slightly from fit to fit, we will average them
            double caa = 0.0;
            double cbb = 0.0;
            double cab = 0.0;

            // do 50 fits
            for (int k = 0; k < 50; k++)
            {
                Console.WriteLine("k={0}", k);

                // generate a synthetic data set
                BivariateSample s = new BivariateSample();
                for (int i = 0; i < 50; i++)
                {
                    double x  = 2.0 * rng.NextDouble() - 1.0;
                    double ez = Math.Exp(a0 + b0 * x);
                    double P  = ez / (1.0 + ez);
                    if (rng.NextDouble() < P)
                    {
                        s.Add(x, 1.0);
                    }
                    else
                    {
                        s.Add(x, 0.0);
                    }
                }

                // do the regression
                LinearLogisticRegressionResult r = s.LinearLogisticRegression();

                // record best fit parameters
                double a = r.Intercept.Value;
                double b = r.Slope.Value;
                ps.Add(a, b);

                Console.WriteLine("{0}, {1}", a, b);

                // record estimated covariances
                caa += r.Parameters.CovarianceMatrix[0, 0];
                cbb += r.Parameters.CovarianceMatrix[1, 1];
                cab += r.Parameters.CovarianceMatrix[0, 1];
            }

            caa /= ps.Count;
            cbb /= ps.Count;
            cab /= ps.Count;

            // check that mean parameter estimates are what they should be: the underlying population parameters
            Assert.IsTrue(ps.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(a0));
            Assert.IsTrue(ps.Y.PopulationMean.ConfidenceInterval(0.95).ClosedContains(b0));

            // check that parameter covarainces are what they should be: the reported covariance estimates
            Assert.IsTrue(ps.X.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(caa));
            Assert.IsTrue(ps.Y.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(cbb));
            Assert.IsTrue(ps.PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(cab));
        }