public void LinearLogisticRegression() { // do a set of logistic regression fits // make sure not only that the fit parameters are what they should be, but that their variances/covariances are as returned Random rng = new Random(314159); // define logistic parameters double a0 = 1.0; double b0 = -1.0 / 2.0; //double a0 = -0.5; double b0 = 2.0; // keep track of sample of returned a and b fit parameters BivariateSample ps = new BivariateSample(); // also keep track of returned covariance estimates // since these vary slightly from fit to fit, we will average them double caa = 0.0; double cbb = 0.0; double cab = 0.0; // do 50 fits for (int k = 0; k < 50; k++) { Console.WriteLine("k={0}", k); // generate a synthetic data set BivariateSample s = new BivariateSample(); for (int i = 0; i < 50; i++) { double x = 2.0 * rng.NextDouble() - 1.0; double ez = Math.Exp(a0 + b0 * x); double P = ez / (1.0 + ez); if (rng.NextDouble() < P) { s.Add(x, 1.0); } else { s.Add(x, 0.0); } } //if (k != 27) continue; // do the regression FitResult r = s.LinearLogisticRegression(); // record best fit parameters double a = r.Parameter(0).Value; double b = r.Parameter(1).Value; ps.Add(a, b); Console.WriteLine("{0}, {1}", a, b); // record estimated covariances caa += r.Covariance(0, 0); cbb += r.Covariance(1, 1); cab += r.Covariance(0, 1); } caa /= ps.Count; cbb /= ps.Count; cab /= ps.Count; // check that mean parameter estimates are what they should be: the underlying population parameters Assert.IsTrue(ps.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(a0)); Assert.IsTrue(ps.Y.PopulationMean.ConfidenceInterval(0.95).ClosedContains(b0)); // check that parameter covarainces are what they should be: the reported covariance estimates Assert.IsTrue(ps.X.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(caa)); Assert.IsTrue(ps.Y.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(cbb)); Assert.IsTrue(ps.PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(cab)); }
public void BivariateLogisticRegression() { double[] c = new double[] { -0.1, 1.0 }; Random rng = new Random(1); UniformDistribution pointDistribution = new UniformDistribution(Interval.FromEndpoints(-4.0, 4.0)); BivariateSample sample1 = new BivariateSample(); MultivariateSample sample2 = new MultivariateSample(2); for (int k = 0; k < 1000; k++) { double x = pointDistribution.GetRandomValue(rng); double z = c[0] * x + c[1]; double ez = Math.Exp(z); double p = ez / (1.0 + ez); double y = (rng.NextDouble() < p) ? 1.0 : 0.0; sample1.Add(x, y); sample2.Add(x, y); } Console.WriteLine(sample1.Covariance / sample1.X.Variance / sample1.Y.Mean / (1.0 - sample1.Y.Mean)); Console.WriteLine(sample1.Covariance / sample1.X.Variance / sample1.Y.Variance); FitResult result1 = sample1.LinearLogisticRegression(); FitResult result2 = sample2.TwoColumns(0, 1).LinearLogisticRegression(); FitResult result3 = sample2.LogisticLinearRegression(1); for (int i = 0; i < result1.Dimension; i++) { Console.WriteLine("{0} {1} {2}", i, result1.Parameter(i), result3.Parameter(i) ); } }