Exemple #1
0
 public static DTOs.Responses.LinearRegressionResponse ToDto(this LinearRegressionResult result)
 {
     return(new DTOs.Responses.LinearRegressionResponse
     {
         Intercept = new DTOs.Responses.CoefficientResponse
         {
             PValue = Number(result.Intercept.Value),
             StandardError = result.Intercept.StandardError,
             TStatistic = result.Intercept.TStatistic,
             Value = result.Intercept.Value
         },
         Slope = result.SlopeLst.Select(_ =>
                                        new DTOs.Responses.CoefficientResponse
         {
             PValue = Number(_.PValue),
             StandardError = _.StandardError,
             TStatistic = _.TStatistic,
             Value = _.Value
         }
                                        ).ToList(),
         ResidualStandardError = result.ResidualStandardError,
         ResidualSumOfSquares = result.ResidualSumOfSquare,
         RSquare = result.RSquare
     });
 }
        public void MultivariateLinearRegressionAgreement2()
        {
            // A multivariate linear regression with just one x-column should be the same as a bivariate linear regression.

            double intercept = 1.0;
            double slope     = -2.0;
            ContinuousDistribution yErrDist = new NormalDistribution(0.0, 3.0);
            UniformDistribution    xDist    = new UniformDistribution(Interval.FromEndpoints(-2.0, 3.0));
            Random rng = new Random(1111111);

            MultivariateSample multi = new MultivariateSample("x", "y");

            for (int i = 0; i < 10; i++)
            {
                double x = xDist.GetRandomValue(rng);
                double y = intercept + slope * x + yErrDist.GetRandomValue(rng);
                multi.Add(x, y);
            }

            // Old multi linear regression code.
            MultiLinearRegressionResult result1 = multi.LinearRegression(1);

            // Simple linear regression code.
            LinearRegressionResult result2 = multi.TwoColumns(0, 1).LinearRegression();

            Assert.IsTrue(TestUtilities.IsNearlyEqual(result1.Parameters["Intercept"].Estimate, result2.Parameters["Intercept"].Estimate));

            // New multi linear regression code.
            MultiLinearRegressionResult result3 = multi.Column(1).ToList().MultiLinearRegression(multi.Column(0).ToList());

            Assert.IsTrue(TestUtilities.IsNearlyEqual(result1.Parameters["Intercept"].Estimate, result3.Parameters["Intercept"].Estimate));
        }
Exemple #3
0
        public void TestBivariateRegression()
        {
            // Do a bunch of linear regressions. r^2 should be distributed as expected.

            double a0 = 1.0;
            double b0 = 0.0;

            Random rng = new Random(1001110000);
            ContinuousDistribution xDistribution = new UniformDistribution(Interval.FromEndpoints(-2.0, 4.0));
            ContinuousDistribution eDistribution = new NormalDistribution();

            List <double> r2Sample = new List <double>();

            for (int i = 0; i < 500; i++)
            {
                BivariateSample xySample = new BivariateSample();
                for (int k = 0; k < 10; k++)
                {
                    double x = xDistribution.GetRandomValue(rng);
                    double y = a0 + b0 * x + eDistribution.GetRandomValue(rng);
                    xySample.Add(x, y);
                }
                LinearRegressionResult fit = xySample.LinearRegression();
                double a = fit.Intercept.Value;
                double b = fit.Slope.Value;

                r2Sample.Add(fit.RSquared);
            }

            ContinuousDistribution r2Distribution = new BetaDistribution((2 - 1) / 2.0, (10 - 2) / 2.0);
            TestResult             ks             = r2Sample.KolmogorovSmirnovTest(r2Distribution);

            Assert.IsTrue(ks.Probability > 0.05);
        }
Exemple #4
0
        public void InternetSampleDownload()
        {
            FrameTable table = DownloadFrameTable(new Uri("https://raw.githubusercontent.com/Dataweekends/zero_to_deep_learning_udemy/master/data/weight-height.csv"));
            FrameView  view  = table.WhereNotNull();

            view.AddComputedColumn("Bmi", (FrameRow r) => {
                double h = (double)r["Height"];
                double w = (double)r["Weight"];
                return(w / (h * h));
            });

            FrameView males   = view.Where("Gender", (string s) => (s == "Male"));
            FrameView females = view.Where("Gender", (string s) => (s == "Female"));

            SummaryStatistics maleSummary   = new SummaryStatistics(males["Height"].As <double>());
            SummaryStatistics femaleSummary = new SummaryStatistics(females["Height"].As <double>());

            TestResult allNormal    = view["Height"].As <double>().ShapiroFranciaTest();
            TestResult maleNormal   = males["Height"].As <double>().ShapiroFranciaTest();
            TestResult femaleNormal = females["Height"].As <double>().ShapiroFranciaTest();

            TestResult tTest  = Univariate.StudentTTest(males["Height"].As <double>(), females["Height"].As <double>());
            TestResult mwTest = Univariate.MannWhitneyTest(males["Height"].As <double>(), females["Height"].As <double>());

            LinearRegressionResult     result0 = males["Weight"].As <double>().LinearRegression(males["Height"].As <double>());
            PolynomialRegressionResult result1 = males["Height"].As <double>().PolynomialRegression(males["Weight"].As <double>(), 1);
            PolynomialRegressionResult result2 = males["Height"].As <double>().PolynomialRegression(males["Weight"].As <double>(), 2);
            PolynomialRegressionResult result3 = males["Height"].As <double>().PolynomialRegression(males["Weight"].As <double>(), 3);

            //MultiLinearRegressionResult multi = view["Weight"].As<double>().MultiLinearRegression(view["Height"].As<double>(), view["Gender"].As<string>().Select(s => (s == "Male") ? 1.0 : 0.0).ToList());
        }
Exemple #5
0
 /// <summary>
 /// Constructor
 /// </summary>
 public LcmsDataSet()
 {
     PreviouslyAnalyzed = false;
     Name             = String.Empty;
     Tool             = LcmsIdentificationTool.Raw;
     Evidences        = new List <Evidence>();
     RegressionResult = new LinearRegressionResult();
 }
Exemple #6
0
 /// <summary>
 /// Constructor
 /// </summary>
 /// <param name="name">Name of dataset</param>
 /// <param name="tool">Format of dataset</param>
 /// <param name="evidences">Evidences in dataset</param>
 public LcmsDataSet(string name, LcmsIdentificationTool tool, IEnumerable <Evidence> evidences)
 {
     PreviouslyAnalyzed = false;
     Name             = name;
     Tool             = tool;
     Evidences        = new List <Evidence>(evidences);
     RegressionResult = new LinearRegressionResult();
 }
Exemple #7
0
        public void BivariateLinearRegressionNullDistribution()
        {
            // create uncorrelated x and y values
            // the distribution of F-test statistics returned by linear fits should follow the expected F-distribution

            Random             rng = new Random(987654321);
            NormalDistribution xd  = new NormalDistribution(1.0, 2.0);
            NormalDistribution yd  = new NormalDistribution(-3.0, 4.0);

            Sample fs = new Sample();

            Sample rSample = new Sample();
            ContinuousDistribution rDistribution = null;

            Sample fSample = new Sample();
            ContinuousDistribution fDistribution = null;

            for (int i = 0; i < 127; i++)
            {
                BivariateSample sample = new BivariateSample();
                for (int j = 0; j < 7; j++)
                {
                    sample.Add(xd.GetRandomValue(rng), yd.GetRandomValue(rng));
                }
                LinearRegressionResult result = sample.LinearRegression();

                double f = result.F.Statistic;
                fs.Add(f);

                rSample.Add(result.R.Statistic);
                rDistribution = result.R.Distribution;

                fSample.Add(result.F.Statistic);
                fDistribution = result.F.Distribution;

                Assert.IsTrue(result.F.Statistic == result.Anova.Result.Statistic);

                Assert.IsTrue(TestUtilities.IsNearlyEqual(
                                  result.R.Probability, result.F.Probability,
                                  new EvaluationSettings()
                {
                    RelativePrecision = 1.0E-14, AbsolutePrecision = 1.0E-16
                }
                                  ));
            }

            ContinuousDistribution fd = new FisherDistribution(1, 5);

            Console.WriteLine("{0} v. {1}", fs.PopulationMean, fd.Mean);
            TestResult t = fs.KolmogorovSmirnovTest(fd);

            Console.WriteLine(t.LeftProbability);
            Assert.IsTrue(t.LeftProbability < 0.95);

            Assert.IsTrue(rSample.KuiperTest(rDistribution).Probability > 0.05);
            Assert.IsTrue(fSample.KuiperTest(fDistribution).Probability > 0.05);
        }
        public void LinearRegressionVariances()
        {
            // do a set of logistic regression fits
            // make sure not only that the fit parameters are what they should be, but that their variances/covariances are as returned

            Random rng = new Random(314159);

            // define line parameters
            double a0 = 2.0; double b0 = -1.0;

            // do a lot of fits, recording results of each
            FrameTable data = new FrameTable();

            data.AddColumns <double>("a", "va", "b", "vb", "abCov", "p", "dp");

            for (int k = 0; k < 128; k++)
            {
                // we should be able to draw x's from any distribution; noise should be drawn from a normal distribution
                ContinuousDistribution xd = new LogisticDistribution();
                ContinuousDistribution nd = new NormalDistribution(0.0, 2.0);

                // generate a synthetic data set
                BivariateSample sample = new BivariateSample();
                for (int i = 0; i < 12; i++)
                {
                    double x = xd.GetRandomValue(rng);
                    double y = a0 + b0 * x + nd.GetRandomValue(rng);
                    sample.Add(x, y);
                }

                // do the regression
                LinearRegressionResult result = sample.LinearRegression();

                // record result
                UncertainValue p = result.Predict(12.0);
                data.AddRow(new Dictionary <string, object>()
                {
                    { "a", result.Intercept.Value },
                    { "va", result.Parameters.VarianceOf("Intercept") },
                    { "b", result.Slope.Value },
                    { "vb", result.Parameters.VarianceOf("Slope") },
                    { "abCov", result.Parameters.CovarianceOf("Slope", "Intercept") },
                    { "p", p.Value },
                    { "dp", p.Uncertainty }
                });
            }

            // variances of parameters should agree with predictions
            Assert.IsTrue(data["a"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(data["va"].As <double>().Median()));
            Assert.IsTrue(data["b"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(data["vb"].As <double>().Median()));
            Assert.IsTrue(data["a"].As <double>().PopulationCovariance(data["b"].As <double>()).ConfidenceInterval(0.99).ClosedContains(data["abCov"].As <double>().Median()));

            // variance of prediction should agree with claim
            Assert.IsTrue(data["p"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["dp"].As <double>().Median()));
        }
Exemple #9
0
        public void TestBivariateRegression()
        {
            double a0 = 1.0;
            double b0 = 0.0;

            Random rng = new Random(1001110000);
            ContinuousDistribution xDistribution = new UniformDistribution(Interval.FromEndpoints(-2.0, 4.0));
            ContinuousDistribution eDistribution = new NormalDistribution();

            Sample r2Sample = new Sample();

            for (int i = 0; i < 500; i++)
            {
                BivariateSample xySample = new BivariateSample();
                for (int k = 0; k < 10; k++)
                {
                    double x = xDistribution.GetRandomValue(rng);
                    double y = a0 + b0 * x + eDistribution.GetRandomValue(rng);
                    xySample.Add(x, y);
                }
                LinearRegressionResult fit = xySample.LinearRegression();
                double a = fit.Intercept.Value;
                double b = fit.Slope.Value;

                double ss2 = 0.0;
                double ss1 = 0.0;
                foreach (XY xy in xySample)
                {
                    ss2 += MoreMath.Sqr(xy.Y - (a + b * xy.X));
                    ss1 += MoreMath.Sqr(xy.Y - xySample.Y.Mean);
                }
                double r2 = fit.RSquared;
                r2Sample.Add(r2);
            }

            Console.WriteLine("{0} {1} {2} {3} {4}", r2Sample.Count, r2Sample.PopulationMean, r2Sample.StandardDeviation, r2Sample.Minimum, r2Sample.Maximum);

            ContinuousDistribution r2Distribution = new BetaDistribution((2 - 1) / 2.0, (10 - 2) / 2.0);

            //Distribution r2Distribution = new BetaDistribution((10 - 2) / 2.0, (2 - 1) / 2.0);
            Console.WriteLine("{0} {1}", r2Distribution.Mean, r2Distribution.StandardDeviation);

            TestResult ks = r2Sample.KolmogorovSmirnovTest(r2Distribution);

            Console.WriteLine(ks.RightProbability);
            Console.WriteLine(ks.Probability);
        }
        public void BivariateLinearRegressionNullDistribution()
        {
            // Create uncorrelated x and y values and do a linear fit.
            // The r-tests and F-test statistics returned by the linear fits
            // should agree and both test statistics should follow their claimed
            // distributions.

            Random             rng = new Random(987654321);
            NormalDistribution xd  = new NormalDistribution(1.0, 2.0);
            NormalDistribution yd  = new NormalDistribution(-3.0, 4.0);

            Sample rSample = new Sample();
            ContinuousDistribution rDistribution = null;

            Sample fSample = new Sample();
            ContinuousDistribution fDistribution = null;

            for (int i = 0; i < 127; i++)
            {
                BivariateSample sample = new BivariateSample();
                for (int j = 0; j < 7; j++)
                {
                    sample.Add(xd.GetRandomValue(rng), yd.GetRandomValue(rng));
                }
                LinearRegressionResult result = sample.LinearRegression();

                rSample.Add(result.R.Statistic.Value);
                rDistribution = result.R.Statistic.Distribution;

                fSample.Add(result.F.Statistic.Value);
                fDistribution = result.F.Statistic.Distribution;

                Assert.IsTrue(result.F.Statistic.Value == result.Anova.Result.Statistic.Value);

                Assert.IsTrue(TestUtilities.IsNearlyEqual(
                                  result.R.Probability, result.F.Probability,
                                  new EvaluationSettings()
                {
                    RelativePrecision = 1.0E-13, AbsolutePrecision = 1.0E-16
                }
                                  ));
            }

            Assert.IsTrue(rSample.KuiperTest(rDistribution).Probability > 0.05);
            Assert.IsTrue(fSample.KuiperTest(fDistribution).Probability > 0.05);
        }
Exemple #11
0
        public static void LinearRegression()
        {
            List <double> x = new List <double>()
            {
                -1.1, 2.2, 1.4, 0.5, 3.7, 2.8
            };
            List <double> y = new List <double>()
            {
                -2.9, 3.4, 0.9, 0.1, 6.8, 5.7
            };

            LinearRegressionResult result = y.LinearRegression(x);

            Console.WriteLine($"y = ({result.Intercept}) + ({result.Slope}) x");

            Console.WriteLine($"Fit explains {result.RSquared * 100.0}% of the variance");

            Console.WriteLine($"Probability of no dependence {result.R.Probability}.");

            OneWayAnovaResult anova = result.Anova;

            Console.WriteLine("Fit        dof = {0} SS = {1}", anova.Factor.DegreesOfFreedom, anova.Factor.SumOfSquares);
            Console.WriteLine("Residual   dof = {0} SS = {1}", anova.Residual.DegreesOfFreedom, anova.Residual.SumOfSquares);
            Console.WriteLine("Total      dof = {0} SS = {1}", anova.Total.DegreesOfFreedom, anova.Total.SumOfSquares);
            Console.WriteLine($"Probability of no dependence {anova.Result.Probability}.");

            // Print a 95% confidence interval on the slope
            Console.WriteLine($"slope is in {result.Slope.ConfidenceInterval(0.95)} with 95% confidence");

            IReadOnlyList <double> residuals = result.Residuals;

            ColumnVector    parameters = result.Parameters.ValuesVector;
            SymmetricMatrix covariance = result.Parameters.CovarianceMatrix;

            result.Parameters.CovarianceOf("Intercept", "Slope");

            double         x1 = 3.0;
            UncertainValue y1 = result.Predict(x1);

            Console.WriteLine($"Predicted y({x1}) = {y1}.");
        }
Exemple #12
0
 /// <summary>
 /// Default constructor.
 /// </summary>
 /// <param name="matches"></param>
 /// <param name="alignmentFunction"></param>
 public DriftTimeAlignmentResults(List <FeatureMatch <TTarget, TObserved> > matches, LinearRegressionResult alignmentFunction)
 {
     Matches           = matches;
     AlignmentFunction = alignmentFunction;
 }
        public void LinearRegressionSimple()
        {
            double a = -1.0;
            double b = 2.0;

            ContinuousDistribution xDistribution = new CauchyDistribution();
            ContinuousDistribution eDistribution = new NormalDistribution();

            int    n   = 16;
            Random rng = new Random(1);

            double[] x = new double[n];
            double[] y = new double[n];
            for (int i = 0; i < 16; i++)
            {
                x[i] = xDistribution.GetRandomValue(rng);
                y[i] = a + b * x[i] + eDistribution.GetRandomValue(rng);
            }

            LinearRegressionResult result = y.LinearRegression(x);

            // Parameters should be right
            Assert.IsTrue(result.Intercept.ConfidenceInterval(0.95).ClosedContains(a));
            Assert.IsTrue(result.Slope.ConfidenceInterval(0.95).ClosedContains(b));

            // Reported values should be consistent
            Assert.IsTrue(result.Intercept == result.Parameters["Intercept"].Estimate);
            Assert.IsTrue(result.Intercept.Value == result.Parameters.ValuesVector[result.Parameters.IndexOf("Intercept")]);
            Assert.IsTrue(TestUtilities.IsNearlyEqual(result.Intercept.Uncertainty, Math.Sqrt(result.Parameters.VarianceOf("Intercept"))));
            Assert.IsTrue(result.Slope == result.Parameters["Slope"].Estimate);
            Assert.IsTrue(result.Slope.Value == result.Parameters.ValuesVector[result.Parameters.IndexOf("Slope")]);
            Assert.IsTrue(TestUtilities.IsNearlyEqual(result.Slope.Uncertainty, Math.Sqrt(result.Parameters.VarianceOf("Slope"))));

            // Residuals should agree with definition
            for (int i = 0; i < x.Length; i++)
            {
                double yp = result.Predict(x[i]).Value;
                Assert.IsTrue(TestUtilities.IsNearlyEqual(result.Residuals[i], y[i] - yp));
            }

            // R and R-squared agree
            Assert.IsTrue(TestUtilities.IsNearlyEqual(result.RSquared, MoreMath.Sqr(result.R.Statistic.Value)));

            // F-test and R-test agree
            Assert.IsTrue(TestUtilities.IsNearlyEqual(result.F.Probability, result.R.Probability));

            // ANOVA's sums of squares are correct
            double SST = y.Variance() * y.Length;

            Assert.IsTrue(TestUtilities.IsNearlyEqual(SST, result.Anova.Total.SumOfSquares));
            double SSR = 0.0;

            foreach (double z in result.Residuals)
            {
                SSR += z * z;
            }
            Assert.IsTrue(TestUtilities.IsNearlyEqual(SSR, result.Anova.Residual.SumOfSquares));
            Assert.IsTrue(TestUtilities.IsNearlyEqual(SSR, result.SumOfSquaredResiduals));

            // R is same as correlation coefficient
            Assert.IsTrue(TestUtilities.IsNearlyEqual(x.CorrelationCoefficient(y), result.R.Statistic.Value));
        }
Exemple #14
0
        /// <summary>
        /// Does a zero mean drift time correction.
        /// </summary>
        /// <param name="observedEnumerable">All observed features to shift that should already be drift time aligned.</param>
        /// <param name="targetEnumerable">Expected features</param>
        /// <param name="massTolerance">PPM Mass Tolerance</param>
        /// <param name="netTolerance">Normalized Elution Time tolerance.</param>
        /// <param name="driftTimeTolerance">Drift time tolerance to use.</param>
        public static DriftTimeAlignmentResults <TTarget, TObserved> CorrectForOffset(IEnumerable <TTarget> observedEnumerable, IEnumerable <TObserved> targetEnumerable, double massTolerance, double netTolerance, double driftTimeTolerance)
        {
            // Setup Tolerance for Feature Matching
            var featureMatcherParameters = new FeatureMatcherParameters();

            featureMatcherParameters.SetTolerances(massTolerance, netTolerance, (float)driftTimeTolerance);
            featureMatcherParameters.UseDriftTime = true;

            // Find all matches based on defined tolerances
            var featureMatcher = new FeatureMatcher <TTarget, TObserved>(observedEnumerable.ToList(), targetEnumerable.ToList(), featureMatcherParameters);
            var matchList      = featureMatcher.FindMatches(observedEnumerable.ToList(), targetEnumerable.ToList(), featureMatcherParameters.UserTolerances, 0);

            // Create List of Drift Time differences
            var differenceList = new List <double>(matchList.Count);

            foreach (var featureMatch in matchList)
            {
                var observedFeature = featureMatch.ObservedFeature;
                var targetFeature   = featureMatch.TargetFeature;

                double observedDriftTime;
                if (observedFeature.DriftTimeAligned != double.NaN && observedFeature.DriftTimeAligned > 0.0)
                {
                    observedDriftTime = observedFeature.DriftTimeAligned;
                }
                else
                {
                    observedDriftTime = observedFeature.DriftTime;
                }

                double targetDriftTime;
                if (!double.IsNaN(targetFeature.DriftTimeAligned) && targetFeature.DriftTimeAligned > 0.0)
                {
                    targetDriftTime = targetFeature.DriftTimeAligned;
                }
                else
                {
                    targetDriftTime = targetFeature.DriftTime;
                }

                differenceList.Add(observedDriftTime - targetDriftTime);
            }

            // Create bins for histogram
            var bins = new List <double>();

            for (var i = -driftTimeTolerance; i <= driftTimeTolerance; i += (driftTimeTolerance / 100.0))
            {
                bins.Add(i);
            }
            bins.Add(driftTimeTolerance);

            // Group drift time differences into the bins
            var groupings = differenceList.GroupBy(difference => bins.First(bin => bin >= difference));

            // Order the groupings by their count, so the group with the highest count will be first
            var orderGroupingsByCount = from singleGroup in groupings
                                        orderby singleGroup.Count() descending
                                        select singleGroup;

            // Grab the drift time from the group with the most counts
            var driftTimeOffset = orderGroupingsByCount.First().Key;

            // Update all of the observed features with the new drift time
            foreach (var observedFeature in observedEnumerable)
            {
                if (!double.IsNaN(observedFeature.DriftTimeAligned) && observedFeature.DriftTimeAligned > 0.0)
                {
                    observedFeature.DriftTimeAligned -= driftTimeOffset;
                }
                else
                {
                    observedFeature.DriftTime -= (float)driftTimeOffset;
                }
            }

            var linearEquation = new LinearRegressionResult {
                Slope = 0, Intercept = driftTimeOffset
            };
            var results = new DriftTimeAlignmentResults <TTarget, TObserved>(matchList, linearEquation);

            return(results);
        }
Exemple #15
0
        public static void AnalyzingData()
        {
            FrameTable table;
            Uri        url     = new Uri("https://raw.githubusercontent.com/dcwuser/metanumerics/master/Examples/Data/example.csv");
            WebRequest request = WebRequest.Create(url);

            using (WebResponse response = request.GetResponse()) {
                using (StreamReader reader = new StreamReader(response.GetResponseStream())) {
                    table = FrameTable.FromCsv(reader);
                }
            }
            FrameView view = table.WhereNotNull();

            // Get the column with (zero-based) index 4.
            FrameColumn column4 = view.Columns[4];
            // Get the column named "Height".
            FrameColumn heightsColumn = view.Columns["Height"];
            // Even easier way to get the column named "Height".
            FrameColumn alsoHeightsColumn = view["Height"];

            IReadOnlyList <double> heights = view["Height"].As <double>();

            SummaryStatistics summary = new SummaryStatistics(view["Height"].As <double>());

            Console.WriteLine($"Count = {summary.Count}");
            Console.WriteLine($"Mean = {summary.Mean}");
            Console.WriteLine($"Standard Deviation = {summary.StandardDeviation}");
            Console.WriteLine($"Skewness = {summary.Skewness}");
            Console.WriteLine($"Estimated population mean = {summary.PopulationMean}");
            Console.WriteLine($"Estimated population standard deviation = {summary.PopulationStandardDeviation}");

            IReadOnlyList <double> maleHeights =
                view.Where <string>("Sex", s => s == "M").Columns["Height"].As <double>();
            IReadOnlyList <double> femaleHeights =
                view.Where <string>("Sex", s => s == "F").Columns["Height"].As <double>();
            TestResult test = Univariate.StudentTTest(maleHeights, femaleHeights);

            Console.WriteLine($"{test.Statistic.Name} = {test.Statistic.Value}");
            Console.WriteLine($"P = {test.Probability}");

            TestResult maleHeightNormality  = maleHeights.ShapiroFranciaTest();
            TestResult totalHeightNormality = view["Height"].As <double>().ShapiroFranciaTest();
            TestResult heightCompatibility  = Univariate.KolmogorovSmirnovTest(maleHeights, femaleHeights);

            LinearRegressionResult fit =
                view["Weight"].As <double>().LinearRegression(view["Height"].As <double>());

            Console.WriteLine($"Model weight = ({fit.Slope}) * height + ({fit.Intercept}).");
            Console.WriteLine($"Model explains {fit.RSquared * 100.0}% of variation.");

            ContingencyTable <string, bool> contingency =
                Bivariate.Crosstabs(view["Sex"].As <string>(), view["Result"].As <bool>());

            Console.WriteLine($"Male incidence: {contingency.ProbabilityOfColumnConditionalOnRow(true, "M")}");
            Console.WriteLine($"Female incidence: {contingency.ProbabilityOfColumnConditionalOnRow(true, "F")}");
            Console.WriteLine($"Log odds ratio = {contingency.Binary.LogOddsRatio}");

            view.AddComputedColumn("Bmi", r => ((double)r["Weight"]) / MoreMath.Sqr((double)r["Height"] / 100.0));
            view.AddComputedColumn("Age", r => (DateTime.Now - (DateTime)r["Birthdate"]).TotalDays / 365.24);

            MultiLinearLogisticRegressionResult result =
                view["Result"].As <bool>().MultiLinearLogisticRegression(
                    view["Bmi"].As <double>(),
                    view["Sex"].As <string, double>(s => s == "M" ? 1.0 : 0.0)
                    );

            foreach (Parameter parameter in result.Parameters)
            {
                Console.WriteLine($"{parameter.Name} = {parameter.Estimate}");
            }

            TestResult spearman = Bivariate.SpearmanRhoTest(view["Age"].As <double>(), view["Result"].As <double>());

            Console.WriteLine($"{spearman.Statistic.Name} = {spearman.Statistic.Value} P = {spearman.Probability}");
        }
Exemple #16
0
        public void BivariateLinearRegression()
        {
            // do a set of logistic regression fits
            // make sure not only that the fit parameters are what they should be, but that their variances/covariances are as returned

            Random rng = new Random(314159);

            // define line parameters
            double a0 = 2.0; double b0 = -1.0;

            // keep track of sample of returned a and b fit parameters
            BivariateSample pSample = new BivariateSample();

            // also keep track of returned covariance estimates
            // since these vary slightly from fit to fit, we will average them
            double caa = 0.0;
            double cbb = 0.0;
            double cab = 0.0;

            // Record predictions for a new point
            double x0      = 12.0;
            Sample ySample = new Sample();
            double ySigma  = 0.0;

            // do 100 fits
            for (int k = 0; k < 128; k++)
            {
                // we should be able to draw x's from any distribution; noise should be drawn from a normal distribution
                ContinuousDistribution xd = new LogisticDistribution();
                ContinuousDistribution nd = new NormalDistribution(0.0, 2.0);

                // generate a synthetic data set
                BivariateSample sample = new BivariateSample();
                for (int i = 0; i < 16; i++)
                {
                    double x = xd.GetRandomValue(rng);
                    double y = a0 + b0 * x + nd.GetRandomValue(rng);
                    sample.Add(x, y);
                }

                // do the regression
                LinearRegressionResult result = sample.LinearRegression();

                // test consistancy
                Assert.IsTrue(result.Intercept == result.Parameters[0].Estimate);
                Assert.IsTrue(result.Intercept.Value == result.Parameters.Best[0]);
                Assert.IsTrue(TestUtilities.IsNearlyEqual(result.Intercept.Uncertainty, Math.Sqrt(result.Parameters.Covariance[0, 0])));
                Assert.IsTrue(result.Slope == result.Parameters[1].Estimate);
                Assert.IsTrue(result.Slope.Value == result.Parameters.Best[1]);
                Assert.IsTrue(TestUtilities.IsNearlyEqual(result.Slope.Uncertainty, Math.Sqrt(result.Parameters.Covariance[1, 1])));
                Assert.IsTrue(TestUtilities.IsNearlyEqual(result.R.Statistic, sample.CorrelationCoefficient));

                // record best fit parameters
                double a = result.Parameters.Best[0];
                double b = result.Parameters.Best[1];
                pSample.Add(a, b);

                // record estimated covariances
                caa += result.Parameters.Covariance[0, 0];
                cbb += result.Parameters.Covariance[1, 1];
                cab += result.Parameters.Covariance[0, 1];

                UncertainValue yPredict = result.Predict(x0);
                ySample.Add(yPredict.Value);
                ySigma += yPredict.Uncertainty;

                double SST = 0.0;
                foreach (double y in sample.Y)
                {
                    SST += MoreMath.Sqr(y - sample.Y.Mean);
                }
                Assert.IsTrue(TestUtilities.IsNearlyEqual(SST, result.Anova.Total.SumOfSquares));

                double SSR = 0.0;
                foreach (double z in result.Residuals)
                {
                    SSR += z * z;
                }
                Assert.IsTrue(TestUtilities.IsNearlyEqual(SSR, result.Anova.Residual.SumOfSquares));
            }

            caa    /= pSample.Count;
            cbb    /= pSample.Count;
            cab    /= pSample.Count;
            ySigma /= pSample.Count;

            // check that mean parameter estimates are what they should be: the underlying population parameters
            Assert.IsTrue(pSample.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(a0));
            Assert.IsTrue(pSample.Y.PopulationMean.ConfidenceInterval(0.95).ClosedContains(b0));

            Console.WriteLine("{0} {1}", caa, pSample.X.PopulationVariance);
            Console.WriteLine("{0} {1}", cbb, pSample.Y.PopulationVariance);

            // check that parameter covarainces are what they should be: the reported covariance estimates
            Assert.IsTrue(pSample.X.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(caa));
            Assert.IsTrue(pSample.Y.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(cbb));
            Assert.IsTrue(pSample.PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(cab));

            // Check that the predicted ys conform to the model and the asserted uncertainty.
            Assert.IsTrue(ySample.PopulationMean.ConfidenceInterval(0.95).ClosedContains(a0 + x0 * b0));
            //Assert.IsTrue(ySample.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(ySigma));
        }