public static DTOs.Responses.LinearRegressionResponse ToDto(this LinearRegressionResult result) { return(new DTOs.Responses.LinearRegressionResponse { Intercept = new DTOs.Responses.CoefficientResponse { PValue = Number(result.Intercept.Value), StandardError = result.Intercept.StandardError, TStatistic = result.Intercept.TStatistic, Value = result.Intercept.Value }, Slope = result.SlopeLst.Select(_ => new DTOs.Responses.CoefficientResponse { PValue = Number(_.PValue), StandardError = _.StandardError, TStatistic = _.TStatistic, Value = _.Value } ).ToList(), ResidualStandardError = result.ResidualStandardError, ResidualSumOfSquares = result.ResidualSumOfSquare, RSquare = result.RSquare }); }
public void MultivariateLinearRegressionAgreement2() { // A multivariate linear regression with just one x-column should be the same as a bivariate linear regression. double intercept = 1.0; double slope = -2.0; ContinuousDistribution yErrDist = new NormalDistribution(0.0, 3.0); UniformDistribution xDist = new UniformDistribution(Interval.FromEndpoints(-2.0, 3.0)); Random rng = new Random(1111111); MultivariateSample multi = new MultivariateSample("x", "y"); for (int i = 0; i < 10; i++) { double x = xDist.GetRandomValue(rng); double y = intercept + slope * x + yErrDist.GetRandomValue(rng); multi.Add(x, y); } // Old multi linear regression code. MultiLinearRegressionResult result1 = multi.LinearRegression(1); // Simple linear regression code. LinearRegressionResult result2 = multi.TwoColumns(0, 1).LinearRegression(); Assert.IsTrue(TestUtilities.IsNearlyEqual(result1.Parameters["Intercept"].Estimate, result2.Parameters["Intercept"].Estimate)); // New multi linear regression code. MultiLinearRegressionResult result3 = multi.Column(1).ToList().MultiLinearRegression(multi.Column(0).ToList()); Assert.IsTrue(TestUtilities.IsNearlyEqual(result1.Parameters["Intercept"].Estimate, result3.Parameters["Intercept"].Estimate)); }
public void TestBivariateRegression() { // Do a bunch of linear regressions. r^2 should be distributed as expected. double a0 = 1.0; double b0 = 0.0; Random rng = new Random(1001110000); ContinuousDistribution xDistribution = new UniformDistribution(Interval.FromEndpoints(-2.0, 4.0)); ContinuousDistribution eDistribution = new NormalDistribution(); List <double> r2Sample = new List <double>(); for (int i = 0; i < 500; i++) { BivariateSample xySample = new BivariateSample(); for (int k = 0; k < 10; k++) { double x = xDistribution.GetRandomValue(rng); double y = a0 + b0 * x + eDistribution.GetRandomValue(rng); xySample.Add(x, y); } LinearRegressionResult fit = xySample.LinearRegression(); double a = fit.Intercept.Value; double b = fit.Slope.Value; r2Sample.Add(fit.RSquared); } ContinuousDistribution r2Distribution = new BetaDistribution((2 - 1) / 2.0, (10 - 2) / 2.0); TestResult ks = r2Sample.KolmogorovSmirnovTest(r2Distribution); Assert.IsTrue(ks.Probability > 0.05); }
public void InternetSampleDownload() { FrameTable table = DownloadFrameTable(new Uri("https://raw.githubusercontent.com/Dataweekends/zero_to_deep_learning_udemy/master/data/weight-height.csv")); FrameView view = table.WhereNotNull(); view.AddComputedColumn("Bmi", (FrameRow r) => { double h = (double)r["Height"]; double w = (double)r["Weight"]; return(w / (h * h)); }); FrameView males = view.Where("Gender", (string s) => (s == "Male")); FrameView females = view.Where("Gender", (string s) => (s == "Female")); SummaryStatistics maleSummary = new SummaryStatistics(males["Height"].As <double>()); SummaryStatistics femaleSummary = new SummaryStatistics(females["Height"].As <double>()); TestResult allNormal = view["Height"].As <double>().ShapiroFranciaTest(); TestResult maleNormal = males["Height"].As <double>().ShapiroFranciaTest(); TestResult femaleNormal = females["Height"].As <double>().ShapiroFranciaTest(); TestResult tTest = Univariate.StudentTTest(males["Height"].As <double>(), females["Height"].As <double>()); TestResult mwTest = Univariate.MannWhitneyTest(males["Height"].As <double>(), females["Height"].As <double>()); LinearRegressionResult result0 = males["Weight"].As <double>().LinearRegression(males["Height"].As <double>()); PolynomialRegressionResult result1 = males["Height"].As <double>().PolynomialRegression(males["Weight"].As <double>(), 1); PolynomialRegressionResult result2 = males["Height"].As <double>().PolynomialRegression(males["Weight"].As <double>(), 2); PolynomialRegressionResult result3 = males["Height"].As <double>().PolynomialRegression(males["Weight"].As <double>(), 3); //MultiLinearRegressionResult multi = view["Weight"].As<double>().MultiLinearRegression(view["Height"].As<double>(), view["Gender"].As<string>().Select(s => (s == "Male") ? 1.0 : 0.0).ToList()); }
/// <summary> /// Constructor /// </summary> public LcmsDataSet() { PreviouslyAnalyzed = false; Name = String.Empty; Tool = LcmsIdentificationTool.Raw; Evidences = new List <Evidence>(); RegressionResult = new LinearRegressionResult(); }
/// <summary> /// Constructor /// </summary> /// <param name="name">Name of dataset</param> /// <param name="tool">Format of dataset</param> /// <param name="evidences">Evidences in dataset</param> public LcmsDataSet(string name, LcmsIdentificationTool tool, IEnumerable <Evidence> evidences) { PreviouslyAnalyzed = false; Name = name; Tool = tool; Evidences = new List <Evidence>(evidences); RegressionResult = new LinearRegressionResult(); }
public void BivariateLinearRegressionNullDistribution() { // create uncorrelated x and y values // the distribution of F-test statistics returned by linear fits should follow the expected F-distribution Random rng = new Random(987654321); NormalDistribution xd = new NormalDistribution(1.0, 2.0); NormalDistribution yd = new NormalDistribution(-3.0, 4.0); Sample fs = new Sample(); Sample rSample = new Sample(); ContinuousDistribution rDistribution = null; Sample fSample = new Sample(); ContinuousDistribution fDistribution = null; for (int i = 0; i < 127; i++) { BivariateSample sample = new BivariateSample(); for (int j = 0; j < 7; j++) { sample.Add(xd.GetRandomValue(rng), yd.GetRandomValue(rng)); } LinearRegressionResult result = sample.LinearRegression(); double f = result.F.Statistic; fs.Add(f); rSample.Add(result.R.Statistic); rDistribution = result.R.Distribution; fSample.Add(result.F.Statistic); fDistribution = result.F.Distribution; Assert.IsTrue(result.F.Statistic == result.Anova.Result.Statistic); Assert.IsTrue(TestUtilities.IsNearlyEqual( result.R.Probability, result.F.Probability, new EvaluationSettings() { RelativePrecision = 1.0E-14, AbsolutePrecision = 1.0E-16 } )); } ContinuousDistribution fd = new FisherDistribution(1, 5); Console.WriteLine("{0} v. {1}", fs.PopulationMean, fd.Mean); TestResult t = fs.KolmogorovSmirnovTest(fd); Console.WriteLine(t.LeftProbability); Assert.IsTrue(t.LeftProbability < 0.95); Assert.IsTrue(rSample.KuiperTest(rDistribution).Probability > 0.05); Assert.IsTrue(fSample.KuiperTest(fDistribution).Probability > 0.05); }
public void LinearRegressionVariances() { // do a set of logistic regression fits // make sure not only that the fit parameters are what they should be, but that their variances/covariances are as returned Random rng = new Random(314159); // define line parameters double a0 = 2.0; double b0 = -1.0; // do a lot of fits, recording results of each FrameTable data = new FrameTable(); data.AddColumns <double>("a", "va", "b", "vb", "abCov", "p", "dp"); for (int k = 0; k < 128; k++) { // we should be able to draw x's from any distribution; noise should be drawn from a normal distribution ContinuousDistribution xd = new LogisticDistribution(); ContinuousDistribution nd = new NormalDistribution(0.0, 2.0); // generate a synthetic data set BivariateSample sample = new BivariateSample(); for (int i = 0; i < 12; i++) { double x = xd.GetRandomValue(rng); double y = a0 + b0 * x + nd.GetRandomValue(rng); sample.Add(x, y); } // do the regression LinearRegressionResult result = sample.LinearRegression(); // record result UncertainValue p = result.Predict(12.0); data.AddRow(new Dictionary <string, object>() { { "a", result.Intercept.Value }, { "va", result.Parameters.VarianceOf("Intercept") }, { "b", result.Slope.Value }, { "vb", result.Parameters.VarianceOf("Slope") }, { "abCov", result.Parameters.CovarianceOf("Slope", "Intercept") }, { "p", p.Value }, { "dp", p.Uncertainty } }); } // variances of parameters should agree with predictions Assert.IsTrue(data["a"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(data["va"].As <double>().Median())); Assert.IsTrue(data["b"].As <double>().PopulationVariance().ConfidenceInterval(0.99).ClosedContains(data["vb"].As <double>().Median())); Assert.IsTrue(data["a"].As <double>().PopulationCovariance(data["b"].As <double>()).ConfidenceInterval(0.99).ClosedContains(data["abCov"].As <double>().Median())); // variance of prediction should agree with claim Assert.IsTrue(data["p"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["dp"].As <double>().Median())); }
public void TestBivariateRegression() { double a0 = 1.0; double b0 = 0.0; Random rng = new Random(1001110000); ContinuousDistribution xDistribution = new UniformDistribution(Interval.FromEndpoints(-2.0, 4.0)); ContinuousDistribution eDistribution = new NormalDistribution(); Sample r2Sample = new Sample(); for (int i = 0; i < 500; i++) { BivariateSample xySample = new BivariateSample(); for (int k = 0; k < 10; k++) { double x = xDistribution.GetRandomValue(rng); double y = a0 + b0 * x + eDistribution.GetRandomValue(rng); xySample.Add(x, y); } LinearRegressionResult fit = xySample.LinearRegression(); double a = fit.Intercept.Value; double b = fit.Slope.Value; double ss2 = 0.0; double ss1 = 0.0; foreach (XY xy in xySample) { ss2 += MoreMath.Sqr(xy.Y - (a + b * xy.X)); ss1 += MoreMath.Sqr(xy.Y - xySample.Y.Mean); } double r2 = fit.RSquared; r2Sample.Add(r2); } Console.WriteLine("{0} {1} {2} {3} {4}", r2Sample.Count, r2Sample.PopulationMean, r2Sample.StandardDeviation, r2Sample.Minimum, r2Sample.Maximum); ContinuousDistribution r2Distribution = new BetaDistribution((2 - 1) / 2.0, (10 - 2) / 2.0); //Distribution r2Distribution = new BetaDistribution((10 - 2) / 2.0, (2 - 1) / 2.0); Console.WriteLine("{0} {1}", r2Distribution.Mean, r2Distribution.StandardDeviation); TestResult ks = r2Sample.KolmogorovSmirnovTest(r2Distribution); Console.WriteLine(ks.RightProbability); Console.WriteLine(ks.Probability); }
public void BivariateLinearRegressionNullDistribution() { // Create uncorrelated x and y values and do a linear fit. // The r-tests and F-test statistics returned by the linear fits // should agree and both test statistics should follow their claimed // distributions. Random rng = new Random(987654321); NormalDistribution xd = new NormalDistribution(1.0, 2.0); NormalDistribution yd = new NormalDistribution(-3.0, 4.0); Sample rSample = new Sample(); ContinuousDistribution rDistribution = null; Sample fSample = new Sample(); ContinuousDistribution fDistribution = null; for (int i = 0; i < 127; i++) { BivariateSample sample = new BivariateSample(); for (int j = 0; j < 7; j++) { sample.Add(xd.GetRandomValue(rng), yd.GetRandomValue(rng)); } LinearRegressionResult result = sample.LinearRegression(); rSample.Add(result.R.Statistic.Value); rDistribution = result.R.Statistic.Distribution; fSample.Add(result.F.Statistic.Value); fDistribution = result.F.Statistic.Distribution; Assert.IsTrue(result.F.Statistic.Value == result.Anova.Result.Statistic.Value); Assert.IsTrue(TestUtilities.IsNearlyEqual( result.R.Probability, result.F.Probability, new EvaluationSettings() { RelativePrecision = 1.0E-13, AbsolutePrecision = 1.0E-16 } )); } Assert.IsTrue(rSample.KuiperTest(rDistribution).Probability > 0.05); Assert.IsTrue(fSample.KuiperTest(fDistribution).Probability > 0.05); }
public static void LinearRegression() { List <double> x = new List <double>() { -1.1, 2.2, 1.4, 0.5, 3.7, 2.8 }; List <double> y = new List <double>() { -2.9, 3.4, 0.9, 0.1, 6.8, 5.7 }; LinearRegressionResult result = y.LinearRegression(x); Console.WriteLine($"y = ({result.Intercept}) + ({result.Slope}) x"); Console.WriteLine($"Fit explains {result.RSquared * 100.0}% of the variance"); Console.WriteLine($"Probability of no dependence {result.R.Probability}."); OneWayAnovaResult anova = result.Anova; Console.WriteLine("Fit dof = {0} SS = {1}", anova.Factor.DegreesOfFreedom, anova.Factor.SumOfSquares); Console.WriteLine("Residual dof = {0} SS = {1}", anova.Residual.DegreesOfFreedom, anova.Residual.SumOfSquares); Console.WriteLine("Total dof = {0} SS = {1}", anova.Total.DegreesOfFreedom, anova.Total.SumOfSquares); Console.WriteLine($"Probability of no dependence {anova.Result.Probability}."); // Print a 95% confidence interval on the slope Console.WriteLine($"slope is in {result.Slope.ConfidenceInterval(0.95)} with 95% confidence"); IReadOnlyList <double> residuals = result.Residuals; ColumnVector parameters = result.Parameters.ValuesVector; SymmetricMatrix covariance = result.Parameters.CovarianceMatrix; result.Parameters.CovarianceOf("Intercept", "Slope"); double x1 = 3.0; UncertainValue y1 = result.Predict(x1); Console.WriteLine($"Predicted y({x1}) = {y1}."); }
/// <summary> /// Default constructor. /// </summary> /// <param name="matches"></param> /// <param name="alignmentFunction"></param> public DriftTimeAlignmentResults(List <FeatureMatch <TTarget, TObserved> > matches, LinearRegressionResult alignmentFunction) { Matches = matches; AlignmentFunction = alignmentFunction; }
public void LinearRegressionSimple() { double a = -1.0; double b = 2.0; ContinuousDistribution xDistribution = new CauchyDistribution(); ContinuousDistribution eDistribution = new NormalDistribution(); int n = 16; Random rng = new Random(1); double[] x = new double[n]; double[] y = new double[n]; for (int i = 0; i < 16; i++) { x[i] = xDistribution.GetRandomValue(rng); y[i] = a + b * x[i] + eDistribution.GetRandomValue(rng); } LinearRegressionResult result = y.LinearRegression(x); // Parameters should be right Assert.IsTrue(result.Intercept.ConfidenceInterval(0.95).ClosedContains(a)); Assert.IsTrue(result.Slope.ConfidenceInterval(0.95).ClosedContains(b)); // Reported values should be consistent Assert.IsTrue(result.Intercept == result.Parameters["Intercept"].Estimate); Assert.IsTrue(result.Intercept.Value == result.Parameters.ValuesVector[result.Parameters.IndexOf("Intercept")]); Assert.IsTrue(TestUtilities.IsNearlyEqual(result.Intercept.Uncertainty, Math.Sqrt(result.Parameters.VarianceOf("Intercept")))); Assert.IsTrue(result.Slope == result.Parameters["Slope"].Estimate); Assert.IsTrue(result.Slope.Value == result.Parameters.ValuesVector[result.Parameters.IndexOf("Slope")]); Assert.IsTrue(TestUtilities.IsNearlyEqual(result.Slope.Uncertainty, Math.Sqrt(result.Parameters.VarianceOf("Slope")))); // Residuals should agree with definition for (int i = 0; i < x.Length; i++) { double yp = result.Predict(x[i]).Value; Assert.IsTrue(TestUtilities.IsNearlyEqual(result.Residuals[i], y[i] - yp)); } // R and R-squared agree Assert.IsTrue(TestUtilities.IsNearlyEqual(result.RSquared, MoreMath.Sqr(result.R.Statistic.Value))); // F-test and R-test agree Assert.IsTrue(TestUtilities.IsNearlyEqual(result.F.Probability, result.R.Probability)); // ANOVA's sums of squares are correct double SST = y.Variance() * y.Length; Assert.IsTrue(TestUtilities.IsNearlyEqual(SST, result.Anova.Total.SumOfSquares)); double SSR = 0.0; foreach (double z in result.Residuals) { SSR += z * z; } Assert.IsTrue(TestUtilities.IsNearlyEqual(SSR, result.Anova.Residual.SumOfSquares)); Assert.IsTrue(TestUtilities.IsNearlyEqual(SSR, result.SumOfSquaredResiduals)); // R is same as correlation coefficient Assert.IsTrue(TestUtilities.IsNearlyEqual(x.CorrelationCoefficient(y), result.R.Statistic.Value)); }
/// <summary> /// Does a zero mean drift time correction. /// </summary> /// <param name="observedEnumerable">All observed features to shift that should already be drift time aligned.</param> /// <param name="targetEnumerable">Expected features</param> /// <param name="massTolerance">PPM Mass Tolerance</param> /// <param name="netTolerance">Normalized Elution Time tolerance.</param> /// <param name="driftTimeTolerance">Drift time tolerance to use.</param> public static DriftTimeAlignmentResults <TTarget, TObserved> CorrectForOffset(IEnumerable <TTarget> observedEnumerable, IEnumerable <TObserved> targetEnumerable, double massTolerance, double netTolerance, double driftTimeTolerance) { // Setup Tolerance for Feature Matching var featureMatcherParameters = new FeatureMatcherParameters(); featureMatcherParameters.SetTolerances(massTolerance, netTolerance, (float)driftTimeTolerance); featureMatcherParameters.UseDriftTime = true; // Find all matches based on defined tolerances var featureMatcher = new FeatureMatcher <TTarget, TObserved>(observedEnumerable.ToList(), targetEnumerable.ToList(), featureMatcherParameters); var matchList = featureMatcher.FindMatches(observedEnumerable.ToList(), targetEnumerable.ToList(), featureMatcherParameters.UserTolerances, 0); // Create List of Drift Time differences var differenceList = new List <double>(matchList.Count); foreach (var featureMatch in matchList) { var observedFeature = featureMatch.ObservedFeature; var targetFeature = featureMatch.TargetFeature; double observedDriftTime; if (observedFeature.DriftTimeAligned != double.NaN && observedFeature.DriftTimeAligned > 0.0) { observedDriftTime = observedFeature.DriftTimeAligned; } else { observedDriftTime = observedFeature.DriftTime; } double targetDriftTime; if (!double.IsNaN(targetFeature.DriftTimeAligned) && targetFeature.DriftTimeAligned > 0.0) { targetDriftTime = targetFeature.DriftTimeAligned; } else { targetDriftTime = targetFeature.DriftTime; } differenceList.Add(observedDriftTime - targetDriftTime); } // Create bins for histogram var bins = new List <double>(); for (var i = -driftTimeTolerance; i <= driftTimeTolerance; i += (driftTimeTolerance / 100.0)) { bins.Add(i); } bins.Add(driftTimeTolerance); // Group drift time differences into the bins var groupings = differenceList.GroupBy(difference => bins.First(bin => bin >= difference)); // Order the groupings by their count, so the group with the highest count will be first var orderGroupingsByCount = from singleGroup in groupings orderby singleGroup.Count() descending select singleGroup; // Grab the drift time from the group with the most counts var driftTimeOffset = orderGroupingsByCount.First().Key; // Update all of the observed features with the new drift time foreach (var observedFeature in observedEnumerable) { if (!double.IsNaN(observedFeature.DriftTimeAligned) && observedFeature.DriftTimeAligned > 0.0) { observedFeature.DriftTimeAligned -= driftTimeOffset; } else { observedFeature.DriftTime -= (float)driftTimeOffset; } } var linearEquation = new LinearRegressionResult { Slope = 0, Intercept = driftTimeOffset }; var results = new DriftTimeAlignmentResults <TTarget, TObserved>(matchList, linearEquation); return(results); }
public static void AnalyzingData() { FrameTable table; Uri url = new Uri("https://raw.githubusercontent.com/dcwuser/metanumerics/master/Examples/Data/example.csv"); WebRequest request = WebRequest.Create(url); using (WebResponse response = request.GetResponse()) { using (StreamReader reader = new StreamReader(response.GetResponseStream())) { table = FrameTable.FromCsv(reader); } } FrameView view = table.WhereNotNull(); // Get the column with (zero-based) index 4. FrameColumn column4 = view.Columns[4]; // Get the column named "Height". FrameColumn heightsColumn = view.Columns["Height"]; // Even easier way to get the column named "Height". FrameColumn alsoHeightsColumn = view["Height"]; IReadOnlyList <double> heights = view["Height"].As <double>(); SummaryStatistics summary = new SummaryStatistics(view["Height"].As <double>()); Console.WriteLine($"Count = {summary.Count}"); Console.WriteLine($"Mean = {summary.Mean}"); Console.WriteLine($"Standard Deviation = {summary.StandardDeviation}"); Console.WriteLine($"Skewness = {summary.Skewness}"); Console.WriteLine($"Estimated population mean = {summary.PopulationMean}"); Console.WriteLine($"Estimated population standard deviation = {summary.PopulationStandardDeviation}"); IReadOnlyList <double> maleHeights = view.Where <string>("Sex", s => s == "M").Columns["Height"].As <double>(); IReadOnlyList <double> femaleHeights = view.Where <string>("Sex", s => s == "F").Columns["Height"].As <double>(); TestResult test = Univariate.StudentTTest(maleHeights, femaleHeights); Console.WriteLine($"{test.Statistic.Name} = {test.Statistic.Value}"); Console.WriteLine($"P = {test.Probability}"); TestResult maleHeightNormality = maleHeights.ShapiroFranciaTest(); TestResult totalHeightNormality = view["Height"].As <double>().ShapiroFranciaTest(); TestResult heightCompatibility = Univariate.KolmogorovSmirnovTest(maleHeights, femaleHeights); LinearRegressionResult fit = view["Weight"].As <double>().LinearRegression(view["Height"].As <double>()); Console.WriteLine($"Model weight = ({fit.Slope}) * height + ({fit.Intercept})."); Console.WriteLine($"Model explains {fit.RSquared * 100.0}% of variation."); ContingencyTable <string, bool> contingency = Bivariate.Crosstabs(view["Sex"].As <string>(), view["Result"].As <bool>()); Console.WriteLine($"Male incidence: {contingency.ProbabilityOfColumnConditionalOnRow(true, "M")}"); Console.WriteLine($"Female incidence: {contingency.ProbabilityOfColumnConditionalOnRow(true, "F")}"); Console.WriteLine($"Log odds ratio = {contingency.Binary.LogOddsRatio}"); view.AddComputedColumn("Bmi", r => ((double)r["Weight"]) / MoreMath.Sqr((double)r["Height"] / 100.0)); view.AddComputedColumn("Age", r => (DateTime.Now - (DateTime)r["Birthdate"]).TotalDays / 365.24); MultiLinearLogisticRegressionResult result = view["Result"].As <bool>().MultiLinearLogisticRegression( view["Bmi"].As <double>(), view["Sex"].As <string, double>(s => s == "M" ? 1.0 : 0.0) ); foreach (Parameter parameter in result.Parameters) { Console.WriteLine($"{parameter.Name} = {parameter.Estimate}"); } TestResult spearman = Bivariate.SpearmanRhoTest(view["Age"].As <double>(), view["Result"].As <double>()); Console.WriteLine($"{spearman.Statistic.Name} = {spearman.Statistic.Value} P = {spearman.Probability}"); }
public void BivariateLinearRegression() { // do a set of logistic regression fits // make sure not only that the fit parameters are what they should be, but that their variances/covariances are as returned Random rng = new Random(314159); // define line parameters double a0 = 2.0; double b0 = -1.0; // keep track of sample of returned a and b fit parameters BivariateSample pSample = new BivariateSample(); // also keep track of returned covariance estimates // since these vary slightly from fit to fit, we will average them double caa = 0.0; double cbb = 0.0; double cab = 0.0; // Record predictions for a new point double x0 = 12.0; Sample ySample = new Sample(); double ySigma = 0.0; // do 100 fits for (int k = 0; k < 128; k++) { // we should be able to draw x's from any distribution; noise should be drawn from a normal distribution ContinuousDistribution xd = new LogisticDistribution(); ContinuousDistribution nd = new NormalDistribution(0.0, 2.0); // generate a synthetic data set BivariateSample sample = new BivariateSample(); for (int i = 0; i < 16; i++) { double x = xd.GetRandomValue(rng); double y = a0 + b0 * x + nd.GetRandomValue(rng); sample.Add(x, y); } // do the regression LinearRegressionResult result = sample.LinearRegression(); // test consistancy Assert.IsTrue(result.Intercept == result.Parameters[0].Estimate); Assert.IsTrue(result.Intercept.Value == result.Parameters.Best[0]); Assert.IsTrue(TestUtilities.IsNearlyEqual(result.Intercept.Uncertainty, Math.Sqrt(result.Parameters.Covariance[0, 0]))); Assert.IsTrue(result.Slope == result.Parameters[1].Estimate); Assert.IsTrue(result.Slope.Value == result.Parameters.Best[1]); Assert.IsTrue(TestUtilities.IsNearlyEqual(result.Slope.Uncertainty, Math.Sqrt(result.Parameters.Covariance[1, 1]))); Assert.IsTrue(TestUtilities.IsNearlyEqual(result.R.Statistic, sample.CorrelationCoefficient)); // record best fit parameters double a = result.Parameters.Best[0]; double b = result.Parameters.Best[1]; pSample.Add(a, b); // record estimated covariances caa += result.Parameters.Covariance[0, 0]; cbb += result.Parameters.Covariance[1, 1]; cab += result.Parameters.Covariance[0, 1]; UncertainValue yPredict = result.Predict(x0); ySample.Add(yPredict.Value); ySigma += yPredict.Uncertainty; double SST = 0.0; foreach (double y in sample.Y) { SST += MoreMath.Sqr(y - sample.Y.Mean); } Assert.IsTrue(TestUtilities.IsNearlyEqual(SST, result.Anova.Total.SumOfSquares)); double SSR = 0.0; foreach (double z in result.Residuals) { SSR += z * z; } Assert.IsTrue(TestUtilities.IsNearlyEqual(SSR, result.Anova.Residual.SumOfSquares)); } caa /= pSample.Count; cbb /= pSample.Count; cab /= pSample.Count; ySigma /= pSample.Count; // check that mean parameter estimates are what they should be: the underlying population parameters Assert.IsTrue(pSample.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(a0)); Assert.IsTrue(pSample.Y.PopulationMean.ConfidenceInterval(0.95).ClosedContains(b0)); Console.WriteLine("{0} {1}", caa, pSample.X.PopulationVariance); Console.WriteLine("{0} {1}", cbb, pSample.Y.PopulationVariance); // check that parameter covarainces are what they should be: the reported covariance estimates Assert.IsTrue(pSample.X.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(caa)); Assert.IsTrue(pSample.Y.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(cbb)); Assert.IsTrue(pSample.PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(cab)); // Check that the predicted ys conform to the model and the asserted uncertainty. Assert.IsTrue(ySample.PopulationMean.ConfidenceInterval(0.95).ClosedContains(a0 + x0 * b0)); //Assert.IsTrue(ySample.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(ySigma)); }