public void BivariateNonlinearFitSimple() { double t0 = 3.0; double s0 = 1.0; ContinuousDistribution xDistribution = new CauchyDistribution(0.0, 2.0); ContinuousDistribution eDistribution = new NormalDistribution(0.0, 0.5); Random rng = new Random(5); List <double> x = TestUtilities.CreateDataSample(rng, xDistribution, 48).ToList(); List <double> y = x.Select(z => Math.Sin(2.0 * Math.PI * z / t0 + s0) + eDistribution.GetRandomValue(rng)).ToList(); Func <IReadOnlyDictionary <string, double>, double, double> fitFunction = (d, z) => { double t = d["Period"]; double s = d["Phase"]; return(Math.Sin(2.0 * Math.PI * z / t + s)); }; Dictionary <string, double> start = new Dictionary <string, double>() { { "Period", 2.5 }, { "Phase", 1.5 } }; NonlinearRegressionResult result = y.NonlinearRegression(x, fitFunction, start); Assert.IsTrue(result.Parameters["Period"].Estimate.ConfidenceInterval(0.99).ClosedContains(t0)); Assert.IsTrue(result.Parameters["Phase"].Estimate.ConfidenceInterval(0.99).ClosedContains(s0)); for (int i = 0; i < x.Count; i++) { double yp = result.Predict(x[i]); Assert.IsTrue(TestUtilities.IsNearlyEqual(result.Residuals[i], y[i] - yp)); } }
public void BivariatePolynomialRegressionSimple() { // Pick a simple polynomial Polynomial p = Polynomial.FromCoefficients(3.0, -2.0, 1.0); // Use it to generate a data set Random rng = new Random(1); ContinuousDistribution xDistribution = new CauchyDistribution(1.0, 2.0); ContinuousDistribution errorDistribution = new NormalDistribution(0.0, 3.0); List <double> xs = new List <double>(TestUtilities.CreateDataSample(rng, xDistribution, 10)); List <double> ys = new List <double>(xs.Select(x => p.Evaluate(x) + errorDistribution.GetRandomValue(rng))); PolynomialRegressionResult fit = Bivariate.PolynomialRegression(ys, xs, p.Degree); // Parameters should agree Assert.IsTrue(fit.Parameters.Count == p.Degree + 1); for (int k = 0; k <= p.Degree; k++) { Assert.IsTrue(fit.Coefficient(k).ConfidenceInterval(0.99).ClosedContains(p.Coefficient(k))); } // Residuals should agree Assert.IsTrue(fit.Residuals.Count == xs.Count); for (int i = 0; i < xs.Count; i++) { double z = ys[i] - fit.Predict(xs[i]).Value; Assert.IsTrue(TestUtilities.IsNearlyEqual(z, fit.Residuals[i])); } // Intercept is same as coefficient of x^0 Assert.IsTrue(fit.Intercept == fit.Coefficient(0)); }
public void StreamSampleSummaryAgreement() { // Streaming properties should give same answers as list methods. Random rng = new Random(2); List <double> sample = new List <double>(TestUtilities.CreateDataSample(rng, new UniformDistribution(Interval.FromEndpoints(-4.0, 3.0)), 32)); SummaryStatistics summary = new SummaryStatistics(sample); Assert.IsTrue(summary.Count == sample.Count); Assert.IsTrue(TestUtilities.IsNearlyEqual(sample.Mean(), summary.Mean)); Assert.IsTrue(TestUtilities.IsNearlyEqual(sample.Variance(), summary.Variance)); Assert.IsTrue(TestUtilities.IsNearlyEqual(sample.PopulationMean(), summary.PopulationMean)); Assert.IsTrue(TestUtilities.IsNearlyEqual(sample.PopulationVariance(), summary.PopulationVariance)); Assert.IsTrue(sample.Minimum() == summary.Minimum); Assert.IsTrue(sample.Maximum() == summary.Maximum); }
public void StreamingSampleSummaryCombination() { // Combining partial summaries should give same answer as full summary Random rng = new Random(1); List <double> sample = new List <double>(TestUtilities.CreateDataSample(rng, new UniformDistribution(Interval.FromEndpoints(-4.0, 3.0)), 64)); SummaryStatistics summary = new SummaryStatistics(sample); Assert.IsTrue(summary.Count == sample.Count); for (int i = 0; i < 4; i++) { // Pick a split point in the data int m = rng.Next(0, sample.Count); // Create a summary of the first part. SummaryStatistics summary1 = new SummaryStatistics(sample.Take(m)); Assert.IsTrue(summary1.Count == m); // Create a summary of the second part. SummaryStatistics summary2 = new SummaryStatistics(sample.Skip(m)); Assert.IsTrue(summary2.Count == sample.Count - m); // Combine them. Their summary statistics should agree with the original summary. SummaryStatistics combined = SummaryStatistics.Combine(summary1, summary2); Assert.IsTrue(combined.Count == summary.Count); Assert.IsTrue(TestUtilities.IsNearlyEqual(combined.Mean, summary.Mean)); Assert.IsTrue(TestUtilities.IsNearlyEqual(combined.Variance, summary.Variance)); Assert.IsTrue(TestUtilities.IsNearlyEqual(combined.StandardDeviation, summary.StandardDeviation)); Assert.IsTrue(TestUtilities.IsNearlyEqual(combined.Skewness, summary.Skewness)); Assert.IsTrue(TestUtilities.IsNearlyEqual(combined.PopulationMean, summary.PopulationMean)); Assert.IsTrue(TestUtilities.IsNearlyEqual(combined.PopulationVariance, summary.PopulationVariance)); Assert.IsTrue(combined.Minimum == summary.Minimum); Assert.IsTrue(combined.Maximum == summary.Maximum); } }
public void TwoSampleKolmogorovNullDistributionTest() { Random rng = new Random(4); ContinuousDistribution population = new ExponentialDistribution(); int[] sizes = new int[] { 23, 30, 175 }; foreach (int na in sizes) { foreach (int nb in sizes) { Sample d = new Sample(); ContinuousDistribution nullDistribution = null; for (int i = 0; i < 128; i++) { List <double> a = TestUtilities.CreateDataSample(rng, population, na).ToList(); List <double> b = TestUtilities.CreateDataSample(rng, population, nb).ToList(); TestResult r = Univariate.KolmogorovSmirnovTest(a, b); d.Add(r.Statistic.Value); nullDistribution = r.Statistic.Distribution; } // Only do full KS test if the number of bins is larger than the sample size, otherwise we are going to fail // because the KS test detects the granularity of the distribution. TestResult mr = d.KolmogorovSmirnovTest(nullDistribution); if (AdvancedIntegerMath.LCM(na, nb) > d.Count) { Assert.IsTrue(mr.Probability > 0.01); } // But always test that mean and standard deviation are as expected Assert.IsTrue(d.PopulationMean.ConfidenceInterval(0.99).ClosedContains(nullDistribution.Mean)); Assert.IsTrue(d.PopulationStandardDeviation.ConfidenceInterval(0.99).ClosedContains(nullDistribution.StandardDeviation)); // This test is actually a bit sensitive, probably because the discrete-ness of the underlying distribution // and the inaccuracy of the asymptotic approximation for intermediate sample size make strict comparisons iffy. } } }