public void KendallNullDistributionTest() { // pick independent distributions for x and y, which needn't be normal and needn't be related Distribution xDistrubtion = new LogisticDistribution(); Distribution yDistribution = new ExponentialDistribution(); Random rng = new Random(314159265); // generate bivariate samples of various sizes //int n = 64; { foreach (int n in TestUtilities.GenerateIntegerValues(4, 64, 8)) { Sample testStatistics = new Sample(); Distribution testDistribution = null; for (int i = 0; i < 128; i++) { BivariateSample sample = new BivariateSample(); for (int j = 0; j < n; j++) { sample.Add(xDistrubtion.GetRandomValue(rng), yDistribution.GetRandomValue(rng)); } TestResult result = sample.KendallTauTest(); testStatistics.Add(result.Statistic); testDistribution = result.Distribution; } //TestResult r2 = testStatistics.KolmogorovSmirnovTest(testDistribution); //Console.WriteLine("n={0} P={1}", n, r2.LeftProbability); //Assert.IsTrue(r2.RightProbability > 0.05); Console.WriteLine("{0} {1}", testStatistics.PopulationVariance, testDistribution.Variance); Assert.IsTrue(testStatistics.PopulationMean.ConfidenceInterval(0.95).ClosedContains(testDistribution.Mean)); Assert.IsTrue(testStatistics.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(testDistribution.Variance)); } }
public void ContingencyTableProbabilitiesAndUncertainties() { // start with an underlying population double[,] pp = new double[,] { { 1.0 / 45.0, 2.0 / 45.0, 3.0 / 45.0 }, { 4.0 / 45.0, 5.0 / 45.0, 6.0 / 45.0 }, { 7.0 / 45.0, 8.0 / 45.0, 9.0 / 45.0 } }; // form 50 contingency tables, each with N = 50 Random rng = new Random(314159); BivariateSample p22s = new BivariateSample(); BivariateSample pr0s = new BivariateSample(); BivariateSample pc1s = new BivariateSample(); BivariateSample pr2c0s = new BivariateSample(); BivariateSample pc1r2s = new BivariateSample(); for (int i = 0; i < 50; i++) { ContingencyTable T = new ContingencyTable(3, 3); for (int j = 0; j < 50; j++) { int r, c; ChooseRandomCell(pp, rng.NextDouble(), out r, out c); T.Increment(r, c); } Assert.IsTrue(T.Total == 50); // for each contingency table, compute estimates of various population quantities UncertainValue p22 = T.Probability(2, 2); UncertainValue pr0 = T.ProbabilityOfRow(0); UncertainValue pc1 = T.ProbabilityOfColumn(1); UncertainValue pr2c0 = T.ProbabilityOfRowConditionalOnColumn(2, 0); UncertainValue pc1r2 = T.ProbabilityOfColumnConditionalOnRow(1, 2); p22s.Add(p22.Value, p22.Uncertainty); pr0s.Add(pr0.Value, pr0.Uncertainty); pc1s.Add(pc1.Value, pc1.Uncertainty); pr2c0s.Add(pr2c0.Value, pr2c0.Uncertainty); pc1r2s.Add(pc1r2.Value, pc1r2.Uncertainty); } // the estimated population mean of each probability should include the correct probability in the underlyting distribution Assert.IsTrue(p22s.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(9.0 / 45.0)); Assert.IsTrue(pr0s.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(6.0 / 45.0)); Assert.IsTrue(pc1s.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(15.0 / 45.0)); Assert.IsTrue(pr2c0s.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(7.0 / 12.0)); Assert.IsTrue(pc1r2s.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(8.0 / 24.0)); // the estimated uncertainty for each population parameter should be the standard deviation across independent measurements // since the reported uncertainly changes each time, we use the mean value for comparison Assert.IsTrue(p22s.X.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(p22s.Y.Mean)); Assert.IsTrue(pr0s.X.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(pr0s.Y.Mean)); Assert.IsTrue(pc1s.X.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(pc1s.Y.Mean)); Assert.IsTrue(pr2c0s.X.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(pr2c0s.Y.Mean)); Assert.IsTrue(pc1r2s.X.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(pc1r2s.Y.Mean)); }
public void BivariateLinearPolynomialRegressionAgreement() { // A degree-1 polynomial fit should give the same answer as a linear fit BivariateSample B = new BivariateSample(); B.Add(0.0, 5.0); B.Add(3.0, 6.0); B.Add(1.0, 7.0); B.Add(4.0, 8.0); B.Add(2.0, 9.0); FitResult PR = B.PolynomialRegression(1); FitResult LR = B.LinearRegression(); Assert.IsTrue(TestUtilities.IsNearlyEqual(PR.Parameters, LR.Parameters)); Assert.IsTrue(TestUtilities.IsNearlyEqual(PR.CovarianceMatrix, LR.CovarianceMatrix)); Assert.IsTrue(TestUtilities.IsNearlyEqual(PR.GoodnessOfFit.Statistic, LR.GoodnessOfFit.Statistic)); }
public void BivariateNullAssociation() { Random rng = new Random(314159265); // Create sample sets for our three test statisics Sample PS = new Sample(); Sample SS = new Sample(); Sample KS = new Sample(); // variables to hold the claimed distribution of teach test statistic Distribution PD = null; Distribution SD = null; Distribution KD = null; // generate a large number of bivariate samples and conduct our three tests on each for (int j = 0; j < 100; j++) { BivariateSample S = new BivariateSample(); // sample size should be large so that asymptotic assumptions are justified for (int i = 0; i < 100; i++) { double x = rng.NextDouble(); double y = rng.NextDouble(); S.Add(x, y); } TestResult PR = S.PearsonRTest(); PS.Add(PR.Statistic); PD = PR.Distribution; TestResult SR = S.SpearmanRhoTest(); SS.Add(SR.Statistic); SD = SR.Distribution; TestResult KR = S.KendallTauTest(); KS.Add(KR.Statistic); KD = KR.Distribution; } // do KS to test whether the samples follow the claimed distributions //Console.WriteLine(PS.KolmogorovSmirnovTest(PD).LeftProbability); //Console.WriteLine(SS.KolmogorovSmirnovTest(SD).LeftProbability); //Console.WriteLine(KS.KolmogorovSmirnovTest(KD).LeftProbability); Assert.IsTrue(PS.KolmogorovSmirnovTest(PD).LeftProbability < 0.95); Assert.IsTrue(SS.KolmogorovSmirnovTest(SD).LeftProbability < 0.95); Assert.IsTrue(KS.KolmogorovSmirnovTest(KD).LeftProbability < 0.95); }
public void BivariatePolynomialRegression() { // do a set of polynomial regression fits // make sure not only that the fit parameters are what they should be, but that their variances/covariances are as claimed Random rng = new Random(271828); // define logistic parameters double[] a = new double[] { 0.0, -1.0, 2.0, -3.0 }; // keep track of sample of returned a and b fit parameters MultivariateSample A = new MultivariateSample(a.Length); // also keep track of returned covariance estimates // since these vary slightly from fit to fit, we will average them SymmetricMatrix C = new SymmetricMatrix(a.Length); // also keep track of test statistics Sample F = new Sample(); // do 100 fits for (int k = 0; k < 100; k++) { // we should be able to draw x's from any distribution; noise should be drawn from a normal distribution Distribution xd = new CauchyDistribution(); Distribution nd = new NormalDistribution(0.0, 4.0); // generate a synthetic data set BivariateSample s = new BivariateSample(); for (int j = 0; j < 20; j++) { double x = xd.GetRandomValue(rng); double y = nd.GetRandomValue(rng); for (int i = 0; i < a.Length; i++) { y += a[i] * MoreMath.Pow(x, i); } s.Add(x, y); } // do the regression FitResult r = s.PolynomialRegression(a.Length - 1); ColumnVector ps = r.Parameters; //Console.WriteLine("{0} {1} {2}", ps[0], ps[1], ps[2]); // record best fit parameters A.Add(ps); // record estimated covariances C += r.CovarianceMatrix; // record the fit statistic F.Add(r.GoodnessOfFit.Statistic); //Console.WriteLine("F={0}", r.GoodnessOfFit.Statistic); } C = (1.0 / A.Count) * C; // allow matrix division by real numbers // check that mean parameter estimates are what they should be: the underlying population parameters for (int i = 0; i < A.Dimension; i++) { Console.WriteLine("{0} {1}", A.Column(i).PopulationMean, a[i]); Assert.IsTrue(A.Column(i).PopulationMean.ConfidenceInterval(0.95).ClosedContains(a[i])); } // check that parameter covarainces are what they should be: the reported covariance estimates for (int i = 0; i < A.Dimension; i++) { for (int j = i; j < A.Dimension; j++) { Console.WriteLine("{0} {1} {2} {3}", i, j, C[i, j], A.TwoColumns(i, j).PopulationCovariance); Assert.IsTrue(A.TwoColumns(i, j).PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(C[i, j])); } } // check that F is distributed as it should be //Console.WriteLine(fs.KolmogorovSmirnovTest(new FisherDistribution(2, 48)).LeftProbability); }
public void BivariateLinearRegressionGoodnessOfFitDistribution() { // create uncorrelated x and y values // the distribution of F-test statistics returned by linear fits should follow the expected F-distribution Random rng = new Random(987654321); NormalDistribution xd = new NormalDistribution(1.0, 2.0); NormalDistribution yd = new NormalDistribution(-3.0, 4.0); Sample fs = new Sample(); for (int i = 0; i < 127; i++) { BivariateSample xys = new BivariateSample(); for (int j = 0; j < 7; j++) { xys.Add(xd.GetRandomValue(rng), yd.GetRandomValue(rng)); } double f = xys.LinearRegression().GoodnessOfFit.Statistic; fs.Add(f); } Distribution fd = new FisherDistribution(1, 5); Console.WriteLine("{0} v. {1}", fs.PopulationMean, fd.Mean); TestResult t = fs.KolmogorovSmirnovTest(fd); Console.WriteLine(t.LeftProbability); Assert.IsTrue(t.LeftProbability < 0.95); }
public void NormalFitUncertainties() { NormalDistribution N = new NormalDistribution(-1.0, 2.0); // Create a bivariate sample to hold our fitted best mu and sigma values // so we can determine their covariance as well as their means and variances BivariateSample fits = new BivariateSample(); double cmm = 0.0, css = 0.0, cms = 0.0; // A bunch of times, create a normal sample for (int i = 0; i < 64; i++) { // we will use small samples so the variation in mu and sigma will be more substantial Sample s = TestUtilities.CreateSample(N, 16, i); // fit each sample to a normal distribution FitResult fit = NormalDistribution.FitToSample(s); // and record the mu and sigma values from the fit into our bivariate sample fits.Add(fit.Parameter(0).Value, fit.Parameter(1).Value); // also record the claimed covariances among these parameters cmm += fit.Covariance(0, 0); css += fit.Covariance(1, 1); cms += fit.Covariance(0, 1); } cmm /= fits.Count; css /= fits.Count; cms /= fits.Count; // the mean fit values should agree with the population distribution Console.WriteLine("{0} {1}", fits.X.PopulationMean, N.Mean); Assert.IsTrue(fits.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(N.Mean)); Console.WriteLine("{0} {1}", fits.Y.PopulationMean, N.StandardDeviation); Assert.IsTrue(fits.Y.PopulationMean.ConfidenceInterval(0.95).ClosedContains(N.StandardDeviation)); // but also the covariances of those fit values should agree with the claimed covariances Console.WriteLine("{0} {1}", fits.X.PopulationVariance, cmm); Assert.IsTrue(fits.X.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(cmm)); Console.WriteLine("{0} {1}", fits.Y.PopulationVariance, css); Assert.IsTrue(fits.Y.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(css)); Console.WriteLine("{0} {1}", fits.PopulationCovariance, cms); Assert.IsTrue(fits.PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(cms)); /* Random rng = new Random(2718281); BivariateSample P = new BivariateSample(); double cmm = 0.0; double css = 0.0; double cms = 0.0; for (int i = 0; i < 64; i++) { Sample s = new Sample(); for (int j = 0; j < 16; j++) { s.Add(N.GetRandomValue(rng)); } FitResult r = NormalDistribution.FitToSample(s); P.Add(r.Parameter(0).Value, r.Parameter(1).Value); cmm += r.Covariance(0, 0); css += r.Covariance(1, 1); cms += r.Covariance(0, 1); } cmm /= P.Count; css /= P.Count; cms /= P.Count; Console.WriteLine("{0} {1}", P.X.PopulationMean, P.Y.PopulationMean); Assert.IsTrue(P.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(N.Mean)); Assert.IsTrue(P.Y.PopulationMean.ConfidenceInterval(0.95).ClosedContains(N.StandardDeviation)); Assert.IsTrue(P.X.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(cmm)); Assert.IsTrue(P.Y.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(css)); Assert.IsTrue(P.PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(cms)); */ }
public void PearsonRDistribution() { Random rng = new Random(1); // pick some underlying distributions for the sample variables, which must be normal but can have any parameters NormalDistribution xDistribution = new NormalDistribution(1, 2); NormalDistribution yDistribution = new NormalDistribution(3, 4); // try this for several sample sizes, all low so that we see the difference from the normal distribution // n = 3 maxima at ends; n = 4 uniform; n = 5 semi-circular "mound"; n = 6 parabolic "mound" foreach (int n in new int[] { 3, 4, 5, 6, 8 }) { Console.WriteLine("n={0}", n); // find r values Sample rSample = new Sample(); for (int i = 0; i < 100; i++) { // to get each r value, construct a bivariate sample of the given size with no cross-correlation BivariateSample xySample = new BivariateSample(); for (int j = 0; j < n; j++) { xySample.Add(xDistribution.GetRandomValue(rng), yDistribution.GetRandomValue(rng)); } double r = xySample.PearsonRTest().Statistic; rSample.Add(r); } // check whether r is distributed as expected TestResult result = rSample.KolmogorovSmirnovTest(new PearsonRDistribution(n)); Console.WriteLine("P={0}", result.LeftProbability); Assert.IsTrue(result.LeftProbability < 0.95); } }
public void BivariateSampleManipulations() { BivariateSample s = new BivariateSample(); s.Add(1.0, 3.0); s.Add(2.0, 2.0); s.Add(3.0, 1.0); Assert.IsTrue(s.Count == 3); Assert.IsTrue(s.Remove(2.0, 2.0)); Assert.IsTrue(s.Count == 2); Assert.IsFalse(s.Remove(2.0, 2.0)); Assert.IsTrue(s.Contains(1.0, 3.0)); Assert.IsFalse(s.Contains(3.0, 3.0)); s.Clear(); Assert.IsTrue(s.Count == 0); }
public void WaldFitUncertainties() { WaldDistribution wald = new WaldDistribution(3.5, 2.5); Random rng = new Random(314159); BivariateSample P = new BivariateSample(); double cmm = 0.0; double css = 0.0; double cms = 0.0; for (int i = 0; i < 50; i++) { Sample s = new Sample(); for (int j = 0; j < 50; j++) { s.Add(wald.GetRandomValue(rng)); } FitResult r = WaldDistribution.FitToSample(s); P.Add(r.Parameter(0).Value, r.Parameter(1).Value); cmm += r.Covariance(0, 0); css += r.Covariance(1, 1); cms += r.Covariance(0, 1); } cmm /= P.Count; css /= P.Count; cms /= P.Count; Console.WriteLine("{0} {1}", P.X.PopulationMean, P.Y.PopulationMean); Assert.IsTrue(P.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(wald.Mean)); Assert.IsTrue(P.Y.PopulationMean.ConfidenceInterval(0.95).ClosedContains(wald.ShapeParameter)); // the ML shape parameter estimate appears to be asymptoticly unbiased, as it must be according to ML fit theory, // but detectably upward biased for small n. we now correct for this. Console.WriteLine("{0} {1} {2}", P.X.PopulationVariance, P.Y.PopulationVariance, P.PopulationCovariance); Console.WriteLine("{0} {1} {2}", cmm, css, cms); Assert.IsTrue(P.X.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(cmm)); Assert.IsTrue(P.Y.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(css)); Assert.IsTrue(P.PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(cms)); }
/// <summary> /// Beta arvutus /// </summary> /// <param name="finDataAdapter">Adapter kõigi finantsandmete kättesaamise jaoks</param> /// <param name="dcfInput">DCF arvutuste eelduste hoidja</param> public static void CalculateBeta(FinDataAdapter finDataAdapter, DcfInput dcfInput) { BivariateSample bivariate = new BivariateSample(); MultivariateSample mv = new MultivariateSample(2); decimal prevPrice = 0; double? prevIndex = null; decimal curPrice = 0; double? curIndex = null; int k = 0; for (int i = 0; i < finDataAdapter.PriceDataDao.PriceDatas.Count; i = i + 22) { if (k < 36) { PriceData pd = finDataAdapter.PriceDataDao.PriceDatas[i]; curPrice = pd.AdjClose; curIndex = finDataAdapter.PriceDataDao.GetClosePrice(pd.PriceDate, finDataAdapter.PriceDataDao.IndexDatas)[0]; if (curPrice != 0 && curIndex != null && prevPrice != 0 && prevIndex != null) { //MessageBox.Show("s:" + ((double)(prevPrice / curPrice) - 1)); //MessageBox.Show("i:" + ((double)(prevIndex / curIndex) - 1)); ////bivariate.Add((double) (prevPrice/curPrice)-1,(double) (prevIndex/curIndex)-1); double[] db = new double[2]; db[0] = ((double)(prevPrice / curPrice) - 1); db[1] = ((double)(prevIndex / curIndex) - 1); mv.Add(db); } prevPrice = curPrice; prevIndex = curIndex; //DateTime dt = finDataAdapter.PriceDataDao.PriceDatas[i].PriceDate; //MessageBox.Show(finDataAdapter.PriceDataDao.PriceDatas[i].AdjClose + " " + // dt.ToShortDateString()); //MessageBox.Show(finDataAdapter.PriceDataDao.GetClosePrice(dt, finDataAdapter.PriceDataDao.IndexDatas)[0].ToString()); } k++; } if (mv.Count > 10) { //FitResult fitResult = bivariate.LinearRegression(); FitResult fitResult = mv.LinearRegression(0); dcfInput.Beta = fitResult.Parameter(1).Value; List<FinData> finDatas = finDataAdapter.FinDataDao.FinDatas; dcfInput.CostOfEquity = dcfInput.RiskFreeRate + dcfInput.Beta * dcfInput.MarketRiskPremium; double debt = 0; if (finDatas[finDatas.Count - 1].BsCurrentPortionOfLongTermDebt != null) { debt += (double)finDatas[finDatas.Count - 1].BsCurrentPortionOfLongTermDebt; } if (finDatas[finDatas.Count - 1].BsTotalLongTermDebt != null) { debt += (double)finDatas[finDatas.Count - 1].BsTotalLongTermDebt; } double total = 0.0; if (finDatas[finDatas.Count - 1].BsShareholdersEquity1 != null) { total += (double)(finDatas[finDatas.Count - 1].BsShareholdersEquity1); } total += debt; try { dcfInput.Wacc = dcfInput.CostOfEquity * (double)(finDatas[finDatas.Count - 1].BsShareholdersEquity1 / total) + dcfInput.CostOfDebt * (double)(debt / total) * (1 - dcfInput.TaxRate); } catch (InvalidOperationException) { } //MessageBox.Show("beta: "+fitResult.Parameter(1).Value.ToString()); //double[] pars = fitResult.Parameters(); //foreach (var par in pars) //{ // MessageBox.Show(par.ToString()); //} //MessageBox.Show(fitResult.CorrelationCoefficient(0,1).ToString()); //double[] gfit = fitResult. //MessageBox.Show(fitResult.); //MessageBox.Show(fitResult.Parameter(2).ToString()); } }
public void Bug6162() { // When UncertianMeasurementSample.FitToPolynomial used Cholesky inversion of (A^T A), this inversion // would fail when roundoff errors would made the matrix non-positive-definite. We have now changed // to QR decomposition, which is more robust. //real data double[] X_axis = new double[] { 40270.65625, 40270.6569444444, 40270.6576388888, 40270.6583333332, 40270.6590277776, 40270.659722222, 40270.6604166669, 40270.6611111113, 40270.6618055557, 40270.6625000001 }; double[] Y_axis = new double[] { 246.824996948242, 246.850006103516, 245.875, 246.225006103516, 246.975006103516, 247.024993896484, 246.949996948242, 246.875, 247.5, 247.100006103516 }; UncertainMeasurementSample DataSet = new UncertainMeasurementSample(); for (int i = 0; i < 10; i++) DataSet.Add(X_axis[i], Y_axis[i], 1); //for (int i = 0; i < 10; i++) DataSet.Add(X_axis[i] - 40270.0, Y_axis[i] - 247.0, 1); FitResult DataFit = DataSet.FitToPolynomial(3); for (int i = 0; i < DataFit.Dimension; i++) Console.WriteLine("a" + i.ToString() + " = " + DataFit.Parameter(i).Value); BivariateSample bs = new BivariateSample(); for (int i = 0; i < 10; i++) bs.Add(X_axis[i], Y_axis[i]); FitResult bsFit = bs.PolynomialRegression(3); for (int i = 0; i < bsFit.Dimension; i++) Console.WriteLine(bsFit.Parameter(i)); }
// Not fixing this bug; use Polynomial interpolation for this scenario instead //[TestMethod] public void Bug6392() { // bug requests that we support regression with number of points equal to number // of fit parameters, i.e. polynomial fit var biSample = new BivariateSample(); biSample.Add(0, 1); biSample.Add(1, -1); var fitResult = biSample.LinearRegression(); }
public void SpearmanNullDistributionTest() { // pick independent distributions for x and y, which needn't be normal and needn't be related Distribution xDistrubtion = new UniformDistribution(); Distribution yDistribution = new CauchyDistribution(); Random rng = new Random(1); // generate bivariate samples of various sizes foreach (int n in TestUtilities.GenerateIntegerValues(4, 64, 8)) { Sample testStatistics = new Sample(); Distribution testDistribution = null; for (int i = 0; i < 128; i++) { BivariateSample sample = new BivariateSample(); for (int j = 0; j < n; j++) { sample.Add(xDistrubtion.GetRandomValue(rng), yDistribution.GetRandomValue(rng)); } TestResult result = sample.SpearmanRhoTest(); testStatistics.Add(result.Statistic); testDistribution = result.Distribution; } TestResult r2 = testStatistics.KuiperTest(testDistribution); Console.WriteLine("n={0} P={1}", n, r2.LeftProbability); Assert.IsTrue(r2.RightProbability > 0.05); Assert.IsTrue(testStatistics.PopulationMean.ConfidenceInterval(0.99).ClosedContains(testDistribution.Mean)); Assert.IsTrue(testStatistics.PopulationVariance.ConfidenceInterval(0.99).ClosedContains(testDistribution.Variance)); } }
public void PairedStudentTTest() { BivariateSample s = new BivariateSample(); s.Add(3, 5); s.Add(0, 1); s.Add(6, 5); s.Add(7, 7); s.Add(4, 10); s.Add(3, 9); s.Add(2, 7); s.Add(1, 11); s.Add(4, 8); Console.WriteLine(s.Count); TestResult r = s.PairedStudentTTest(); Console.WriteLine(r.Statistic); Console.WriteLine(r.LeftProbability); }
public void BivariateSampleCopy() { // test independency of copy BivariateSample sample1 = new BivariateSample(); sample1.Add(1.0, 2.0); BivariateSample sample2 = sample1.Copy(); sample2.Add(3.0, 4.0); Assert.IsTrue(sample1.Count == 1); Assert.IsTrue(sample2.Count == 2); }
public void BivariateSampleEnumerations() { List<XY> points = new List<XY>(new XY[] { new XY(1.0, 2.0), new XY(2.0, 3.0), new XY(3.0, 4.0) }); BivariateSample sample = new BivariateSample(); sample.Add(points); Assert.IsTrue(sample.Count == points.Count); foreach (XY point in sample) { Assert.IsTrue(points.Remove(point)); } Assert.IsTrue(points.Count == 0); }
public void WeibullFitUncertainties() { // check that the uncertainty in reported fit parameters is actually meaningful // it should be the standard deviation of fit parameter values in a sample of many fits // define a population distribution Distribution distribution = new WeibullDistribution(2.5, 1.5); // draw a lot of samples from it; fit each sample and // record the reported parameter value and error of each BivariateSample values = new BivariateSample(); MultivariateSample uncertainties = new MultivariateSample(3); for (int i = 0; i < 50; i++) { Sample sample = CreateSample(distribution, 10, i); FitResult fit = WeibullDistribution.FitToSample(sample); UncertainValue a = fit.Parameter(0); UncertainValue b = fit.Parameter(1); values.Add(a.Value, b.Value); uncertainties.Add(a.Uncertainty, b.Uncertainty, fit.Covariance(0,1)); } // the reported errors should agree with the standard deviation of the reported parameters Assert.IsTrue(values.X.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(uncertainties.Column(0).Mean)); Assert.IsTrue(values.Y.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(uncertainties.Column(1).Mean)); //Console.WriteLine("{0} {1}", values.PopulationCovariance, uncertainties.Column(2).Mean); //Assert.IsTrue(values.PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(uncertainties.Column(2).Mean)); }
public void LinearLogisticRegression() { // do a set of logistic regression fits // make sure not only that the fit parameters are what they should be, but that their variances/covariances are as returned Random rng = new Random(314159); // define logistic parameters double a0 = 1.0; double b0 = -1.0 / 2.0; //double a0 = -0.5; double b0 = 2.0; // keep track of sample of returned a and b fit parameters BivariateSample ps = new BivariateSample(); // also keep track of returned covariance estimates // since these vary slightly from fit to fit, we will average them double caa = 0.0; double cbb = 0.0; double cab = 0.0; // do 50 fits for (int k = 0; k < 50; k++) { Console.WriteLine("k={0}", k); // generate a synthetic data set BivariateSample s = new BivariateSample(); for (int i = 0; i < 50; i++) { double x = 2.0 * rng.NextDouble() - 1.0; double ez = Math.Exp(a0 + b0 * x); double P = ez / (1.0 + ez); if (rng.NextDouble() < P) { s.Add(x, 1.0); } else { s.Add(x, 0.0); } } //if (k != 27) continue; // do the regression FitResult r = s.LinearLogisticRegression(); // record best fit parameters double a = r.Parameter(0).Value; double b = r.Parameter(1).Value; ps.Add(a, b); Console.WriteLine("{0}, {1}", a, b); // record estimated covariances caa += r.Covariance(0, 0); cbb += r.Covariance(1, 1); cab += r.Covariance(0, 1); } caa /= ps.Count; cbb /= ps.Count; cab /= ps.Count; // check that mean parameter estimates are what they should be: the underlying population parameters Assert.IsTrue(ps.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(a0)); Assert.IsTrue(ps.Y.PopulationMean.ConfidenceInterval(0.95).ClosedContains(b0)); // check that parameter covarainces are what they should be: the reported covariance estimates Assert.IsTrue(ps.X.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(caa)); Assert.IsTrue(ps.Y.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(cbb)); Assert.IsTrue(ps.PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(cab)); }
public void BivariateLogisticRegression() { double[] c = new double[] { -0.1, 1.0 }; Random rng = new Random(1); UniformDistribution pointDistribution = new UniformDistribution(Interval.FromEndpoints(-4.0, 4.0)); BivariateSample sample1 = new BivariateSample(); MultivariateSample sample2 = new MultivariateSample(2); for (int k = 0; k < 1000; k++) { double x = pointDistribution.GetRandomValue(rng); double z = c[0] * x + c[1]; double ez = Math.Exp(z); double p = ez / (1.0 + ez); double y = (rng.NextDouble() < p) ? 1.0 : 0.0; sample1.Add(x, y); sample2.Add(x, y); } Console.WriteLine(sample1.Covariance / sample1.X.Variance / sample1.Y.Mean / (1.0 - sample1.Y.Mean)); Console.WriteLine(sample1.Covariance / sample1.X.Variance / sample1.Y.Variance); FitResult result1 = sample1.LinearLogisticRegression(); FitResult result2 = sample2.TwoColumns(0, 1).LinearLogisticRegression(); FitResult result3 = sample2.LogisticLinearRegression(1); for (int i = 0; i < result1.Dimension; i++) { Console.WriteLine("{0} {1} {2}", i, result1.Parameter(i), result3.Parameter(i) ); } }
public void BivariateLinearRegression() { // do a set of logistic regression fits // make sure not only that the fit parameters are what they should be, but that their variances/covariances are as returned Random rng = new Random(314159); // define logistic parameters double a0 = 2.0; double b0 = -1.0; // keep track of sample of returned a and b fit parameters BivariateSample ps = new BivariateSample(); // also keep track of returned covariance estimates // since these vary slightly from fit to fit, we will average them double caa = 0.0; double cbb = 0.0; double cab = 0.0; // also keep track of test statistics Sample fs = new Sample(); // do 100 fits for (int k = 0; k < 100; k++) { // we should be able to draw x's from any distribution; noise should be drawn from a normal distribution Distribution xd = new LogisticDistribution(); Distribution nd = new NormalDistribution(0.0, 2.0); // generate a synthetic data set BivariateSample s = new BivariateSample(); for (int i = 0; i < 25; i++) { double x = xd.GetRandomValue(rng); double y = a0 + b0 * x + nd.GetRandomValue(rng); s.Add(x, y); } // do the regression FitResult r = s.LinearRegression(); // record best fit parameters double a = r.Parameter(0).Value; double b = r.Parameter(1).Value; ps.Add(a, b); // record estimated covariances caa += r.Covariance(0, 0); cbb += r.Covariance(1, 1); cab += r.Covariance(0, 1); // record the fit statistic fs.Add(r.GoodnessOfFit.Statistic); Console.WriteLine("F={0}", r.GoodnessOfFit.Statistic); } caa /= ps.Count; cbb /= ps.Count; cab /= ps.Count; // check that mean parameter estimates are what they should be: the underlying population parameters Assert.IsTrue(ps.X.PopulationMean.ConfidenceInterval(0.95).ClosedContains(a0)); Assert.IsTrue(ps.Y.PopulationMean.ConfidenceInterval(0.95).ClosedContains(b0)); Console.WriteLine("{0} {1}", caa, ps.X.PopulationVariance); Console.WriteLine("{0} {1}", cbb, ps.Y.PopulationVariance); // check that parameter covarainces are what they should be: the reported covariance estimates Assert.IsTrue(ps.X.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(caa)); Assert.IsTrue(ps.Y.PopulationVariance.ConfidenceInterval(0.95).ClosedContains(cbb)); Assert.IsTrue(ps.PopulationCovariance.ConfidenceInterval(0.95).ClosedContains(cab)); // check that F is distributed as it should be Console.WriteLine(fs.KolmogorovSmirnovTest(new FisherDistribution(2, 48)).LeftProbability); }
public void GammaFitUncertainty() { // check that the uncertainty in reported fit parameters is actually meaningful // it should be the standard deviation of fit parameter values in a sample of many fits // define a population distribution Distribution distribution = new GammaDistribution(1.5, 2.0); // draw a lot of samples from it; fit each sample and // record the reported parameter value and error of each BivariateSample values = new BivariateSample(); BivariateSample uncertainties = new BivariateSample(); for (int i = 0; i < 100; i++) { Sample sample = CreateSample(distribution, 50, i); FitResult fit = GammaDistribution.FitToSample(sample); UncertainValue a = fit.Parameter(0); UncertainValue b = fit.Parameter(1); values.Add(a.Value, b.Value); uncertainties.Add(a.Uncertainty, b.Uncertainty); } // the reported errors should agree with the standard deviation of the reported parameters Assert.IsTrue(values.X.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(uncertainties.X.Mean)); Assert.IsTrue(values.Y.PopulationStandardDeviation.ConfidenceInterval(0.95).ClosedContains(uncertainties.Y.Mean)); }