public string GetResultDescription(Experiment test) { double p; bool testAssumptionsUpheld = false; try { p = GetPValue(test, out testAssumptionsUpheld); } catch (Exception e) { return e.Message; } StringBuilder builder = new StringBuilder(); if (!testAssumptionsUpheld) { builder.Append("Caution: the sample did not conform to the expected cell frequency condition! "); } ABAlternative best = test.GetBestAlternative(); ABAlternative worst = test.GetWorstAlternative(); builder.Append(String.Format(@" The best alternative you have is: [{0}], which had {1} conversions from {2} participants ({3}). " , best.Content , best.Conversions , best.Participants , best.PrettyConversionRate )); if (p == 1) { builder.Append("However, this result is not statistically significant."); } else { builder.Append(String.Format(@" This difference is <b>{0} likely to be statistically significant (p <= {2})</b>, which means you can be {1} that it is the result of your alternatives actually mattering, rather than being due to random chance. However, this statistical test can't measure how likely the currently observed magnitude of the difference is to be accurate or not. It only says ""better"", not ""better by so much"". ", //Percentages[p], ToPercentageString(p), Descriptions[p], p )); } return builder.ToString(); }
public double GetPValue(Experiment test) { double z = GetZScore(test); z = Math.Abs(z); if (!double.IsPositiveInfinity(z)) { //CF: BUG: any z above 1.29 returns p = 0.1. Eg: z = 10.4 should be p = 0.0 (highly significant) but instead returns 0.1! //for (int a=0; a<ZScores.Length/2; a++) int arrayLen = ZScores.GetLength(0) - 1; for (int a = arrayLen; a >= 0; a--) { if (z >= ZScores[a, 1]) { return ZScores[a, 0]; } } } return 1; }
public string GetResultDescription(Experiment test) { double p; try { p = GetPValue(test); } catch (Exception e) { return e.Message; } StringBuilder builder = new StringBuilder(); //CF: This is wrong. The assumption for a two-proportion z-test is that each sample have at least 10 succeses AND 10 failures! //if (Alternatives[0].Participants < 10 || Alternatives[1].Participants < 10) if (test.Alternatives.Count(x => !x.SampleMeetsTestAssumtions) > 0) { builder.Append("Take these results with a grain of salt since your samples do not meet the required assumptions: "); } ABAlternative best = test.GetBestAlternative(); ABAlternative worst = test.GetWorstAlternative(); builder.Append(String.Format(@" The best alternative you have is: [{0}], which had {1} conversions from {2} participants ({3}). The other alternative was [{4}], which had {5} conversions from {6} participants ({7}). " , best.Content , best.Conversions , best.Participants , best.PrettyConversionRate , worst.Content , worst.Conversions , worst.Participants , worst.PrettyConversionRate )); if (p == 1) { builder.Append("However, this difference is not statistically significant."); } else { builder.Append(String.Format(@" This difference is <b>{0} likely to be statistically significant (p <= {2})</b>, which means you can be {1} that it is the result of your alternatives actually mattering, rather than being due to random chance. However, this statistical test can't measure how likely the currently observed magnitude of the difference is to be accurate or not. It only says ""better"", not ""better by so much"". ", //Percentages[p], ToPercentageString(p), Descriptions[p], p )); } return builder.ToString(); }
private double GetZScore(Experiment test) { if (test.Alternatives.Count != 2) { //throw new Exception("Sorry, can't currently automatically calculate statistics for A/B tests with > 2 alternatives."); return double.PositiveInfinity; } //if (test.Alternatives[0].Participants == 0 || test.Alternatives[1].Participants == 0) if (!test.AllAlternativesHaveParticipants) { //throw new Exception("Can't calculate the z score if either of the alternatives lacks participants."); return double.PositiveInfinity; } /* CF: These variable names are not great. What's happening here is we are performing a Two-Proportion Z-test with a pooled difference of the standard errors * of the two samples. For details, see page 566, "Intro Stats", De Veax, Velleman, and Bock */ double cr1 = test.Alternatives[0].ConversionRate; double cr2 = test.Alternatives[1].ConversionRate; double successes1 = test.Alternatives[0].Successes; double successes2 = test.Alternatives[1].Successes; int n1 = test.Alternatives[0].Participants; int n2 = test.Alternatives[1].Participants; int n = n1 + n2; double pHatPooled = (successes1 + successes2) / n; double frac1 = pHatPooled * (1 - pHatPooled) / n1; double frac2 = pHatPooled * (1 - pHatPooled) / n2; double SE = Math.Sqrt(frac1 + frac2); //z-score: return (cr1 - cr2) / SE; }
public bool IsStatisticallySignificant(Experiment test, double pValue) { return GetPValue(test) <= pValue; }
public bool IsStatisticallySignificant(Experiment test) { return IsStatisticallySignificant(test, 0.05); }
/// <summary> /// Create a new test, or load an existing one. /// </summary> /// <param name="testName"></param> /// <param name="alternatives"></param> /// <returns></returns> public Experiment GetOrCreateTest(string testName, ControlCollection alternatives) { SerializableDictionary<string, Experiment> tests = GetTests(); Experiment test; if (tests.ContainsKey(testName)) { test = tests[testName]; } else { string[] altNames = new string[alternatives.Count]; for (int a = 0; a < alternatives.Count; a++) { Alternative alt = (Alternative)alternatives[a]; if (!String.IsNullOrEmpty(alt.Name)) { altNames[a] = alt.Name; } else { altNames[a] = "Alternative " + (a + 1); } } test = new Experiment(testName, altNames); tests.Add(testName, test); SaveTests(tests); } return test; }
/// <summary> /// For the specified test, pick an alternative to always show this user, and return that alternative. /// </summary> /// <param name="test"></param> /// <returns></returns> public ABAlternative GetUserAlternative(Experiment test) { //complete an experiment as soon as we reach our required sample size... if (test.IsComplete) { return test.GetBestAlternative(); } ABUser user = IdentifyUser(); ABAlternative choice = test.GetUserAlternative(user.ID); if (!user.Tests.Contains(test.TestName) && !IsBotRequest()) //don't score the participation more than once for an identified user (don't score for bots either) { choice.ScoreParticipation(); user.Tests.Add(test.TestName); user.SaveToCookie(); //persist the new participant count to the file store... ScoreParticipation(test.TestName, choice); } return choice; }
private double GetPValue(Experiment test, out bool testAssumptionsUpheld) { double p = 1; testAssumptionsUpheld = true; int participants = test.Alternatives.Sum(x => x.Participants); if (participants > 0) { //TODO: optimize this int successes = test.Alternatives.Sum(x => x.Successes); //pHat represents the estimated overall proportion of successes for all the alternatives combined. double pHat = (double)successes / participants; //qHat is the complement of pHat (the estimated overall proportion of failures for all the alternatives combined). double qHat = 1.0 - pHat; if (pHat == 0 || qHat == 0) { return p; //throw exception? } // chi^2 = sum_all_cells( (observed - expected)^2 / expected ) double chiSquare = 0; //associative array holds the expected values for each alternative. double[] expectedSuccesses = new double[test.Alternatives.Count]; for (int i = 0; i < expectedSuccesses.Length; i++) { //expectedSuccesses[i] = test.Alternatives[i].Participants * pHat; double expected = test.Alternatives[i].Participants * pHat; if (expected < 5) { testAssumptionsUpheld = false; } double observed = (double)test.Alternatives[i].Successes; chiSquare += Math.Pow(observed - expected, 2) / expected; } double[] expectedFailures = new double[test.Alternatives.Count]; for (int i = 0; i < expectedFailures.Length; i++) { double expected = test.Alternatives[i].Participants * qHat; if (expected < 5) { testAssumptionsUpheld = false; } double observed = (double)test.Alternatives[i].Failures; chiSquare += Math.Pow(observed - expected, 2) / expected; } p = LookupPValue(chiSquare, test.Alternatives.Count); } return p; }
public double GetPValue(Experiment test) { bool notUsed = false; return GetPValue(test, out notUsed); }
public bool IsStatisticallySignificant(Experiment test) { return(IsStatisticallySignificant(test, 0.05)); }
public string GetResultDescription(Experiment test) { double p; try { p = GetPValue(test); } catch (Exception e) { return(e.Message); } StringBuilder builder = new StringBuilder(); //CF: This is wrong. The assumption for a two-proportion z-test is that each sample have at least 10 succeses AND 10 failures! //if (Alternatives[0].Participants < 10 || Alternatives[1].Participants < 10) if (test.Alternatives.Count(x => !x.SampleMeetsTestAssumtions) > 0) { builder.Append("Take these results with a grain of salt since your samples do not meet the required assumptions: "); } ABAlternative best = test.GetBestAlternative(); ABAlternative worst = test.GetWorstAlternative(); builder.Append(String.Format(@" The best alternative you have is: [{0}], which had {1} conversions from {2} participants ({3}). The other alternative was [{4}], which had {5} conversions from {6} participants ({7}). " , best.Content , best.Conversions , best.Participants , best.PrettyConversionRate , worst.Content , worst.Conversions , worst.Participants , worst.PrettyConversionRate )); if (p == 1) { builder.Append("However, this difference is not statistically significant."); } else { builder.Append(String.Format(@" This difference is <b>{0} likely to be statistically significant (p <= {2})</b>, which means you can be {1} that it is the result of your alternatives actually mattering, rather than being due to random chance. However, this statistical test can't measure how likely the currently observed magnitude of the difference is to be accurate or not. It only says ""better"", not ""better by so much"". " , //Percentages[p], ToPercentageString(p), Descriptions[p], p )); } return(builder.ToString()); }
public bool IsStatisticallySignificant(Experiment test, double pValue) { return(GetPValue(test) <= pValue); }
private double GetPValue(Experiment test, out bool testAssumptionsUpheld) { double p = 1; testAssumptionsUpheld = true; int participants = test.Alternatives.Sum(x => x.Participants); if (participants > 0) { //TODO: optimize this int successes = test.Alternatives.Sum(x => x.Successes); //pHat represents the estimated overall proportion of successes for all the alternatives combined. double pHat = (double)successes / participants; //qHat is the complement of pHat (the estimated overall proportion of failures for all the alternatives combined). double qHat = 1.0 - pHat; if (pHat == 0 || qHat == 0) { return(p); //throw exception? } // chi^2 = sum_all_cells( (observed - expected)^2 / expected ) double chiSquare = 0; //associative array holds the expected values for each alternative. double[] expectedSuccesses = new double[test.Alternatives.Count]; for (int i = 0; i < expectedSuccesses.Length; i++) { //expectedSuccesses[i] = test.Alternatives[i].Participants * pHat; double expected = test.Alternatives[i].Participants * pHat; if (expected < 5) { testAssumptionsUpheld = false; } double observed = (double)test.Alternatives[i].Successes; chiSquare += Math.Pow(observed - expected, 2) / expected; } double[] expectedFailures = new double[test.Alternatives.Count]; for (int i = 0; i < expectedFailures.Length; i++) { double expected = test.Alternatives[i].Participants * qHat; if (expected < 5) { testAssumptionsUpheld = false; } double observed = (double)test.Alternatives[i].Failures; chiSquare += Math.Pow(observed - expected, 2) / expected; } p = LookupPValue(chiSquare, test.Alternatives.Count); } return(p); }
public double GetPValue(Experiment test) { bool notUsed = false; return(GetPValue(test, out notUsed)); }