public SamplesRequirement GetSampleSizeRequirement( BenchmarkResults.BeforeAndAfter basedOnPreliminaryResults) { if (basedOnPreliminaryResults.Baseline.ResultStatistics.N >= 30 && basedOnPreliminaryResults.Treatment.ResultStatistics.N >= 30) { Trace.WriteLine("In this scenario, you should use Z-test"); } { // Variances are determined by the preliminary results var size = TwoSampleTTestPowerAnalysis.GetSampleSize( variance1: basedOnPreliminaryResults.Baseline.ResultStatistics.Variance, variance2: basedOnPreliminaryResults.Treatment.ResultStatistics.Variance, alpha: this.alpha, delta: this.minimumDetectableDifferenceDesired, power: this.testStatisticalPower ); var n1 = (int)Math.Ceiling(size.Samples1); var n2 = (int)Math.Ceiling(size.Samples2); return(new SamplesRequirement( n1, n2)); } }
public SamplesRequirement GetSampleSizeRequirement(BenchmarkResults.BeforeAndAfter basedOnPreliminaryResults) { return(new AutoTOrZTestSampleSizeDeterminer( this.alpha, this.minimumDetectableDifferenceDesired, this.testStatisticalPower) .GetSampleSizeRequirement(basedOnPreliminaryResults)); }
public static TwoSampleHypothesisTestResult TestHypothesis( this ITwoSampleNormalDistributionHypothesisTest source, BenchmarkResults.BeforeAndAfter resultMeasurement, double hypothesizedDifference, TwoSampleHypothesis alternateHypothesis, double alpha) { return(source.TestHypothesis( resultMeasurement.Baseline.GetResultRuns().Select(run => MeasurementExtensions.GetAverageNanoseconds(run)), resultMeasurement.Treatment.GetResultRuns().Select(run => run.GetAverageNanoseconds()), hypothesizedDifference, alternateHypothesis, alpha)); }
public BenchmarkResults RunBenchmark <TBenchmarkContainer>(BenchmarkRunParameters runParameters) { var config = new Config( runParameters.DesiredMaxLatency, this.jobMutator); // TODO: P3 - Validate return values to catch invalid usage (e.g. Before throws and After returns - invalid Benchmark comparison because not doing the same thing) var reports = BenchmarkRunner.Run <TBenchmarkContainer>(config) .Reports; var parameterInstancesComparer = ParameterInstancesComparer.Default; var reportsByArgs = reports .GroupBy( report => report.BenchmarkCase.Parameters, parameterInstancesComparer); IDictionary <ParameterInstances, BenchmarkResults.BeforeAndAfter> beforeAndAfters = new Dictionary <ParameterInstances, BenchmarkResults.BeforeAndAfter>(parameterInstancesComparer); foreach (var reportForArgs in reportsByArgs) { if (reportForArgs.Count() != 2 || reportForArgs.Count(report => report.BenchmarkCase.IsBaseline()) != 1) { throw new InvalidOperationException("Expected exactly 1 baseline and 1 treatment"); } var args = reportForArgs.Key; var baseline = reportForArgs.Single(report => report.BenchmarkCase.IsBaseline()); var treatment = reportForArgs.Single(report => !report.BenchmarkCase.IsBaseline()); beforeAndAfters[args] = new BenchmarkResults.BeforeAndAfter( baseline, treatment); } return(new BenchmarkResults(beforeAndAfters)); }
private ValidationResult GetValidationResult(ParameterInstances parameterInstances, BenchmarkResults.BeforeAndAfter resultMeasurement) { double hypothesizedDifference; if (this.byAtLeastTimeInterval != null) { hypothesizedDifference = byAtLeastTimeInterval.Value.Nanoseconds; } else if (this.byAtLeastPercent != null) { var baselineMean = resultMeasurement.Baseline.ResultStatistics.Mean; hypothesizedDifference = baselineMean * this.byAtLeastPercent.Value.Multiplier; } else { throw new InvalidOperationException("This is why you use a library like OneOf"); } switch (alternateHypothesis) { case TwoSampleHypothesis.FirstValueIsGreaterThanSecond: // observed: baseline - treatment -- we are saying First<Second so baseline - treatment should be negative hypothesizedDifference *= -1; break; case TwoSampleHypothesis.FirstValueIsSmallerThanSecond: break; default: throw new ArgumentOutOfRangeException(); } var testResult = new TwoSampleAutoTOrZTestHypothesisTest() .TestHypothesis( resultMeasurement, hypothesizedDifference, alternateHypothesis, this.alpha); var isMatch = testResult.IsSignificant; var confidenceInterval = testResult.ConfidenceInterval; var observedDifference = testResult.ObservedDifference; var confIntervalInMs = new DoubleRange(confidenceInterval.Min * 1e-6, confidenceInterval.Max * 1e-6); var confidenceLevel = 1 - this.alpha; string byAtLeastString; if (byAtLeastPercent != null) { byAtLeastString = this.byAtLeastPercent.Value.Multiplier.ToString("P0"); } else if (byAtLeastTimeInterval != null) { byAtLeastString = this.byAtLeastTimeInterval.Value.ToString(); } else { throw new Exception(); } var message = $"Support: {(isMatch ? "do support" : "cannot support")}\r\n" + $"{this.alternateHypothesis.ToDescriptiveString("baseline duration", "treatment duration")} by {byAtLeastString}\r\n" + $"Alpha: {this.alpha}.\r\n" + $"HypothesizedDifference: {hypothesizedDifference}.\r\n" + $"ObservedDifference: {observedDifference}\r\n" + $"ConfidenceInterval: {confIntervalInMs} ms\r\n" + $"Baseline {resultMeasurement.Baseline.ResultStatistics.ToSummaryString(confidenceLevel)}" + $"Treatment {resultMeasurement.Treatment.ResultStatistics.ToSummaryString(confidenceLevel)}"; return(new ValidationResult( parameterInstances, this, message, // TODO: P3 - We are abusing this type here... isViolation != isMatch isViolation: isMatch)); }
public SamplesRequirement GetSampleSizeRequirement(BenchmarkResults.BeforeAndAfter basedOnPreliminaryResults) { if (basedOnPreliminaryResults.Baseline.ResultStatistics.N < 30 || basedOnPreliminaryResults.Treatment.ResultStatistics.N < 30) { throw new InvalidOperationException( "Too few samples for Z test - please use T test"); } var test = new TwoSampleZTest( basedOnPreliminaryResults.Baseline.GetAverageNanosecondsForResultRuns(), basedOnPreliminaryResults.Treatment.GetAverageNanosecondsForResultRuns(), // TODO: P1 - Doing the tests separately like this and doing one tailed is not correct // but achieving the call syntax we want with the semantics statistics needs is hard :( // The specific problem is that the desired significance might not be achieved based on how this is done alternate: TwoSampleHypothesis.ValuesAreDifferent); Func <BaseTwoSamplePowerAnalysis, int> getSampleSizeForSample1 = analysis => (int)Math.Min(int.MaxValue, Math.Ceiling(analysis.Samples1)); // WORK AROUND FOR BUG IN ACCORD { // This was a weirdness in the Accord library - looks like a bug. We are going to work around it but validate it here in case it changes in the future. var originalAnalysis = test.Analysis.Clone() as TwoSampleZTestPowerAnalysis; var newAnalysis = test.Analysis as TwoSampleZTestPowerAnalysis; newAnalysis.Power = 0.80; newAnalysis.ComputeSamples(); var smallerPower = originalAnalysis.Power < newAnalysis.Power ? originalAnalysis : newAnalysis; var largerPower = smallerPower == newAnalysis ? originalAnalysis : newAnalysis; if (largerPower.Samples1 < smallerPower.Samples1) { // Not expected, but is the bug we are working around if (largerPower.TotalSamples > smallerPower.Samples1) { // Bug validated, our work around is okay getSampleSizeForSample1 = analysis => (int)Math.Min(int.MaxValue, Math.Ceiling(analysis.TotalSamples)); } else { throw new InvalidOperationException( "Larger power resulted in smaller sample size needed? Impossible."); } } else { getSampleSizeForSample1 = analysis => (int)Math.Min(int.MaxValue, Math.Ceiling(analysis.TotalSamples)); var version = FileVersionInfo.GetVersionInfo(typeof(BaseTwoSamplePowerAnalysis).Assembly.Location); if (version.FileMajorPart == 3 && version.FileMinorPart <= 8) { // Known version } else { throw new InvalidOperationException( $"It's possible you just need a lot more samples, but it's also possible our work around for a bug in Accord is no longer needed. Gotta check this! {smallerPower.Samples1} {largerPower.Samples1}"); } } } // WORK AROUND FOR BUG IN ACCORD // The difference standard deviation var standardDeviation = test.StandardError * Math.Sqrt(basedOnPreliminaryResults.Baseline.ResultStatistics.N); var size4 = TwoSampleZTestPowerAnalysis.GetSampleSize( // TODO: Does this delta need to be minimumDetectableDifferenceDesired, or do we use the observed difference? delta: test.ObservedDifference, power: this.testStatisticalPower, alpha: this.alpha, // TODO: P1 - Does the direction here matter? hypothesis: TwoSampleHypothesis.ValuesAreDifferent, standardDeviation: standardDeviation); var n1 = getSampleSizeForSample1(size4); return(new SamplesRequirement( (int)Math.Min(int.MaxValue, n1), (int)Math.Min(int.MaxValue, n1))); }
SamplesRequirement ISampleSizeDeterminer.GetSampleSizeRequirement(BenchmarkResults.BeforeAndAfter basedOnPreliminaryResults) { return(this.getSampleSize(basedOnPreliminaryResults)); }