Esempio n. 1
0
        public SamplesRequirement GetSampleSizeRequirement(
            BenchmarkResults.BeforeAndAfter basedOnPreliminaryResults)
        {
            if (basedOnPreliminaryResults.Baseline.ResultStatistics.N >= 30 &&
                basedOnPreliminaryResults.Treatment.ResultStatistics.N >= 30)
            {
                Trace.WriteLine("In this scenario, you should use Z-test");
            }

            {
// Variances are determined by the preliminary results
                var size = TwoSampleTTestPowerAnalysis.GetSampleSize(
                    variance1: basedOnPreliminaryResults.Baseline.ResultStatistics.Variance,
                    variance2: basedOnPreliminaryResults.Treatment.ResultStatistics.Variance,
                    alpha: this.alpha,
                    delta: this.minimumDetectableDifferenceDesired,
                    power: this.testStatisticalPower
                    );

                var n1 = (int)Math.Ceiling(size.Samples1);
                var n2 = (int)Math.Ceiling(size.Samples2);

                return(new SamplesRequirement(
                           n1,
                           n2));
            }
        }
Esempio n. 2
0
 public SamplesRequirement GetSampleSizeRequirement(BenchmarkResults.BeforeAndAfter basedOnPreliminaryResults)
 {
     return(new AutoTOrZTestSampleSizeDeterminer(
                this.alpha,
                this.minimumDetectableDifferenceDesired,
                this.testStatisticalPower)
            .GetSampleSizeRequirement(basedOnPreliminaryResults));
 }
Esempio n. 3
0
 public static TwoSampleHypothesisTestResult TestHypothesis(
     this ITwoSampleNormalDistributionHypothesisTest source,
     BenchmarkResults.BeforeAndAfter resultMeasurement,
     double hypothesizedDifference,
     TwoSampleHypothesis alternateHypothesis,
     double alpha)
 {
     return(source.TestHypothesis(
                resultMeasurement.Baseline.GetResultRuns().Select(run => MeasurementExtensions.GetAverageNanoseconds(run)),
                resultMeasurement.Treatment.GetResultRuns().Select(run => run.GetAverageNanoseconds()),
                hypothesizedDifference,
                alternateHypothesis,
                alpha));
 }
Esempio n. 4
0
        public BenchmarkResults RunBenchmark <TBenchmarkContainer>(BenchmarkRunParameters runParameters)
        {
            var config = new Config(
                runParameters.DesiredMaxLatency,
                this.jobMutator);

            // TODO: P3 - Validate return values to catch invalid usage (e.g. Before throws and After returns - invalid Benchmark comparison because not doing the same thing)

            var reports = BenchmarkRunner.Run <TBenchmarkContainer>(config)
                          .Reports;

            var parameterInstancesComparer = ParameterInstancesComparer.Default;
            var reportsByArgs = reports
                                .GroupBy(
                report => report.BenchmarkCase.Parameters,
                parameterInstancesComparer);

            IDictionary <ParameterInstances, BenchmarkResults.BeforeAndAfter> beforeAndAfters =
                new Dictionary <ParameterInstances, BenchmarkResults.BeforeAndAfter>(parameterInstancesComparer);

            foreach (var reportForArgs in reportsByArgs)
            {
                if (reportForArgs.Count() != 2 ||
                    reportForArgs.Count(report => report.BenchmarkCase.IsBaseline()) != 1)
                {
                    throw new InvalidOperationException("Expected exactly 1 baseline and 1 treatment");
                }

                var args      = reportForArgs.Key;
                var baseline  = reportForArgs.Single(report => report.BenchmarkCase.IsBaseline());
                var treatment = reportForArgs.Single(report => !report.BenchmarkCase.IsBaseline());

                beforeAndAfters[args] = new BenchmarkResults.BeforeAndAfter(
                    baseline,
                    treatment);
            }

            return(new BenchmarkResults(beforeAndAfters));
        }
Esempio n. 5
0
        private ValidationResult GetValidationResult(ParameterInstances parameterInstances, BenchmarkResults.BeforeAndAfter resultMeasurement)
        {
            double hypothesizedDifference;

            if (this.byAtLeastTimeInterval != null)
            {
                hypothesizedDifference = byAtLeastTimeInterval.Value.Nanoseconds;
            }
            else if (this.byAtLeastPercent != null)
            {
                var baselineMean = resultMeasurement.Baseline.ResultStatistics.Mean;
                hypothesizedDifference = baselineMean * this.byAtLeastPercent.Value.Multiplier;
            }
            else
            {
                throw new InvalidOperationException("This is why you use a library like OneOf");
            }

            switch (alternateHypothesis)
            {
            case TwoSampleHypothesis.FirstValueIsGreaterThanSecond:
                // observed: baseline - treatment -- we are saying First<Second so baseline - treatment should be negative
                hypothesizedDifference *= -1;
                break;

            case TwoSampleHypothesis.FirstValueIsSmallerThanSecond:
                break;

            default:
                throw new ArgumentOutOfRangeException();
            }

            var testResult = new TwoSampleAutoTOrZTestHypothesisTest()
                             .TestHypothesis(
                resultMeasurement,
                hypothesizedDifference,
                alternateHypothesis,
                this.alpha);

            var isMatch            = testResult.IsSignificant;
            var confidenceInterval = testResult.ConfidenceInterval;

            var observedDifference = testResult.ObservedDifference;

            var confIntervalInMs = new DoubleRange(confidenceInterval.Min * 1e-6, confidenceInterval.Max * 1e-6);

            var confidenceLevel = 1 - this.alpha;

            string byAtLeastString;

            if (byAtLeastPercent != null)
            {
                byAtLeastString = this.byAtLeastPercent.Value.Multiplier.ToString("P0");
            }
            else if (byAtLeastTimeInterval != null)
            {
                byAtLeastString = this.byAtLeastTimeInterval.Value.ToString();
            }
            else
            {
                throw new Exception();
            }

            var message =
                $"Support: {(isMatch ? "do support" : "cannot support")}\r\n" +
                $"{this.alternateHypothesis.ToDescriptiveString("baseline duration", "treatment duration")} by {byAtLeastString}\r\n" +
                $"Alpha: {this.alpha}.\r\n" +
                $"HypothesizedDifference: {hypothesizedDifference}.\r\n" +
                $"ObservedDifference: {observedDifference}\r\n" +
                $"ConfidenceInterval: {confIntervalInMs} ms\r\n" +
                $"Baseline {resultMeasurement.Baseline.ResultStatistics.ToSummaryString(confidenceLevel)}" +
                $"Treatment {resultMeasurement.Treatment.ResultStatistics.ToSummaryString(confidenceLevel)}";

            return(new ValidationResult(
                       parameterInstances,
                       this,
                       message,
                       // TODO: P3 - We are abusing this type here... isViolation != isMatch
                       isViolation: isMatch));
        }
Esempio n. 6
0
        public SamplesRequirement GetSampleSizeRequirement(BenchmarkResults.BeforeAndAfter basedOnPreliminaryResults)
        {
            if (basedOnPreliminaryResults.Baseline.ResultStatistics.N < 30 ||
                basedOnPreliminaryResults.Treatment.ResultStatistics.N < 30)
            {
                throw new InvalidOperationException(
                          "Too few samples for Z test - please use T test");
            }

            var test = new TwoSampleZTest(
                basedOnPreliminaryResults.Baseline.GetAverageNanosecondsForResultRuns(),
                basedOnPreliminaryResults.Treatment.GetAverageNanosecondsForResultRuns(),
                // TODO: P1 - Doing the tests separately like this and doing one tailed is not correct
                // but achieving the call syntax we want with the semantics statistics needs is hard :(
                // The specific problem is that the desired significance might not be achieved based on how this is done
                alternate: TwoSampleHypothesis.ValuesAreDifferent);

            Func <BaseTwoSamplePowerAnalysis, int> getSampleSizeForSample1 = analysis => (int)Math.Min(int.MaxValue, Math.Ceiling(analysis.Samples1));

            // WORK AROUND FOR BUG IN ACCORD
            {
                // This was a weirdness in the Accord library - looks like a bug. We are going to work around it but validate it here in case it changes in the future.
                var originalAnalysis = test.Analysis.Clone() as TwoSampleZTestPowerAnalysis;
                var newAnalysis      = test.Analysis as TwoSampleZTestPowerAnalysis;
                newAnalysis.Power = 0.80;
                newAnalysis.ComputeSamples();

                var smallerPower = originalAnalysis.Power < newAnalysis.Power ? originalAnalysis : newAnalysis;
                var largerPower  = smallerPower == newAnalysis ? originalAnalysis : newAnalysis;

                if (largerPower.Samples1 < smallerPower.Samples1)
                {
                    // Not expected, but is the bug we are working around
                    if (largerPower.TotalSamples > smallerPower.Samples1)
                    {
                        // Bug validated, our work around is okay
                        getSampleSizeForSample1 = analysis => (int)Math.Min(int.MaxValue, Math.Ceiling(analysis.TotalSamples));
                    }
                    else
                    {
                        throw new InvalidOperationException(
                                  "Larger power resulted in smaller sample size needed? Impossible.");
                    }
                }
                else
                {
                    getSampleSizeForSample1 = analysis => (int)Math.Min(int.MaxValue, Math.Ceiling(analysis.TotalSamples));

                    var version = FileVersionInfo.GetVersionInfo(typeof(BaseTwoSamplePowerAnalysis).Assembly.Location);
                    if (version.FileMajorPart == 3 && version.FileMinorPart <= 8)
                    {
                        // Known version
                    }
                    else
                    {
                        throw new InvalidOperationException(
                                  $"It's possible you just need a lot more samples, but it's also possible our work around for a bug in Accord is no longer needed. Gotta check this! {smallerPower.Samples1} {largerPower.Samples1}");
                    }
                }
            }
            // WORK AROUND FOR BUG IN ACCORD

            // The difference standard deviation
            var standardDeviation = test.StandardError * Math.Sqrt(basedOnPreliminaryResults.Baseline.ResultStatistics.N);

            var size4 = TwoSampleZTestPowerAnalysis.GetSampleSize(
                // TODO: Does this delta need to be minimumDetectableDifferenceDesired, or do we use the observed difference?
                delta: test.ObservedDifference,
                power: this.testStatisticalPower,
                alpha: this.alpha,
                // TODO: P1 - Does the direction here matter?
                hypothesis: TwoSampleHypothesis.ValuesAreDifferent,
                standardDeviation: standardDeviation);

            var n1 = getSampleSizeForSample1(size4);

            return(new SamplesRequirement(
                       (int)Math.Min(int.MaxValue, n1),
                       (int)Math.Min(int.MaxValue, n1)));
        }
Esempio n. 7
0
 SamplesRequirement ISampleSizeDeterminer.GetSampleSizeRequirement(BenchmarkResults.BeforeAndAfter basedOnPreliminaryResults)
 {
     return(this.getSampleSize(basedOnPreliminaryResults));
 }