コード例 #1
0
 private ComparisonDecision MakeDecision(MetricValue baselineMetric, MetricValue candidateMetric)
 {
     // we're doing a t-test to make a decision on the test metrics, for now.
     return(TTest.Run(baselineMetric, candidateMetric, m_pvalue));
 }
コード例 #2
0
ファイル: TTest.cs プロジェクト: shimingsg/CoreGCBench
        /// <summary>
        /// Runs an unpaired t-test on the sample data, making a decision on the data
        /// based on the results of the test. Whether or not the null hypothesis
        /// (that the means are the same betwen the baseline and the candidate) is rejected
        /// is based upon the value of the test statistic compared to the pvalue.
        /// </summary>
        /// <param name="baseline"></param>
        /// <param name="candidate"></param>
        /// <param name="pvalue">The PValue to use for this test. Must be specified up-front
        /// in order to avoid bias.</param>
        /// <returns>A <see cref="ComparisonDecision"/> made using the data.</returns>
        public static ComparisonDecision Run(MetricValue baseline, MetricValue candidate, double pvalue = StandardPValue)
        {
            // it is not required by this statistical test for the baseline and candidate
            // to have the same number of samples, but it /is/ required that their variances
            // be the same.
            //
            // TODO(segilles) - if, throughout the course of using this tool we find that its
            // false positive rate is high due to high variance differences, we should consider
            // switching to Welch's t-test, which is more robust in the case of unequal variances.

            // We use a table for the t-distribution, and we only have values for certain p-values.
            if (pvalue != StandardPValue && Array.IndexOf(AllowedPValues, pvalue) < 0)
            {
                throw new ArgumentException($"invalid pvalue: {pvalue}");
            }

            // First, we calculate the test statistic.
            double testStatistic    = CalculateStatistic(baseline, candidate);
            int    degreesOfFreedom = baseline.SampleSize + candidate.SampleSize - 2;

            if (!s_tdistTable.ContainsKey(degreesOfFreedom))
            {
                // TODO(segilles) if this happens a lot, we may consider adding more keys to the table.
                int closest = int.MaxValue;
                foreach (var key in s_tdistTable.Keys)
                {
                    if (Math.Abs(key - degreesOfFreedom) <= Math.Abs(closest - degreesOfFreedom))
                    {
                        closest = key;
                    }
                }

                Logger.LogVerbose($"DOF {degreesOfFreedom} not found in table, rounding to closest DOF: {closest}");
                degreesOfFreedom = closest;
            }

            double targetStatistic = s_tdistTable[degreesOfFreedom][pvalue];

            Logger.LogVerbose($"metric {baseline.Name} has test statistic {testStatistic} vs. target {targetStatistic}");

            if (double.IsInfinity(testStatistic) || double.IsNaN(testStatistic))
            {
                // if this occured, there was something funny about the data we used to calculate.
                return(ComparisonDecision.Indeterminate);
            }

            if (Math.Abs(testStatistic) < targetStatistic)
            {
                // there's not enough data to prove or disprove the null hypothesis.
                return(ComparisonDecision.Indeterminate);
            }


            // we've got a statistically significant difference!
            switch (baseline.Direction)
            {
            case Direction.HigherIsBetter:
                return(baseline.Value < candidate.Value ? ComparisonDecision.Improvement : ComparisonDecision.Regression);

            case Direction.LowerIsBetter:
                return(baseline.Value > candidate.Value ? ComparisonDecision.Improvement : ComparisonDecision.Regression);

            default:
                throw new InvalidOperationException("invalid value for Direction enum");
            }
        }