Пример #1
0
 /// <summary>
 /// Calculates the variance for a set of numbers from a weighted mean.
 /// See:
 /// http://en.wikipedia.org/wiki/Weighted_mean
 /// </summary>
 /// <param name="weights">The weights</param>
 /// <returns>Variance from weighted mean</returns>
 public double Variance(Statistics weights)
 {
     try
     {
         double s = 0;
         for (int i = 0; i < _list.Length; i++)
         {
             s += weights._list[i] * Math.Pow(_list[i], 2);
         }
         return((s / weights.Mean() - _list.Length * Math.Pow(Mean(weights), 2)) / (_list.Length - 1));
     }
     catch (Exception)
     {
         return(double.NaN);
     }
 }
Пример #2
0
        ///<summary>
        ///This subroutine determines the best fit line by minimizing the sum of the squares
        ///of the perpendicular distances of the points to the line.
        ///This was initially reported by Kermack and Haldane (1950) Biometrika, 37, 30.
        ///However I found it in York, D. (1966) Canadian Journal of Physics, vol 44, p 1079.
        ///</summary>
        public static LinearRegression LinearRegressionWithErrorsInBothCoordinates(Statistics a, Statistics b)
        {
            double meanA = a.Mean();
            double meanB = b.Mean();
            double sA2   = 0;
            double sB2   = 0;
            double sAb   = 0;

            for (int i = 0; i < a.Length; i++)
            {
                double dA = a._list[i] - meanA;
                double dB = b._list[i] - meanB;

                sA2 += dA * dA;
                sB2 += dB * dB;
                sAb += dA * dB;
            }
            LinearRegression result = new LinearRegression();

            if (sA2 > 0 && sB2 > 0 && sAb > 0)
            {
                result.Correlation = sAb / Math.Sqrt(sA2 * sB2);
                result.Slope       = (sB2 - sA2 + Math.Sqrt((sB2 - sA2) * (sB2 - sA2)
                                                            + 4 * (sAb * sAb))) / 2 / sAb;
                result.Intercept = meanB - result.Slope * meanA;
                if (result.Correlation < 1)
                {
                    result.SlopeError = (result.Slope / result.Correlation) * Math.Sqrt((1 - (result.Correlation * result.Correlation)) / a.Length);
                }
                else
                {
                    result.SlopeError = 0;
                }
            }
            else
            {
                result.Correlation = 0;
                result.Slope       = 0;
                result.SlopeError  = 0;
                result.Intercept   = 0;
            }
            return(result);
        }
Пример #3
0
        /// <summary>
        /// Calculates the covariance between two sets of numbers.
        /// </summary>
        /// <param name="s1">First set of numbers</param>
        /// <param name="s2">Second set of numbers</param>
        /// <returns></returns>
        public static double Covariance(Statistics s1, Statistics s2)
        {
            try
            {
                if (s1.Length != s2.Length)
                {
                    return(double.NaN);
                }

                int    len    = s1.Length;
                double sumMul = 0;
                for (int i = 0; i < len; i++)
                {
                    sumMul += (s1._list[i] * s2._list[i]);
                }
                return((sumMul - len * s1.Mean() * s2.Mean()) / (len - 1));
            }
            catch (Exception)
            {
                return(double.NaN);
            }
        }
Пример #4
0
        private void UpdateStatsGrid(IList<double> xValues, IList<double> yValues)
        {
            var timeToValuesDict = new Dictionary<double, IList<double>>();
            for (var i = 0; i < xValues.Count(); i ++)
            {
                var time = xValues[i];
                var value = yValues[i];
                IList<double> values;
                if (!timeToValuesDict.TryGetValue(time, out values))
                {
                    values = new List<double>();
                    timeToValuesDict.Add(time, values);
                }
                values.Add(value);
            }
            var allTimePoints = new HashSet<double>(Workspace.MsDataFiles
                                            .Where(d => d.TimePoint.HasValue)
            // ReSharper disable PossibleInvalidOperationException
                                            .Select(d => d.TimePoint.Value))
            // ReSharper restore PossibleInvalidOperationException
                                            .ToArray();
            Array.Sort(allTimePoints);
            gridViewStats.Rows.Clear();
            if (allTimePoints.Length > 0)
            {
                gridViewStats.Rows.Add(allTimePoints.Length);
                for (int i = 0; i < allTimePoints.Length; i++)
                {
                    var row = gridViewStats.Rows[i];
                    var time = allTimePoints[i];
                    row.Cells[colStatsTime.Index].Value = time;
                    row.Cells[colStatsInclude.Index].Value = !IsTimePointExcluded(time);
                    IList<double> values;
                    if (timeToValuesDict.TryGetValue(time, out values))
                    {
                        var stats = new Statistics(values.ToArray());
                        row.Cells[colStatsMean.Index].Value = stats.Mean();
                        row.Cells[colStatsMedian.Index].Value = stats.Median();
                        row.Cells[colStatsStdDev.Index].Value = stats.StdDev();
                        row.Cells[colStatsPointCount.Index].Value = stats.Length;

                    }
                }
            }
        }
Пример #5
0
 /// <summary>
 /// Calculates the Alpha coefficient (y-intercept) of the linear
 /// regression function given the Y and X values.
 /// </summary>
 /// <param name="y">Y values</param>
 /// <param name="x">X values</param>
 /// <returns>The Alpha coefficient</returns>
 public static double Alpha(Statistics y, Statistics x)
 {
     return(y.Mean() - Beta(y, x) * x.Mean());
 }
Пример #6
0
 private void SetColumnValues(DataColumns dataColumns, DataGridViewRow row, Statistics stats)
 {
     row.Cells[dataColumns.ValueColumn.Index].Value = stats.Length > 0 ? stats.Mean() : (double?) null;
     row.Cells[dataColumns.StdDevColumn.Index].Value = stats.Length > 1 ? stats.StdDev() : (double?) null;
     row.Cells[dataColumns.StdErrColumn.Index].Value = stats.Length > 1 ? stats.StdErr() : (double?) null;
 }
Пример #7
0
        /// <summary>
        /// Calculates the covariance between two sets of numbers.
        /// </summary>
        /// <param name="s1">First set of numbers</param>
        /// <param name="s2">Second set of numbers</param>
        /// <returns></returns>
        public static double Covariance(Statistics s1, Statistics s2)
        {
            try
            {
                if (s1.Length != s2.Length)
                    return double.NaN;

                int len = s1.Length;
                double sumMul = 0;
                for (int i = 0; i < len; i++)
                    sumMul += (s1._list[i] * s2._list[i]);
                return (sumMul - len * s1.Mean() * s2.Mean()) / (len - 1);
            }
            catch (Exception)
            {
                return double.NaN;
            }
        }
Пример #8
0
 /// <summary>
 /// Calculates the variance for a set of numbers from a weighted mean.
 /// See:
 /// http://en.wikipedia.org/wiki/Weighted_mean
 /// </summary>
 /// <param name="weights">The weights</param>
 /// <returns>Variance from weighted mean</returns>
 public double Variance(Statistics weights)
 {
     try
     {
         double s = 0;
         for (int i = 0; i < _list.Length; i++)
             s += weights._list[i] * Math.Pow(_list[i], 2);
         return (s / weights.Mean() - _list.Length * Math.Pow(Mean(weights), 2)) / (_list.Length - 1);
     }
     catch (Exception)
     {
         return double.NaN;
     }
 }
Пример #9
0
 /// <summary>
 /// Calculates the Alpha coefficient (y-intercept) of the linear
 /// regression function given the Y and X values.
 /// </summary>
 /// <param name="y">Y values</param>
 /// <param name="x">X values</param>
 /// <returns>The Alpha coefficient</returns>
 public static double Alpha(Statistics y, Statistics x)
 {
     return y.Mean() - Beta(y, x) * x.Mean();
 }
Пример #10
0
        ///<summary>
        ///This subroutine determines the best fit line by minimizing the sum of the squares
        ///of the perpendicular distances of the points to the line.
        ///This was initially reported by Kermack and Haldane (1950) Biometrika, 37, 30.
        ///However I found it in York, D. (1966) Canadian Journal of Physics, vol 44, p 1079.
        ///</summary>
        public static LinearRegression LinearRegressionWithErrorsInBothCoordinates(Statistics a, Statistics b)
        {
            double meanA = a.Mean();
            double meanB = b.Mean();
            double sA2 = 0;
            double sB2 = 0;
            double sAb = 0;

            for (int i = 0; i < a.Length; i++)
            {
                double dA = a._list[i] - meanA;
                double dB = b._list[i] - meanB;

                sA2 += dA * dA;
                sB2 += dB * dB;
                sAb += dA * dB;
            }
            LinearRegression result = new LinearRegression();
            if (sA2 > 0 && sB2 > 0 && sAb > 0)
            {
                result.Correlation = sAb / Math.Sqrt(sA2 * sB2);
                result.Slope = (sB2 - sA2 + Math.Sqrt((sB2 - sA2) * (sB2 - sA2)
                                               + 4 * (sAb * sAb))) / 2 / sAb;
                result.Intercept = meanB - result.Slope * meanA;
                if (result.Correlation < 1)
                {
                    result.SlopeError = (result.Slope / result.Correlation) * Math.Sqrt((1 - (result.Correlation * result.Correlation)) / a.Length);
                }
                else
                {
                    result.SlopeError = 0;
                }
            }
            else
            {
                result.Correlation = 0;
                result.Slope = 0;
                result.SlopeError = 0;
                result.Intercept = 0;
            }
            return result;
        }
Пример #11
0
        private ResultData CalculateHalfLife(ICollection<ProcessedRowData> rowDatas)
        {
            IEnumerable<ProcessedRowData> filteredRowDatas;
            if (EvviesFilter != EvviesFilterEnum.None)
            {
                var applicableRowDatas = new List<ProcessedRowData>();
                var values = new Dictionary<double, List<double>>();
                var filteredRowDataList = new List<ProcessedRowData>();
                foreach (var rowData in rowDatas)
                {
                    Debug.Assert(RejectReason.EvviesFilter != rowData.RejectReason);
                    if (null != rowData.RejectReason)
                    {
                        continue;
                    }
                    var value = rowData.Turnover;
                    if (!value.HasValue || double.IsNaN(value.Value) || double.IsInfinity(value.Value))
                    {
                        continue;
                    }
                    var timePoint = GetTimePoint(rowData.RawRowData);
                    if (!timePoint.HasValue)
                    {
                        filteredRowDataList.Add(rowData);
                        continue;
                    }
                    List<double> list;
                    if (!values.TryGetValue(timePoint.Value, out list))
                    {
                        list = new List<double>();
                        values.Add(timePoint.Value, list);
                    }
                    list.Add(value.Value);
                    applicableRowDatas.Add(rowData);
                }
                if (EvviesFilter == EvviesFilterEnum.Oct2011)
                {
                    foreach (var entry in values.ToArray())
                    {
                        var statistics = new Statistics(entry.Value.ToArray());
                        var min = statistics.Median() - 3*statistics.StdDev();
                        var max = statistics.Median() + 3*statistics.StdDev();
                        if (statistics.Median() + 2 * statistics.StdDev() >= .99)
                        {
                            // Throw away any values of 100% or 99% if they are more than 2 SD above the median.
                            max = Math.Min(.99, max);
                        }
                        var newValues = entry.Value.Where(v => v >= min && v <= max).ToList();
                        if (newValues.Count != entry.Value.Count)
                        {
                            values[entry.Key] = newValues;
                        }
                    }
                }

                var cutoffs = new Dictionary<double, KeyValuePair<double, double>>();
                foreach (var entry in values)
                {
                    var statistics = new Statistics(entry.Value.ToArray());
                    var mean = statistics.Mean();
                    var stdDev = statistics.StdDev();
                    double cutoff;
                    if (EvviesFilter == EvviesFilterEnum.TwoStdDev)
                    {
                        cutoff = 2*stdDev;
                    }
                    else
                    {
                        if (stdDev / mean < .3)
                        {
                            cutoff = 2 * stdDev;
                        }
                        else
                        {
                            cutoff = stdDev;
                        }
                    }
                    cutoffs.Add(entry.Key, new KeyValuePair<double, double>(mean - cutoff, mean + cutoff));
                }
                foreach (var rowData in applicableRowDatas)
                {
                    var cutoff = cutoffs[GetTimePoint(rowData.RawRowData).Value];
                    var value = rowData.Turnover;
                    rowData.EvviesFilterMin = cutoff.Key;
                    rowData.EvviesFilterMax = cutoff.Value;
                    // Only apply Evvie's Filter to rows that has a time point.
                    if (GetTimePoint(rowData.RawRowData).HasValue)
                    {
                        if (value.Value < cutoff.Key || value.Value > cutoff.Value)
                        {
                            Debug.Assert(null == rowData.RejectReason);
                            rowData.RejectReason = RejectReason.EvviesFilter;
                            continue;
                        }
                    }
                    filteredRowDataList.Add(rowData);
                }
                filteredRowDatas = filteredRowDataList;
            }
            else
            {
                filteredRowDatas = rowDatas.Where(rowData=>null == rowData.RejectReason).ToArray();
            }
            if (HalfLifeSettings.SimpleLinearRegression)
            {
                var timePoints = new List<double>();
                var logValues = new List<double>();
                foreach (var rowData in filteredRowDatas)
                {
                    if (null != rowData.RejectReason)
                    {
                        continue;
                    }
                    double? logValue = Math.Log(1-rowData.Turnover.Value);
                    if (!logValue.HasValue || double.IsNaN(logValue.Value) || double.IsInfinity(logValue.Value))
                    {
                        rowData.RejectReason = RejectReason.ValueOutOfRange;
                        continue;
                    }
                    double? timePoint = GetTimePoint(rowData.RawRowData);
                    if (!timePoint.HasValue || ExcludedTimePoints.Contains(timePoint.Value))
                    {
                        rowData.RejectReason = RejectReason.NoTimePoint;
                        continue;
                    }
                    logValues.Add(logValue.Value);
                    timePoints.Add(timePoint.Value);
                }
                var statsTimePoints = new Statistics(timePoints.ToArray());
                var statsLogValues = new Statistics(logValues.ToArray());
                double rateConstant, stDevRateConstant, rateConstantError, yIntercept;
                double? rSquared = null;
                if (FixedInitialPercent)
                {
                    rateConstant = statsLogValues.SlopeWithoutIntercept(statsTimePoints);
                    stDevRateConstant = Statistics.StdDevSlopeWithoutIntercept(statsLogValues, statsTimePoints);
                    rateConstantError = stDevRateConstant * GetErrorFactor(timePoints.Count - 1);
                    yIntercept = 0;
                }
                else
                {
                    rateConstant = statsLogValues.Slope(statsTimePoints);
                    stDevRateConstant = Statistics.StdDevB(statsLogValues, statsTimePoints);
                    rateConstantError = stDevRateConstant * GetErrorFactor(timePoints.Count - 2);
                    yIntercept = Statistics.Intercept(statsLogValues, statsTimePoints);
                    rSquared = Math.Pow(Statistics.R(statsLogValues, statsTimePoints), 2);
                }
                return new ResultData
                {
                    RateConstant = rateConstant,
                    RateConstantStdDev = stDevRateConstant,
                    RateConstantError = rateConstantError,
                    PointCount = timePoints.Count,
                    YIntercept = yIntercept,
                    RSquared = rSquared,
                    RowDatas = rowDatas.ToArray(),
                    FilteredRowDatas = filteredRowDatas.ToArray(),
                };
            }
            else
            {
                var dataPoints = new List<KeyValuePair<double, double>>();
                foreach (var rowData in filteredRowDatas)
                {
                    double? time = rowData.RawRowData.MsDataFile.TimePoint;
                    double? y;
                    y = 1-rowData.Turnover;
                    if (!y.HasValue || !time.HasValue)
                    {
                        continue;
                    }
                    dataPoints.Add(new KeyValuePair<double, double>(time.Value, y.Value));
                }
                var timePoints =
                    Workspace.MsDataFiles.Select(msDataFile => msDataFile.TimePoint)
                    .Where(timePoint => timePoint.HasValue).ToList();
                var resultData = new ResultData
                                     {
                                         PointCount = dataPoints.Count,
                                         FilteredRowDatas = filteredRowDatas.ToArray(),
                                         RowDatas = rowDatas.ToArray(),
                                     };
                if (resultData.RowDatas.Count == 0 || timePoints.Count == 0)
                {
                    resultData.RateConstant = double.NaN;
                    resultData.YIntercept = double.NaN;
                    return resultData;
                }
                NelderMeadSimplex.SimplexConstant[] initialParameters;
                double convergenceTolerance = 0;
                int maxEvaluations = 1000;
                if (FixedInitialPercent)
                {
                    timePoints.Add(0);
                    double timePointDifference = timePoints.Max().Value - timePoints.Min().Value;
                    initialParameters = new[] {new NelderMeadSimplex.SimplexConstant(1/timePointDifference, 1.0/10/timePointDifference)};
                    var regressionResult = NelderMeadSimplex.Regress(initialParameters, convergenceTolerance,
                                                                     maxEvaluations,
                                                                     constants =>
                                                                     SumOfResidualsSquared(
                                                                         x => Math.Exp(-constants[0]*x), dataPoints));
                    resultData.RateConstant = -regressionResult.Constants[0];
                }
                else
                {
                    double timePointDifference = timePoints.Max().Value - timePoints.Min().Value;
                    initialParameters = new[]
                                            {
                                                new NelderMeadSimplex.SimplexConstant(1/timePointDifference,
                                                                                      1.0/10/timePointDifference),
                                                new NelderMeadSimplex.SimplexConstant(0, .1),
                                            };
                    var regressionResult = NelderMeadSimplex.Regress(initialParameters, convergenceTolerance, maxEvaluations,
                        constants=>SumOfResidualsSquared(x=>Math.Exp(-constants[0] * x + constants[1]), dataPoints));
                    resultData.RateConstant = -regressionResult.Constants[0];
                    resultData.YIntercept = regressionResult.Constants[1];
                }
                return resultData;
            }
        }