Пример #1
0
 /// <summary>
 /// Calculates the Beta coefficient (slope) of the linear regression function
 /// given the Y and X values.
 /// </summary>
 /// <param name="y">Y values</param>
 /// <param name="x">X values</param>
 /// <returns>The Beta coefficient</returns>
 public static double Beta(Statistics y, Statistics x)
 {
     try
     {
         return Covariance(y, x) / (Math.Pow(x.StdDev(), 2));
     }
     catch (Exception)
     {
         return double.NaN;
     }
 }
Пример #2
0
        private void UpdateStatsGrid(IList<double> xValues, IList<double> yValues)
        {
            var timeToValuesDict = new Dictionary<double, IList<double>>();
            for (var i = 0; i < xValues.Count(); i ++)
            {
                var time = xValues[i];
                var value = yValues[i];
                IList<double> values;
                if (!timeToValuesDict.TryGetValue(time, out values))
                {
                    values = new List<double>();
                    timeToValuesDict.Add(time, values);
                }
                values.Add(value);
            }
            var allTimePoints = new HashSet<double>(Workspace.MsDataFiles
                                            .Where(d => d.TimePoint.HasValue)
            // ReSharper disable PossibleInvalidOperationException
                                            .Select(d => d.TimePoint.Value))
            // ReSharper restore PossibleInvalidOperationException
                                            .ToArray();
            Array.Sort(allTimePoints);
            gridViewStats.Rows.Clear();
            if (allTimePoints.Length > 0)
            {
                gridViewStats.Rows.Add(allTimePoints.Length);
                for (int i = 0; i < allTimePoints.Length; i++)
                {
                    var row = gridViewStats.Rows[i];
                    var time = allTimePoints[i];
                    row.Cells[colStatsTime.Index].Value = time;
                    row.Cells[colStatsInclude.Index].Value = !IsTimePointExcluded(time);
                    IList<double> values;
                    if (timeToValuesDict.TryGetValue(time, out values))
                    {
                        var stats = new Statistics(values.ToArray());
                        row.Cells[colStatsMean.Index].Value = stats.Mean();
                        row.Cells[colStatsMedian.Index].Value = stats.Median();
                        row.Cells[colStatsStdDev.Index].Value = stats.StdDev();
                        row.Cells[colStatsPointCount.Index].Value = stats.Length;

                    }
                }
            }
        }
Пример #3
0
 private void SetColumnValues(DataColumns dataColumns, DataGridViewRow row, Statistics stats)
 {
     row.Cells[dataColumns.ValueColumn.Index].Value = stats.Length > 0 ? stats.Mean() : (double?) null;
     row.Cells[dataColumns.StdDevColumn.Index].Value = stats.Length > 1 ? stats.StdDev() : (double?) null;
     row.Cells[dataColumns.StdErrColumn.Index].Value = stats.Length > 1 ? stats.StdErr() : (double?) null;
 }
Пример #4
0
 /// <summary>
 /// Calculates the correlation coefficient between two sets
 /// of numbers. 
 /// </summary>
 /// <param name="s1">First set of numbers</param>
 /// <param name="s2">Second set of numbers</param>
 /// <returns>Correlation coefficient</returns>
 public static double R(Statistics s1, Statistics s2)
 {
     try
     {
         return Covariance(s1, s2) / (s1.StdDev() * s2.StdDev());
     }
     catch (Exception)
     {
         return double.NaN;
     }
 }
Пример #5
0
        private ResultData CalculateHalfLife(ICollection<ProcessedRowData> rowDatas)
        {
            IEnumerable<ProcessedRowData> filteredRowDatas;
            if (EvviesFilter != EvviesFilterEnum.None)
            {
                var applicableRowDatas = new List<ProcessedRowData>();
                var values = new Dictionary<double, List<double>>();
                var filteredRowDataList = new List<ProcessedRowData>();
                foreach (var rowData in rowDatas)
                {
                    Debug.Assert(RejectReason.EvviesFilter != rowData.RejectReason);
                    if (null != rowData.RejectReason)
                    {
                        continue;
                    }
                    var value = rowData.Turnover;
                    if (!value.HasValue || double.IsNaN(value.Value) || double.IsInfinity(value.Value))
                    {
                        continue;
                    }
                    var timePoint = GetTimePoint(rowData.RawRowData);
                    if (!timePoint.HasValue)
                    {
                        filteredRowDataList.Add(rowData);
                        continue;
                    }
                    List<double> list;
                    if (!values.TryGetValue(timePoint.Value, out list))
                    {
                        list = new List<double>();
                        values.Add(timePoint.Value, list);
                    }
                    list.Add(value.Value);
                    applicableRowDatas.Add(rowData);
                }
                if (EvviesFilter == EvviesFilterEnum.Oct2011)
                {
                    foreach (var entry in values.ToArray())
                    {
                        var statistics = new Statistics(entry.Value.ToArray());
                        var min = statistics.Median() - 3*statistics.StdDev();
                        var max = statistics.Median() + 3*statistics.StdDev();
                        if (statistics.Median() + 2 * statistics.StdDev() >= .99)
                        {
                            // Throw away any values of 100% or 99% if they are more than 2 SD above the median.
                            max = Math.Min(.99, max);
                        }
                        var newValues = entry.Value.Where(v => v >= min && v <= max).ToList();
                        if (newValues.Count != entry.Value.Count)
                        {
                            values[entry.Key] = newValues;
                        }
                    }
                }

                var cutoffs = new Dictionary<double, KeyValuePair<double, double>>();
                foreach (var entry in values)
                {
                    var statistics = new Statistics(entry.Value.ToArray());
                    var mean = statistics.Mean();
                    var stdDev = statistics.StdDev();
                    double cutoff;
                    if (EvviesFilter == EvviesFilterEnum.TwoStdDev)
                    {
                        cutoff = 2*stdDev;
                    }
                    else
                    {
                        if (stdDev / mean < .3)
                        {
                            cutoff = 2 * stdDev;
                        }
                        else
                        {
                            cutoff = stdDev;
                        }
                    }
                    cutoffs.Add(entry.Key, new KeyValuePair<double, double>(mean - cutoff, mean + cutoff));
                }
                foreach (var rowData in applicableRowDatas)
                {
                    var cutoff = cutoffs[GetTimePoint(rowData.RawRowData).Value];
                    var value = rowData.Turnover;
                    rowData.EvviesFilterMin = cutoff.Key;
                    rowData.EvviesFilterMax = cutoff.Value;
                    // Only apply Evvie's Filter to rows that has a time point.
                    if (GetTimePoint(rowData.RawRowData).HasValue)
                    {
                        if (value.Value < cutoff.Key || value.Value > cutoff.Value)
                        {
                            Debug.Assert(null == rowData.RejectReason);
                            rowData.RejectReason = RejectReason.EvviesFilter;
                            continue;
                        }
                    }
                    filteredRowDataList.Add(rowData);
                }
                filteredRowDatas = filteredRowDataList;
            }
            else
            {
                filteredRowDatas = rowDatas.Where(rowData=>null == rowData.RejectReason).ToArray();
            }
            if (HalfLifeSettings.SimpleLinearRegression)
            {
                var timePoints = new List<double>();
                var logValues = new List<double>();
                foreach (var rowData in filteredRowDatas)
                {
                    if (null != rowData.RejectReason)
                    {
                        continue;
                    }
                    double? logValue = Math.Log(1-rowData.Turnover.Value);
                    if (!logValue.HasValue || double.IsNaN(logValue.Value) || double.IsInfinity(logValue.Value))
                    {
                        rowData.RejectReason = RejectReason.ValueOutOfRange;
                        continue;
                    }
                    double? timePoint = GetTimePoint(rowData.RawRowData);
                    if (!timePoint.HasValue || ExcludedTimePoints.Contains(timePoint.Value))
                    {
                        rowData.RejectReason = RejectReason.NoTimePoint;
                        continue;
                    }
                    logValues.Add(logValue.Value);
                    timePoints.Add(timePoint.Value);
                }
                var statsTimePoints = new Statistics(timePoints.ToArray());
                var statsLogValues = new Statistics(logValues.ToArray());
                double rateConstant, stDevRateConstant, rateConstantError, yIntercept;
                double? rSquared = null;
                if (FixedInitialPercent)
                {
                    rateConstant = statsLogValues.SlopeWithoutIntercept(statsTimePoints);
                    stDevRateConstant = Statistics.StdDevSlopeWithoutIntercept(statsLogValues, statsTimePoints);
                    rateConstantError = stDevRateConstant * GetErrorFactor(timePoints.Count - 1);
                    yIntercept = 0;
                }
                else
                {
                    rateConstant = statsLogValues.Slope(statsTimePoints);
                    stDevRateConstant = Statistics.StdDevB(statsLogValues, statsTimePoints);
                    rateConstantError = stDevRateConstant * GetErrorFactor(timePoints.Count - 2);
                    yIntercept = Statistics.Intercept(statsLogValues, statsTimePoints);
                    rSquared = Math.Pow(Statistics.R(statsLogValues, statsTimePoints), 2);
                }
                return new ResultData
                {
                    RateConstant = rateConstant,
                    RateConstantStdDev = stDevRateConstant,
                    RateConstantError = rateConstantError,
                    PointCount = timePoints.Count,
                    YIntercept = yIntercept,
                    RSquared = rSquared,
                    RowDatas = rowDatas.ToArray(),
                    FilteredRowDatas = filteredRowDatas.ToArray(),
                };
            }
            else
            {
                var dataPoints = new List<KeyValuePair<double, double>>();
                foreach (var rowData in filteredRowDatas)
                {
                    double? time = rowData.RawRowData.MsDataFile.TimePoint;
                    double? y;
                    y = 1-rowData.Turnover;
                    if (!y.HasValue || !time.HasValue)
                    {
                        continue;
                    }
                    dataPoints.Add(new KeyValuePair<double, double>(time.Value, y.Value));
                }
                var timePoints =
                    Workspace.MsDataFiles.Select(msDataFile => msDataFile.TimePoint)
                    .Where(timePoint => timePoint.HasValue).ToList();
                var resultData = new ResultData
                                     {
                                         PointCount = dataPoints.Count,
                                         FilteredRowDatas = filteredRowDatas.ToArray(),
                                         RowDatas = rowDatas.ToArray(),
                                     };
                if (resultData.RowDatas.Count == 0 || timePoints.Count == 0)
                {
                    resultData.RateConstant = double.NaN;
                    resultData.YIntercept = double.NaN;
                    return resultData;
                }
                NelderMeadSimplex.SimplexConstant[] initialParameters;
                double convergenceTolerance = 0;
                int maxEvaluations = 1000;
                if (FixedInitialPercent)
                {
                    timePoints.Add(0);
                    double timePointDifference = timePoints.Max().Value - timePoints.Min().Value;
                    initialParameters = new[] {new NelderMeadSimplex.SimplexConstant(1/timePointDifference, 1.0/10/timePointDifference)};
                    var regressionResult = NelderMeadSimplex.Regress(initialParameters, convergenceTolerance,
                                                                     maxEvaluations,
                                                                     constants =>
                                                                     SumOfResidualsSquared(
                                                                         x => Math.Exp(-constants[0]*x), dataPoints));
                    resultData.RateConstant = -regressionResult.Constants[0];
                }
                else
                {
                    double timePointDifference = timePoints.Max().Value - timePoints.Min().Value;
                    initialParameters = new[]
                                            {
                                                new NelderMeadSimplex.SimplexConstant(1/timePointDifference,
                                                                                      1.0/10/timePointDifference),
                                                new NelderMeadSimplex.SimplexConstant(0, .1),
                                            };
                    var regressionResult = NelderMeadSimplex.Regress(initialParameters, convergenceTolerance, maxEvaluations,
                        constants=>SumOfResidualsSquared(x=>Math.Exp(-constants[0] * x + constants[1]), dataPoints));
                    resultData.RateConstant = -regressionResult.Constants[0];
                    resultData.YIntercept = regressionResult.Constants[1];
                }
                return resultData;
            }
        }