public void Verify(List <BooleanStatistic> statistics, CorrelationMatrix correlationMatrix, double meanConfidence = 0.1, double correlationConfidence = 0.1, bool onlyWarn = false, Dictionary <string, double> diffie = null)
        {
            var samples = GetSamples(1000, statistics);

            var taken = new HashSet <string>();

            foreach (var outerStat in statistics)
            {
                if (!NameToColumnIndex.ContainsKey(outerStat.Name))
                {
                    continue;
                }

                var samplesForOuterStat = GetValue(NameToColumnIndex[outerStat.Name], samples);
                var meanForOuterStat    = TrueRatio(samplesForOuterStat);
                ThrowIfExceedsConfidence(meanForOuterStat, outerStat.TrueRatio, meanConfidence, "Mean for " + outerStat.Name, onlyWarn);

                foreach (var innerStat in statistics)
                {
                    var combo = innerStat.Name + outerStat.Name;
                    if (!NameToColumnIndex.ContainsKey(innerStat.Name))
                    {
                        continue;
                    }

                    if (taken.Contains(combo))
                    {
                        continue;
                    }

                    if (innerStat.Name == outerStat.Name)
                    {
                        continue;
                    }

                    if (outerStat.TrueRatio == 100 || outerStat.TrueRatio == 0 || innerStat.TrueRatio == 100 || innerStat.TrueRatio == 0)
                    {
                        continue;
                    }

                    var samplesForInnerStat = GetValue(NameToColumnIndex[innerStat.Name], samples);
                    var cor    = StatisticHelper.CalculateCorrelation(samplesForOuterStat, samplesForInnerStat);
                    var orgCor = correlationMatrix.GetValue(outerStat.Name, innerStat.Name).Value;

                    var realDiff = ThrowIfExceedsConfidence(cor, orgCor, correlationConfidence, "Correlation between " + innerStat.Name + " and " + outerStat.Name, onlyWarn);

                    if (diffie != null)
                    {
                        if (!diffie.ContainsKey(combo))
                        {
                            diffie.Add(combo, 0);
                        }

                        diffie[combo] += realDiff;
                    }

                    taken.Add(outerStat.Name + innerStat.Name);
                }
            }
        }
Пример #2
0
        public static CorrelationMatrix Create(Dictionary <string, Statistic> statisticsDic, double correlationThreshold = 0.3)
        {
            var matrix = new CorrelationMatrix(statisticsDic.Count, statisticsDic.Keys.ToArray());
            var taken  = new HashSet <string>();

            foreach (var outerStat in statisticsDic.Keys)     //vertical
            {
                foreach (var innerStat in statisticsDic.Keys) //horizont
                {
                    if (taken.Contains(outerStat + innerStat) || taken.Contains(innerStat + outerStat))
                    {
                        continue;
                    }

                    var valuesOuter = statisticsDic[outerStat].GetSamples();
                    var valuesInner = statisticsDic[innerStat].GetSamples();

                    if (valuesInner == null || valuesOuter == null || valuesOuter.Length < 2 || valuesInner.Length < 2)
                    {
                        matrix.SetValue(outerStat, innerStat, null, null);
                        continue;
                    }

                    int?numberOfSamples = null;
                    if (valuesOuter.Length != valuesInner.Length)
                    {
                        numberOfSamples = Math.Min(valuesInner.Length, valuesOuter.Length);
                        valuesInner     = valuesInner.Take(numberOfSamples.Value).ToArray();
                        valuesOuter     = valuesOuter.Take(numberOfSamples.Value).ToArray();
                    }

                    var value = matrix.SetValue(outerStat, innerStat, StatisticHelper.CalculateCorrelation(valuesOuter, valuesInner), numberOfSamples);
                    taken.Add(outerStat + innerStat);
                }
            }

            return(matrix);
        }