Exemple #1
0
        public void GetAnovaStats(List <string> categories, List <string> numerals)
        {
            var anovaStats = new AnovaStats();

            Dictionary <string, List <double> > groups = new Dictionary <string, List <double> >();
            Dictionary <string, double>         means  = new Dictionary <string, double>();

            for (var i = 0; i < categories.Count; i++)
            {
                //need to clean numbers...exclude $ , and stuff like that

                if (categories[i] == "" || numerals[i] == "" || categories[i] == null || numerals[i] == null)
                {
                    continue;
                }

                double numeral;

                bool resultOfParse = double.TryParse(numerals[i], out numeral);

                if (resultOfParse)
                {
                    if (!groups.ContainsKey(categories[i]))
                    {
                        groups.Add(categories[i], new List <double> {
                            numeral
                        });
                    }
                    else
                    {
                        groups[categories[i]].Add(numeral);
                    }
                }
            }


            double ssb = 0;
            double ssw = 0;
            double sst = 0;
            int    dfB;
            int    dfW;
            double f;
            //df bt is #kvp -1
            //dfw

            double indTotalSquare = 0;
            double totalSum       = 0;
            int    totalCount     = 0;

            foreach (KeyValuePair <string, List <double> > kvp in groups)
            {
                var mean = kvp.Value.Sum() / kvp.Value.Count();
                means.Add(kvp.Key, mean);
                //for ssb
                indTotalSquare += (Math.Pow(kvp.Value.Sum(), 2)) / kvp.Value.Count();
                totalSum       += kvp.Value.Sum();
                totalCount     += kvp.Value.Count();
                //for ssw

                foreach (double num in kvp.Value)
                {
                    ssw += (Math.Pow(num - mean, 2));
                }
            }

            ssb = indTotalSquare - (Math.Pow(totalSum, 2) / totalCount);
            sst = ssb + ssw;

            dfB = groups.Count - 1;
            dfW = totalCount - groups.Count;

            f = (ssb / (double)dfB) / (ssw / (double)dfW);

            //Console.WriteLine("ssb: {0}, sst: {1}, dfB: {2}, dfW: {3}, ssw: {4}, f: {5}", ssb, sst, dfB, dfW, ssw, f);
            var keyValueMeans = new List <KeyValue>();

            foreach (KeyValuePair <string, double> kvp in means)
            {
                keyValueMeans.Add(new KeyValue {
                    Key = kvp.Key, Value = kvp.Value
                });
            }

            FTable fTable = new FTable();

            CriticalValueAtAlphaZeroFive = fTable.sigAtZeroFive(dfB, dfW, f);

            SigAtPointZeroFive = f > CriticalValueAtAlphaZeroFive;
            SignificantResult  = f > CriticalValueAtAlphaZeroFive;
            Means  = keyValueMeans;
            Groups = groups;
            FStat  = f;
        }
Exemple #2
0
        public static StatSummaryVM GetSummaryVM(Dictionary <string, List <string> > dictFile, SelectTypeVM vm)
        {
            ApplicationDbContext _context = new ApplicationDbContext();

            List <string>          checkedColumns      = new List <string>();
            List <ChiStats>        chiStatsList        = new List <ChiStats>();
            List <AnovaStats>      anovaStatsList      = new List <AnovaStats>();
            List <PearsonCorr>     pearsonCorrelations = new List <PearsonCorr>();
            List <DateAndCategory> dateAndCategories   = new List <DateAndCategory>();
            List <DateAndNumeral>  dateAndNumerals     = new List <DateAndNumeral>();

            StatSummaryVM statsSummary = new StatSummaryVM();

            foreach (KeyValuePair <string, List <string> > entry in dictFile)
            {
                foreach (KeyValuePair <string, List <string> > entryCompare in dictFile)
                {
                    var checkedKey = Methods.ConcatAlph(entry.Key, entryCompare.Key);

                    if (entry.Key == entryCompare.Key ||
                        checkedColumns.Contains(checkedKey) ||
                        (entry.Value.Count != entryCompare.Value.Count) ||
                        (vm.ColumnTypes[entry.Key] == "exclude" || vm.ColumnTypes[entryCompare.Key] == "exclude"))
                    {
                        continue;
                    }


                    if ((vm.ColumnTypes[entry.Key] == "date time" || vm.ColumnTypes[entryCompare.Key] == "date time")
                        &&
                        (vm.ColumnTypes[entry.Key] == "categorical" || vm.ColumnTypes[entryCompare.Key] == "categorical"))
                    {
                        DateAndCategory dateCat = new DateAndCategory();

                        if (vm.ColumnTypes[entry.Key] == "date time")
                        {
                            dateCat.GetLinePlotData(entry.Value, entryCompare.Value);
                        }
                        else
                        {
                            dateCat.GetLinePlotData(entryCompare.Value, entry.Value);
                        }
                        dateCat.Variable1 = entry.Key;
                        dateCat.Variable2 = entryCompare.Key;

                        dateAndCategories.Add(dateCat);
                        //_context.SaveChanges();
                    }

                    if ((vm.ColumnTypes[entry.Key] == "date time" || vm.ColumnTypes[entryCompare.Key] == "date time")
                        &&
                        (vm.ColumnTypes[entry.Key] == "numeral" || vm.ColumnTypes[entryCompare.Key] == "numeral"))
                    {
                        DateAndNumeral dateNum = new DateAndNumeral();

                        if (vm.ColumnTypes[entry.Key] == "date time")
                        {
                            dateNum.MakeDataBlob(entry.Value, entryCompare.Value);
                            dateNum.DateName    = entry.Key;
                            dateNum.NumeralName = entryCompare.Key;
                        }
                        else
                        {
                            dateNum.MakeDataBlob(entryCompare.Value, entry.Value);
                            dateNum.DateName    = entryCompare.Key;
                            dateNum.NumeralName = entry.Key;
                        }


                        dateAndNumerals.Add(dateNum);
                        //_context.SaveChanges();
                    }


                    if (vm.ColumnTypes[entry.Key] == "categorical" && vm.ColumnTypes[entryCompare.Key] == "categorical")
                    {
                        ChiStats chiStats = new ChiStats();

                        chiStats.GetChiStat(entry.Value, entryCompare.Value);

                        chiStats.Variable1 = entryCompare.Key.Replace("\"", "");
                        chiStats.Variable2 = entry.Key.Replace("\"", "");

                        chiStatsList.Add(chiStats);
                        //_context.ChiStats.Add(chiStats);
                        //_context.SaveChanges();
                    }

                    if ((vm.ColumnTypes[entry.Key] == "categorical" || vm.ColumnTypes[entryCompare.Key] == "categorical")
                        &&
                        (vm.ColumnTypes[entry.Key] == "numeral" || vm.ColumnTypes[entryCompare.Key] == "numeral"))
                    {
                        AnovaStats anovaStats = new AnovaStats();

                        if (vm.ColumnTypes[entry.Key] == "numeral")
                        {
                            anovaStats.GetAnovaStats(entryCompare.Value, entry.Value);
                            anovaStats.CategoricalVariable = entryCompare.Key;
                            anovaStats.NumericalVariable   = entry.Key;
                            anovaStatsList.Add(anovaStats);
                        }
                        else
                        {
                            anovaStats.GetAnovaStats(entry.Value, entryCompare.Value);
                            anovaStats.CategoricalVariable = entry.Key;
                            anovaStats.NumericalVariable   = entryCompare.Key;
                            anovaStatsList.Add(anovaStats);
                        }

                        //anovaStats.SaveStat();
                        _context.AnovaStats.Add(anovaStats);
                        _context.SaveChanges();
                    }

                    if (vm.ColumnTypes[entry.Key] == "numeral" && vm.ColumnTypes[entryCompare.Key] == "numeral")
                    {
                        PearsonCorr pearsonCorr = new PearsonCorr();
                        pearsonCorr.Variable1 = entry.Key;
                        pearsonCorr.Variable2 = entryCompare.Key;
                        pearsonCorr.ComputeCoeff(entry.Value, entryCompare.Value);

                        pearsonCorrelations.Add(pearsonCorr);

                        _context.SaveChanges();
                    }


                    checkedColumns.Add(checkedKey);
                }
            }
            statsSummary.Name        = vm.Name != null ? vm.Name : "Untitled";
            statsSummary.Description = vm.Description != null ? vm.Description : "no description available";

            statsSummary.AnovaStats      = anovaStatsList;
            statsSummary.ChiStats        = chiStatsList;
            statsSummary.PearsonCorrs    = pearsonCorrelations;
            statsSummary.DateAndCatories = dateAndCategories;
            statsSummary.DateAndNumerals = dateAndNumerals;
            statsSummary.Path            = vm.Path;
            statsSummary.FileName        = vm.FileName;


            return(statsSummary);
        }