public void GetAnovaStats(List <string> categories, List <string> numerals) { var anovaStats = new AnovaStats(); Dictionary <string, List <double> > groups = new Dictionary <string, List <double> >(); Dictionary <string, double> means = new Dictionary <string, double>(); for (var i = 0; i < categories.Count; i++) { //need to clean numbers...exclude $ , and stuff like that if (categories[i] == "" || numerals[i] == "" || categories[i] == null || numerals[i] == null) { continue; } double numeral; bool resultOfParse = double.TryParse(numerals[i], out numeral); if (resultOfParse) { if (!groups.ContainsKey(categories[i])) { groups.Add(categories[i], new List <double> { numeral }); } else { groups[categories[i]].Add(numeral); } } } double ssb = 0; double ssw = 0; double sst = 0; int dfB; int dfW; double f; //df bt is #kvp -1 //dfw double indTotalSquare = 0; double totalSum = 0; int totalCount = 0; foreach (KeyValuePair <string, List <double> > kvp in groups) { var mean = kvp.Value.Sum() / kvp.Value.Count(); means.Add(kvp.Key, mean); //for ssb indTotalSquare += (Math.Pow(kvp.Value.Sum(), 2)) / kvp.Value.Count(); totalSum += kvp.Value.Sum(); totalCount += kvp.Value.Count(); //for ssw foreach (double num in kvp.Value) { ssw += (Math.Pow(num - mean, 2)); } } ssb = indTotalSquare - (Math.Pow(totalSum, 2) / totalCount); sst = ssb + ssw; dfB = groups.Count - 1; dfW = totalCount - groups.Count; f = (ssb / (double)dfB) / (ssw / (double)dfW); //Console.WriteLine("ssb: {0}, sst: {1}, dfB: {2}, dfW: {3}, ssw: {4}, f: {5}", ssb, sst, dfB, dfW, ssw, f); var keyValueMeans = new List <KeyValue>(); foreach (KeyValuePair <string, double> kvp in means) { keyValueMeans.Add(new KeyValue { Key = kvp.Key, Value = kvp.Value }); } FTable fTable = new FTable(); CriticalValueAtAlphaZeroFive = fTable.sigAtZeroFive(dfB, dfW, f); SigAtPointZeroFive = f > CriticalValueAtAlphaZeroFive; SignificantResult = f > CriticalValueAtAlphaZeroFive; Means = keyValueMeans; Groups = groups; FStat = f; }
public static StatSummaryVM GetSummaryVM(Dictionary <string, List <string> > dictFile, SelectTypeVM vm) { ApplicationDbContext _context = new ApplicationDbContext(); List <string> checkedColumns = new List <string>(); List <ChiStats> chiStatsList = new List <ChiStats>(); List <AnovaStats> anovaStatsList = new List <AnovaStats>(); List <PearsonCorr> pearsonCorrelations = new List <PearsonCorr>(); List <DateAndCategory> dateAndCategories = new List <DateAndCategory>(); List <DateAndNumeral> dateAndNumerals = new List <DateAndNumeral>(); StatSummaryVM statsSummary = new StatSummaryVM(); foreach (KeyValuePair <string, List <string> > entry in dictFile) { foreach (KeyValuePair <string, List <string> > entryCompare in dictFile) { var checkedKey = Methods.ConcatAlph(entry.Key, entryCompare.Key); if (entry.Key == entryCompare.Key || checkedColumns.Contains(checkedKey) || (entry.Value.Count != entryCompare.Value.Count) || (vm.ColumnTypes[entry.Key] == "exclude" || vm.ColumnTypes[entryCompare.Key] == "exclude")) { continue; } if ((vm.ColumnTypes[entry.Key] == "date time" || vm.ColumnTypes[entryCompare.Key] == "date time") && (vm.ColumnTypes[entry.Key] == "categorical" || vm.ColumnTypes[entryCompare.Key] == "categorical")) { DateAndCategory dateCat = new DateAndCategory(); if (vm.ColumnTypes[entry.Key] == "date time") { dateCat.GetLinePlotData(entry.Value, entryCompare.Value); } else { dateCat.GetLinePlotData(entryCompare.Value, entry.Value); } dateCat.Variable1 = entry.Key; dateCat.Variable2 = entryCompare.Key; dateAndCategories.Add(dateCat); //_context.SaveChanges(); } if ((vm.ColumnTypes[entry.Key] == "date time" || vm.ColumnTypes[entryCompare.Key] == "date time") && (vm.ColumnTypes[entry.Key] == "numeral" || vm.ColumnTypes[entryCompare.Key] == "numeral")) { DateAndNumeral dateNum = new DateAndNumeral(); if (vm.ColumnTypes[entry.Key] == "date time") { dateNum.MakeDataBlob(entry.Value, entryCompare.Value); dateNum.DateName = entry.Key; dateNum.NumeralName = entryCompare.Key; } else { dateNum.MakeDataBlob(entryCompare.Value, entry.Value); dateNum.DateName = entryCompare.Key; dateNum.NumeralName = entry.Key; } dateAndNumerals.Add(dateNum); //_context.SaveChanges(); } if (vm.ColumnTypes[entry.Key] == "categorical" && vm.ColumnTypes[entryCompare.Key] == "categorical") { ChiStats chiStats = new ChiStats(); chiStats.GetChiStat(entry.Value, entryCompare.Value); chiStats.Variable1 = entryCompare.Key.Replace("\"", ""); chiStats.Variable2 = entry.Key.Replace("\"", ""); chiStatsList.Add(chiStats); //_context.ChiStats.Add(chiStats); //_context.SaveChanges(); } if ((vm.ColumnTypes[entry.Key] == "categorical" || vm.ColumnTypes[entryCompare.Key] == "categorical") && (vm.ColumnTypes[entry.Key] == "numeral" || vm.ColumnTypes[entryCompare.Key] == "numeral")) { AnovaStats anovaStats = new AnovaStats(); if (vm.ColumnTypes[entry.Key] == "numeral") { anovaStats.GetAnovaStats(entryCompare.Value, entry.Value); anovaStats.CategoricalVariable = entryCompare.Key; anovaStats.NumericalVariable = entry.Key; anovaStatsList.Add(anovaStats); } else { anovaStats.GetAnovaStats(entry.Value, entryCompare.Value); anovaStats.CategoricalVariable = entry.Key; anovaStats.NumericalVariable = entryCompare.Key; anovaStatsList.Add(anovaStats); } //anovaStats.SaveStat(); _context.AnovaStats.Add(anovaStats); _context.SaveChanges(); } if (vm.ColumnTypes[entry.Key] == "numeral" && vm.ColumnTypes[entryCompare.Key] == "numeral") { PearsonCorr pearsonCorr = new PearsonCorr(); pearsonCorr.Variable1 = entry.Key; pearsonCorr.Variable2 = entryCompare.Key; pearsonCorr.ComputeCoeff(entry.Value, entryCompare.Value); pearsonCorrelations.Add(pearsonCorr); _context.SaveChanges(); } checkedColumns.Add(checkedKey); } } statsSummary.Name = vm.Name != null ? vm.Name : "Untitled"; statsSummary.Description = vm.Description != null ? vm.Description : "no description available"; statsSummary.AnovaStats = anovaStatsList; statsSummary.ChiStats = chiStatsList; statsSummary.PearsonCorrs = pearsonCorrelations; statsSummary.DateAndCatories = dateAndCategories; statsSummary.DateAndNumerals = dateAndNumerals; statsSummary.Path = vm.Path; statsSummary.FileName = vm.FileName; return(statsSummary); }