public Dictionary <string, string> ExportSettings(bool suppressWarnings) { Dictionary <string, string> SettingsDict = new Dictionary <string, string>(); SettingsDict.Add("RawFreqs", RawFreqs.ToString()); //if we have a dictionary loaded, then we'll save it if (!String.IsNullOrEmpty(DictionaryLocation)) { SettingsDict.Add("DictionaryLocation", DictionaryLocation.ToString()); SettingsDict.Add("DictionaryContents", UserLoadedDictionary.DictionaryRawText); SettingsDict.Add("IncludeStDev", IncludeStDevs.ToString()); SettingsDict.Add("RoundLength", RoundValuesToNDecimals.ToString()); } return(SettingsDict); }
//one of the few plugins thus far where I'm actually using a constructor //might not be the most efficient way to handle this (especially at runtime) //but I don't suspect that it'll be too bad. //public ExamineDictWords() //{ // DictionaryList = new List<DictionaryMetaObject>(); // ListOfBuiltInDictionaries = new HashSet<string>(); // foreach(DictionaryMetaObject dict in DictionaryList) // { // ListOfBuiltInDictionaries.Add(dict.DictionaryName); // } //} public Payload FinishUp(Payload Input) { Payload OutputData = new Payload(); OutputData.FileID = ""; string StringOutputFormatParameter = "N" + RoundValuesToNDecimals.ToString(); #region Cronbach's Alphas //first thing's first: we have to figure out which words constitute each category so that we can calculate Σ[s2i] Dictionary <int, List <string> > CatWordMap = new Dictionary <int, List <String> >(); for (int i = 0; i < UserLoadedDictionary.DictData.NumCats; i++) { CatWordMap.Add(i, new List <string>()); } #region Figure out which words belong to which category //iterate over n-grams, starting with the largest possible n-gram (derived from the user's dictionary file) for (int NumberOfWords = UserLoadedDictionary.DictData.MaxWords; NumberOfWords > 0; NumberOfWords--) { if (UserLoadedDictionary.DictData.FullDictionary["Standards"].ContainsKey(NumberOfWords)) { foreach (string term in UserLoadedDictionary.DictData.FullDictionary["Standards"][NumberOfWords].Keys) { for (int wordCatCount = 0; wordCatCount < UserLoadedDictionary.DictData.FullDictionary["Standards"][NumberOfWords][term].Length; wordCatCount++) { int outputCatMap = OutputDataMap[UserLoadedDictionary.DictData.FullDictionary["Standards"][NumberOfWords][term][wordCatCount]]; CatWordMap[outputCatMap].Add(term); } } } if (UserLoadedDictionary.DictData.FullDictionary["Wildcards"].ContainsKey(NumberOfWords)) { foreach (string term in UserLoadedDictionary.DictData.FullDictionary["Wildcards"][NumberOfWords].Keys) { for (int wordCatCount = 0; wordCatCount < UserLoadedDictionary.DictData.FullDictionary["Wildcards"][NumberOfWords][term].Length; wordCatCount++) { int outputCatMap = OutputDataMap[UserLoadedDictionary.DictData.FullDictionary["Wildcards"][NumberOfWords][term][wordCatCount]]; CatWordMap[outputCatMap].Add(term); } } } } #endregion #region Raw Cronbach OutputData.SegmentNumber.Add(0); //used as the entry number OutputData.SegmentID.Add("Cronbach's Alpha (Raw)"); //the dictionary entry string[] OutputArray_Cronbach = new string[UserLoadedDictionary.DictData.NumCats]; //now, we go through each category and calculate the *raw* cronbach's alpha for (int i = 0; i < UserLoadedDictionary.DictData.NumCats; i++) { //this gets us to sum of variances for the category's constituent items double itemVarianceSum = 0; string[] catWordList = CatWordMap[i].ToArray(); double k = (double)catWordList.Length; for (int j = 0; j < catWordList.Length; j++) { double itemVariance = TermVariancesRaw[catWordList[j]]["S"] / (TotalNumberOfDocs["Docs"] - 1); if (itemVariance > 0) { itemVarianceSum += itemVariance; } else { k -= 1; } } double totalVariance = CategoryVariancesRaw[i]["S"] / (TotalNumberOfDocs["Docs"] - 1); //https://data.library.virginia.edu/using-and-interpreting-cronbachs-alpha/ //double CronbachRaw = (k / (k - 1)) * ((totalVariance - itemVarianceSum) / totalVariance); double CronbachRaw = (k / (k - 1)) * (1 - (itemVarianceSum / totalVariance)); if (!Double.IsNaN(CronbachRaw) && !Double.IsInfinity(CronbachRaw)) { OutputArray_Cronbach[i] = Math.Round(CronbachRaw, RoundValuesToNDecimals, MidpointRounding.AwayFromZero).ToString(StringOutputFormatParameter); } else { OutputArray_Cronbach[i] = "N/A"; } } OutputData.StringArrayList.Add(OutputArray_Cronbach); #endregion #region OneHot Cronbach OutputData.SegmentNumber.Add(0); //used as the entry number OutputData.SegmentID.Add("Kuder–Richardson Formula 20 (KR-20)"); //the dictionary entry OutputArray_Cronbach = new string[UserLoadedDictionary.DictData.NumCats]; //now, we go through each category and calculate the *raw* cronbach's alpha for (int i = 0; i < UserLoadedDictionary.DictData.NumCats; i++) { //this gets us to sum of variances for the category's constituent items double itemVarianceSum = 0; string[] catWordList = CatWordMap[i].ToArray(); double k = (double)catWordList.Length; for (int j = 0; j < catWordList.Length; j++) { double itemVariance = TermVariancesOneHot[catWordList[j]]["S"] / (TotalNumberOfDocs["Docs"] - 1); if (itemVariance > 0) { itemVarianceSum += itemVariance; } else { k -= 1; } } double totalVariance = CategoryVariancesOneHot[i]["S"] / (TotalNumberOfDocs["Docs"] - 1); //double CronbachOneHot = (k / (k - 1)) * ((totalVariance - itemVarianceSum) / totalVariance); double CronbachOneHot = (k / (k - 1)) * (1 - (itemVarianceSum / totalVariance)); if (!Double.IsNaN(CronbachOneHot) && !Double.IsInfinity(CronbachOneHot)) { OutputArray_Cronbach[i] = Math.Round(CronbachOneHot, RoundValuesToNDecimals, MidpointRounding.AwayFromZero).ToString(StringOutputFormatParameter); } else { OutputArray_Cronbach[i] = "N/A"; } } OutputData.StringArrayList.Add(OutputArray_Cronbach); #endregion #endregion #region this is where we calculate the avg percentages for each word for (int i = 0; i < UserLoadedDictionary.DictData.AllEntries.Count; i++) { OutputData.SegmentNumber.Add((ulong)(i + 1)); //used as the entry number OutputData.SegmentID.Add(UserLoadedDictionary.DictData.AllEntries[i]); //the dictionary entry string[] OutputArray = new string[UserLoadedDictionary.DictData.NumCats]; for (int j = 0; j < UserLoadedDictionary.DictData.NumCats; j++) { OutputArray[j] = ""; } for (int j = 0; j < UserLoadedDictionary.DictData.NumCats; j++) { //if we know that the mean is zero, then we just skip on to the next one if (EntryFreqTracker_Long[UserLoadedDictionary.DictData.AllEntries[i]][j] == 0) { continue; } if (RawFreqs) { OutputArray[j] = Math.Round((EntryFreqTracker_Long[UserLoadedDictionary.DictData.AllEntries[i]][j] / (double)TotalNumberOfDocs["Docs"]), RoundValuesToNDecimals, MidpointRounding.AwayFromZero).ToString(StringOutputFormatParameter); } else { OutputArray[j] = Math.Round(((EntryFreqTracker_Double[UserLoadedDictionary.DictData.AllEntries[i]][j] / TotalNumberOfDocs["Docs"]) * 100), RoundValuesToNDecimals, MidpointRounding.AwayFromZero).ToString(StringOutputFormatParameter); } //calculate the standard deviation if (IncludeStDevs) { OutputArray[j] += " (" + Math.Round(Math.Sqrt(TermVariancesRaw[UserLoadedDictionary.DictData.AllEntries[i]]["S"]), RoundValuesToNDecimals, MidpointRounding.AwayFromZero).ToString(StringOutputFormatParameter) + ")"; } } OutputData.StringArrayList.Add(OutputArray); } #endregion EntryFreqTracker_Long = new ConcurrentDictionary <string, ulong[]>(); EntryFreqTracker_Double = new ConcurrentDictionary <string, double[]>(); return(OutputData); }