Exemplo n.º 1
0
        public Dictionary <string, string> ExportSettings(bool suppressWarnings)
        {
            Dictionary <string, string> SettingsDict = new Dictionary <string, string>();

            SettingsDict.Add("RawFreqs", RawFreqs.ToString());

            //if we have a dictionary loaded, then we'll save it
            if (!String.IsNullOrEmpty(DictionaryLocation))
            {
                SettingsDict.Add("DictionaryLocation", DictionaryLocation.ToString());
                SettingsDict.Add("DictionaryContents", UserLoadedDictionary.DictionaryRawText);
                SettingsDict.Add("IncludeStDev", IncludeStDevs.ToString());
                SettingsDict.Add("RoundLength", RoundValuesToNDecimals.ToString());
            }

            return(SettingsDict);
        }
Exemplo n.º 2
0
        //one of the few plugins thus far where I'm actually using a constructor
        //might not be the most efficient way to handle this (especially at runtime)
        //but I don't suspect that it'll be too bad.
        //public ExamineDictWords()
        //{
        //    DictionaryList = new List<DictionaryMetaObject>();
        //    ListOfBuiltInDictionaries = new HashSet<string>();


        //    foreach(DictionaryMetaObject dict in DictionaryList)
        //    {
        //        ListOfBuiltInDictionaries.Add(dict.DictionaryName);
        //    }

        //}



        public Payload FinishUp(Payload Input)
        {
            Payload OutputData = new Payload();

            OutputData.FileID = "";
            string StringOutputFormatParameter = "N" + RoundValuesToNDecimals.ToString();


            #region Cronbach's Alphas
            //first thing's first: we have to figure out which words constitute each category so that we can calculate Σ[s2i]
            Dictionary <int, List <string> > CatWordMap = new Dictionary <int, List <String> >();
            for (int i = 0; i < UserLoadedDictionary.DictData.NumCats; i++)
            {
                CatWordMap.Add(i, new List <string>());
            }

            #region Figure out which words belong to which category
            //iterate over n-grams, starting with the largest possible n-gram (derived from the user's dictionary file)
            for (int NumberOfWords = UserLoadedDictionary.DictData.MaxWords; NumberOfWords > 0; NumberOfWords--)
            {
                if (UserLoadedDictionary.DictData.FullDictionary["Standards"].ContainsKey(NumberOfWords))
                {
                    foreach (string term in UserLoadedDictionary.DictData.FullDictionary["Standards"][NumberOfWords].Keys)
                    {
                        for (int wordCatCount = 0; wordCatCount < UserLoadedDictionary.DictData.FullDictionary["Standards"][NumberOfWords][term].Length; wordCatCount++)
                        {
                            int outputCatMap = OutputDataMap[UserLoadedDictionary.DictData.FullDictionary["Standards"][NumberOfWords][term][wordCatCount]];
                            CatWordMap[outputCatMap].Add(term);
                        }
                    }
                }

                if (UserLoadedDictionary.DictData.FullDictionary["Wildcards"].ContainsKey(NumberOfWords))
                {
                    foreach (string term in UserLoadedDictionary.DictData.FullDictionary["Wildcards"][NumberOfWords].Keys)
                    {
                        for (int wordCatCount = 0; wordCatCount < UserLoadedDictionary.DictData.FullDictionary["Wildcards"][NumberOfWords][term].Length; wordCatCount++)
                        {
                            int outputCatMap = OutputDataMap[UserLoadedDictionary.DictData.FullDictionary["Wildcards"][NumberOfWords][term][wordCatCount]];
                            CatWordMap[outputCatMap].Add(term);
                        }
                    }
                }
            }
            #endregion


            #region Raw Cronbach
            OutputData.SegmentNumber.Add(0);                    //used as the entry number
            OutputData.SegmentID.Add("Cronbach's Alpha (Raw)"); //the dictionary entry
            string[] OutputArray_Cronbach = new string[UserLoadedDictionary.DictData.NumCats];

            //now, we go through each category and calculate the *raw* cronbach's alpha
            for (int i = 0; i < UserLoadedDictionary.DictData.NumCats; i++)
            {
                //this gets us to sum of variances for the category's constituent items
                double   itemVarianceSum = 0;
                string[] catWordList     = CatWordMap[i].ToArray();
                double   k = (double)catWordList.Length;

                for (int j = 0; j < catWordList.Length; j++)
                {
                    double itemVariance = TermVariancesRaw[catWordList[j]]["S"] / (TotalNumberOfDocs["Docs"] - 1);
                    if (itemVariance > 0)
                    {
                        itemVarianceSum += itemVariance;
                    }
                    else
                    {
                        k -= 1;
                    }
                }

                double totalVariance = CategoryVariancesRaw[i]["S"] / (TotalNumberOfDocs["Docs"] - 1);

                //https://data.library.virginia.edu/using-and-interpreting-cronbachs-alpha/
                //double CronbachRaw = (k / (k - 1)) * ((totalVariance - itemVarianceSum) / totalVariance);
                double CronbachRaw = (k / (k - 1)) * (1 - (itemVarianceSum / totalVariance));
                if (!Double.IsNaN(CronbachRaw) && !Double.IsInfinity(CronbachRaw))
                {
                    OutputArray_Cronbach[i] = Math.Round(CronbachRaw, RoundValuesToNDecimals, MidpointRounding.AwayFromZero).ToString(StringOutputFormatParameter);
                }
                else
                {
                    OutputArray_Cronbach[i] = "N/A";
                }
            }
            OutputData.StringArrayList.Add(OutputArray_Cronbach);
            #endregion


            #region OneHot Cronbach
            OutputData.SegmentNumber.Add(0);                                 //used as the entry number
            OutputData.SegmentID.Add("Kuder–Richardson Formula 20 (KR-20)"); //the dictionary entry
            OutputArray_Cronbach = new string[UserLoadedDictionary.DictData.NumCats];

            //now, we go through each category and calculate the *raw* cronbach's alpha
            for (int i = 0; i < UserLoadedDictionary.DictData.NumCats; i++)
            {
                //this gets us to sum of variances for the category's constituent items
                double   itemVarianceSum = 0;
                string[] catWordList     = CatWordMap[i].ToArray();
                double   k = (double)catWordList.Length;

                for (int j = 0; j < catWordList.Length; j++)
                {
                    double itemVariance = TermVariancesOneHot[catWordList[j]]["S"] / (TotalNumberOfDocs["Docs"] - 1);
                    if (itemVariance > 0)
                    {
                        itemVarianceSum += itemVariance;
                    }
                    else
                    {
                        k -= 1;
                    }
                }

                double totalVariance = CategoryVariancesOneHot[i]["S"] / (TotalNumberOfDocs["Docs"] - 1);

                //double CronbachOneHot = (k / (k - 1)) * ((totalVariance - itemVarianceSum) / totalVariance);
                double CronbachOneHot = (k / (k - 1)) * (1 - (itemVarianceSum / totalVariance));
                if (!Double.IsNaN(CronbachOneHot) && !Double.IsInfinity(CronbachOneHot))
                {
                    OutputArray_Cronbach[i] = Math.Round(CronbachOneHot, RoundValuesToNDecimals, MidpointRounding.AwayFromZero).ToString(StringOutputFormatParameter);
                }
                else
                {
                    OutputArray_Cronbach[i] = "N/A";
                }
            }
            OutputData.StringArrayList.Add(OutputArray_Cronbach);
            #endregion



            #endregion


            #region this is where we calculate the avg percentages for each word
            for (int i = 0; i < UserLoadedDictionary.DictData.AllEntries.Count; i++)
            {
                OutputData.SegmentNumber.Add((ulong)(i + 1));                          //used as the entry number
                OutputData.SegmentID.Add(UserLoadedDictionary.DictData.AllEntries[i]); //the dictionary entry

                string[] OutputArray = new string[UserLoadedDictionary.DictData.NumCats];
                for (int j = 0; j < UserLoadedDictionary.DictData.NumCats; j++)
                {
                    OutputArray[j] = "";
                }

                for (int j = 0; j < UserLoadedDictionary.DictData.NumCats; j++)
                {
                    //if we know that the mean is zero, then we just skip on to the next one
                    if (EntryFreqTracker_Long[UserLoadedDictionary.DictData.AllEntries[i]][j] == 0)
                    {
                        continue;
                    }

                    if (RawFreqs)
                    {
                        OutputArray[j] = Math.Round((EntryFreqTracker_Long[UserLoadedDictionary.DictData.AllEntries[i]][j] / (double)TotalNumberOfDocs["Docs"]), RoundValuesToNDecimals, MidpointRounding.AwayFromZero).ToString(StringOutputFormatParameter);
                    }
                    else
                    {
                        OutputArray[j] = Math.Round(((EntryFreqTracker_Double[UserLoadedDictionary.DictData.AllEntries[i]][j] / TotalNumberOfDocs["Docs"]) * 100), RoundValuesToNDecimals, MidpointRounding.AwayFromZero).ToString(StringOutputFormatParameter);
                    }

                    //calculate the standard deviation
                    if (IncludeStDevs)
                    {
                        OutputArray[j] += " (" + Math.Round(Math.Sqrt(TermVariancesRaw[UserLoadedDictionary.DictData.AllEntries[i]]["S"]), RoundValuesToNDecimals, MidpointRounding.AwayFromZero).ToString(StringOutputFormatParameter) + ")";
                    }
                }


                OutputData.StringArrayList.Add(OutputArray);
            }
            #endregion



            EntryFreqTracker_Long   = new ConcurrentDictionary <string, ulong[]>();
            EntryFreqTracker_Double = new ConcurrentDictionary <string, double[]>();

            return(OutputData);
        }