ImputeEmotionalContentFromFile(string filename, uint column, uint repeats, string imputesave)
        {
            MemorySource <string, ThreeTuple <ContinuousDistribution, ContinuousDistribution, ContinuousDistribution> > imputed = new MemorySource <string, ThreeTuple <ContinuousDistribution, ContinuousDistribution, ContinuousDistribution> >();
            ComboSource <string, ThreeTuple <ContinuousDistribution, ContinuousDistribution, ContinuousDistribution> >  combo   = new ComboSource <string, ThreeTuple <ContinuousDistribution, ContinuousDistribution, ContinuousDistribution> >(source, imputed);

            // Check for existing imputed file
            DataReader imputereader = new DataReader(imputesave);
            uint       kk           = 0;

            for (string[] row = imputereader.ReadRow(); row != null; row = imputereader.ReadRow())
            {
                kk++;
                if (kk % 1000 == 0)
                {
                    Console.WriteLine("#" + kk);
                }

                double meanv = double.Parse(row[1]), varv = double.Parse(row[2]), meana = double.Parse(row[3]), vara = double.Parse(row[4]), meand = double.Parse(row[5]), vard = double.Parse(row[6]);

                ContinuousDistribution valence = new ClippedGaussianDistribution(meanv, varv, 0, 1);
                ContinuousDistribution arousal = new ClippedGaussianDistribution(meana, vara, 0, 1);
                ContinuousDistribution dominance = new ClippedGaussianDistribution(meand, vard, 0, 1);

                imputed[row[0]] = new ThreeTuple <ContinuousDistribution, ContinuousDistribution, ContinuousDistribution>(valence, arousal, dominance);
            }
            imputereader.Close();

            for (uint ii = 0; ii < repeats; ii++)
            {
                Dictionary <string, List <KeyValuePair <double, double> > >
                sentencesV     = new Dictionary <string, List <KeyValuePair <double, double> > >(),
                    sentencesA = new Dictionary <string, List <KeyValuePair <double, double> > >(),
                    sentencesD = new Dictionary <string, List <KeyValuePair <double, double> > >();

                DataReader reader = new DataReader(filename);
                uint       jj     = 0;
                for (string[] row = reader.ReadRow(); row != null; row = reader.ReadRow())
                {
                    jj++;
                    if (jj % 1000 == 0)
                    {
                        Console.WriteLine("#" + jj + ": " + sentencesV.Count + ", " + imputed.Count);
                    }

                    List <string> words = TwitterUtilities.SplitWords(row[column].ToLower());
                    AnalyzeWords(words, combo, sentencesV, sentencesA, sentencesD);
                }
                reader.Close();

                AnalyzeSentences(imputed, sentencesV, sentencesA, sentencesD, imputesave);
            }

            source = combo;
            return(imputed);
        }
        public void AnalyzeSentences(MemorySource <string, ThreeTuple <ContinuousDistribution, ContinuousDistribution, ContinuousDistribution> > inputed,
                                     Dictionary <string, List <KeyValuePair <double, double> > > sentencesV,
                                     Dictionary <string, List <KeyValuePair <double, double> > > sentencesA,
                                     Dictionary <string, List <KeyValuePair <double, double> > > sentencesD, string imputesave)
        {
            using (var stream = File.CreateText(imputesave)) {
                foreach (string stem in sentencesV.Keys)
                {
                    double vmean = WeightedStatistics.Mean(sentencesV[stem]), amean = WeightedStatistics.Mean(sentencesA[stem]), dmean = WeightedStatistics.Mean(sentencesD[stem]);

                    ClippedGaussianDistribution valence = new ClippedGaussianDistribution(vmean, WeightedStatistics.Variance(sentencesV[stem], vmean, true), 0, 1);
                    ClippedGaussianDistribution arousal = new ClippedGaussianDistribution(amean, WeightedStatistics.Variance(sentencesA[stem], amean, true), 0, 1);
                    ClippedGaussianDistribution dominance = new ClippedGaussianDistribution(dmean, WeightedStatistics.Variance(sentencesD[stem], dmean, true), 0, 1);

                    inputed[stem] = new ThreeTuple <ContinuousDistribution, ContinuousDistribution, ContinuousDistribution>(valence, arousal, dominance);
                    stream.WriteLine(stem + "," + valence.InternalMean + "," + valence.InternalVariance + "," + arousal.InternalMean + "," + arousal.InternalVariance + "," + dominance.InternalMean + "," + dominance.InternalVariance);
                }
            }
        }