ImputeEmotionalContentFromFile(string filename, uint column, uint repeats, string imputesave) { MemorySource <string, ThreeTuple <ContinuousDistribution, ContinuousDistribution, ContinuousDistribution> > imputed = new MemorySource <string, ThreeTuple <ContinuousDistribution, ContinuousDistribution, ContinuousDistribution> >(); ComboSource <string, ThreeTuple <ContinuousDistribution, ContinuousDistribution, ContinuousDistribution> > combo = new ComboSource <string, ThreeTuple <ContinuousDistribution, ContinuousDistribution, ContinuousDistribution> >(source, imputed); // Check for existing imputed file DataReader imputereader = new DataReader(imputesave); uint kk = 0; for (string[] row = imputereader.ReadRow(); row != null; row = imputereader.ReadRow()) { kk++; if (kk % 1000 == 0) { Console.WriteLine("#" + kk); } double meanv = double.Parse(row[1]), varv = double.Parse(row[2]), meana = double.Parse(row[3]), vara = double.Parse(row[4]), meand = double.Parse(row[5]), vard = double.Parse(row[6]); ContinuousDistribution valence = new ClippedGaussianDistribution(meanv, varv, 0, 1); ContinuousDistribution arousal = new ClippedGaussianDistribution(meana, vara, 0, 1); ContinuousDistribution dominance = new ClippedGaussianDistribution(meand, vard, 0, 1); imputed[row[0]] = new ThreeTuple <ContinuousDistribution, ContinuousDistribution, ContinuousDistribution>(valence, arousal, dominance); } imputereader.Close(); for (uint ii = 0; ii < repeats; ii++) { Dictionary <string, List <KeyValuePair <double, double> > > sentencesV = new Dictionary <string, List <KeyValuePair <double, double> > >(), sentencesA = new Dictionary <string, List <KeyValuePair <double, double> > >(), sentencesD = new Dictionary <string, List <KeyValuePair <double, double> > >(); DataReader reader = new DataReader(filename); uint jj = 0; for (string[] row = reader.ReadRow(); row != null; row = reader.ReadRow()) { jj++; if (jj % 1000 == 0) { Console.WriteLine("#" + jj + ": " + sentencesV.Count + ", " + imputed.Count); } List <string> words = TwitterUtilities.SplitWords(row[column].ToLower()); AnalyzeWords(words, combo, sentencesV, sentencesA, sentencesD); } reader.Close(); AnalyzeSentences(imputed, sentencesV, sentencesA, sentencesD, imputesave); } source = combo; return(imputed); }
public void AnalyzeSentences(MemorySource <string, ThreeTuple <ContinuousDistribution, ContinuousDistribution, ContinuousDistribution> > inputed, Dictionary <string, List <KeyValuePair <double, double> > > sentencesV, Dictionary <string, List <KeyValuePair <double, double> > > sentencesA, Dictionary <string, List <KeyValuePair <double, double> > > sentencesD, string imputesave) { using (var stream = File.CreateText(imputesave)) { foreach (string stem in sentencesV.Keys) { double vmean = WeightedStatistics.Mean(sentencesV[stem]), amean = WeightedStatistics.Mean(sentencesA[stem]), dmean = WeightedStatistics.Mean(sentencesD[stem]); ClippedGaussianDistribution valence = new ClippedGaussianDistribution(vmean, WeightedStatistics.Variance(sentencesV[stem], vmean, true), 0, 1); ClippedGaussianDistribution arousal = new ClippedGaussianDistribution(amean, WeightedStatistics.Variance(sentencesA[stem], amean, true), 0, 1); ClippedGaussianDistribution dominance = new ClippedGaussianDistribution(dmean, WeightedStatistics.Variance(sentencesD[stem], dmean, true), 0, 1); inputed[stem] = new ThreeTuple <ContinuousDistribution, ContinuousDistribution, ContinuousDistribution>(valence, arousal, dominance); stream.WriteLine(stem + "," + valence.InternalMean + "," + valence.InternalVariance + "," + arousal.InternalMean + "," + arousal.InternalVariance + "," + dominance.InternalMean + "," + dominance.InternalVariance); } } }