/// <summary> /// Main method to run the crowdsourcing experiments presented in Simpson et.al (WWW15). /// </summary> public static void Main() { var data = Datum.LoadData(Path.Combine("Data", "weatherTweets.tsv.gz")); // Run model and get results var VocabularyOnSubData = ResultsWords.BuildVocabularyOnSubdata((List <Datum>)data); BCCWords model = new BCCWords(); ResultsWords resultsWords = new ResultsWords(data, VocabularyOnSubData); DataMappingWords mapping = resultsWords.Mapping as DataMappingWords; if (mapping != null) { resultsWords = new ResultsWords(data, VocabularyOnSubData); resultsWords.RunBCCWords("BCCwords", data, data, model, Results.RunMode.ClearResults, true); } using (var writer = new StreamWriter(Console.OpenStandardOutput())) { resultsWords.WriteResults(writer, false, false, false, true); } Console.WriteLine("Done. Press enter to exit."); Console.ReadLine(); }
/// <summary> /// Writes various results to a StreamWriter. /// </summary> /// <param name="writer">A StreamWriter instance.</param> /// <param name="writeCommunityParameters">Set true to write community parameters.</param> /// <param name="writeWorkerParameters">Set true to write worker parameters.</param> /// <param name="writeWorkerCommunities">Set true to write worker communities.</param> /// <param name="writeProbWords">Set true to write word probabilities</param> /// <param name="topWords">Number of words to select</param> public void WriteResults(StreamWriter writer, bool writeCommunityParameters, bool writeWorkerParameters, bool writeWorkerCommunities, bool writeProbWords, int topWords = 30) { base.WriteResults(writer, writeCommunityParameters, writeWorkerCommunities, writeWorkerCommunities); DataMappingWords MappingWords = Mapping as DataMappingWords; if (writeProbWords && this.ProbWords != null) { int NumClasses = ProbWords.Length; for (int c = 0; c < NumClasses; c++) { if (MappingWords != null && MappingWords.WorkerCount > 300) // Assume it's CF { writer.WriteLine("Class {0}", MappingWords.CFLabelName[c]); } else if (MappingWords != null) { writer.WriteLine("Class {0}", MappingWords.SPLabelName[c]); } Vector probs = ProbWords[c].GetMean(); var probsDictionary = probs.Select((value, index) => new KeyValuePair <string, double>(MappingWords.Vocabulary[index], Math.Log(value))).OrderByDescending(x => x.Value).ToArray(); for (int w = 0; w < topWords; w++) { writer.WriteLine($"\t{probsDictionary[w].Key}: \t{probsDictionary[w].Value:0.000}"); } } } }
/// <summary> /// Writes various results to a StreamWriter. /// </summary> /// <param name="writer">A StreamWriter instance.</param> /// <param name="writeCommunityParameters">Set true to write community parameters.</param> /// <param name="writeWorkerParameters">Set true to write worker parameters.</param> /// <param name="writeWorkerCommunities">Set true to write worker communities.</param> /// <param name="writeProbWords">Set true to write word probabilities</param> /// <param name="topWords">Number of words to select</param> public void WriteResults(StreamWriter writer, bool writeCommunityParameters, bool writeWorkerParameters, bool writeWorkerCommunities, bool writeProbWords, int topWords = 30) { base.WriteResults(writer, writeCommunityParameters, writeWorkerCommunities, writeWorkerCommunities); DataMappingWords MappingWords = Mapping as DataMappingWords; if (writeProbWords && this.ProbWords != null) { int NumClasses = ProbWords.Length; var classifiedWords = new Dictionary <string, KeyValuePair <string, double> >(); for (int c = 0; c < NumClasses; c++) { string className = string.Empty; if (MappingWords != null) { if (MappingWords.WorkerCount > 100) // Assume it's CF { className = MappingWords.CFLabelName[c]; } else { className = MappingWords.SPLabelName[c]; } writer.WriteLine($"Class {className}"); } Vector probs = ProbWords[c].GetMean(); var probsDictionary = probs.Select((value, index) => new KeyValuePair <string, double>(MappingWords.Vocabulary[index], Math.Log(value))).OrderByDescending(x => x.Value).ToArray(); topWords = Math.Min(topWords, probsDictionary.Length); for (int w = 0; w < topWords; w++) { writer.WriteLine($"\t{probsDictionary[w].Key}: \t{probsDictionary[w].Value:0.000}"); if (!string.IsNullOrEmpty(className)) { KeyValuePair <string, double> classifiedWord; if (!classifiedWords.TryGetValue(probsDictionary[w].Key, out classifiedWord) || classifiedWord.Value < probsDictionary[w].Value) { classifiedWords[probsDictionary[w].Key] = new KeyValuePair <string, double>(className, probsDictionary[w].Value); } } } } writer.WriteLine(); writer.WriteLine($"Main classes:"); foreach (var wordByClass in classifiedWords.GroupBy(classified => classified.Value.Key)) { writer.WriteLine($"Class {wordByClass.Key}:"); foreach (var word in wordByClass.OrderByDescending(w => w.Value.Value)) { writer.WriteLine($"\t{word.Key}"); } } } }
/// <summary> /// Runs the majority vote method on the data. /// </summary> /// <param name="modelName"></param> /// <param name="data">The data</param> /// <param name="mode"></param> /// <param name="calculateAccuracy">Compute the accuracy (true).</param> /// <param name="fullData"></param> /// <param name="model"></param> /// <param name="useMajorityVote"></param> /// <param name="useRandomLabel"></param> /// <returns>The updated results</returns> public void RunBCCWords(string modelName, IList <Datum> data, IList <Datum> fullData, BCCWords model, RunMode mode, bool calculateAccuracy, bool useMajorityVote = false, bool useRandomLabel = false) { DataMappingWords MappingWords = null; if (FullMapping == null) { FullMapping = new DataMapping(fullData); } if (Mapping == null) { // Build vocabulary Console.Write("Building vocabulary..."); Stopwatch stopwatch = new Stopwatch(); stopwatch.Start(); string[] corpus = data.Select(d => d.BodyText).Distinct().ToArray(); Vocabulary = BuildVocabularyFromCorpus(corpus); Console.WriteLine("done. Elapsed time: {0}", stopwatch.Elapsed); // Build data mapping this.Mapping = new DataMappingWords(data, MappingWords.Vocabulary); MappingWords = Mapping as DataMappingWords; this.GoldLabels = MappingWords.GetGoldLabelsPerTaskId(); } MappingWords = Mapping as DataMappingWords; int[] trueLabels = null; if (useMajorityVote) { if (MappingWords != null) { var majorityLabel = MappingWords.GetMajorityVotesPerTaskId(data); trueLabels = Util.ArrayInit(FullMapping.TaskCount, i => majorityLabel.ContainsKey(Mapping.TaskIndexToId[i]) ? (int)majorityLabel[Mapping.TaskIndexToId[i]] : Rand.Int(Mapping.LabelMin, Mapping.LabelMax + 1)); data = MappingWords.BuildDataFromAssignedLabels(majorityLabel, data); } } if (useRandomLabel) { var randomLabels = MappingWords.GetRandomLabelPerTaskId(data); data = MappingWords.BuildDataFromAssignedLabels(randomLabels, data); } var labelsPerWorkerIndex = MappingWords.GetLabelsPerWorkerIndex(data); var taskIndicesPerWorkerIndex = MappingWords.GetTaskIndicesPerWorkerIndex(data); // Create model ClearResults(); model.CreateModel(MappingWords.TaskCount, MappingWords.LabelCount, MappingWords.WordCount); // Run model inference BCCWordsPosteriors posteriors = model.InferPosteriors(labelsPerWorkerIndex, taskIndicesPerWorkerIndex, MappingWords.WordIndicesPerTaskIndex, MappingWords.WordCountsPerTaskIndex, trueLabels); // Update results UpdateResults(posteriors, mode); // Compute accuracy if (calculateAccuracy) { UpdateAccuracy(); } }
/// <summary> /// Writes various results to a StreamWriter. /// </summary> /// <param name="writer">A StreamWriter instance.</param> /// <param name="writeCommunityParameters">Set true to write community parameters.</param> /// <param name="writeWorkerParameters">Set true to write worker parameters.</param> /// <param name="writeWorkerCommunities">Set true to write worker communities.</param> /// <param name="writeProbWords">Set true to write word probabilities</param> /// <param name="topWords">Number of words to select</param> public void WriteResults(StreamWriter writer, bool writeCommunityParameters, bool writeWorkerParameters, bool writeWorkerCommunities, bool writeProbWords, int topWords = 30) { base.WriteResults(writer, writeCommunityParameters, writeWorkerCommunities, writeWorkerCommunities); DataMappingWords MappingWords = Mapping as DataMappingWords; if (writeProbWords && this.ProbWords != null) { int NumClasses = ProbWords.Length; var classifiedWords = new Dictionary <string, KeyValuePair <string, double> >(); for (int c = 0; c < NumClasses; c++) { string className = string.Empty; if (MappingWords != null) { if (MappingWords.WorkerCount > 100) // Assume it's CF { className = MappingWords.CFLabelName[c]; } else { className = MappingWords.SPLabelName[c]; } writer.WriteLine($"Class {className}"); } Vector probs = ProbWords[c].GetMean(); var probsDictionary = probs.Select((value, index) => new KeyValuePair <string, double>(MappingWords.Vocabulary[index], Math.Log(value))).OrderByDescending(x => x.Value).ToArray(); topWords = Math.Min(topWords, probsDictionary.Length); for (int w = 0; w < topWords; w++) { writer.WriteLine($"\t{probsDictionary[w].Key}: \t{probsDictionary[w].Value:0.000}"); if (!string.IsNullOrEmpty(className)) { KeyValuePair <string, double> classifiedWord; if (!classifiedWords.TryGetValue(probsDictionary[w].Key, out classifiedWord) || classifiedWord.Value < probsDictionary[w].Value) { classifiedWords[probsDictionary[w].Key] = new KeyValuePair <string, double>(className, probsDictionary[w].Value); } } } } writer.WriteLine(); //Create wordCloud var wordCloudPositif = new WordCloud.WordCloud(500, 500, true); var wordCloudNegatif = new WordCloud.WordCloud(500, 500, true); var wordCloudNeutral = new WordCloud.WordCloud(500, 500, true); var wordCloudUnknown = new WordCloud.WordCloud(500, 500, true); var wordCloud = new WordCloud.WordCloud(500, 500, true); //Create ListWords List <String> wordsPositif = new List <string> { }; List <String> wordsNegatif = new List <string> { }; List <String> wordsNeutral = new List <string> { }; List <String> wordsUnknown = new List <string> { }; List <String> words = new List <string> { }; //Create ListFrequencies List <int> frequenciesPositif = new List <int> { }; List <int> frequenciesNegatif = new List <int> { }; List <int> frequenciesNeutral = new List <int> { }; List <int> frequenciesUnknown = new List <int> { }; List <int> frequencies = new List <int> { }; //Create Bitmap Bitmap myBitmapPositif = null; Bitmap myBitmapNegatif = null; Bitmap myBitmapNeutral = null; Bitmap myBitmapUnknown = null; Bitmap myBitmap = null; writer.WriteLine($"Main classes:"); foreach (var wordByClass in classifiedWords.GroupBy(classified => classified.Value.Key)) { writer.WriteLine($"Class {wordByClass.Key}:"); foreach (var word in wordByClass.OrderByDescending(w => w.Value.Value)) { writer.WriteLine($"\t{word.Key}"); if (wordByClass.Key == "Negative") { wordsNegatif.Add($"{word.Key}"); frequenciesNegatif.Add(1); } else if (wordByClass.Key == "Neutral") { wordsNeutral.Add($"{word.Key}"); frequenciesNeutral.Add(1); } else if (wordByClass.Key == "Positive") { wordsPositif.Add($"{word.Key}"); frequenciesPositif.Add(1); } else if (wordByClass.Key == "NotRelated") { words.Add($"{word.Key}"); frequencies.Add(1); } else if (wordByClass.Key == "Unknown") { wordsUnknown.Add($"{word.Key}"); frequenciesUnknown.Add(1); } } } //Creation wordCloud Positif myBitmapPositif = new Bitmap(wordCloudPositif.Draw(wordsPositif, frequenciesPositif)); myBitmapPositif.Save("C:/Users/athen/OneDrive/Documents/EPF/5A/IA/Sentimental-Analysis/WordClouds/Positive.jpg"); //Creation wordCloud Negatif myBitmapNegatif = new Bitmap(wordCloudNegatif.Draw(wordsNegatif, frequenciesNegatif)); myBitmapNegatif.Save("C:/Users/athen/OneDrive/Documents/EPF/5A/IA/Sentimental-Analysis/WordClouds/Negative.jpg"); //Creation wordCloud Neutral myBitmapNeutral = new Bitmap(wordCloudNeutral.Draw(wordsNeutral, frequenciesNeutral)); myBitmapNeutral.Save("C:/Users/athen/OneDrive/Documents/EPF/5A/IA/Sentimental-Analysis/WordClouds/Neutral.jpg"); //Creation wordCloud Unkown myBitmapUnknown = new Bitmap(wordCloudUnknown.Draw(wordsUnknown, frequenciesUnknown)); myBitmapUnknown.Save("C:/Users/athen/OneDrive/Documents/EPF/5A/IA/Sentimental-Analysis/WordClouds/Unknown.jpg"); //Creation wordCloud NotRelated myBitmap = new Bitmap(wordCloud.Draw(words, frequencies)); myBitmap.Save("C:/Users/athen/OneDrive/Documents/EPF/5A/IA/Sentimental-Analysis/WordClouds/notRelated.jpg"); } }