/// <summary>
        /// Main method to run the crowdsourcing experiments presented in Simpson et.al (WWW15).
        /// </summary>
        public static void Main()
        {
            var data = Datum.LoadData(Path.Combine("Data", "weatherTweets.tsv.gz"));

            // Run model and get results
            var VocabularyOnSubData = ResultsWords.BuildVocabularyOnSubdata((List <Datum>)data);

            BCCWords         model        = new BCCWords();
            ResultsWords     resultsWords = new ResultsWords(data, VocabularyOnSubData);
            DataMappingWords mapping      = resultsWords.Mapping as DataMappingWords;

            if (mapping != null)
            {
                resultsWords = new ResultsWords(data, VocabularyOnSubData);
                resultsWords.RunBCCWords("BCCwords", data, data, model, Results.RunMode.ClearResults, true);
            }

            using (var writer = new StreamWriter(Console.OpenStandardOutput()))
            {
                resultsWords.WriteResults(writer, false, false, false, true);
            }

            Console.WriteLine("Done.  Press enter to exit.");
            Console.ReadLine();
        }
Exemplo n.º 2
0
        /// <summary>
        /// Writes various results to a StreamWriter.
        /// </summary>
        /// <param name="writer">A StreamWriter instance.</param>
        /// <param name="writeCommunityParameters">Set true to write community parameters.</param>
        /// <param name="writeWorkerParameters">Set true to write worker parameters.</param>
        /// <param name="writeWorkerCommunities">Set true to write worker communities.</param>
        /// <param name="writeProbWords">Set true to write word probabilities</param>
        /// <param name="topWords">Number of words to select</param>
        public void WriteResults(StreamWriter writer, bool writeCommunityParameters, bool writeWorkerParameters, bool writeWorkerCommunities, bool writeProbWords, int topWords = 30)
        {
            base.WriteResults(writer, writeCommunityParameters, writeWorkerCommunities, writeWorkerCommunities);
            DataMappingWords MappingWords = Mapping as DataMappingWords;

            if (writeProbWords && this.ProbWords != null)
            {
                int NumClasses = ProbWords.Length;
                for (int c = 0; c < NumClasses; c++)
                {
                    if (MappingWords != null && MappingWords.WorkerCount > 300) // Assume it's CF
                    {
                        writer.WriteLine("Class {0}", MappingWords.CFLabelName[c]);
                    }
                    else
                    if (MappingWords != null)
                    {
                        writer.WriteLine("Class {0}", MappingWords.SPLabelName[c]);
                    }

                    Vector probs           = ProbWords[c].GetMean();
                    var    probsDictionary = probs.Select((value, index) => new KeyValuePair <string, double>(MappingWords.Vocabulary[index], Math.Log(value))).OrderByDescending(x => x.Value).ToArray();

                    for (int w = 0; w < topWords; w++)
                    {
                        writer.WriteLine($"\t{probsDictionary[w].Key}: \t{probsDictionary[w].Value:0.000}");
                    }
                }
            }
        }
Exemplo n.º 3
0
        /// <summary>
        /// Writes various results to a StreamWriter.
        /// </summary>
        /// <param name="writer">A StreamWriter instance.</param>
        /// <param name="writeCommunityParameters">Set true to write community parameters.</param>
        /// <param name="writeWorkerParameters">Set true to write worker parameters.</param>
        /// <param name="writeWorkerCommunities">Set true to write worker communities.</param>
        /// <param name="writeProbWords">Set true to write word probabilities</param>
        /// <param name="topWords">Number of words to select</param>
        public void WriteResults(StreamWriter writer, bool writeCommunityParameters, bool writeWorkerParameters, bool writeWorkerCommunities, bool writeProbWords, int topWords = 30)
        {
            base.WriteResults(writer, writeCommunityParameters, writeWorkerCommunities, writeWorkerCommunities);
            DataMappingWords MappingWords = Mapping as DataMappingWords;

            if (writeProbWords && this.ProbWords != null)
            {
                int NumClasses      = ProbWords.Length;
                var classifiedWords = new Dictionary <string, KeyValuePair <string, double> >();
                for (int c = 0; c < NumClasses; c++)
                {
                    string className = string.Empty;
                    if (MappingWords != null)
                    {
                        if (MappingWords.WorkerCount > 100)         // Assume it's CF
                        {
                            className = MappingWords.CFLabelName[c];
                        }
                        else
                        {
                            className = MappingWords.SPLabelName[c];
                        }
                        writer.WriteLine($"Class {className}");
                    }

                    Vector probs           = ProbWords[c].GetMean();
                    var    probsDictionary = probs.Select((value, index) => new KeyValuePair <string, double>(MappingWords.Vocabulary[index], Math.Log(value))).OrderByDescending(x => x.Value).ToArray();
                    topWords = Math.Min(topWords, probsDictionary.Length);
                    for (int w = 0; w < topWords; w++)
                    {
                        writer.WriteLine($"\t{probsDictionary[w].Key}: \t{probsDictionary[w].Value:0.000}");
                        if (!string.IsNullOrEmpty(className))
                        {
                            KeyValuePair <string, double> classifiedWord;
                            if (!classifiedWords.TryGetValue(probsDictionary[w].Key, out classifiedWord) ||
                                classifiedWord.Value < probsDictionary[w].Value)
                            {
                                classifiedWords[probsDictionary[w].Key] = new KeyValuePair <string, double>(className, probsDictionary[w].Value);
                            }
                        }
                    }
                }
                writer.WriteLine();
                writer.WriteLine($"Main classes:");
                foreach (var wordByClass in classifiedWords.GroupBy(classified => classified.Value.Key))
                {
                    writer.WriteLine($"Class {wordByClass.Key}:");
                    foreach (var word in wordByClass.OrderByDescending(w => w.Value.Value))
                    {
                        writer.WriteLine($"\t{word.Key}");
                    }
                }
            }
        }
Exemplo n.º 4
0
        /// <summary>
        /// Runs the majority vote method on the data.
        /// </summary>
        /// <param name="modelName"></param>
        /// <param name="data">The data</param>
        /// <param name="mode"></param>
        /// <param name="calculateAccuracy">Compute the accuracy (true).</param>
        /// <param name="fullData"></param>
        /// <param name="model"></param>
        /// <param name="useMajorityVote"></param>
        /// <param name="useRandomLabel"></param>
        /// <returns>The updated results</returns>
        public void RunBCCWords(string modelName,
                                IList <Datum> data,
                                IList <Datum> fullData,
                                BCCWords model,
                                RunMode mode,
                                bool calculateAccuracy,
                                bool useMajorityVote = false,
                                bool useRandomLabel  = false)
        {
            DataMappingWords MappingWords = null;

            if (FullMapping == null)
            {
                FullMapping = new DataMapping(fullData);
            }

            if (Mapping == null)
            {
                // Build vocabulary
                Console.Write("Building vocabulary...");
                Stopwatch stopwatch = new Stopwatch();
                stopwatch.Start();
                string[] corpus = data.Select(d => d.BodyText).Distinct().ToArray();
                Vocabulary = BuildVocabularyFromCorpus(corpus);
                Console.WriteLine("done. Elapsed time: {0}", stopwatch.Elapsed);

                // Build data mapping
                this.Mapping    = new DataMappingWords(data, MappingWords.Vocabulary);
                MappingWords    = Mapping as DataMappingWords;
                this.GoldLabels = MappingWords.GetGoldLabelsPerTaskId();
            }

            MappingWords = Mapping as DataMappingWords;
            int[] trueLabels = null;
            if (useMajorityVote)
            {
                if (MappingWords != null)
                {
                    var majorityLabel = MappingWords.GetMajorityVotesPerTaskId(data);
                    trueLabels = Util.ArrayInit(FullMapping.TaskCount, i => majorityLabel.ContainsKey(Mapping.TaskIndexToId[i]) ? (int)majorityLabel[Mapping.TaskIndexToId[i]] : Rand.Int(Mapping.LabelMin, Mapping.LabelMax + 1));
                    data       = MappingWords.BuildDataFromAssignedLabels(majorityLabel, data);
                }
            }

            if (useRandomLabel)
            {
                var randomLabels = MappingWords.GetRandomLabelPerTaskId(data);
                data = MappingWords.BuildDataFromAssignedLabels(randomLabels, data);
            }

            var labelsPerWorkerIndex      = MappingWords.GetLabelsPerWorkerIndex(data);
            var taskIndicesPerWorkerIndex = MappingWords.GetTaskIndicesPerWorkerIndex(data);

            // Create model
            ClearResults();
            model.CreateModel(MappingWords.TaskCount, MappingWords.LabelCount, MappingWords.WordCount);

            // Run model inference
            BCCWordsPosteriors posteriors = model.InferPosteriors(labelsPerWorkerIndex, taskIndicesPerWorkerIndex, MappingWords.WordIndicesPerTaskIndex, MappingWords.WordCountsPerTaskIndex, trueLabels);

            // Update results
            UpdateResults(posteriors, mode);

            // Compute accuracy
            if (calculateAccuracy)
            {
                UpdateAccuracy();
            }
        }
Exemplo n.º 5
0
        /// <summary>
        /// Writes various results to a StreamWriter.
        /// </summary>
        /// <param name="writer">A StreamWriter instance.</param>
        /// <param name="writeCommunityParameters">Set true to write community parameters.</param>
        /// <param name="writeWorkerParameters">Set true to write worker parameters.</param>
        /// <param name="writeWorkerCommunities">Set true to write worker communities.</param>
        /// <param name="writeProbWords">Set true to write word probabilities</param>
        /// <param name="topWords">Number of words to select</param>
        public void WriteResults(StreamWriter writer, bool writeCommunityParameters, bool writeWorkerParameters, bool writeWorkerCommunities, bool writeProbWords, int topWords = 30)
        {
            base.WriteResults(writer, writeCommunityParameters, writeWorkerCommunities, writeWorkerCommunities);
            DataMappingWords MappingWords = Mapping as DataMappingWords;

            if (writeProbWords && this.ProbWords != null)
            {
                int NumClasses      = ProbWords.Length;
                var classifiedWords = new Dictionary <string, KeyValuePair <string, double> >();
                for (int c = 0; c < NumClasses; c++)
                {
                    string className = string.Empty;
                    if (MappingWords != null)
                    {
                        if (MappingWords.WorkerCount > 100) // Assume it's CF
                        {
                            className = MappingWords.CFLabelName[c];
                        }
                        else
                        {
                            className = MappingWords.SPLabelName[c];
                        }
                        writer.WriteLine($"Class {className}");
                    }

                    Vector probs           = ProbWords[c].GetMean();
                    var    probsDictionary = probs.Select((value, index) => new KeyValuePair <string, double>(MappingWords.Vocabulary[index], Math.Log(value))).OrderByDescending(x => x.Value).ToArray();
                    topWords = Math.Min(topWords, probsDictionary.Length);
                    for (int w = 0; w < topWords; w++)
                    {
                        writer.WriteLine($"\t{probsDictionary[w].Key}: \t{probsDictionary[w].Value:0.000}");
                        if (!string.IsNullOrEmpty(className))
                        {
                            KeyValuePair <string, double> classifiedWord;
                            if (!classifiedWords.TryGetValue(probsDictionary[w].Key, out classifiedWord) ||
                                classifiedWord.Value < probsDictionary[w].Value)
                            {
                                classifiedWords[probsDictionary[w].Key] = new KeyValuePair <string, double>(className, probsDictionary[w].Value);
                            }
                        }
                    }
                }
                writer.WriteLine();

                //Create wordCloud
                var wordCloudPositif = new WordCloud.WordCloud(500, 500, true);
                var wordCloudNegatif = new WordCloud.WordCloud(500, 500, true);
                var wordCloudNeutral = new WordCloud.WordCloud(500, 500, true);
                var wordCloudUnknown = new WordCloud.WordCloud(500, 500, true);
                var wordCloud        = new WordCloud.WordCloud(500, 500, true);

                //Create ListWords
                List <String> wordsPositif = new List <string> {
                };
                List <String> wordsNegatif = new List <string> {
                };
                List <String> wordsNeutral = new List <string> {
                };
                List <String> wordsUnknown = new List <string> {
                };
                List <String> words        = new List <string> {
                };

                //Create ListFrequencies
                List <int> frequenciesPositif = new List <int> {
                };
                List <int> frequenciesNegatif = new List <int> {
                };
                List <int> frequenciesNeutral = new List <int> {
                };
                List <int> frequenciesUnknown = new List <int> {
                };
                List <int> frequencies        = new List <int> {
                };

                //Create Bitmap
                Bitmap myBitmapPositif = null;
                Bitmap myBitmapNegatif = null;
                Bitmap myBitmapNeutral = null;
                Bitmap myBitmapUnknown = null;
                Bitmap myBitmap        = null;

                writer.WriteLine($"Main classes:");
                foreach (var wordByClass in classifiedWords.GroupBy(classified => classified.Value.Key))
                {
                    writer.WriteLine($"Class {wordByClass.Key}:");


                    foreach (var word in wordByClass.OrderByDescending(w => w.Value.Value))
                    {
                        writer.WriteLine($"\t{word.Key}");
                        if (wordByClass.Key == "Negative")
                        {
                            wordsNegatif.Add($"{word.Key}");
                            frequenciesNegatif.Add(1);
                        }
                        else if (wordByClass.Key == "Neutral")
                        {
                            wordsNeutral.Add($"{word.Key}");
                            frequenciesNeutral.Add(1);
                        }
                        else if (wordByClass.Key == "Positive")
                        {
                            wordsPositif.Add($"{word.Key}");
                            frequenciesPositif.Add(1);
                        }
                        else if (wordByClass.Key == "NotRelated")
                        {
                            words.Add($"{word.Key}");
                            frequencies.Add(1);
                        }
                        else if (wordByClass.Key == "Unknown")
                        {
                            wordsUnknown.Add($"{word.Key}");
                            frequenciesUnknown.Add(1);
                        }
                    }
                }

                //Creation wordCloud Positif
                myBitmapPositif = new Bitmap(wordCloudPositif.Draw(wordsPositif, frequenciesPositif));
                myBitmapPositif.Save("C:/Users/athen/OneDrive/Documents/EPF/5A/IA/Sentimental-Analysis/WordClouds/Positive.jpg");

                //Creation wordCloud Negatif
                myBitmapNegatif = new Bitmap(wordCloudNegatif.Draw(wordsNegatif, frequenciesNegatif));
                myBitmapNegatif.Save("C:/Users/athen/OneDrive/Documents/EPF/5A/IA/Sentimental-Analysis/WordClouds/Negative.jpg");

                //Creation wordCloud Neutral
                myBitmapNeutral = new Bitmap(wordCloudNeutral.Draw(wordsNeutral, frequenciesNeutral));
                myBitmapNeutral.Save("C:/Users/athen/OneDrive/Documents/EPF/5A/IA/Sentimental-Analysis/WordClouds/Neutral.jpg");

                //Creation wordCloud Unkown
                myBitmapUnknown = new Bitmap(wordCloudUnknown.Draw(wordsUnknown, frequenciesUnknown));
                myBitmapUnknown.Save("C:/Users/athen/OneDrive/Documents/EPF/5A/IA/Sentimental-Analysis/WordClouds/Unknown.jpg");

                //Creation wordCloud NotRelated
                myBitmap = new Bitmap(wordCloud.Draw(words, frequencies));
                myBitmap.Save("C:/Users/athen/OneDrive/Documents/EPF/5A/IA/Sentimental-Analysis/WordClouds/notRelated.jpg");
            }
        }