コード例 #1
0
        /// <summary>
        /// Loads the initial corpus data file, which contains Tweet Ids.
        /// </summary>
        /// <param name="pathName">File path to the file to open.</param>
        /// <returns>List of CorpusDataRow</returns>
        private static List <CorpusDataRow> LoadCorpus(string pathName)
        {
            List <CorpusDataRow> corpus = new List <CorpusDataRow>();

            using (FileStream f = new FileStream(pathName, FileMode.Open))
            {
                using (StreamReader streamReader = new StreamReader(f))
                {
                    using (CsvReader csvReader = new CsvReader(streamReader))
                    {
                        csvReader.Configuration.HasHeaderRecord = false;

                        while (csvReader.Read())
                        {
                            CorpusDataRow row         = new CorpusDataRow();
                            int           columnIndex = 0;

                            row.Keyword = csvReader.GetField(columnIndex++);

                            // Convert the first letter to uppercase.
                            string sentiment = csvReader.GetField(columnIndex++);
                            sentiment = sentiment.First().ToString().ToUpper() + String.Join("", sentiment.Skip(1));

                            row.Sentiment = (Sentiment)Enum.Parse(typeof(Sentiment), sentiment);
                            row.Id        = Int64.Parse(csvReader.GetField(columnIndex++));

                            corpus.Add(row);
                        }
                    }
                }
            }

            return(corpus);
        }
コード例 #2
0
 /// <summary>
 /// Save a CorpusDataRow object to the output file.
 /// </summary>
 /// <param name="row">CorpusDataRow (with Tweet DTO populated).</param>
 /// <param name="pathName">File path to output file to append to.</param>
 private static void SaveResult(CorpusDataRow row, string pathName)
 {
     using (FileStream f = new FileStream(pathName, FileMode.Append))
     {
         using (StreamWriter streamWriter = new StreamWriter(f))
         {
             using (CsvWriter csvWriter = new CsvWriter(streamWriter))
             {
                 csvWriter.WriteRecord <CorpusDataRow>(row);
             }
         }
     }
 }
コード例 #3
0
        private static List <CorpusDataRow> SearchTweets(string keyword, Sentiment sentiment, int count, TwitterService service, string outputPath)
        {
            List <CorpusDataRow> outputCorpus = new List <CorpusDataRow>();
            long?lastId    = null;
            int  index     = 0;
            int  skipCount = 0;

            keyword += sentiment == Sentiment.Positive ? " :)" : " :(";

            while (skipCount == 0 && outputCorpus.Count < count)
            {
                // Fetch the tweet.
                var statusList = service.Search(new SearchOptions()
                {
                    Q = keyword, Lang = "en", IncludeEntities = false, Count = count, MaxId = lastId
                });
                lastId = statusList.Statuses.Last().Id;
                foreach (var status in statusList.Statuses)
                {
                    if (!status.Text.StartsWith("RT") && !status.Text.Contains(":P") &&
                        !((status.Text.Contains(":)") || status.Text.Contains(":-)") || status.Text.Contains(": )") || status.Text.Contains(":D") || status.Text.Contains("=)")) &&
                          (status.Text.Contains(":(") || status.Text.Contains(":-(") || status.Text.Contains(": ("))) &&
                        outputCorpus.Where(c => c.Tweet.Text == status.Text).Count() == 0)
                    {
                        status.Text = status.Text.Replace(",", " ");
                        status.Text = status.Text.Replace("\n", " ");
                        status.Text = status.Text.Replace("\r", " ");
                        status.Text = status.Text.Replace("\t", " ");
                        status.Text = status.Text.Replace(":)", " ");
                        status.Text = status.Text.Replace(":-)", " ");
                        status.Text = status.Text.Replace(": )", " ");
                        status.Text = status.Text.Replace(":D", " ");
                        status.Text = status.Text.Replace("=)", " ");
                        status.Text = status.Text.Replace(":(", " ");
                        status.Text = status.Text.Replace(":-(", " ");
                        status.Text = status.Text.Replace(": (", " ");

                        if (service.Response.StatusCode == System.Net.HttpStatusCode.OK)
                        {
                            // Convert the TwitterStatus to a Tweet DTO.
                            CorpusDataRow row = new CorpusDataRow();
                            row.Id        = status.Id;
                            row.Keyword   = keyword;
                            row.Tweet     = Mapper.Map <TwitterStatus, Tweet>(status);
                            row.Sentiment = sentiment;

                            // Save the result to file.
                            SaveResult(row, outputPath);

                            outputCorpus.Add(row);

                            if ((index + 1) % 50 == 0)
                            {
                                Console.WriteLine("Processed " + (index + 1) + " tweets.");
                            }
                        }
                        else
                        {
                            // Check the rate limit.
                            TwitterRateLimitStatus rateSearch = service.Response.RateLimitStatus;
                            if (rateSearch.RemainingHits < 1)
                            {
                                DateTime resetTime = rateSearch.ResetTime + TimeSpan.FromMinutes(1);

                                Console.WriteLine("Rate Limit reached. Sleeping until " + resetTime);
                                Thread.Sleep(resetTime - DateTime.Now);

                                // Try this record again.
                                index--;
                            }
                            else
                            {
                                // Some other error. Maybe 404. Skip this record.
                                skipCount++;
                                Console.WriteLine("Skipped " + skipCount + " records. Got " + service.Response.StatusCode + ".");
                            }
                        }
                    }
                }
            }

            Console.WriteLine("Saved " + outputCorpus.Count + ", Skipped " + skipCount + ".");

            return(outputCorpus);
        }
コード例 #4
0
        /// <summary>
        /// Loads the tweet text data for each id in the corpus.
        /// </summary>
        /// <param name="service">TwitterService</param>
        /// <param name="corpus">List of CorpusDataRow</param>
        /// <param name="outputPath">File path to output data file.</param>
        /// <returns>List of CorpusDataRow (with Tweet DTO populated).</returns>
        private static List <CorpusDataRow> LoadTweets(TwitterService service, List <CorpusDataRow> corpus, string outputPath)
        {
            List <CorpusDataRow> outputCorpus = new List <CorpusDataRow>();
            int skipCount = 0;

            for (int index = GetResumeIndex(corpus, outputPath); index < corpus.Count; index++)
            {
                CorpusDataRow row = corpus[index];

                // Fetch the tweet.
                var status = service.GetTweet(new GetTweetOptions()
                {
                    Id = row.Id
                });
                status.Text = status.Text.Replace(",", " ");
                status.Text = status.Text.Replace("\n", " ");
                status.Text = status.Text.Replace("\r", " ");
                status.Text = status.Text.Replace("\t", " ");

                if (service.Response.StatusCode == System.Net.HttpStatusCode.OK)
                {
                    // Convert the TwitterStatus to a Tweet DTO.
                    row.Tweet = Mapper.Map <TwitterStatus, Tweet>(status);

                    // Save the result to file.
                    SaveResult(row, outputPath);

                    outputCorpus.Add(row);

                    if ((index + 1) % 50 == 0)
                    {
                        Console.WriteLine("Processed " + (index + 1) + " tweets.");
                    }
                }
                else
                {
                    // Check the rate limit.
                    TwitterRateLimitStatus rateSearch = service.Response.RateLimitStatus;
                    if (rateSearch.RemainingHits < 1)
                    {
                        DateTime resetTime = rateSearch.ResetTime + TimeSpan.FromMinutes(1);

                        Console.WriteLine("Rate Limit reached. Sleeping until " + resetTime);
                        Thread.Sleep(resetTime - DateTime.Now);

                        // Try this record again.
                        index--;
                    }
                    else
                    {
                        // Some other error. Maybe 404. Skip this record.
                        skipCount++;
                        Console.WriteLine("Skipped " + skipCount + " records. Got " + service.Response.StatusCode + ".");
                    }
                }
            }

            Console.WriteLine("Saved " + outputCorpus.Count + ", Skipped " + skipCount + ".");

            return(outputCorpus);
        }