public void Execute() { int count =0; PushToCouchDBReceiver receiver = new PushToCouchDBReceiver(); using (XtractDataContext db = new XtractDataContext()) { db.ObjectTrackingEnabled = false; foreach (Tweet tw in from tweet in db.Tweets where tweet.sample_reason.Equals(SampleReason.user_data.ToString()) select tweet) { string screenName = tw.screen_name; long? twitter_id = tw.twitter_id; foreach (string text in _tokenizer.Tokenize(tw.text)) { Word word = new Word { screen_name = screenName, text = text, twitter_id = twitter_id }; if (word.IsEntity()) { tw.AddEntity(word.text); } } receiver.Push(tw); if (++count % 1000 == 0) { Console.Out.WriteLine("tokenized " + count + " tweets"); } } } }
public void TestWordExtractPushToCouch() { //setup IUrlExpander expander = new UrlExpander(); Tokenizer tokenizer = new Tokenizer(expander); List<Tweet> tweets = new List<Tweet>(); PushToCouchDBReceiver receiver = new PushToCouchDBReceiver(); tweets.Add(new Tweet() { text = "@steelers_munoz why can't you sleep? i'm starving, I haven't had dinner and it's 10.25 haha", date_scanned = DateTime.Now.ToLongTimeString(), date_tweeted = DateTime.Now.ToLongTimeString(), english_similarity = 0.5, sample_reason = SampleReason.user_data.ToString(), screen_name = "utunga", twitter_id = 9128123123 }); tweets.Add(new Tweet() { text = "RT @OMGTeenQuotez: the bad experiences i been through made me stronger.. #OMGTeenQuotez", date_scanned = DateTime.Now.ToLongTimeString(), date_tweeted = DateTime.Now.ToLongTimeString(), english_similarity = 0.5, sample_reason = SampleReason.user_data.ToString(), screen_name = "utunga", twitter_id = 9128123123 }); foreach (Tweet tw in tweets) { string screenName = tw.screen_name; long? twitter_id = tw.twitter_id; foreach (string text in tokenizer.Tokenize(tw.text)) { Word word = new Word {screen_name = screenName, text = text, twitter_id = twitter_id}; if (word.IsEntity()) { tw.AddEntity(word.text); } } receiver.Push(tw); Console.Out.WriteLine("JSON:" + JSON.Serialize(tw)); } }