Esempio n. 1
0
        public void Execute()
        {
            int count =0;
            PushToCouchDBReceiver receiver = new PushToCouchDBReceiver();
            using (XtractDataContext db = new XtractDataContext())
            {
                db.ObjectTrackingEnabled = false;
                foreach (Tweet tw in from tweet in db.Tweets
                                     where tweet.sample_reason.Equals(SampleReason.user_data.ToString())
                                     select tweet)
                {
                    string screenName = tw.screen_name;
                    long? twitter_id = tw.twitter_id;

                    foreach (string text in _tokenizer.Tokenize(tw.text))
                    {
                        Word word = new Word { screen_name = screenName, text = text, twitter_id = twitter_id };
                        if (word.IsEntity())
                        {
                            tw.AddEntity(word.text);
                        }
                    }

                    receiver.Push(tw);

                    if (++count % 1000 == 0)
                    {
                        Console.Out.WriteLine("tokenized " + count + " tweets");
                    }
                }

            }
        }
Esempio n. 2
0
    public void TestWordExtractPushToCouch()
    {
        //setup
        IUrlExpander expander = new UrlExpander();
        Tokenizer tokenizer = new Tokenizer(expander);
        List<Tweet> tweets = new List<Tweet>();

        PushToCouchDBReceiver receiver = new PushToCouchDBReceiver();

        tweets.Add(new Tweet()
                       {
                           text = "@steelers_munoz why can't you sleep? i'm starving, I haven't had dinner and it's 10.25 haha",
                           date_scanned = DateTime.Now.ToLongTimeString(),
                           date_tweeted = DateTime.Now.ToLongTimeString(),
                           english_similarity = 0.5,
                           sample_reason = SampleReason.user_data.ToString(),
                           screen_name = "utunga",
                           twitter_id = 9128123123
                       });

        tweets.Add(new Tweet()
                       {
                           text = "RT @OMGTeenQuotez: the bad experiences i been through made me stronger..  #OMGTeenQuotez",
                           date_scanned = DateTime.Now.ToLongTimeString(),
                           date_tweeted = DateTime.Now.ToLongTimeString(),
                           english_similarity = 0.5,
                           sample_reason = SampleReason.user_data.ToString(),
                           screen_name = "utunga",
                           twitter_id = 9128123123
                       });

        foreach (Tweet tw in tweets)
        {
            string screenName = tw.screen_name;
            long? twitter_id = tw.twitter_id;

            foreach (string text in tokenizer.Tokenize(tw.text))
            {
                Word word = new Word {screen_name = screenName, text = text, twitter_id = twitter_id};
                if (word.IsEntity())
                {
                    tw.AddEntity(word.text);
                }
            }
            receiver.Push(tw);
            Console.Out.WriteLine("JSON:" + JSON.Serialize(tw));
        }
    }