Exemplo n.º 1
0
        public List <StructTweetIDs> GetTweetData(string keyword)
        {
            lst_structTweetIDs = new List <StructTweetIDs>();

            try
            {
                string searchURL = "http://search.twitter.com/search.json?q=" + keyword + "&rpp=100&include_entities=true&result_type=recent";
                //string searchURL = "http://search.twitter.com/search.json?q=" + keyword + "&include_entities=true&result_type=recent";
                string res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", "");

                string[] splitRes = Regex.Split(res_Get_searchURL, "\"from_user\"");//Regex.Split(res_Get_searchURL, "{\"created_at\"");
                splitRes = splitRes.Skip(1).ToArray();

                foreach (string item in splitRes)
                {
                    string modified_Item = "\"from_user\"" + item;

                    string from_user = Globussoft.GlobusHttpHelper.ParseJson(modified_Item, "from_user");

                    string from_user_id = Globussoft.GlobusHttpHelper.ParseEncodedJson(modified_Item, "from_user_id");

                    string from_user_name = Globussoft.GlobusHttpHelper.ParseJson(modified_Item, "from_user_name");

                    string id = Globussoft.GlobusHttpHelper.ParseEncodedJson(modified_Item, "id");

                    string text = Globussoft.GlobusHttpHelper.ParseJson(modified_Item, "text");

                    StructTweetIDs structTweetIDs = new StructTweetIDs();

                    structTweetIDs.ID_Tweet             = id;
                    structTweetIDs.ID_Tweet_User        = from_user_id;
                    structTweetIDs.username__Tweet_User = from_user;
                    structTweetIDs.wholeTweetMessage    = text;

                    lst_structTweetIDs.Add(structTweetIDs);
                }
            }
            catch (Exception ex)
            {
                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData() -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData() -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs);
            }

            return(lst_structTweetIDs);
        }
        public List<StructTweetIDs> GetTweetData_ByUserName(string keyword)
        {
            lst_structTweetIDs = new List<StructTweetIDs>();
            string user_id = string.Empty;

            try
            {
                string searchURL = string.Empty;

                if (NumberHelper.ValidateNumber(keyword))
                {
                    searchURL = "https://api.twitter.com/1/statuses/user_timeline.json?include_entities=true&include_rts=true&user_id =" + keyword + "&count=" + TweetExtractCount;
                }
                else
                {
                    searchURL = "https://api.twitter.com/1/statuses/user_timeline.json?include_entities=true&include_rts=true&screen_name=" + keyword + "&count=" + TweetExtractCount;
                }
                string res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", "");

                string[] splitRes = Regex.Split(res_Get_searchURL, "{\"created_at");//Regex.Split(res_Get_searchURL, "{\"created_at\"");
                splitRes = splitRes.Skip(1).ToArray();

                foreach (string item in splitRes)
                {
                    string modified_Item = "\"from_user\"" + item;
                    string text = string.Empty;
                    string TweeterUserId = string.Empty;
                    string TweeterUserScreanName = string.Empty;
                    string Tweetid = Globussoft.GlobusHttpHelper.ParseEncodedJson(modified_Item, "id");

                    try
                    {
                        int startIndex = item.IndexOf("\"text\":");
                        string start = item.Substring(startIndex).Replace("\"text\":", "");
                        int endIndex = start.IndexOf(",\"");
                        string end = start.Substring(0, endIndex);
                        text = end.Replace("\"", string.Empty).Replace("&#39;", "'").Trim();
                    }
                    catch (Exception ex)
                    {
                        //Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_ByUserName() --> text -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                        //Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_ByUserName() --> text -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs);
                    }

                    //get tweet user ID
                    try
                    {
                        string item1 = Regex.Split(res_Get_searchURL, "user\":")[1];
                        int startIndex = item1.IndexOf("{\"id\":");
                        string start = item1.Substring(startIndex);
                        int endIndex = start.IndexOf(",\"id_str");
                        string end = start.Substring(0, endIndex);
                        TweeterUserId = end.Replace("{\"id\":", string.Empty).Trim();
                    }
                    catch (Exception ex)
                    {
                        //Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_ByUserName() -->  TweeterUserId -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                        //Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_ByUserName() -->  TweeterUserId -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs);
                    }

                    try
                    {
                        //get tweet user screan Name
                        int startIndex = item.IndexOf("screen_name");
                        string start = item.Substring(startIndex);
                        int endIndex = start.IndexOf(",\"");
                        string end = start.Substring(0, endIndex);
                        TweeterUserScreanName = end.Replace("screen_name\":\"", string.Empty).Replace("\"", string.Empty).Replace("&#39;", "'").Trim();
                    }
                    catch (Exception ex)
                    {
                        //Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_ByUserName() --> TweeterUserScreanName -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                        //Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_ByUserName() --> TweeterUserScreanName -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs);
                    }

                    StructTweetIDs structTweetIDs = new StructTweetIDs();

                    structTweetIDs.ID_Tweet = Tweetid;
                    structTweetIDs.ID_Tweet_User = TweeterUserId;
                    structTweetIDs.username__Tweet_User = TweeterUserScreanName;
                    structTweetIDs.wholeTweetMessage = text;

                    lst_structTweetIDs.Add(structTweetIDs);
                }
            }
            catch (Exception ex)
            {
                //Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_ByUserName() -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                //Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_ByUserName() -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs);
            }

            return lst_structTweetIDs;
        }
        public List<StructTweetIDs> GetTweetData(string keyword)
        {
            lst_structTweetIDs = new List<StructTweetIDs>();

            try
            {
                string searchURL = string.Empty;

                if (noOfRecords > 0 && noOfRecords != 20)
                {
                    searchURL = "http://search.twitter.com/search.json?q=" + keyword + "&rpp=" + noOfRecords + "&include_entities=true&result_type=recent";
                }
                else
                {
                    searchURL = "http://search.twitter.com/search.json?q=" + keyword + "&rpp=100&include_entities=true&result_type=recent";
                }
                //string searchURL = "http://search.twitter.com/search.json?q=" + keyword + "&include_entities=true&result_type=recent";

                string res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", "");

                string[] splitRes = Regex.Split(res_Get_searchURL, "\"from_user\"");//Regex.Split(res_Get_searchURL, "{\"created_at\"");
                splitRes = splitRes.Skip(1).ToArray();

                foreach (string item in splitRes)
                {
                    string modified_Item = "\"from_user\"" + item;

                    string from_user = Globussoft.GlobusHttpHelper.ParseJson(modified_Item, "from_user");

                    string from_user_id = Globussoft.GlobusHttpHelper.ParseEncodedJson(modified_Item, "from_user_id");

                    string from_user_name = Globussoft.GlobusHttpHelper.ParseJson(modified_Item, "from_user_name");

                    string id = Globussoft.GlobusHttpHelper.ParseEncodedJson(modified_Item, "id");

                    string text = Globussoft.GlobusHttpHelper.ParseJson(modified_Item, "text");

                    StructTweetIDs structTweetIDs = new StructTweetIDs();

                    structTweetIDs.ID_Tweet = id;
                    structTweetIDs.ID_Tweet_User = from_user_id;
                    structTweetIDs.username__Tweet_User = from_user;
                    structTweetIDs.wholeTweetMessage = text;

                    lst_structTweetIDs.Add(structTweetIDs);
                }
            }
            catch (Exception ex)
            {
                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData() -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData() -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs);
            }

            return lst_structTweetIDs;
        }
        public List<StructTweetIDs> TweetExtractor_ByUserName_New(string keyword)
        {
            lst_structTweetIDs = new List<StructTweetIDs>();
            string user_name = string.Empty;
            int i = 0;
            try
            {
                string HomePagedata = string.Empty;
                //startAgain:
                //if (i == 0)
                {
                //HomePagedata = globushttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/i/profiles/show/" + keyword + "/timeline?include_available_features=1&include_entities=1"), "", "");
                //&composed_count=0&count=" + noOfRecords + ""

                //HomePagedata = globushttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/i/profiles/show/" + keyword + "/timeline?include_available_features=1&include_entities=1&composed_count=0&count=" + noOfRecords + ""), "", "");
                    HomePagedata = globushttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/i/profiles/show/" + Uri.EscapeDataString(keyword) + "/timeline/with_replies?composed_count=0&count=" + noOfRecords + "&include_available_features=1&include_entities=1"),"","");
                }
                //else
                //{
                //    if (HomePagedata.Contains("\"has_more_items\":true"))
                //    {
                //        HomePagedata = globushttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/i/profiles/show/" + keyword + "/timeline?include_available_features=1&include_entities=1&max_id=" + lst_structTweetIDs[lst_structTweetIDs.Count - 1].ID_Tweet), "", "");
                //    }
                //    else
                //    {
                //        return lst_structTweetIDs;
                //    }
                //}

                if (!string.IsNullOrEmpty(HomePagedata))
                {
                    JObject Abc = JObject.Parse(HomePagedata);
                    string datahkj = string.Empty;
                    foreach (object data in Abc)
                    {
                        datahkj = data.ToString();
                    }

                    string[] splitRes = Regex.Split(datahkj, "ProfileTweet u-textBreak js-tweet js-stream-tweet js-actionable-tweet");//Regex.Split(res_Get_searchURL, "{\"created_at\"");
                    splitRes = splitRes.Skip(1).ToArray();

                    foreach (string item in splitRes)
                    {
                        string modified_Item = string.Empty;
                        string text = string.Empty;
                        string TweeterUserId = string.Empty;
                        string TweeterUserScreanName = string.Empty;
                        string Tweetid = string.Empty;

                        ///Tweet ID
                        try
                        {
                            int startindex = item.IndexOf("data-item-id=\\\"");
                            string start = item.Substring(startindex).Replace("data-item-id=\\\"", "");
                            int endindex = start.IndexOf("\\\"");
                            string end = start.Substring(0, endindex);
                            Tweetid = end;
                        }
                        catch (Exception ex)
                        {

                        }

                        ///Tweet User Screen name
                        try
                        {
                            int startindex = item.IndexOf("data-screen-name=\\\"");
                            string start = item.Substring(startindex).Replace("data-screen-name=\\\"", "");
                            int endindex = start.IndexOf("\\\"");
                            string end = start.Substring(0, endindex);
                            TweeterUserScreanName = end;
                        }
                        catch (Exception ex)
                        {

                        }

                        ///Tweet User User-id
                        try
                        {
                            int startindex = item.IndexOf("data-user-id=\\\"");
                            string start = item.Substring(startindex).Replace("data-user-id=\\\"", "");
                            int endindex = start.IndexOf("\\\"");
                            string end = start.Substring(0, endindex);
                            TweeterUserId = end;
                        }
                        catch (Exception ex)
                        {

                        }

                        ///Tweet Text
                        try
                        {
                            #region code commented by PUJA
                            //int startindex = item.IndexOf("ProfileTweet-text js-tweet-text u-dir");
                            //string start = item.Substring(startindex).Replace("ProfileTweet-text js-tweet-text u-dir", "");
                            //int endindex = start.IndexOf("</p>");
                            //string end = start.Substring(0, endindex);
                            //text = end.Replace("class=\\\"invisible\\\"", "").Replace("<b", "").Replace("</b", "").Replace("<s", "").Replace("</s", "").Replace("class=\\\"twitter-atreply pretty-link\\\" dir=\\\"ltr\\\"", "").Replace(">", "").Replace("class=\\\"js-display-url\\\"", "").Replace("class=\\\"invisible\\\">", "").Replace("class=\\\"tco-ellipsis\\\"", "").Replace("class=\\\"invisible\\\"", "").Replace("&nbsp;", "").Replace("</a", "").Replace("</span", "").Replace("<span", "").Replace("<a href=", "").Replace("rel=nofollow dir=ltr data-expanded-url=", "").Replace("rel=\\\"nofollow\\\" dir=\\\"ltr\\\" data-expanded-url=\\\"", "").Replace("class=\\\"twitter-timeline-link\\\" target=\\\"_blank\\\" title=\\\"", "").Replace("class=\\\"twitter-timeline-link u-isHiddenVisually\\\" data-pre-embedded=\\\"true\\\" dir=\\\"ltr\\\"","").Trim();
                            //text = text.Replace("<", "").Replace("\\\"", "").Replace("\\n","").Replace("\"","").Trim();

                            //text = text.Replace("&nbsp;", "").Replace("a href=", "").Replace("/a", "").Replace("<span", "").Replace("</span", "").Replace("class=\\\"js-display-url\\\"", "").Replace("class=\\\"tco-ellipsis\\\"", "").Replace("class=\\\"invisible\\\"", "").Replace("<strong>", "").Replace("target=\\\"_blank\\\"", "").Replace("class=\\\"twitter-timeline-link\\\"", "").Replace("</strong>", "").Replace("rel=\\\"nofollow\\\" dir=\\\"ltr\\\" data-expanded-url=", "");
                            //text = text.Replace("&quot;", "").Replace("<", "").Replace(">", "").Replace("\"", "").Replace("\\", "").Replace("title=", "").Replace("&amp;", "&").Replace("&#39;", "'").Replace("&lt;", "<").Replace("&gt;", ">").Replace("\n", string.Empty).Replace("..", string.Empty).Replace("\n \"", string.Empty).Replace("\\n", string.Empty).Replace("\\", string.Empty).Replace("js-tweet-text tweet-text", string.Empty).Replace("#", string.Empty).Replace("dir=ltr", "").Trim();

                            //text = text.Replace("\"", "").Replace("<", "").Replace("\\\"", "").Replace("\\", "");

                            //string[] array = Regex.Split(text, "http");
                            //text = string.Empty;
                            //foreach (string itemData in array)
                            //{
                            //    if (!itemData.Contains("t.co"))
                            //    {
                            //        string data = string.Empty;
                            //        if (itemData.Contains("//"))
                            //        {
                            //            data = "http" + itemData;
                            //            if (!text.Contains(itemData.Replace(" ", "")))
                            //            {
                            //                text += data;
                            //            }
                            //        }
                            //        else
                            //        {
                            //            if (!text.Contains(itemData.Replace(" ", "")))
                            //            {
                            //                text += itemData;
                            //            }
                            //        }
                            //    }
                            //    if (text.Contains("data-aria-label-part=0"))
                            //    {
                            //        text = globushttpHelper.getBetween(text + ":&$#@", "data-aria-label-part=0", ":&$#@");
                            //    }
                            //}
                            #endregion
                GlobusRegex regx = new GlobusRegex();
               // foreach (string item1 in splitRes)

                        string tweetUserid = string.Empty;
                        int startindex = item.IndexOf("ProfileTweet-text js-tweet-text u-dir");
                        if (startindex == -1)
                        {
                            startindex = item.IndexOf("js-tweet-text tweet-text");
                        }

                        string start = item.Substring(startindex).Replace("ProfileTweet-text js-tweet-text u-dir", "").Replace("js-tweet-text tweet-text tweet-text-rtl\\\"", "");
                        int endindex = start.IndexOf("</p>");

                        if (endindex == -1)
                        {
                            endindex = 0;
                            endindex = start.IndexOf("stream-item-footer");
                        }

                        string end = start.Substring(0, endindex);
                        end = regx.StripTagsRegex(end);
                        text = end.Replace("&nbsp;", "").Replace("a href=", "").Replace("/a", "").Replace("<span", "").Replace("</span", "").Replace("class=\\\"js-display-url\\\"", "").Replace("class=\\\"tco-ellipsis\\\"", "").Replace("class=\\\"invisible\\\"", "").Replace("<strong>", "").Replace("target=\\\"_blank\\\"", "").Replace("class=\\\"twitter-timeline-link\\\"", "").Replace("</strong>", "").Replace("rel=\\\"nofollow\\\" dir=\\\"ltr\\\" data-expanded-url=", "").Replace("dir=\"ltr\"", "");
                        text = text.Replace("&quot;", "").Replace("<", "").Replace(">", "").Replace("\"", "").Replace("\\", "").Replace("title=", "").Replace("&amp;", "&").Replace("&#39;", "'").Replace("&lt;", "<").Replace("&gt;", ">");

                        string[] array = Regex.Split(text, "http");
                        text = string.Empty;
                        foreach (string itemData in array)
                        {
                            if (!itemData.Contains("t.co"))
                            {
                                string data = string.Empty;
                                if (itemData.Contains("//"))
                                {
                                    data = ("http" + itemData).Replace(" span ", string.Empty);
                                    if (!text.Contains(itemData.Replace(" ", "")))// && !data.Contains("class") && !text.Contains(data))
                                    {
                                        text += data.Replace("u003c", string.Empty).Replace("u003e", string.Empty);
                                    }
                                }
                                else
                                {
                                    if (!text.Contains(itemData.Replace(" ", "")))
                                    {
                                        text += itemData.Replace("u003c", string.Empty).Replace("u003e", string.Empty).Replace("\n", string.Empty).Replace("                 ", string.Empty).Replace("        lang=endata-aria-label-part=0", string.Empty);
                                    }
                                }
                            }

                            if (text.Contains("data-aria-label-part=0"))
                            {
                                text = globushttpHelper.getBetween(text + ":&$#@", "data-aria-label-part=0", ":&$#@");
                            }
                        }
                 }
                        catch (Exception ex)
                        {

                        }

                        StructTweetIDs structTweetIDs = new StructTweetIDs();

                        structTweetIDs.ID_Tweet = Tweetid;
                        structTweetIDs.ID_Tweet_User = TweeterUserId;
                        structTweetIDs.username__Tweet_User = TweeterUserScreanName;
                        structTweetIDs.wholeTweetMessage = text;

                        //lst_structTweetIDs.Add(structTweetIDs);
                        if (lst_structTweetIDs.Count < noOfRecords)
                        {
                            lst_structTweetIDs.Add(structTweetIDs);
                        }
                        else
                        {
                            break;
                        }
                    }
                }
                //i++;
                //goto startAgain;
                return lst_structTweetIDs;
            }
            catch (Exception ex)
            {
                return lst_structTweetIDs;
            }
        }
        public List<StructTweetIDs> GetTweetData_New_ForCampaign(string keyword,int noOfReplies)
        {
            try
            {

                int counter = 0;
                int counterNoOfReplies = noOfReplies;
                lst_structTweetIDs = new List<StructTweetIDs>();
                string res_Get_searchURL = string.Empty;
                string searchURL = string.Empty;
                string maxid = string.Empty;

                string TweetId = string.Empty;

                if (keyword.Trim().Contains(" "))
                {
                    keyword = keyword.Replace(" ", "+");
                }
            startAgain:
                //if (counter == 0)
                //{
                //    searchURL = "https://twitter.com/i/search/timeline?type=recent&src=typd&include_available_features=1&include_entities=1&max_id=0&q=" + keyword + "&composed_count=0&count=" + noOfRecords + "";
                //    counter++;
                //}
                //else
                //{
                //    searchURL = "https://twitter.com/i/search/timeline?type=recent&src=typd&include_available_features=1&include_entities=1&max_id=0&q=" + keyword + "&composed_count=0&count=" + noOfRecords + "&scroll_cursor=" + TweetId;
                //}
                if (counter == 0)
                {
                    //searchURL = "https://twitter.com/i/search/timeline?type=recent&src=typd&include_available_features=1&include_entities=1&max_id=0&q=" + keyword + "&composed_count=0&count=" + noOfRecords + "";
                    searchURL = "https://twitter.com/i/search/timeline?q=" + Uri.EscapeDataString(keyword) + "&src=typd&f=realtime";
                    counter++;
                }
                else
                {
                    //searchURL = "https://twitter.com/i/search/timeline?type=recent&src=typd&include_available_features=1&include_entities=1&max_id=0&q=" + keyword + "&composed_count=0&count=" + noOfRecords + "&scroll_cursor=" + TweetId;

                    searchURL = "https://twitter.com/i/search/timeline?q=" + Uri.EscapeDataString(keyword) + "&src=typd&f=realtime&include_available_features=1&include_entities=1&last_note_ts=0&oldest_unread_id=0&scroll_cursor=" + TweetId + "";
                }

                try
                {
                    res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", "");

                    if (string.IsNullOrEmpty(res_Get_searchURL))
                    {
                        res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", "");
                    }
                    try
                    {
                        //string sjss = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", "");
                        string[] splitRes = Regex.Split(res_Get_searchURL, "refresh_cursor");
                        //splitRes = splitRes.Skip(1).ToArray();
                        foreach (string item in splitRes)
                        {

                            if (item.Contains("refresh_cursor"))
                            {
                                int startIndex = item.IndexOf("TWEET-");
                                string start = item.Substring(startIndex).Replace("data-user-id=\\\"", "");
                                int endIndex = start.IndexOf("\"");
                                string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", "");
                                TweetId = end;
                            }
                            if (item.Contains("scroll_cursor"))
                            {
                                int startIndex = item.IndexOf("TWEET-");
                                string start = item.Substring(startIndex).Replace("data-user-id=\\\"", "");
                                int endIndex = start.IndexOf("\"");
                                string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", "");
                                TweetId = end;
                            }
                        }
                    }
                    catch (Exception)
                    {
                    }
                }
                catch (Exception ex)
                {
                    System.Threading.Thread.Sleep(2000);
                    res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", "");
                    Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " --  res_Get_searchURL --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                    Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- res_Get_searchURL --> " + ex.Message, Globals.Path_TwtErrorLogs);
                }

                if (!string.IsNullOrEmpty(res_Get_searchURL))
                {
                    object DEserizedData = Newtonsoft.Json.JsonConvert.DeserializeObject<dynamic>(res_Get_searchURL);
                    string DataHtml = (string)((JObject)DEserizedData)["items_html"];
                    string[] splitRes = Regex.Split(DataHtml, "data-item-id");

                    splitRes = splitRes.Skip(1).ToArray();

                    GlobusRegex regx = new GlobusRegex();
                    foreach (string item in splitRes)
                    {
                        if (item.Contains("data-screen-name=") && !item.Contains("follow-button") && !item.Contains("Following"))
                        {

                        }
                        else
                        {
                            continue;
                        }
                        string modified_Item = "\"from_user\"" + item;

                        string id = "";
                        try
                        {
                            int startIndex = item.IndexOf("data-user-id=");
                            string start = item.Substring(startIndex).Replace("data-user-id=", "");
                            int endIndex = start.IndexOf("data-is-reply-to");
                            if (endIndex == -1)
                            {
                                endIndex = start.IndexOf("data-expanded-footer=");
                            }
                            if (endIndex == -1)
                            {
                                endIndex = start.IndexOf(">");
                            }
                            string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", "").Replace("\n", string.Empty);
                            if (end.Contains(" "))
                            {
                                end = end.Split(' ')[0];
                            }
                            id = end;
                        }
                        catch (Exception ex)
                        {
                            id = "null";
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- id -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- id -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs);
                        }

                        string from_user_id = "";
                        try
                        {
                            int startIndex = item.IndexOf("data-screen-name=");
                            string start = item.Substring(startIndex).Replace("data-screen-name=", "");
                            int endIndex = start.IndexOf("data-name");
                            if (endIndex > 100)
                            {
                                endIndex = 0;
                                endIndex = start.IndexOf("data-user");
                            }
                            string end = start.Substring(0, endIndex).Replace("from_user_id\":", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("_str", "").Replace("user", "").Replace("}", "").Replace("]", "");
                            from_user_id = end;
                        }
                        catch (Exception ex)
                        {
                            from_user_id = "null";
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwtErrorLogs);
                        }

                        string tweetUserid = string.Empty;
                        try
                        {
                            int startIndex = item.IndexOf("=\"");
                            string start = item.Substring(startIndex).Replace("=\"", "");
                            int endIndex = start.IndexOf("\"");
                            string end = start.Substring(0, endIndex).Replace("from_user_id\":", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("_str", "").Replace("user", "").Replace("}", "").Replace("]", "");
                            tweetUserid = end;
                        }
                        catch (Exception ex)
                        {
                            from_user_id = "null";
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwtErrorLogs);
                        }

                        ///Tweet Text
                        string text = string.Empty;
                        try
                        {

                            int startindex = item.IndexOf("js-tweet-text tweet-text\"");
                            if (startindex == -1)
                            {
                                startindex = 0;
                                startindex = item.IndexOf("js-tweet-text tweet-text");
                            }

                            string start = item.Substring(startindex).Replace("js-tweet-text tweet-text\"", "").Replace("js-tweet-text tweet-text tweet-text-rtl\"", "");
                            int endindex = start.IndexOf("</p>");

                            if (endindex == -1)
                            {
                                endindex = 0;
                                endindex = start.IndexOf("stream-item-footer");
                            }

                            string end = start.Substring(0, endindex);
                            end = regx.StripTagsRegex(end);
                            text = end.Replace("&nbsp;", "").Replace("a href=", "").Replace("/a", "").Replace("<span", "").Replace("</span", "").Replace("class=\\\"js-display-url\\\"", "").Replace("class=\\\"tco-ellipsis\\\"", "").Replace("class=\\\"invisible\\\"", "").Replace("<strong>", "").Replace("target=\\\"_blank\\\"", "").Replace("class=\\\"twitter-timeline-link\\\"", "").Replace("</strong>", "").Replace("rel=\\\"nofollow\\\" dir=\\\"ltr\\\" data-expanded-url=", "");
                            text = text.Replace("&quot;", "").Replace("<", "").Replace(">", "").Replace("\"", "").Replace("\\", "").Replace("title=", "");

                            string[] array = Regex.Split(text, "http");
                            text = string.Empty;
                            foreach (string itemData in array)
                            {
                                if (!itemData.Contains("t.co"))
                                {
                                    string data = string.Empty;
                                    if (itemData.Contains("//"))
                                    {
                                        data = ("http" + itemData).Replace(" span ", string.Empty);
                                        if (!text.Contains(itemData.Replace(" ", "")))// && !data.Contains("class") && !text.Contains(data))
                                        {
                                            text += data.Replace("u003c", string.Empty).Replace("u003e", string.Empty).Replace("lang=en data-aria-label-part=0",string.Empty);
                                        }
                                    }
                                    else
                                    {
                                        if (!text.Contains(itemData.Replace(" ", "")))
                                        {
                                            text += itemData.Replace("u003c", string.Empty).Replace("u003e", string.Empty).Replace("lang=en data-aria-label-part=0",string.Empty);
                                        }
                                    }
                                }
                            }
                            if (text.Contains("data-aria-label-part=0"))
                            {
                                text = globushttpHelper.getBetween(text + ":&$#@", "data-aria-label-part=0", ":&$#@");
                            }
                        }
                        catch { };

                        StructTweetIDs structTweetIDs = new StructTweetIDs();

                        if (id != "null")
                        {
                            structTweetIDs.ID_Tweet = tweetUserid;
                            structTweetIDs.ID_Tweet_User = id;
                            structTweetIDs.username__Tweet_User = from_user_id;
                            structTweetIDs.wholeTweetMessage = text;
                            lst_structTweetIDs.Add(structTweetIDs);
                        }
                        if (lst_structTweetIDs.Count >= counterNoOfReplies)
                        {
                            return lst_structTweetIDs;
                        }
                    }
                    lst_structTweetIDs = lst_structTweetIDs.Distinct().ToList();
                }

                if (lst_structTweetIDs.Count > counterNoOfReplies)
                {
                    if (res_Get_searchURL.Contains("has_more_items\":false"))
                    {
                        return lst_structTweetIDs;
                    }
                    else
                        goto startAgain;
                }
                else
                {
                    if (res_Get_searchURL.Contains("has_more_items\":false"))
                    {
                        return lst_structTweetIDs;
                    }
                    else
                        goto startAgain;
                }
            }
            catch (Exception ex)
            {
                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs);
            }

            return lst_structTweetIDs;
        }
Exemplo n.º 6
0
        public List<StructTweetIDs> TweetExtractor_ByUserName_New_New(string keyword)
        
        {
            lst_structTweetIDs = new List<StructTweetIDs>();
            string user_name = string.Empty;
            int i = 0;
            try
            {
                string HomePagedata = string.Empty;
                //startAgain:
                //if (i == 0)
                //{
                //HomePagedata = globushttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/i/profiles/show/" + keyword + "/timeline?include_available_features=1&include_entities=1"), "", "");
                //&composed_count=0&count=" + noOfRecords + ""

                HomePagedata = globushttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/i/profiles/show/" + keyword + "/timeline?include_available_features=1&include_entities=1&composed_count=0&count=" + noOfRecords + ""), "", "");
                //}
                //else
                //{
                //    if (HomePagedata.Contains("\"has_more_items\":true"))
                //    {
                //        HomePagedata = globushttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/i/profiles/show/" + keyword + "/timeline?include_available_features=1&include_entities=1&max_id=" + lst_structTweetIDs[lst_structTweetIDs.Count - 1].ID_Tweet), "", "");
                //    }
                //    else
                //    {
                //        return lst_structTweetIDs;
                //    }
                //}
                
                if (!string.IsNullOrEmpty(HomePagedata))
                {
                    JObject Abc = JObject.Parse(HomePagedata);
                    string datahkj = string.Empty;
                    foreach (object data in Abc)
                    {
                        datahkj = data.ToString();
                    }

                    string[] splitRes = Regex.Split(datahkj, "js-stream-item stream-item stream-item expanding-stream-item");//Regex.Split(res_Get_searchURL, "{\"created_at\"");
                    if (splitRes.Count() == 1)
                    {
                        splitRes = Regex.Split(datahkj, "ProfileTweet u-textBreak js-tweet js-stream-tweet js-actionable-tweet");
                    }
                    splitRes = splitRes.Skip(1).ToArray();
                    if (splitRes[0].Contains("Pinned Tweet"))
                    {
                        splitRes = splitRes.Skip(1).ToArray();
                    }

                    foreach (string item in splitRes)
                    {
                        string modified_Item = string.Empty;
                        string text = string.Empty;
                        string TweeterUserId = string.Empty;
                        string TweeterUserScreanName = string.Empty;
                        string Tweetid = string.Empty;

                        if (item.Contains("data-retweet-id="))
                        {
                            continue;
                        }

                        ///Tweet ID
                        try
                        {
                            int startindex = item.IndexOf("data-item-id=\\\"");
                            string start = item.Substring(startindex).Replace("data-item-id=\\\"", "");
                            int endindex = start.IndexOf("\\\"");
                            string end = start.Substring(0, endindex);
                            Tweetid = end;
                        }
                        catch (Exception ex)
                        {

                        }


                        ///Tweet User Screen name
                        try
                        {
                            int startindex = item.IndexOf("data-screen-name=\\\"");
                            string start = item.Substring(startindex).Replace("data-screen-name=\\\"", "");
                            int endindex = start.IndexOf("\\\"");
                            string end = start.Substring(0, endindex);
                            TweeterUserScreanName = end;
                        }
                        catch (Exception ex)
                        {

                        }

                        ///Tweet User User-id
                        try
                        {
                            int startindex = item.IndexOf("data-user-id=\\\"");
                            string start = item.Substring(startindex).Replace("data-user-id=\\\"", "");
                            int endindex = start.IndexOf("\\\"");
                            string end = start.Substring(0, endindex);
                            TweeterUserId = end;
                        }
                        catch (Exception ex)
                        {

                        }

                        ///Tweet Text 
                        try
                        {
                            //1st editing

                            //int startindex = item.IndexOf("\\\"js-tweet-text tweet-text\\\"");
                            //string start = item.Substring(startindex).Replace("\\\"js-tweet-text tweet-text\\\"", "");
                            //int endindex = start.IndexOf("</p>");
                            //string end = start.Substring(0, endindex);
                            //text = end.Replace("class=\\\"invisible\\\"", "").Replace("<b", "").Replace("</b", "").Replace("<s", "").Replace("</s", "").Replace("class=\\\"twitter-atreply pretty-link\\\" dir=\\\"ltr\\\"", "").Replace(">", "").Replace("class=\\\"js-display-url\\\"", "").Replace("class=\\\"invisible\\\">", "").Replace("class=\\\"tco-ellipsis\\\"", "").Replace("class=\\\"invisible\\\"", "").Replace("&nbsp;", "").Replace("</a", "").Replace("</span", "").Replace("<span", "").Replace("<a href=", "").Replace("rel=nofollow dir=ltr data-expanded-url=", "").Replace("rel=\\\"nofollow\\\" dir=\\\"ltr\\\" data-expanded-url=\\\"", "").Replace("class=\\\"twitter-timeline-link\\\" target=\\\"_blank\\\" title=\\\"", "");
                            //text = text.Replace("\"", "").Replace("<", "").Replace("\\\"", "").Replace("\\", "");

                            //2nd Editing

                            //int startindex = item.IndexOf("ProfileTweet-text js-tweet-text u-dir");
                            //string start = item.Substring(startindex).Replace("ProfileTweet-text js-tweet-text u-dir", "");
                            //int endindex = start.IndexOf("</p>");
                            //string end = start.Substring(0, endindex);

                            try 
                            {
                                string[] getText = Regex.Split(item, "TweetTextSize TweetTextSize--16px js-tweet-text tweet-text");
                                try
                                {
                                    text = Utils.getBetween(getText[1],">","<");
                                }
                                catch { };
                            }
                            catch { };

                            text = text.Replace("class=\\\"invisible\\\"", "").Replace("<b", "").Replace("</b", "").Replace("<s", "").Replace("</s", "").Replace("class=\\\"twitter-atreply pretty-link\\\" dir=\\\"ltr\\\"", "").Replace(">", "").Replace("class=\\\"js-display-url\\\"", "").Replace("class=\\\"invisible\\\">", "").Replace("class=\\\"tco-ellipsis\\\"", "").Replace("class=\\\"invisible\\\"", "").Replace("&nbsp;", "").Replace("</a", "").Replace("</span", "").Replace("<span", "").Replace("<a href=", "").Replace("rel=nofollow dir=ltr data-expanded-url=", "").Replace("rel=\\\"nofollow\\\" dir=\\\"ltr\\\" data-expanded-url=\\\"", "").Replace("class=\\\"twitter-timeline-link\\\" target=\\\"_blank\\\" title=\\\"", "");
                            text = text.Replace("\"", "").Replace("<", "").Replace("\\\"", "").Replace("\\", "");



                            string[] array = Regex.Split(text, "http");
                            text = string.Empty;
                            foreach (string itemData in array)
                            {
                                if (!itemData.Contains("t.co"))
                                {
                                    string data = string.Empty;
                                    if (itemData.Contains("//"))
                                    {
                                        data = "http" + itemData;
                                        if (!text.Contains(itemData.Replace(" ", "")))
                                        {
                                            text += data;
                                        }
                                    }
                                    else
                                    {
                                        if (!text.Contains(itemData.Replace(" ", "")))
                                        {
                                            text += itemData;
                                        }
                                    }
                                }
                            }
                        }
                        catch (Exception ex)
                        {

                        }

                        StructTweetIDs structTweetIDs = new StructTweetIDs();

                        structTweetIDs.ID_Tweet = Tweetid;
                        structTweetIDs.ID_Tweet_User = TweeterUserId;
                        structTweetIDs.username__Tweet_User = TweeterUserScreanName;
                        structTweetIDs.wholeTweetMessage = text;

                        if (lst_structTweetIDs.Count < noOfRecords)
                        {
                            lst_structTweetIDs.Add(structTweetIDs);
                        }
                        else
                        {
                            break;
                        }
                    }
                }
                //i++;
                //goto startAgain;
                return lst_structTweetIDs;
            }
            catch (Exception ex)
            {
                return lst_structTweetIDs;
            }
        }
Exemplo n.º 7
0
        public List<StructTweetIDs> KeywordStructData(string keyword)
        {
            try
            {
                int counter = 0;
                lst_structTweetIDs = new List<StructTweetIDs>();
                string res_Get_searchURL = string.Empty;
                string searchURL = string.Empty;
                if (keyword.Trim().Contains(" "))
                {
                    keyword = keyword.Replace(" ", "+");
                }

                //string searchURL = "https://twitter.com/phoenix_search.phoenix?q=" + keyword + "&count=" + noOfRecords + "&include_entities=1&include_available_features=1&contributor_details=true&page=null&mode=relevance&query_source=typed_query";
                if (noOfRecords > 0 && noOfRecords != 20)
                {
                    searchURL = "http://search.twitter.com/search.json?q=" + Uri.EscapeDataString(keyword) + "&result_type=mixed&count=" + noOfRecords;
                }
                else
                {
                    //searchURL = "https://api.twitter.com/1.1/search/tweets.json?q=" + keyword + "&result_type=mixed&count=100";
                    //http://search.twitter.com/search.json?q=blue%20angels&rpp=5&include_entities=true&result_type=mixed
                    searchURL = "http://search.twitter.com/search.json?q=" + Uri.EscapeDataString(keyword) + "&result_type=mixed&count=" + noOfRecords;
                }
            startAgain:
                try
                {
                    res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", "");
                    if (string.IsNullOrEmpty(res_Get_searchURL))
                    {
                        res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", "");
                    }
                    if (res_Get_searchURL.Contains("\"next_page\":") && counter < noOfRecords)
                    {
                        try
                        {
                            int startindex = res_Get_searchURL.IndexOf("\"next_page\":");
                            if (startindex > 0)
                            {
                                string start = res_Get_searchURL.Substring(startindex).Replace("\"next_page\":\"", "");
                                int endIndex = start.IndexOf("\",");
                                string end = start.Substring(0, endIndex).Replace("from_user_id\":", "");
                                searchURL = "http://search.twitter.com/search.json" + end;
                            }
                        }
                        catch (Exception ex)
                        {

                        }
                    }
                    else
                    {
                        return lst_structTweetIDs;
                    }
                }
                catch (Exception ex)
                {
                    System.Threading.Thread.Sleep(2000);
                    res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", "");
                    Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " --  res_Get_searchURL --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                    Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- res_Get_searchURL --> " + ex.Message, Globals.Path_TwtErrorLogs);
                }

                if (!string.IsNullOrEmpty(res_Get_searchURL))
                {

                    string[] splitRes = Regex.Split(res_Get_searchURL, "{\"created_at\""); //Regex.Split(res_Get_searchURL, "\"in_reply_to_status_id_str\"");

                    splitRes = splitRes.Skip(1).ToArray();


                    foreach (string item in splitRes)
                    {
                        if (noOfRecords > counter)
                        {
                            counter++;
                        }
                        else
                        {
                            break;
                        }
                        string modified_Item = "\"from_user\"" + item;

                        string id = "";
                        try
                        {
                            int startIndex = item.IndexOf("\"id_str\"");
                            string start = item.Substring(startIndex);
                            int endIndex = start.IndexOf("\",");
                            string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", "");
                            id = end;
                        }
                        catch (Exception ex)
                        {
                            id = "null";
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- id -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- id -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs);
                        }

                        string from_user_id = "";
                        try
                        {
                            int startIndex = item.IndexOf("from_user_id\":");
                            string start = item.Substring(startIndex);
                            int endIndex = start.IndexOf(",\"from_user_id_str");
                            string end = start.Substring(0, endIndex).Replace("from_user_id\":", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("_str", "").Replace("user", "").Replace("}", "").Replace("]", "");
                            from_user_id = end;
                        }
                        catch (Exception ex)
                        {
                            from_user_id = "null";
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwtErrorLogs);
                        }

                        string from_user = "";
                        try
                        {
                            if (item.Contains("\"screen_name\""))
                            {
                                int startindex = item.IndexOf("\"screen_name\"");
                                string start = item.Substring(startindex);
                                int endIndex = start.IndexOf(",\"");
                                string end = start.Substring(0, endIndex).Replace("screen_name", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", "");
                                from_user = end;
                            }
                            else
                            {
                                int startindex = item.IndexOf("\"from_user\"");
                                string start = item.Substring(startindex);
                                int endIndex = start.IndexOf(",\"");
                                string end = start.Substring(0, endIndex).Replace("from_user", "").Replace("\"", "").Replace(",\"from", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", "");
                                from_user = end;
                            }
                        }
                        catch (Exception ex)
                        {
                            from_user_id = "null";
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user --> " + ex.Message, Globals.Path_TwtErrorLogs);
                        }

                        string text = string.Empty;

                        try
                        {
                            int startindex = item.IndexOf("\"text\":");
                            string start = item.Substring(startindex).Replace("\"text\":", "");
                            int endIndex = start.IndexOf(",\"");
                            if (endIndex == -1)
                            {
                                endIndex = start.IndexOf("}");
                            }
                            string end = start.Substring(0, endIndex).Replace("screen_name", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", string.Empty);
                            text = end;
                        }
                        catch (Exception ex)
                        {
                            from_user_id = "null";
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user --> " + ex.Message, Globals.Path_TwtErrorLogs);
                        }

                        try
                        {
                            int startindex = item.IndexOf("\"text\":");
                            string start = item.Substring(startindex).Replace("\"text\":", "");
                            int endIndex = start.IndexOf(",\"");
                            if (endIndex == -1)
                            {
                                endIndex = start.IndexOf("}");
                            }
                            string end = start.Substring(0, endIndex).Replace("screen_name", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", string.Empty);
                            text = end;
                        }
                        catch (Exception ex)
                        {
                            from_user_id = "null";
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user --> " + ex.Message, Globals.Path_TwtErrorLogs);
                        }

                        StructTweetIDs structTweetIDs = new StructTweetIDs();

                        if (id != "null")
                        {
                            structTweetIDs.ID_Tweet = id;
                            structTweetIDs.ID_Tweet_User = from_user_id;
                            structTweetIDs.username__Tweet_User = from_user;
                            structTweetIDs.wholeTweetMessage = text;

                            lst_structTweetIDs.Add(structTweetIDs);

                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(id + ":" + from_user_id, Globals.Path_keywordFollowerScrapedData);
                        }


                    }
                }
                if (lst_structTweetIDs.Count < noOfRecords)
                {
                    goto startAgain;
                }
            }
            catch (Exception ex)
            {
                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs);
            }

            return lst_structTweetIDs;
        }
Exemplo n.º 8
0
        //Function for Returning TweetData to Wait and reply
        public List<StructTweetIDs> GetTweetData_WaitReply(string keyword)
        {
            try
            {
                try
                {
                    lst_structTweetIDs = new List<StructTweetIDs>();
                    //AddToTweetCreatorLogs("[ " + DateTime.Now + " ] => [ Extracting Tweets for " + keyword + " ]");
                    string[] arraylst = new string[] { };
                    string scroll_cursor = "0";
                    GlobusHttpHelper HttpHelper = new GlobusHttpHelper();
                    for (int i = 0; i < noOfRecords; i++)
                    {
                        //AddToTweetCreatorLogs("[ " + DateTime.Now + " ] => [ Getting " + (i + 1) + " Page Tweets ]");
                        string pgsrcs = HttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/i/search/timeline?q=" + Uri.EscapeDataString(keyword) + "&src=typd&f=realtime&include_available_features=1&include_entities=1&last_note_ts=0&scroll_cursor=" + scroll_cursor), "", "");
                        //Getting the pages
                        try
                        {
                            int startindex = pgsrcs.IndexOf("scroll_cursor");
                            string start = pgsrcs.Substring(startindex).Replace("scroll_cursor", string.Empty);
                            int endindex = start.IndexOf("refresh_cursor");
                            string end = string.Empty;

                            if (endindex >= 0)
                            {
                                end = start.Substring(0, endindex);
                                scroll_cursor = end.Replace("\\", string.Empty).Replace("\"", string.Empty).Replace(",", string.Empty).Replace(":", string.Empty).Trim();
                            }
                            else
                            {
                                endindex = start.IndexOf("\"}");
                                end = start.Substring(0, endindex);
                                scroll_cursor = end;
                                
                            }
                        }
                        catch (Exception ex)
                        {
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> StartKeywordExtracting() --> Getting Maxid --> " + ex.Message, Globals.Path_TweetCreatorErroLog);
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> StartKeywordExtracting() --> Getting Maxid  --> " + ex.Message, Globals.Path_TwtErrorLogs);
                        }
                        //getting the information:tweets,username,userid,tweetid
                        JObject Abc = JObject.Parse(pgsrcs);
                        string datahkj = string.Empty;
                        datahkj = Abc["items_html"].ToString();

                        string[] splitRes = Regex.Split(((string)Abc["items_html"]), "js-stream-item stream-item stream-item expanding-stream-item");//Regex.Split(res_Get_searchURL, "{\"created_at\"");
                        splitRes = splitRes.Skip(1).ToArray();
                        GlobusRegex regx = new GlobusRegex();
                        foreach (string item in splitRes)
                        {
                            string from_user = string.Empty;
                            string from_user_id = string.Empty;
                            string from_user_name = string.Empty;
                            string id = string.Empty;
                            string text = string.Empty;

                            ///Tweet ID
                            try
                            {
                                int startindex = item.IndexOf("data-item-id=\"");
                                string start = item.Substring(startindex).Replace("data-item-id=\"", "");
                                int endindex = start.IndexOf("\"");
                                string end = start.Substring(0, endindex);
                                id = end;
                            }
                            catch (Exception ex)
                            {
                                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_New() -- " + keyword + " --> userid --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_New() -- " + keyword + " --> userid --> " + ex.Message, Globals.Path_TwtErrorLogs);
                            }


                            ///Tweet User Screen name
                            try
                            {
                                int startindex = item.IndexOf("data-screen-name=\"");
                                string start = item.Substring(startindex).Replace("data-screen-name=\"", "");
                                int endindex = start.IndexOf("\"");
                                string end = start.Substring(0, endindex);
                                from_user_name = end;
                            }
                            catch (Exception ex)
                            {
                                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_New() -- " + keyword + " --> from_user_name --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_New() -- " + keyword + " --> from_user_name --> " + ex.Message, Globals.Path_TwtErrorLogs);
                            }

                            ///Tweet User User-id
                            try
                            {
                                int startindex = item.IndexOf("data-user-id=\"");
                                string start = item.Substring(startindex).Replace("data-user-id=\"", "");
                                int endindex = start.IndexOf("\"");
                                string end = start.Substring(0, endindex);
                                from_user_id = end;
                            }
                            catch (Exception ex)
                            {
                                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_New() -- " + keyword + " --> from_user_id --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_New() -- " + keyword + " --> from_user_id --> " + ex.Message, Globals.Path_TwtErrorLogs);
                            }

                            ///Tweet Text 
                            try
                            {
                                int startindex = item.IndexOf("js-tweet-text tweet-text\"");
                                string start = item.Substring(startindex).Replace("js-tweet-text tweet-text\"", "");
                                int endindex = start.IndexOf("</p>");
                                string end = start.Substring(0, endindex);
                                end = regx.StripTagsRegex(end);
                                text = end.Replace("&nbsp;", "").Replace("a href=", "").Replace("/a", "").Replace("<span", "").Replace("</span", "").Replace("class=\\\"js-display-url\\\"", "").Replace("class=\\\"tco-ellipsis\\\"", "").Replace("class=\\\"invisible\\\"", "").Replace("<strong>", "").Replace("target=\\\"_blank\\\"", "").Replace("class=\\\"twitter-timeline-link\\\"", "").Replace("</strong>", "").Replace("rel=\\\"nofollow\\\" dir=\\\"ltr\\\" data-expanded-url=", "");
                                text = text.Replace("&quot;", "").Replace("<", "").Replace(">", "").Replace("\"", "").Replace("\\", "").Replace("title=", "");

                                string[] array = Regex.Split(text, "http");
                                text = string.Empty;
                                foreach (string itemData in array)
                                {
                                    if (!itemData.Contains("t.co"))
                                    {
                                        string data = string.Empty;
                                        if (itemData.Contains("//"))
                                        {
                                            data = "http" + itemData;
                                            if (!text.Contains(itemData.Replace(" ", "")))
                                            {
                                                text += data;
                                            }
                                        }
                                        else
                                        {
                                            if (!text.Contains(itemData.Replace(" ", "")))
                                            {
                                                text += itemData;
                                            }
                                        }
                                    }
                                }
                            }
                            catch (Exception ex)
                            {
                                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_New() -- " + keyword + " --> text --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_New() -- " + keyword + " --> text --> " + ex.Message, Globals.Path_TwtErrorLogs);
                            }

                            StructTweetIDs structTweetIDs = new StructTweetIDs();

                            structTweetIDs.ID_Tweet = id;
                            structTweetIDs.ID_Tweet_User = from_user_id;
                            structTweetIDs.username__Tweet_User = from_user_name;
                            structTweetIDs.wholeTweetMessage = text;
                            Log("[ " + DateTime.Now + " ] => [ " + id + " ]");
                            Log("[ " + DateTime.Now + " ] => [ " + from_user_id + " ]");
                            Log("[ " + DateTime.Now + " ] => [ " + from_user_name + " ]");
                            Log("[ " + DateTime.Now + " ] => [ " + text + " ]");
                            Log("---------------------------------------------------------------------------------------------------------------------------------------------------");
                            if (text.Contains(keyword))
                            {
                                lst_structTweetIDs.Add(structTweetIDs);
                                lst_structTweetIDs = lst_structTweetIDs.Distinct().ToList();
                            }
                            if (lst_structTweetIDs.Count == noOfRecords)
                            {
                                break;
                            }
                        }
                        if (lst_structTweetIDs.Count == noOfRecords)
                        {
                            break;
                        }
                    }
                    //AddToTweetCreatorLogs("[ " + DateTime.Now + " ] => [ " + lstweete.Count + " Total distinct Tweets ]");
                    
                    //AddToTweetCreatorLogs("[ " + DateTime.Now + " ] => [ Finished Extracting Tweets for " + keyword + " ]");
                    //AddToTweetCreatorLogs("-----------------------------------------------------------------------------------------------------------------------");
                }
                catch (Exception ex)
                {
                    Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> StartKeywordExtracting() -->  " + ex.Message, Globals.Path_TweetCreatorErroLog);
                    Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> StartKeywordExtracting() --> " + ex.Message, Globals.Path_TwtErrorLogs);
                }
                return lst_structTweetIDs;
            }
            catch (Exception ex)
            {
                return lst_structTweetIDs;
                GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_New() -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_New() -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs);
            }
        }
Exemplo n.º 9
0
        public List<StructTweetIDs> GetTweetData_New(string keyword)
        {
            lst_structTweetIDs = new List<StructTweetIDs>();
            string Nextcounter = "0";
            try
            {
                //StartAgain:
                string searchURL = "https://twitter.com/i/search/timeline?type=relevance&src=typd&include_available_features=1&include_entities=1&max_id=" + noOfRecords + "&q=" + Uri.EscapeDataString(keyword);
                string res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), string.Empty, string.Empty);

                JObject Abc = JObject.Parse(res_Get_searchURL);
                string datahkj = string.Empty;
                datahkj = Abc["items_html"].ToString();

                string[] splitRes = Regex.Split(((string)Abc["items_html"]), "js-stream-item stream-item stream-item expanding-stream-item");//Regex.Split(res_Get_searchURL, "{\"created_at\"");
                splitRes = splitRes.Skip(1).ToArray();
                GlobusRegex regx = new GlobusRegex();
                foreach (string item in splitRes)
                {
                    string from_user = string.Empty;
                    string from_user_id = string.Empty;
                    string from_user_name = string.Empty;
                    string id = string.Empty;
                    string text = string.Empty;

                    ///Tweet ID
                    try
                    {
                        int startindex = item.IndexOf("data-item-id=\"");
                        string start = item.Substring(startindex).Replace("data-item-id=\"", "");
                        int endindex = start.IndexOf("\"");
                        string end = start.Substring(0, endindex);
                        id = end;
                    }
                    catch (Exception ex)
                    {
                        Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_New() -- " + keyword + " --> userid --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                        Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_New() -- " + keyword + " --> userid --> " + ex.Message, Globals.Path_TwtErrorLogs);
                    }


                    ///Tweet User Screen name
                    try
                    {
                        int startindex = item.IndexOf("data-screen-name=\"");
                        string start = item.Substring(startindex).Replace("data-screen-name=\"", "");
                        int endindex = start.IndexOf("\"");
                        string end = start.Substring(0, endindex);
                        from_user_name = end;
                    }
                    catch (Exception ex)
                    {
                        Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_New() -- " + keyword + " --> from_user_name --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                        Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_New() -- " + keyword + " --> from_user_name --> " + ex.Message, Globals.Path_TwtErrorLogs);
                    }

                    ///Tweet User User-id
                    try
                    {
                        int startindex = item.IndexOf("data-user-id=\"");
                        string start = item.Substring(startindex).Replace("data-user-id=\"", "");
                        int endindex = start.IndexOf("\"");
                        string end = start.Substring(0, endindex);
                        from_user_id = end;
                    }
                    catch (Exception ex)
                    {
                        Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_New() -- " + keyword + " --> from_user_id --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                        Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_New() -- " + keyword + " --> from_user_id --> " + ex.Message, Globals.Path_TwtErrorLogs);
                    }

                    ///Tweet Text 
                    try
                    {
                        int startindex = item.IndexOf("js-tweet-text tweet-text\"");
                        string start = item.Substring(startindex).Replace("js-tweet-text tweet-text\"", "");
                        int endindex = start.IndexOf("</p>");
                        string end = start.Substring(0, endindex);
                        end = regx.StripTagsRegex(end);
                        text = end.Replace("&nbsp;", "").Replace("a href=", "").Replace("/a", "").Replace("<span", "").Replace("</span", "").Replace("class=\\\"js-display-url\\\"", "").Replace("class=\\\"tco-ellipsis\\\"", "").Replace("class=\\\"invisible\\\"", "").Replace("<strong>", "").Replace("target=\\\"_blank\\\"", "").Replace("class=\\\"twitter-timeline-link\\\"", "").Replace("</strong>", "").Replace("rel=\\\"nofollow\\\" dir=\\\"ltr\\\" data-expanded-url=", "");
                        text = text.Replace("&quot;", "").Replace("<", "").Replace(">", "").Replace("\"", "").Replace("\\", "").Replace("title=", "");

                        string[] array = Regex.Split(text, "http");
                        text = string.Empty;
                        foreach (string itemData in array)
                        {
                            if (!itemData.Contains("t.co"))
                            {
                                string data = string.Empty;
                                if (itemData.Contains("//"))
                                {
                                    data = "http" + itemData;
                                    if (!text.Contains(itemData.Replace(" ", "")))
                                    {
                                        text += data;
                                    }
                                }
                                else
                                {
                                    if (!text.Contains(itemData.Replace(" ", "")))
                                    {
                                        text += itemData;
                                    }
                                }
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_New() -- " + keyword + " --> text --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                        Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_New() -- " + keyword + " --> text --> " + ex.Message, Globals.Path_TwtErrorLogs);
                    }

                    StructTweetIDs structTweetIDs = new StructTweetIDs();

                    structTweetIDs.ID_Tweet = id;
                    structTweetIDs.ID_Tweet_User = from_user_id;
                    structTweetIDs.username__Tweet_User = from_user_name;
                    structTweetIDs.wholeTweetMessage = text;
                    Log("[ " + DateTime.Now + " ] => [ " + id + " ]");
                    Log("[ " + DateTime.Now + " ] => [ " + from_user_id + " ]");
                    Log("[ " + DateTime.Now + " ] => [ " + from_user_name + " ]");
                    Log("[ " + DateTime.Now + " ] => [ " + text + " ]");
                    Log("---------------------------------------------------------------------------------------------------------------------------------------------------");
                    lst_structTweetIDs.Add(structTweetIDs);

                    Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(id + ":" + from_user_id, Globals.Path_keywordFollowerScrapedData);
                }

                //if (res_Get_searchURL.Contains("\"has_more_items\":true"))
                //{
                //    try
                //    {
                //        int startindex = res_Get_searchURL.IndexOf("{\"max_id\":\"");
                //        string start = res_Get_searchURL.Substring(startindex).Replace("{\"max_id\":\"", "");
                //        int endindex = start.IndexOf("\",");
                //        string end = start.Substring(0, endindex);
                //        Nextcounter = end;
                //    }
                //    catch (Exception ex)
                //    {
                //        Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_New() -- " + keyword + " --> res_Get_searchURL --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                //        Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_New() -- " + keyword + " --> res_Get_searchURL --> " + ex.Message, Globals.Path_TwtErrorLogs);
                //    }
                //    //goto StartAgain;
                //}

                return lst_structTweetIDs;
            }
            catch (Exception ex)
            {
                return lst_structTweetIDs;
                GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_New() -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_New() -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs);
            }
        }
Exemplo n.º 10
0
        public List<StructTweetIDs> NewKeywordStructDataForOnlyTweet(string keyword)
        {
            try
            {
                BaseLib.GlobusRegex regx = new GlobusRegex();
                int counter = 0;
                lst_structTweetIDs = new List<StructTweetIDs>();
                //lstTweetIds=new List<string>();
                string res_Get_searchURL = string.Empty;
                string searchURL = string.Empty;
                string maxid = string.Empty;
                string TweetId = string.Empty;
                string text = string.Empty;

                if (keyword.Trim().Contains(" "))
                {
                    keyword = keyword.Replace(" ", "+");
                }

            startAgain:

                if (!RetweetFromUserName)
                {
                    if (counter == 0)
                    {

                        searchURL = "https://twitter.com/i/search/timeline?q=" + Uri.EscapeDataString(keyword) + "&src=typd&f=realtime";
                        counter++;
                    }
                    else
                    {

                        searchURL = "https://twitter.com/i/search/timeline?q=" + Uri.EscapeDataString(keyword) + "&src=typd&f=realtime&include_available_features=1&include_entities=1&last_note_ts=0&oldest_unread_id=0&scroll_cursor=" + TweetId + "";
                    }
                }
                else
                {
                    searchURL = "https://twitter.com/i/profiles/show/" + Uri.EscapeDataString(keyword) + "/timeline/with_replies?composed_count=0&count=" + RetweetExtractcount + "&include_available_features=1&include_entities=1";
                }

                try
                {
                    res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", "");

                    if (string.IsNullOrEmpty(res_Get_searchURL))
                    {
                        res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", "");
                    }

                    try
                    {
                       
                        string[] splitRes = Regex.Split(res_Get_searchURL, "refresh_cursor");
                        
                        foreach (string item in splitRes)
                        {
                            if (item.Contains("refresh_cursor"))
                            {
                                int startIndex = item.IndexOf("TWEET-");
                                string start = item.Substring(startIndex).Replace("data-user-id=\\\"", "");
                                int endIndex = start.IndexOf("\"");
                                string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", "");
                                TweetId = end;
                            }
                            if (item.Contains("scroll_cursor"))
                            {
                                int startIndex = item.IndexOf("TWEET-");
                                string start = item.Substring(startIndex).Replace("data-user-id=\\\"", "");
                                int endIndex = start.IndexOf("\"");
                                string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", "");
                                TweetId = end;
                            }
                        }
                    }
                    catch (Exception)
                    {
                    }
                }

                catch (Exception ex)
                {
                    System.Threading.Thread.Sleep(2000);
                    res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", "");
                    Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " --  res_Get_searchURL --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                    Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- res_Get_searchURL --> " + ex.Message, Globals.Path_TwtErrorLogs);
                }
                // && !res_Get_searchURL.Contains("has_more_items\":false")
                if (!string.IsNullOrEmpty(res_Get_searchURL))
                {
                    //string[] splitRes = Regex.Split(res_Get_searchURL, "data-item-id"); //Regex.Split(res_Get_searchURL, "\"in_reply_to_status_id_str\"");
                    string[] splitRes = Regex.Split(res_Get_searchURL, "data-item-id");

                    splitRes = splitRes.Skip(1).ToArray();


                    foreach (string item in splitRes)
                    {
                        if (item.Contains("data-screen-name=") && !item.Contains("js-actionable-user js-profile-popup-actionable"))
                        {
                           
                        }
                        else
                        {
                            continue;
                        }
                        string modified_Item = "\"from_user\"" + item;

                        string id = "";
                        try
                        {
                            int startIndex = item.IndexOf("data-user-id=");
                            string start = item.Substring(startIndex).Replace("data-user-id=\\\"", "");
                            int endIndex = start.IndexOf("\\\"");
                            string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", "");
                            id = end;
                        }
                        catch (Exception ex)
                        {
                            id = "null";
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- id -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- id -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs);
                        }

                        
                        string tweetUserid = string.Empty;
                        try
                        {
                            int startIndex = item.IndexOf("=\\\"");
                            string start = item.Substring(startIndex).Replace("=\\\"", "");
                            int endIndex = start.IndexOf("\\\"");
                            string end = start.Substring(0, endIndex).Replace("from_user_id\":", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("_str", "").Replace("user", "").Replace("}", "").Replace("]", "");
                            tweetUserid = end;
                        }
                        catch (Exception ex)
                        {
                            tweetUserid = "null";
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwtErrorLogs);
                        }

                        try
                        {
                            int startindex = item.IndexOf("js-tweet-text tweet-text");
                            string start = item.Substring(startindex).Replace("js-tweet-text tweet-text", "");
                            int endindex = start.IndexOf("</p>");
                            if (endindex == -1)
                            {
                                endindex = 0;
                                endindex = start.IndexOf("stream-item-footer");
                            }
                            string end = start.Substring(0, endindex);
                            end = regx.StripTagsRegex(end);
                           
                            text = end.Replace("&nbsp;", "").Replace("a href=", "").Replace("/a", "").Replace("<span", "").Replace("</span", "").Replace("class=\\\"js-display-url\\\"", "").Replace("class=\\\"tco-ellipsis\\\"", "").Replace("class=\\\"invisible\\\"", "").Replace("<strong>", "").Replace("target=\\\"_blank\\\"", "").Replace("class=\\\"twitter-timeline-link\\\"", "").Replace("</strong>", "").Replace("rel=\\\"nofollow\\\" dir=\\\"ltr\\\" data-expanded-url=", "").Replace("dir=\"ltr\"", "");
                            text = text.Replace("&quot;", "").Replace("<", "").Replace(">", "").Replace("\"", "").Replace("\\", "").Replace("title=", "").Replace("&amp;", "&").Replace("&#39;", "'").Replace("&lt;", "<").Replace("&gt;", ">");


                            //string[] array = Regex.Split(text, "http");
                            //text = string.Empty;
                            //foreach (string itemData in array)
                            //{
                            //    if (!itemData.Contains("t.co"))
                            //    {
                            //        string data = string.Empty;
                            //        if (itemData.Contains("//"))
                            //        {
                            //            data = ("http" + itemData).Replace(" span ", string.Empty);
                            //            if (!text.Contains(itemData.Replace(" ", "")))// && !data.Contains("class") && !text.Contains(data))
                            //            {
                            //                text += data.Replace("u003c", string.Empty).Replace("u003e", string.Empty);
                            //            }
                            //        }
                            //        else
                            //        {
                            //            if (!text.Contains(itemData.Replace(" ", "")))
                            //            {
                            //                text += itemData.Replace("u003c", string.Empty).Replace("u003e", string.Empty);
                            //            }
                            //        }
                            //    }
                            //}
                        }
                        catch (Exception ex)
                        {
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_New() -- " + keyword + " --> text --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_New() -- " + keyword + " --> text --> " + ex.Message, Globals.Path_TwtErrorLogs);
                        }

                        StructTweetIDs structTweetIDs = new StructTweetIDs();
                        if (!IsRetweetWithFovieteWithImages)
                        {
                            if (id != "null")
                            {
                                structTweetIDs.ID_Tweet = tweetUserid;
                                structTweetIDs.ID_Tweet_User = id;
                                lst_structTweetIDs.Add(structTweetIDs);
                                //Log("[ " + DateTime.Now + " ] => [ " + tweetUserid + " ]");
                                //Log("-------------------------------------------------------------------------------------------------------------------------------");
                            }

                            lst_structTweetIDs = lst_structTweetIDs.Distinct().ToList();

                            queTweetId.Enqueue(tweetUserid);
                        }
                        else
                        {
                            if (text.Contains("http://t.co"))
                            {
                                if (id != "null")
                                {
                                    structTweetIDs.ID_Tweet = tweetUserid;
                                    structTweetIDs.ID_Tweet_User = id;
                                    lst_structTweetIDs.Add(structTweetIDs);
                                    //Log("[ " + DateTime.Now + " ] => [ " + tweetUserid + " ]");
                                    //Log("-------------------------------------------------------------------------------------------------------------------------------");
                                }

                                lst_structTweetIDs = lst_structTweetIDs.Distinct().ToList();

                                queTweetId.Enqueue(tweetUserid);
                            }
                        }
                        //lstTweetIds.Add(tweetUserid);
                        //lstTweetIds = lstTweetIds.Distinct().ToList();
                        //if (lst_structTweetIDs.Count >= noOfRecords)
                        //{
                        //    return lst_structTweetIDs;
                        //}

                    }

                    if (lst_structTweetIDs.Count <= noOfRecords)
                    {
                        maxid = lst_structTweetIDs[lst_structTweetIDs.Count - 1].ID_Tweet;

                        if (res_Get_searchURL.Contains("has_moreitems\":false"))
                        {
                            return lst_structTweetIDs;
                        }
                        else
                        {
                            goto startAgain;
                        }
                    }
                    else
                    {
                        if (res_Get_searchURL.Contains("has_more_items\":false"))
                        {
                            return lst_structTweetIDs;
                        }
                        else
                            goto startAgain;
                    }
                }
            }
            catch (Exception ex)
            {
                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs);
            }

            return lst_structTweetIDs;
        }
Exemplo n.º 11
0
        public List<StructTweetIDs> NewKeywordStructDataSearchByPeople(string keyword)
        {
            try
            {
                BaseLib.GlobusRegex regx = new GlobusRegex();
                int counter = 0;
                lst_structTweetIDs = new List<StructTweetIDs>();
                string res_Get_searchURL = string.Empty;
                string searchURL = string.Empty;
                string maxid = string.Empty;
                string TweetId = string.Empty;
                string text = string.Empty;

                string ProfileName = string.Empty;
                string Location = string.Empty;
                string Bio = string.Empty;
                string website = string.Empty;
                string NoOfTweets = string.Empty;
                string Followers = string.Empty;
                string Followings = string.Empty;

                if (keyword.Trim().Contains(" "))
                {
                    keyword = keyword.Replace(" ", "+");
                }

            startAgain:

                

                if (!RetweetFromUserName)
                {
                    if (counter == 0)
                    {
                        //searchURL = "https://twitter.com/i/search/timeline?type=recent&src=typd&include_available_features=1&include_entities=1&max_id=0&q=" + keyword + "&composed_count=0&count=" + noOfRecords + "";
                        //searchURL = "https://twitter.com/i/search/timeline?q=" + keyword + "&src=typd&f=realtime";

                        //29-4-2014 only for client it has been changed
                        searchURL = "https://twitter.com/i/search/timeline?q=" + Uri.EscapeDataString(keyword) + "&src=typd&f=realtime&mode=users";
                        counter++;
                    }
                    else
                    {

                        //searchURL = "https://twitter.com/i/search/timeline?q=" + keyword + "&src=typd&f=realtime&include_available_features=1&include_entities=1&last_note_ts=0&oldest_unread_id=0&scroll_cursor=" + TweetId + "";

                        //29-4-2014 only for client it is changed
                        searchURL = "https://twitter.com/i/search/timeline?q=" + Uri.EscapeDataString(keyword) + "&src=typd&f=realtime&mode=users&include_available_features=1&include_entities=1&last_note_ts=0&oldest_unread_id=0&scroll_cursor=" + TweetId + "";
                    }
                }
                else
                {
                    searchURL = "https://twitter.com/i/profiles/show/" + Uri.EscapeDataString(keyword) + "/timeline/with_replies?composed_count=0&count=" + RetweetExtractcount + "&include_available_features=1&include_entities=1";
                }

                try
                {
                    res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", "");

                    if (string.IsNullOrEmpty(res_Get_searchURL))
                    {
                        res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", "");
                    }

                    try
                    {
                        //string sjss = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", "");
                        string[] splitRes = Regex.Split(res_Get_searchURL, "refresh_cursor");
                        //splitRes = splitRes.Skip(1).ToArray();
                        foreach (string item in splitRes)
                        {
                            if (item.Contains("refresh_cursor"))
                            {
                               

                                int startIndex = item.IndexOf("USER-");
                                string start = item.Substring(startIndex).Replace("data-user-id=\\\"", "");
                                int endIndex = start.IndexOf("\"");
                                string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", "");
                                TweetId = end;
                            }
                            if (item.Contains("scroll_cursor"))
                            {
                                int startIndex = item.IndexOf("USER-");
                                string start = item.Substring(startIndex).Replace("data-user-id=\\\"", "");
                                int endIndex = start.IndexOf("\"");
                                string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", "");
                                TweetId = end;
                            }
                        }
                    }
                    catch (Exception)
                    {
                    }
                }

                catch (Exception ex)
                {
                    System.Threading.Thread.Sleep(2000);
                    res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", "");
                    Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " --  res_Get_searchURL --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                    Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- res_Get_searchURL --> " + ex.Message, Globals.Path_TwtErrorLogs);
                }
                // && !res_Get_searchURL.Contains("has_more_items\":false")
                if (!string.IsNullOrEmpty(res_Get_searchURL))
                {
                    //string[] splitRes = Regex.Split(res_Get_searchURL, "data-item-id"); //Regex.Split(res_Get_searchURL, "\"in_reply_to_status_id_str\"");
                    string[] splitRes = Regex.Split(res_Get_searchURL, "data-item-id");

                    splitRes = splitRes.Skip(1).ToArray();


                    foreach (string item in splitRes)
                    {
                        if (item.Contains("data-screen-name=") && !item.Contains("js-actionable-user js-profile-popup-actionable"))
                        {
                            //var avc = Newtonsoft.Json.JsonConvert.DeserializeObject<dynamic>(res_Get_searchURL);
                            //string DataHtml = (string)avc["items_html"];
                        }
                        else
                        {
                            //continue;
                        }
                        string modified_Item = "\"from_user\"" + item;

                        string id = "";
                        try
                        {
                            int startIndex = item.IndexOf("data-user-id=");
                            string start = item.Substring(startIndex).Replace("data-user-id=\\\"", "");
                            int endIndex = start.IndexOf("\\\"");
                            string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", "");
                            id = end;
                        }
                        catch (Exception ex)
                        {
                            id = "null";
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- id -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- id -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs);
                        }

                        string from_user_id = "";
                        try
                        {
                            int startIndex = item.IndexOf("data-screen-name=\\\"");
                            string start = item.Substring(startIndex).Replace("data-screen-name=\\\"", "");
                            int endIndex = start.IndexOf("\\\"");
                            string end = start.Substring(0, endIndex).Replace("from_user_id\":", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("_str", "").Replace("user", "").Replace("}", "").Replace("]", "");
                            from_user_id = end;
                        }
                        catch (Exception ex)
                        {
                            from_user_id = "null";
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwtErrorLogs);
                        }

                        string tweetUserid = string.Empty;
                        try
                        {
                            int startIndex = item.IndexOf("=\\\"");
                            string start = item.Substring(startIndex).Replace("=\\\"", "");
                            int endIndex = start.IndexOf("\\\"");
                            string end = start.Substring(0, endIndex).Replace("from_user_id\":", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("_str", "").Replace("user", "").Replace("}", "").Replace("]", "");
                            tweetUserid = end;
                        }
                        catch (Exception ex)
                        {
                            from_user_id = "null";
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwtErrorLogs);
                        }

                        
                        StructTweetIDs structTweetIDs = new StructTweetIDs();

                        if (id != "null")
                        {
                            structTweetIDs.ID_Tweet = tweetUserid;
                            structTweetIDs.ID_Tweet_User = id;
                            structTweetIDs.username__Tweet_User = from_user_id;
                            structTweetIDs.wholeTweetMessage = text;
                            lst_structTweetIDs.Add(structTweetIDs);
                            Log("[ " + DateTime.Now + " ] => [ " + tweetUserid + " ]");
                            Log("[ " + DateTime.Now + " ] => [ " + id + " ]");
                            Log("[ " + DateTime.Now + " ] => [ " + from_user_id + " ]");
                            Log("-------------------------------------------------------------------------------------------------------------------------------");




                            if (!File.Exists(Globals.Path_KeywordScrapedListData + "-" + keyword + ".csv"))
                            {
                                GlobusFileHelper.AppendStringToTextfileNewLine("USERID , USERNAME , PROFILE NAME , BIO , LOCATION , WEBSITE , NO OF TWEETS , FOLLOWERS , FOLLOWINGS", Globals.Path_KeywordScrapedListData + "-" + keyword + ".csv");
                            }

                            //foreach (TwitterDataScrapper.StructTweetIDs item in data)
                            {
                                

                                ChilkatHttpHelpr objChilkat = new ChilkatHttpHelpr();
                                GlobusHttpHelper HttpHelper = new GlobusHttpHelper();
                                string ProfilePageSource = HttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/" + from_user_id), "", "");

                                string Responce = ProfilePageSource;

                                #region Convert HTML to XML

                                string xHtml = objChilkat.ConvertHtmlToXml(Responce);
                                Chilkat.Xml xml = new Chilkat.Xml();
                                xml.LoadXml(xHtml);

                                Chilkat.Xml xNode = default(Chilkat.Xml);
                                Chilkat.Xml xBeginSearchAfter = default(Chilkat.Xml);
                                #endregion

                                int counterdata = 0;
                                xBeginSearchAfter = null;
                                string dataDescription = string.Empty;
                                xNode = xml.SearchForAttribute(xBeginSearchAfter, "h1", "class", "ProfileHeaderCard-name");
                                while ((xNode != null))
                                {
                                    xBeginSearchAfter = xNode;
                                    if (counterdata == 0)
                                    {
                                        ProfileName = xNode.AccumulateTagContent("text", "script|style");
                                        counterdata++;
                                    }
                                    else if (counterdata == 1)
                                    {
                                        website = xNode.AccumulateTagContent("text", "script|style");
                                        counterdata++;
                                    }
                                    else
                                    {
                                        break;
                                    }
                                   // xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "profile-field");
                                    xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "u-textUserColor");
                                }

                                xBeginSearchAfter = null;
                                dataDescription = string.Empty;
                                xNode = xml.SearchForAttribute(xBeginSearchAfter, "p", "class", "ProfileHeaderCard-bio u-dir");//bio profile-field");
                                while ((xNode != null))
                                {
                                    xBeginSearchAfter = xNode;
                                    Bio = xNode.AccumulateTagContent("text", "script|style").Replace("&#39;", "'").Replace("&#13;&#10;", string.Empty).Trim();
                                    break;
                                }

                                xBeginSearchAfter = null;
                                dataDescription = string.Empty;
                                xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "ProfileHeaderCard-locationText u-dir");//location profile-field");
                                while ((xNode != null))
                                {
                                    xBeginSearchAfter = xNode;
                                    Location = xNode.AccumulateTagContent("text", "script|style");
                                    break;
                                }

                                int counterData = 0;
                                xBeginSearchAfter = null;
                                dataDescription = string.Empty;
                                xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "ProfileNav-stat ProfileNav-stat--link u-borderUserColor u-textCenter js-tooltip js-nav");//location profile-field");
                                while ((xNode != null))
                                {
                                    xBeginSearchAfter = xNode;
                                    if (counterData == 0)
                                    {
                                       // NoOfTweets = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "ProfileNav-value");
                                        NoOfTweets = xNode.AccumulateTagContent("text", "script|style").Replace("Tweets", string.Empty).Replace(",", string.Empty).Replace("Tweet", string.Empty);
                                        counterData++;
                                    }
                                    else if (counterData == 1)
                                    {
                                        Followings = xNode.AccumulateTagContent("text", "script|style").Replace(" Following", string.Empty).Replace(",", string.Empty).Replace("Following", string.Empty);
                                        counterData++;
                                    }
                                    else if (counterData == 2)
                                    {
                                        Followers = xNode.AccumulateTagContent("text", "script|style").Replace("Followers", string.Empty).Replace(",", string.Empty).Replace("Follower", string.Empty);
                                        counterData++;
                                    }
                                    else
                                    {
                                        break;
                                    }
                                    //xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "js-nav");
                                    xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "ProfileNav-stat ProfileNav-stat--link u-borderUserColor u-textCenter js-tooltip js-openSignupDialog js-nonNavigable u-textUserColor");
                                }


                                if (!string.IsNullOrEmpty(from_user_id) && tweetUserid != "null")
                                {
                                    string Id_user = tweetUserid.Replace("}]", string.Empty).Trim();
                                    Globals.lstScrapedUserIDs.Add(Id_user);
                                    GlobusFileHelper.AppendStringToTextfileNewLine(id + "," + from_user_id + "," + ProfileName + "," + Bio.Replace(",", "") + "," + Location.Replace(",", "") + "," + website + "," + NoOfTweets.Replace(",", "").Replace("Tweets", "") + "," + Followers.Replace(",", "").Replace("Following", "") + "," + Followings.Replace(",", "").Replace("Followers", "").Replace("Follower", ""), Globals.Path_KeywordScrapedListData + "-" + keyword + ".csv");

                                    GlobusFileHelper.AppendStringToTextfileNewLine(from_user_id, Globals.Path_KeywordScrapedListData + "-" + keyword + ".txt");
                                    Log("[ " + DateTime.Now + " ] => [ " + from_user_id + "," + Id_user + "," + ProfileName + "," + Bio.Replace(",", "") + "," + Location + "," + website + "," + NoOfTweets + "," + Followers + "," + Followings + " ]");
                                }
                            }
                        }

                        lst_structTweetIDs = lst_structTweetIDs.Distinct().ToList();

                        if (lst_structTweetIDs.Count >= noOfRecords)
                        {
                            return lst_structTweetIDs;
                        }

                    }

                    if (lst_structTweetIDs.Count <= noOfRecords)
                    {
                        maxid = lst_structTweetIDs[lst_structTweetIDs.Count - 1].ID_Tweet;

                        if (res_Get_searchURL.Contains("has_moreitems\":false"))
                        {
                            return lst_structTweetIDs;
                        }
                        else
                        {
                            goto startAgain;
                        }
                    }
                    else
                    {
                        if (res_Get_searchURL.Contains("has_more_items\":false"))
                        {
                            return lst_structTweetIDs;
                        }
                        else
                            goto startAgain;
                    }
                }
            }
            catch (Exception ex)
            {
                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs);
            }

            return lst_structTweetIDs;
        }
Exemplo n.º 12
0
        public List<StructTweetIDs> NewKeywordStructData1(string keyword)
        {
            try
            {
                BaseLib.GlobusRegex regx = new GlobusRegex();
                int counter = 0;
                lst_structTweetIDs = new List<StructTweetIDs>();
                string res_Get_searchURL = string.Empty;
                string searchURL = string.Empty;
                string maxid = string.Empty;
                string TweetId = string.Empty;
                string text = string.Empty;

                if (keyword.Trim().Contains(" "))
                {
                    keyword = keyword.Replace(" ", "+");
                }

            startAgain:

                #region <Old Get request URl>>
                //if (counter == 0)
                //{
                //    //searchURL = "https://twitter.com/i/search/realtime?type=relevance&src=typd&include_available_features=1&include_entities=1&q=" + Uri.EscapeDataString(keyword);
                //    //searchURL = "https://twitter.com/i/search/realtime?type=relevance&src=typd&composed_count=0&count=" + noOfRecords + "&include_available_features=1&include_entities=1&max_id=" + maxid + "&q=" + keyword;
                //    //searchURL = "https://twitter.com/i/search/timeline?type=recent&src=typd&include_available_features=1&include_entities=1&max_id=0&q=" + keyword + "&composed_count=0&count=" + noOfRecords + "";
                //    //counter++;

                //    //searchURL = "https://twitter.com/search?q=ranbir&mode=relevance&src=typd";

                //    //searchURL = "https://twitter.com/search?q=" + keyword + "&mode=relevance&src=typd";
                //    searchURL = "https://twitter.com/i/search/timeline?q=" + keyword + "&src=typd&mode=relevance&composed_count=0&include_available_features=1&include_entities=1&include_new_items_bar=true&interval=30000";

                //    counter++;

                //}

                //else
                //{
                //    if (res_Get_searchURL.Contains("has_more_items\":false"))
                //    {
                //        return lst_structTweetIDs;
                //    }
                //    //searchURL = "https://twitter.com/i/search/timeline?type=relevance&src=typd&include_available_features=1&include_entities=1&max_id=" + maxid + "&q=" + keyword;
                //    //searchURL = "https://twitter.com/i/search/timeline?q=ranbir&src=typd&mode=relevance&composed_count=0&include_available_features=1&include_entities=1&include_new_items_bar=true&interval=30000&latent_count=25&refresh_cursor=TWEET-372646612473876480-374800735134687234";

                //    searchURL = "https://twitter.com/i/search/timeline?q=" + keyword + "&src=typd&mode=relevance&composed_count=0&include_available_features=1&include_entities=1&include_new_items_bar=true&interval=30000&latent_count=25&refresh_cursor=" + TweetId;

                //}
                #endregion

                if (!RetweetFromUserName)
                {
                    if (counter == 0)
                    {
                        //searchURL = "https://twitter.com/i/search/timeline?type=recent&src=typd&include_available_features=1&include_entities=1&max_id=0&q=" + keyword + "&composed_count=0&count=" + noOfRecords + "";
                        searchURL = "https://twitter.com/i/search/timeline?q=" + Uri.EscapeDataString(keyword) + "&src=typd&f=realtime";

                        
                        counter++;
                    }
                    else
                    {

                        searchURL = "https://twitter.com/i/search/timeline?q=" + Uri.EscapeDataString(keyword) + "&src=typd&f=realtime&include_available_features=1&include_entities=1&last_note_ts=0&oldest_unread_id=0&scroll_cursor=" + TweetId + "";

                        //29-4-2014 only for client it is changed
                        //searchURL = "https://twitter.com/i/search/timeline?q=" + keyword + "&src=typd&f=realtime&mode=users&include_available_features=1&include_entities=1&last_note_ts=0&oldest_unread_id=0&scroll_cursor=" + TweetId + "";
                    }
                }
                else 
                {
                        searchURL = "https://twitter.com/i/profiles/show/" + Uri.EscapeDataString(keyword) + "/timeline/with_replies?composed_count=0&count=" + RetweetExtractcount + "&include_available_features=1&include_entities=1";                   
                }

                try
                {
                    res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", "");

                    if (string.IsNullOrEmpty(res_Get_searchURL))
                    {
                        res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", "");
                    }

                    try
                    {
                        //string sjss = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", "");
                        string[] splitRes = Regex.Split(res_Get_searchURL, "refresh_cursor");
                        //splitRes = splitRes.Skip(1).ToArray();
                        foreach (string item in splitRes)
                        {
                            if (item.Contains("refresh_cursor"))
                            {
                                int startIndex = item.IndexOf("TWEET-");
                                string start = item.Substring(startIndex).Replace("data-user-id=\\\"", "");
                                int endIndex = start.IndexOf("\"");
                                string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", "");
                                TweetId = end;
                                

                                ////only for client 29/4
                                ////int startIndex = item.IndexOf("TWEET-");

                                //int startIndex = item.IndexOf("USER-");
                                //string start = item.Substring(startIndex).Replace("data-user-id=\\\"", "");
                                //int endIndex = start.IndexOf("\"");
                                //string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", "");
                                //TweetId = end;
                            }
                            if (item.Contains("scroll_cursor"))
                            {
                                int startIndex = item.IndexOf("TWEET-");
                                string start = item.Substring(startIndex).Replace("data-user-id=\\\"", "");
                                int endIndex = start.IndexOf("\"");
                                string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", "");
                                TweetId = end;
                            }
                        }
                    }
                    catch (Exception)
                    {
                    }
                }

                catch (Exception ex)
                {
                    System.Threading.Thread.Sleep(2000);
                    res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", "");
                    Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " --  res_Get_searchURL --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                    Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- res_Get_searchURL --> " + ex.Message, Globals.Path_TwtErrorLogs);
                }
                // && !res_Get_searchURL.Contains("has_more_items\":false")
                if (!string.IsNullOrEmpty(res_Get_searchURL))
                {
                    //string[] splitRes = Regex.Split(res_Get_searchURL, "data-item-id"); //Regex.Split(res_Get_searchURL, "\"in_reply_to_status_id_str\"");
                    string[] splitRes = Regex.Split(res_Get_searchURL, "data-item-id");

                    splitRes = splitRes.Skip(1).ToArray();


                    foreach (string item in splitRes)
                    {
                        if (item.Contains("data-screen-name=") && !item.Contains("js-actionable-user js-profile-popup-actionable"))
                        {
                            //var avc = Newtonsoft.Json.JsonConvert.DeserializeObject<dynamic>(res_Get_searchURL);
                            //string DataHtml = (string)avc["items_html"];
                        }
                        else
                        {
                            continue;
                        }
                        string modified_Item = "\"from_user\"" + item;

                        string id = "";
                        try
                        {
                            int startIndex = item.IndexOf("data-user-id=");
                            string start = item.Substring(startIndex).Replace("data-user-id=\\\"", "");
                            int endIndex = start.IndexOf("\\\"");
                            string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", "");
                            id = end;
                        }
                        catch (Exception ex)
                        {
                            id = "null";
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- id -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- id -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs);
                        }

                        string from_user_id = "";
                        try
                        {
                            int startIndex = item.IndexOf("data-screen-name=\\\"");
                            string start = item.Substring(startIndex).Replace("data-screen-name=\\\"", "");
                            int endIndex = start.IndexOf("\\\"");
                            string end = start.Substring(0, endIndex).Replace("from_user_id\":", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("_str", "").Replace("user", "").Replace("}", "").Replace("]", "");
                            from_user_id = end;
                        }
                        catch (Exception ex)
                        {
                            from_user_id = "null";
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwtErrorLogs);
                        }

                        string tweetUserid = string.Empty;
                        try
                        {
                            int startIndex = item.IndexOf("=\\\"");
                            string start = item.Substring(startIndex).Replace("=\\\"", "");
                            int endIndex = start.IndexOf("\\\"");
                            string end = start.Substring(0, endIndex).Replace("from_user_id\":", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("_str", "").Replace("user", "").Replace("}", "").Replace("]", "");
                            tweetUserid = end;
                        }
                        catch (Exception ex)
                        {
                            from_user_id = "null";
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwtErrorLogs);
                        }

                        ///Tweet Text 
                        try
                        {

                            #region previous Code of Find Text
                            //int startindex = item.IndexOf("js-tweet-text tweet-text\""); //TweetTextSize  js-tweet-text tweet-text
                            //if (startindex == -1)
                            //{
                            //    startindex = 0;
                            //    startindex = item.IndexOf("js-tweet-text tweet-text");
                            //}

                            //string start = item.Substring(startindex).Replace("js-tweet-text tweet-text\"", "").Replace("js-tweet-text tweet-text tweet-text-rtl\"", "");
                            //int endindex = start.IndexOf("</p>");

                            //if (endindex == -1)
                            //{
                            //    endindex = 0;
                            //    endindex = start.IndexOf("stream-item-footer");
                            //}

                            //string end = start.Substring(0, endindex);
                            //end = regx.StripTagsRegex(end);
                            //text = end.Replace("&nbsp;", "").Replace("a href=", "").Replace("/a", "").Replace("<span", "").Replace("</span", "").Replace("class=\\\"js-display-url\\\"", "").Replace("class=\\\"tco-ellipsis\\\"", "").Replace("class=\\\"invisible\\\"", "").Replace("<strong>", "").Replace("target=\\\"_blank\\\"", "").Replace("class=\\\"twitter-timeline-link\\\"", "").Replace("</strong>", "").Replace("rel=\\\"nofollow\\\" dir=\\\"ltr\\\" data-expanded-url=", "");
                            //text = text.Replace("&quot;", "").Replace("<", "").Replace(">", "").Replace("\"", "").Replace("\\", "").Replace("title=", "");

                            //string[] array = Regex.Split(text, "http");
                            //text = string.Empty;
                            //foreach (string itemData in array)
                            //{
                            //    if (!itemData.Contains("t.co"))
                            //    {
                            //        string data = string.Empty;
                            //        if (itemData.Contains("//"))
                            //        {
                            //            data = ("http" + itemData).Replace(" span ", string.Empty);
                            //            if (!text.Contains(itemData.Replace(" ", "")))// && !data.Contains("class") && !text.Contains(data))
                            //            {
                            //                text += data.Replace("u003c", string.Empty).Replace("u003e", string.Empty);
                            //            }
                            //        }
                            //        else
                            //        {
                            //            if (!text.Contains(itemData.Replace(" ", "")))
                            //            {
                            //                text += itemData.Replace("u003c", string.Empty).Replace("u003e", string.Empty).Replace("js-tweet-text tweet-text", "");
                            //            }
                            //        }
                            //    }
                            //}
                            #endregion

                            string[] getTweetText = Regex.Split(item, "TweetTextSize  js-tweet-text tweet-text");
                            try
                            {
                                text = Utils.getBetween(getTweetText[1], "data-aria-label-part=", "href=");
                                text = text.Replace("\\\"0\\\"\\u003e", "").Replace("\\u003ca", "");
                            }
                            catch { };

                        }
                        catch { };




                        StructTweetIDs structTweetIDs = new StructTweetIDs();

                        if(text.Contains(""))
                        if (id != "null")
                        {
                            structTweetIDs.ID_Tweet = tweetUserid;
                            structTweetIDs.ID_Tweet_User = id;
                            structTweetIDs.username__Tweet_User = from_user_id;
                            structTweetIDs.wholeTweetMessage = text;
                            lst_structTweetIDs.Add(structTweetIDs);
                            Log("[ " + DateTime.Now + " ] => [ " + tweetUserid + " ]");
                            Log("[ " + DateTime.Now + " ] => [ " + id + " ]");
                            Log("[ " + DateTime.Now + " ] => [ " + from_user_id + " ]");
                            Log("-------------------------------------------------------------------------------------------------------------------------------");
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(id + ":" + from_user_id, Globals.Path_keywordFollowerScrapedData);
                        }
                       
                        lst_structTweetIDs = lst_structTweetIDs.Distinct().ToList();
                       
                        if (lst_structTweetIDs.Count >= noOfRecords)
                        {
                            return lst_structTweetIDs;
                        }
                    
                    }

                    if (lst_structTweetIDs.Count <= noOfRecords)
                    {
                        maxid = lst_structTweetIDs[lst_structTweetIDs.Count - 1].ID_Tweet;

                        if (res_Get_searchURL.Contains("has_moreitems\":false"))
                        {
                            return lst_structTweetIDs;
                        }
                        else
                        {
                            goto startAgain;
                        }
                    }
                    else
                    {
                        if (res_Get_searchURL.Contains("has_more_items\":false"))
                        {
                            return lst_structTweetIDs;
                        }
                        else
                            goto startAgain;
                    }
                }
            }
            catch (Exception ex)
            {
                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs);
            }

            return lst_structTweetIDs;
        }
        public List<StructTweetIDs> KeywordStructData(string keyword)
        {
            try
            {
                int counter = 0;
                lst_structTweetIDs = new List<StructTweetIDs>();
                string res_Get_searchURL = string.Empty;

                string searchURL = "https://twitter.com/phoenix_search.phoenix?q=" + keyword + "&count=" + noOfRecords + "&include_entities=1&include_available_features=1&contributor_details=true&page=null&mode=relevance&query_source=typed_query";


                try
                {
                    res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", "");
                }
                catch (Exception ex)
                {
                    System.Threading.Thread.Sleep(2000);
                    res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", "");
                    Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " --  res_Get_searchURL --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                    Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- res_Get_searchURL --> " + ex.Message, Globals.Path_TwtErrorLogs);
                }

                if (!string.IsNullOrEmpty(res_Get_searchURL))
                {

                    string[] splitRes = Regex.Split(res_Get_searchURL, "\"in_reply_to_status_id_str\"");//Regex.Split(res_Get_searchURL, "{\"created_at\"");

                    splitRes = splitRes.Skip(1).ToArray();

                    foreach (string item in splitRes)
                    {
                        if (noOfRecords > counter)
                        {
                            counter++;
                        }
                        else
                        {
                            break;
                        }
                        string modified_Item = "\"from_user\"" + item;

                        string id = "";
                        try
                        {
                            int startIndex = item.IndexOf("\"id_str\"");
                            string start = item.Substring(startIndex);
                            int endIndex = start.IndexOf("\",");
                            string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", "");
                            id = end;
                        }
                        catch (Exception ex)
                        {
                            id = "null";
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- id -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- id -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs);
                        }

                        string from_user_id = "";
                        try
                        {
                            int startIndex = item.IndexOf("\"id\"");
                            string start = item.Substring(startIndex);
                            int endIndex = start.IndexOf(",\"");
                            string end = start.Substring(0, endIndex).Replace("id", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("_str", "").Replace("user", "").Replace("}", "").Replace("]", "");
                            from_user_id = end;
                        }
                        catch (Exception ex)
                        {
                            from_user_id = "null";
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwtErrorLogs);
                        }

                        string from_user = "";
                        try
                        {
                            int startindex = item.IndexOf("\"screen_name\"");
                            string start = item.Substring(startindex);
                            int endIndex = start.IndexOf(",\"");
                            string end = start.Substring(0, endIndex).Replace("screen_name", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", "");
                            from_user = end;
                        }
                        catch (Exception ex)
                        {
                            from_user_id = "null";
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user --> " + ex.Message, Globals.Path_TwtErrorLogs);
                        }

                        string text = string.Empty;

                        try
                        {
                            int startindex = item.IndexOf("\"text\":");
                            string start = item.Substring(startindex).Replace("\"text\":", "");
                            int endIndex = start.IndexOf(",\"");
                            string end = start.Substring(0, endIndex).Replace("screen_name", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "");
                            text = end;
                        }
                        catch (Exception ex)
                        {
                            from_user_id = "null";
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user --> " + ex.Message, Globals.Path_TwtErrorLogs);
                        }

                        StructTweetIDs structTweetIDs = new StructTweetIDs();

                        if (id != "null")
                        {
                            structTweetIDs.ID_Tweet = id;
                            structTweetIDs.ID_Tweet_User = from_user_id;
                            structTweetIDs.username__Tweet_User = from_user;
                            structTweetIDs.wholeTweetMessage = text;

                            lst_structTweetIDs.Add(structTweetIDs);
                        }

                    }
                }
            }
            catch (Exception ex)
            {
                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs);
            }

            return lst_structTweetIDs;
        }
Exemplo n.º 14
0
        public List <StructTweetIDs> GetTweetData_ByUserName(string keyword)
        {
            lst_structTweetIDs = new List <StructTweetIDs>();
            string user_id = string.Empty;

            try
            {
                string searchURL = string.Empty;

                if (NumberHelper.ValidateNumber(keyword))
                {
                    searchURL = "https://api.twitter.com/1/statuses/user_timeline.json?include_entities=true&include_rts=true&user_id =" + keyword + "&count=" + TweetExtractCount;
                }
                else
                {
                    searchURL = "https://api.twitter.com/1/statuses/user_timeline.json?include_entities=true&include_rts=true&screen_name=" + keyword + "&count=" + TweetExtractCount;
                }
                string res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", "");


                string[] splitRes = Regex.Split(res_Get_searchURL, "{\"created_at");//Regex.Split(res_Get_searchURL, "{\"created_at\"");
                splitRes = splitRes.Skip(1).ToArray();

                foreach (string item in splitRes)
                {
                    string modified_Item         = "\"from_user\"" + item;
                    string text                  = string.Empty;
                    string TweeterUserId         = string.Empty;
                    string TweeterUserScreanName = string.Empty;
                    string Tweetid               = Globussoft.GlobusHttpHelper.ParseEncodedJson(modified_Item, "id");

                    try
                    {
                        int    startIndex = item.IndexOf("\"text\":");
                        string start      = item.Substring(startIndex).Replace("\"text\":", "");
                        int    endIndex   = start.IndexOf(",\"");
                        string end        = start.Substring(0, endIndex);
                        text = end.Replace("\"", string.Empty).Trim();
                    }
                    catch (Exception ex)
                    {
                        Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_ByUserName() --> text -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                        Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_ByUserName() --> text -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs);
                    }

                    //get tweet user ID
                    try
                    {
                        string item1      = Regex.Split(res_Get_searchURL, "user\":")[1];
                        int    startIndex = item1.IndexOf("{\"id\":");
                        string start      = item1.Substring(startIndex);
                        int    endIndex   = start.IndexOf(",\"id_str");
                        string end        = start.Substring(0, endIndex);
                        TweeterUserId = end.Replace("{\"id\":", string.Empty).Trim();
                    }
                    catch (Exception ex)
                    {
                        Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_ByUserName() -->  TweeterUserId -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                        Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_ByUserName() -->  TweeterUserId -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs);
                    }

                    try
                    {
                        //get tweet user screan Name
                        int    startIndex = item.IndexOf("screen_name");
                        string start      = item.Substring(startIndex);
                        int    endIndex   = start.IndexOf(",\"");
                        string end        = start.Substring(0, endIndex);
                        TweeterUserScreanName = end.Replace("screen_name\":\"", string.Empty).Replace("\"", string.Empty).Trim();
                    }
                    catch (Exception ex)
                    {
                        Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_ByUserName() --> TweeterUserScreanName -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                        Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_ByUserName() --> TweeterUserScreanName -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs);
                    }

                    StructTweetIDs structTweetIDs = new StructTweetIDs();

                    structTweetIDs.ID_Tweet             = Tweetid;
                    structTweetIDs.ID_Tweet_User        = TweeterUserId;
                    structTweetIDs.username__Tweet_User = TweeterUserScreanName;
                    structTweetIDs.wholeTweetMessage    = text;

                    lst_structTweetIDs.Add(structTweetIDs);
                }
            }
            catch (Exception ex)
            {
                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_ByUserName() -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_ByUserName() -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs);
            }

            return(lst_structTweetIDs);
        }
Exemplo n.º 15
0
        public List <StructTweetIDs> KeywordStructData(string keyword)
        {
            try
            {
                int counter = 0;
                lst_structTweetIDs = new List <StructTweetIDs>();
                string res_Get_searchURL = string.Empty;

                string searchURL = "https://twitter.com/phoenix_search.phoenix?q=" + keyword + "&count=" + noOfRecords + "&include_entities=1&include_available_features=1&contributor_details=true&page=null&mode=relevance&query_source=typed_query";


                try
                {
                    res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", "");
                }
                catch (Exception ex)
                {
                    System.Threading.Thread.Sleep(2000);
                    res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", "");
                    Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " --  res_Get_searchURL --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                    Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- res_Get_searchURL --> " + ex.Message, Globals.Path_TwtErrorLogs);
                }

                if (!string.IsNullOrEmpty(res_Get_searchURL))
                {
                    string[] splitRes = Regex.Split(res_Get_searchURL, "\"in_reply_to_status_id_str\"");//Regex.Split(res_Get_searchURL, "{\"created_at\"");

                    splitRes = splitRes.Skip(1).ToArray();

                    foreach (string item in splitRes)
                    {
                        if (noOfRecords > counter)
                        {
                            counter++;
                        }
                        else
                        {
                            break;
                        }
                        string modified_Item = "\"from_user\"" + item;

                        string id = "";
                        try
                        {
                            int    startIndex = item.IndexOf("\"id_str\"");
                            string start      = item.Substring(startIndex);
                            int    endIndex   = start.IndexOf("\",");
                            string end        = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", "");
                            id = end;
                        }
                        catch (Exception ex)
                        {
                            id = "null";
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- id -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- id -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs);
                        }

                        string from_user_id = "";
                        try
                        {
                            int    startIndex = item.IndexOf("\"id\"");
                            string start      = item.Substring(startIndex);
                            int    endIndex   = start.IndexOf(",\"");
                            string end        = start.Substring(0, endIndex).Replace("id", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("_str", "").Replace("user", "").Replace("}", "").Replace("]", "");
                            from_user_id = end;
                        }
                        catch (Exception ex)
                        {
                            from_user_id = "null";
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwtErrorLogs);
                        }

                        string from_user = "";
                        try
                        {
                            int    startindex = item.IndexOf("\"screen_name\"");
                            string start      = item.Substring(startindex);
                            int    endIndex   = start.IndexOf(",\"");
                            string end        = start.Substring(0, endIndex).Replace("screen_name", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", "");
                            from_user = end;
                        }
                        catch (Exception ex)
                        {
                            from_user_id = "null";
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user --> " + ex.Message, Globals.Path_TwtErrorLogs);
                        }

                        string text = string.Empty;

                        try
                        {
                            int    startindex = item.IndexOf("\"text\":");
                            string start      = item.Substring(startindex).Replace("\"text\":", "");
                            int    endIndex   = start.IndexOf(",\"");
                            string end        = start.Substring(0, endIndex).Replace("screen_name", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "");
                            text = end;
                        }
                        catch (Exception ex)
                        {
                            from_user_id = "null";
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                            Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user --> " + ex.Message, Globals.Path_TwtErrorLogs);
                        }

                        StructTweetIDs structTweetIDs = new StructTweetIDs();

                        if (id != "null")
                        {
                            structTweetIDs.ID_Tweet             = id;
                            structTweetIDs.ID_Tweet_User        = from_user_id;
                            structTweetIDs.username__Tweet_User = from_user;
                            structTweetIDs.wholeTweetMessage    = text;

                            lst_structTweetIDs.Add(structTweetIDs);
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs);
            }

            return(lst_structTweetIDs);
        }