public List <StructTweetIDs> GetTweetData(string keyword) { lst_structTweetIDs = new List <StructTweetIDs>(); try { string searchURL = "http://search.twitter.com/search.json?q=" + keyword + "&rpp=100&include_entities=true&result_type=recent"; //string searchURL = "http://search.twitter.com/search.json?q=" + keyword + "&include_entities=true&result_type=recent"; string res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", ""); string[] splitRes = Regex.Split(res_Get_searchURL, "\"from_user\"");//Regex.Split(res_Get_searchURL, "{\"created_at\""); splitRes = splitRes.Skip(1).ToArray(); foreach (string item in splitRes) { string modified_Item = "\"from_user\"" + item; string from_user = Globussoft.GlobusHttpHelper.ParseJson(modified_Item, "from_user"); string from_user_id = Globussoft.GlobusHttpHelper.ParseEncodedJson(modified_Item, "from_user_id"); string from_user_name = Globussoft.GlobusHttpHelper.ParseJson(modified_Item, "from_user_name"); string id = Globussoft.GlobusHttpHelper.ParseEncodedJson(modified_Item, "id"); string text = Globussoft.GlobusHttpHelper.ParseJson(modified_Item, "text"); StructTweetIDs structTweetIDs = new StructTweetIDs(); structTweetIDs.ID_Tweet = id; structTweetIDs.ID_Tweet_User = from_user_id; structTweetIDs.username__Tweet_User = from_user; structTweetIDs.wholeTweetMessage = text; lst_structTweetIDs.Add(structTweetIDs); } } catch (Exception ex) { Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData() -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData() -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs); } return(lst_structTweetIDs); }
public List<StructTweetIDs> GetTweetData_ByUserName(string keyword) { lst_structTweetIDs = new List<StructTweetIDs>(); string user_id = string.Empty; try { string searchURL = string.Empty; if (NumberHelper.ValidateNumber(keyword)) { searchURL = "https://api.twitter.com/1/statuses/user_timeline.json?include_entities=true&include_rts=true&user_id =" + keyword + "&count=" + TweetExtractCount; } else { searchURL = "https://api.twitter.com/1/statuses/user_timeline.json?include_entities=true&include_rts=true&screen_name=" + keyword + "&count=" + TweetExtractCount; } string res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", ""); string[] splitRes = Regex.Split(res_Get_searchURL, "{\"created_at");//Regex.Split(res_Get_searchURL, "{\"created_at\""); splitRes = splitRes.Skip(1).ToArray(); foreach (string item in splitRes) { string modified_Item = "\"from_user\"" + item; string text = string.Empty; string TweeterUserId = string.Empty; string TweeterUserScreanName = string.Empty; string Tweetid = Globussoft.GlobusHttpHelper.ParseEncodedJson(modified_Item, "id"); try { int startIndex = item.IndexOf("\"text\":"); string start = item.Substring(startIndex).Replace("\"text\":", ""); int endIndex = start.IndexOf(",\""); string end = start.Substring(0, endIndex); text = end.Replace("\"", string.Empty).Replace("'", "'").Trim(); } catch (Exception ex) { //Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_ByUserName() --> text -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper); //Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_ByUserName() --> text -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs); } //get tweet user ID try { string item1 = Regex.Split(res_Get_searchURL, "user\":")[1]; int startIndex = item1.IndexOf("{\"id\":"); string start = item1.Substring(startIndex); int endIndex = start.IndexOf(",\"id_str"); string end = start.Substring(0, endIndex); TweeterUserId = end.Replace("{\"id\":", string.Empty).Trim(); } catch (Exception ex) { //Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_ByUserName() --> TweeterUserId -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper); //Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_ByUserName() --> TweeterUserId -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs); } try { //get tweet user screan Name int startIndex = item.IndexOf("screen_name"); string start = item.Substring(startIndex); int endIndex = start.IndexOf(",\""); string end = start.Substring(0, endIndex); TweeterUserScreanName = end.Replace("screen_name\":\"", string.Empty).Replace("\"", string.Empty).Replace("'", "'").Trim(); } catch (Exception ex) { //Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_ByUserName() --> TweeterUserScreanName -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper); //Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_ByUserName() --> TweeterUserScreanName -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs); } StructTweetIDs structTweetIDs = new StructTweetIDs(); structTweetIDs.ID_Tweet = Tweetid; structTweetIDs.ID_Tweet_User = TweeterUserId; structTweetIDs.username__Tweet_User = TweeterUserScreanName; structTweetIDs.wholeTweetMessage = text; lst_structTweetIDs.Add(structTweetIDs); } } catch (Exception ex) { //Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_ByUserName() -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper); //Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_ByUserName() -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs); } return lst_structTweetIDs; }
public List<StructTweetIDs> GetTweetData(string keyword) { lst_structTweetIDs = new List<StructTweetIDs>(); try { string searchURL = string.Empty; if (noOfRecords > 0 && noOfRecords != 20) { searchURL = "http://search.twitter.com/search.json?q=" + keyword + "&rpp=" + noOfRecords + "&include_entities=true&result_type=recent"; } else { searchURL = "http://search.twitter.com/search.json?q=" + keyword + "&rpp=100&include_entities=true&result_type=recent"; } //string searchURL = "http://search.twitter.com/search.json?q=" + keyword + "&include_entities=true&result_type=recent"; string res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", ""); string[] splitRes = Regex.Split(res_Get_searchURL, "\"from_user\"");//Regex.Split(res_Get_searchURL, "{\"created_at\""); splitRes = splitRes.Skip(1).ToArray(); foreach (string item in splitRes) { string modified_Item = "\"from_user\"" + item; string from_user = Globussoft.GlobusHttpHelper.ParseJson(modified_Item, "from_user"); string from_user_id = Globussoft.GlobusHttpHelper.ParseEncodedJson(modified_Item, "from_user_id"); string from_user_name = Globussoft.GlobusHttpHelper.ParseJson(modified_Item, "from_user_name"); string id = Globussoft.GlobusHttpHelper.ParseEncodedJson(modified_Item, "id"); string text = Globussoft.GlobusHttpHelper.ParseJson(modified_Item, "text"); StructTweetIDs structTweetIDs = new StructTweetIDs(); structTweetIDs.ID_Tweet = id; structTweetIDs.ID_Tweet_User = from_user_id; structTweetIDs.username__Tweet_User = from_user; structTweetIDs.wholeTweetMessage = text; lst_structTweetIDs.Add(structTweetIDs); } } catch (Exception ex) { Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData() -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData() -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs); } return lst_structTweetIDs; }
public List<StructTweetIDs> TweetExtractor_ByUserName_New(string keyword) { lst_structTweetIDs = new List<StructTweetIDs>(); string user_name = string.Empty; int i = 0; try { string HomePagedata = string.Empty; //startAgain: //if (i == 0) { //HomePagedata = globushttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/i/profiles/show/" + keyword + "/timeline?include_available_features=1&include_entities=1"), "", ""); //&composed_count=0&count=" + noOfRecords + "" //HomePagedata = globushttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/i/profiles/show/" + keyword + "/timeline?include_available_features=1&include_entities=1&composed_count=0&count=" + noOfRecords + ""), "", ""); HomePagedata = globushttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/i/profiles/show/" + Uri.EscapeDataString(keyword) + "/timeline/with_replies?composed_count=0&count=" + noOfRecords + "&include_available_features=1&include_entities=1"),"",""); } //else //{ // if (HomePagedata.Contains("\"has_more_items\":true")) // { // HomePagedata = globushttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/i/profiles/show/" + keyword + "/timeline?include_available_features=1&include_entities=1&max_id=" + lst_structTweetIDs[lst_structTweetIDs.Count - 1].ID_Tweet), "", ""); // } // else // { // return lst_structTweetIDs; // } //} if (!string.IsNullOrEmpty(HomePagedata)) { JObject Abc = JObject.Parse(HomePagedata); string datahkj = string.Empty; foreach (object data in Abc) { datahkj = data.ToString(); } string[] splitRes = Regex.Split(datahkj, "ProfileTweet u-textBreak js-tweet js-stream-tweet js-actionable-tweet");//Regex.Split(res_Get_searchURL, "{\"created_at\""); splitRes = splitRes.Skip(1).ToArray(); foreach (string item in splitRes) { string modified_Item = string.Empty; string text = string.Empty; string TweeterUserId = string.Empty; string TweeterUserScreanName = string.Empty; string Tweetid = string.Empty; ///Tweet ID try { int startindex = item.IndexOf("data-item-id=\\\""); string start = item.Substring(startindex).Replace("data-item-id=\\\"", ""); int endindex = start.IndexOf("\\\""); string end = start.Substring(0, endindex); Tweetid = end; } catch (Exception ex) { } ///Tweet User Screen name try { int startindex = item.IndexOf("data-screen-name=\\\""); string start = item.Substring(startindex).Replace("data-screen-name=\\\"", ""); int endindex = start.IndexOf("\\\""); string end = start.Substring(0, endindex); TweeterUserScreanName = end; } catch (Exception ex) { } ///Tweet User User-id try { int startindex = item.IndexOf("data-user-id=\\\""); string start = item.Substring(startindex).Replace("data-user-id=\\\"", ""); int endindex = start.IndexOf("\\\""); string end = start.Substring(0, endindex); TweeterUserId = end; } catch (Exception ex) { } ///Tweet Text try { #region code commented by PUJA //int startindex = item.IndexOf("ProfileTweet-text js-tweet-text u-dir"); //string start = item.Substring(startindex).Replace("ProfileTweet-text js-tweet-text u-dir", ""); //int endindex = start.IndexOf("</p>"); //string end = start.Substring(0, endindex); //text = end.Replace("class=\\\"invisible\\\"", "").Replace("<b", "").Replace("</b", "").Replace("<s", "").Replace("</s", "").Replace("class=\\\"twitter-atreply pretty-link\\\" dir=\\\"ltr\\\"", "").Replace(">", "").Replace("class=\\\"js-display-url\\\"", "").Replace("class=\\\"invisible\\\">", "").Replace("class=\\\"tco-ellipsis\\\"", "").Replace("class=\\\"invisible\\\"", "").Replace(" ", "").Replace("</a", "").Replace("</span", "").Replace("<span", "").Replace("<a href=", "").Replace("rel=nofollow dir=ltr data-expanded-url=", "").Replace("rel=\\\"nofollow\\\" dir=\\\"ltr\\\" data-expanded-url=\\\"", "").Replace("class=\\\"twitter-timeline-link\\\" target=\\\"_blank\\\" title=\\\"", "").Replace("class=\\\"twitter-timeline-link u-isHiddenVisually\\\" data-pre-embedded=\\\"true\\\" dir=\\\"ltr\\\"","").Trim(); //text = text.Replace("<", "").Replace("\\\"", "").Replace("\\n","").Replace("\"","").Trim(); //text = text.Replace(" ", "").Replace("a href=", "").Replace("/a", "").Replace("<span", "").Replace("</span", "").Replace("class=\\\"js-display-url\\\"", "").Replace("class=\\\"tco-ellipsis\\\"", "").Replace("class=\\\"invisible\\\"", "").Replace("<strong>", "").Replace("target=\\\"_blank\\\"", "").Replace("class=\\\"twitter-timeline-link\\\"", "").Replace("</strong>", "").Replace("rel=\\\"nofollow\\\" dir=\\\"ltr\\\" data-expanded-url=", ""); //text = text.Replace(""", "").Replace("<", "").Replace(">", "").Replace("\"", "").Replace("\\", "").Replace("title=", "").Replace("&", "&").Replace("'", "'").Replace("<", "<").Replace(">", ">").Replace("\n", string.Empty).Replace("..", string.Empty).Replace("\n \"", string.Empty).Replace("\\n", string.Empty).Replace("\\", string.Empty).Replace("js-tweet-text tweet-text", string.Empty).Replace("#", string.Empty).Replace("dir=ltr", "").Trim(); //text = text.Replace("\"", "").Replace("<", "").Replace("\\\"", "").Replace("\\", ""); //string[] array = Regex.Split(text, "http"); //text = string.Empty; //foreach (string itemData in array) //{ // if (!itemData.Contains("t.co")) // { // string data = string.Empty; // if (itemData.Contains("//")) // { // data = "http" + itemData; // if (!text.Contains(itemData.Replace(" ", ""))) // { // text += data; // } // } // else // { // if (!text.Contains(itemData.Replace(" ", ""))) // { // text += itemData; // } // } // } // if (text.Contains("data-aria-label-part=0")) // { // text = globushttpHelper.getBetween(text + ":&$#@", "data-aria-label-part=0", ":&$#@"); // } //} #endregion GlobusRegex regx = new GlobusRegex(); // foreach (string item1 in splitRes) string tweetUserid = string.Empty; int startindex = item.IndexOf("ProfileTweet-text js-tweet-text u-dir"); if (startindex == -1) { startindex = item.IndexOf("js-tweet-text tweet-text"); } string start = item.Substring(startindex).Replace("ProfileTweet-text js-tweet-text u-dir", "").Replace("js-tweet-text tweet-text tweet-text-rtl\\\"", ""); int endindex = start.IndexOf("</p>"); if (endindex == -1) { endindex = 0; endindex = start.IndexOf("stream-item-footer"); } string end = start.Substring(0, endindex); end = regx.StripTagsRegex(end); text = end.Replace(" ", "").Replace("a href=", "").Replace("/a", "").Replace("<span", "").Replace("</span", "").Replace("class=\\\"js-display-url\\\"", "").Replace("class=\\\"tco-ellipsis\\\"", "").Replace("class=\\\"invisible\\\"", "").Replace("<strong>", "").Replace("target=\\\"_blank\\\"", "").Replace("class=\\\"twitter-timeline-link\\\"", "").Replace("</strong>", "").Replace("rel=\\\"nofollow\\\" dir=\\\"ltr\\\" data-expanded-url=", "").Replace("dir=\"ltr\"", ""); text = text.Replace(""", "").Replace("<", "").Replace(">", "").Replace("\"", "").Replace("\\", "").Replace("title=", "").Replace("&", "&").Replace("'", "'").Replace("<", "<").Replace(">", ">"); string[] array = Regex.Split(text, "http"); text = string.Empty; foreach (string itemData in array) { if (!itemData.Contains("t.co")) { string data = string.Empty; if (itemData.Contains("//")) { data = ("http" + itemData).Replace(" span ", string.Empty); if (!text.Contains(itemData.Replace(" ", "")))// && !data.Contains("class") && !text.Contains(data)) { text += data.Replace("u003c", string.Empty).Replace("u003e", string.Empty); } } else { if (!text.Contains(itemData.Replace(" ", ""))) { text += itemData.Replace("u003c", string.Empty).Replace("u003e", string.Empty).Replace("\n", string.Empty).Replace(" ", string.Empty).Replace(" lang=endata-aria-label-part=0", string.Empty); } } } if (text.Contains("data-aria-label-part=0")) { text = globushttpHelper.getBetween(text + ":&$#@", "data-aria-label-part=0", ":&$#@"); } } } catch (Exception ex) { } StructTweetIDs structTweetIDs = new StructTweetIDs(); structTweetIDs.ID_Tweet = Tweetid; structTweetIDs.ID_Tweet_User = TweeterUserId; structTweetIDs.username__Tweet_User = TweeterUserScreanName; structTweetIDs.wholeTweetMessage = text; //lst_structTweetIDs.Add(structTweetIDs); if (lst_structTweetIDs.Count < noOfRecords) { lst_structTweetIDs.Add(structTweetIDs); } else { break; } } } //i++; //goto startAgain; return lst_structTweetIDs; } catch (Exception ex) { return lst_structTweetIDs; } }
public List<StructTweetIDs> GetTweetData_New_ForCampaign(string keyword,int noOfReplies) { try { int counter = 0; int counterNoOfReplies = noOfReplies; lst_structTweetIDs = new List<StructTweetIDs>(); string res_Get_searchURL = string.Empty; string searchURL = string.Empty; string maxid = string.Empty; string TweetId = string.Empty; if (keyword.Trim().Contains(" ")) { keyword = keyword.Replace(" ", "+"); } startAgain: //if (counter == 0) //{ // searchURL = "https://twitter.com/i/search/timeline?type=recent&src=typd&include_available_features=1&include_entities=1&max_id=0&q=" + keyword + "&composed_count=0&count=" + noOfRecords + ""; // counter++; //} //else //{ // searchURL = "https://twitter.com/i/search/timeline?type=recent&src=typd&include_available_features=1&include_entities=1&max_id=0&q=" + keyword + "&composed_count=0&count=" + noOfRecords + "&scroll_cursor=" + TweetId; //} if (counter == 0) { //searchURL = "https://twitter.com/i/search/timeline?type=recent&src=typd&include_available_features=1&include_entities=1&max_id=0&q=" + keyword + "&composed_count=0&count=" + noOfRecords + ""; searchURL = "https://twitter.com/i/search/timeline?q=" + Uri.EscapeDataString(keyword) + "&src=typd&f=realtime"; counter++; } else { //searchURL = "https://twitter.com/i/search/timeline?type=recent&src=typd&include_available_features=1&include_entities=1&max_id=0&q=" + keyword + "&composed_count=0&count=" + noOfRecords + "&scroll_cursor=" + TweetId; searchURL = "https://twitter.com/i/search/timeline?q=" + Uri.EscapeDataString(keyword) + "&src=typd&f=realtime&include_available_features=1&include_entities=1&last_note_ts=0&oldest_unread_id=0&scroll_cursor=" + TweetId + ""; } try { res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", ""); if (string.IsNullOrEmpty(res_Get_searchURL)) { res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", ""); } try { //string sjss = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", ""); string[] splitRes = Regex.Split(res_Get_searchURL, "refresh_cursor"); //splitRes = splitRes.Skip(1).ToArray(); foreach (string item in splitRes) { if (item.Contains("refresh_cursor")) { int startIndex = item.IndexOf("TWEET-"); string start = item.Substring(startIndex).Replace("data-user-id=\\\"", ""); int endIndex = start.IndexOf("\""); string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", ""); TweetId = end; } if (item.Contains("scroll_cursor")) { int startIndex = item.IndexOf("TWEET-"); string start = item.Substring(startIndex).Replace("data-user-id=\\\"", ""); int endIndex = start.IndexOf("\""); string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", ""); TweetId = end; } } } catch (Exception) { } } catch (Exception ex) { System.Threading.Thread.Sleep(2000); res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", ""); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- res_Get_searchURL --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- res_Get_searchURL --> " + ex.Message, Globals.Path_TwtErrorLogs); } if (!string.IsNullOrEmpty(res_Get_searchURL)) { object DEserizedData = Newtonsoft.Json.JsonConvert.DeserializeObject<dynamic>(res_Get_searchURL); string DataHtml = (string)((JObject)DEserizedData)["items_html"]; string[] splitRes = Regex.Split(DataHtml, "data-item-id"); splitRes = splitRes.Skip(1).ToArray(); GlobusRegex regx = new GlobusRegex(); foreach (string item in splitRes) { if (item.Contains("data-screen-name=") && !item.Contains("follow-button") && !item.Contains("Following")) { } else { continue; } string modified_Item = "\"from_user\"" + item; string id = ""; try { int startIndex = item.IndexOf("data-user-id="); string start = item.Substring(startIndex).Replace("data-user-id=", ""); int endIndex = start.IndexOf("data-is-reply-to"); if (endIndex == -1) { endIndex = start.IndexOf("data-expanded-footer="); } if (endIndex == -1) { endIndex = start.IndexOf(">"); } string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", "").Replace("\n", string.Empty); if (end.Contains(" ")) { end = end.Split(' ')[0]; } id = end; } catch (Exception ex) { id = "null"; Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- id -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- id -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs); } string from_user_id = ""; try { int startIndex = item.IndexOf("data-screen-name="); string start = item.Substring(startIndex).Replace("data-screen-name=", ""); int endIndex = start.IndexOf("data-name"); if (endIndex > 100) { endIndex = 0; endIndex = start.IndexOf("data-user"); } string end = start.Substring(0, endIndex).Replace("from_user_id\":", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("_str", "").Replace("user", "").Replace("}", "").Replace("]", ""); from_user_id = end; } catch (Exception ex) { from_user_id = "null"; Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwtErrorLogs); } string tweetUserid = string.Empty; try { int startIndex = item.IndexOf("=\""); string start = item.Substring(startIndex).Replace("=\"", ""); int endIndex = start.IndexOf("\""); string end = start.Substring(0, endIndex).Replace("from_user_id\":", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("_str", "").Replace("user", "").Replace("}", "").Replace("]", ""); tweetUserid = end; } catch (Exception ex) { from_user_id = "null"; Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwtErrorLogs); } ///Tweet Text string text = string.Empty; try { int startindex = item.IndexOf("js-tweet-text tweet-text\""); if (startindex == -1) { startindex = 0; startindex = item.IndexOf("js-tweet-text tweet-text"); } string start = item.Substring(startindex).Replace("js-tweet-text tweet-text\"", "").Replace("js-tweet-text tweet-text tweet-text-rtl\"", ""); int endindex = start.IndexOf("</p>"); if (endindex == -1) { endindex = 0; endindex = start.IndexOf("stream-item-footer"); } string end = start.Substring(0, endindex); end = regx.StripTagsRegex(end); text = end.Replace(" ", "").Replace("a href=", "").Replace("/a", "").Replace("<span", "").Replace("</span", "").Replace("class=\\\"js-display-url\\\"", "").Replace("class=\\\"tco-ellipsis\\\"", "").Replace("class=\\\"invisible\\\"", "").Replace("<strong>", "").Replace("target=\\\"_blank\\\"", "").Replace("class=\\\"twitter-timeline-link\\\"", "").Replace("</strong>", "").Replace("rel=\\\"nofollow\\\" dir=\\\"ltr\\\" data-expanded-url=", ""); text = text.Replace(""", "").Replace("<", "").Replace(">", "").Replace("\"", "").Replace("\\", "").Replace("title=", ""); string[] array = Regex.Split(text, "http"); text = string.Empty; foreach (string itemData in array) { if (!itemData.Contains("t.co")) { string data = string.Empty; if (itemData.Contains("//")) { data = ("http" + itemData).Replace(" span ", string.Empty); if (!text.Contains(itemData.Replace(" ", "")))// && !data.Contains("class") && !text.Contains(data)) { text += data.Replace("u003c", string.Empty).Replace("u003e", string.Empty).Replace("lang=en data-aria-label-part=0",string.Empty); } } else { if (!text.Contains(itemData.Replace(" ", ""))) { text += itemData.Replace("u003c", string.Empty).Replace("u003e", string.Empty).Replace("lang=en data-aria-label-part=0",string.Empty); } } } } if (text.Contains("data-aria-label-part=0")) { text = globushttpHelper.getBetween(text + ":&$#@", "data-aria-label-part=0", ":&$#@"); } } catch { }; StructTweetIDs structTweetIDs = new StructTweetIDs(); if (id != "null") { structTweetIDs.ID_Tweet = tweetUserid; structTweetIDs.ID_Tweet_User = id; structTweetIDs.username__Tweet_User = from_user_id; structTweetIDs.wholeTweetMessage = text; lst_structTweetIDs.Add(structTweetIDs); } if (lst_structTweetIDs.Count >= counterNoOfReplies) { return lst_structTweetIDs; } } lst_structTweetIDs = lst_structTweetIDs.Distinct().ToList(); } if (lst_structTweetIDs.Count > counterNoOfReplies) { if (res_Get_searchURL.Contains("has_more_items\":false")) { return lst_structTweetIDs; } else goto startAgain; } else { if (res_Get_searchURL.Contains("has_more_items\":false")) { return lst_structTweetIDs; } else goto startAgain; } } catch (Exception ex) { Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs); } return lst_structTweetIDs; }
public List<StructTweetIDs> TweetExtractor_ByUserName_New_New(string keyword) { lst_structTweetIDs = new List<StructTweetIDs>(); string user_name = string.Empty; int i = 0; try { string HomePagedata = string.Empty; //startAgain: //if (i == 0) //{ //HomePagedata = globushttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/i/profiles/show/" + keyword + "/timeline?include_available_features=1&include_entities=1"), "", ""); //&composed_count=0&count=" + noOfRecords + "" HomePagedata = globushttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/i/profiles/show/" + keyword + "/timeline?include_available_features=1&include_entities=1&composed_count=0&count=" + noOfRecords + ""), "", ""); //} //else //{ // if (HomePagedata.Contains("\"has_more_items\":true")) // { // HomePagedata = globushttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/i/profiles/show/" + keyword + "/timeline?include_available_features=1&include_entities=1&max_id=" + lst_structTweetIDs[lst_structTweetIDs.Count - 1].ID_Tweet), "", ""); // } // else // { // return lst_structTweetIDs; // } //} if (!string.IsNullOrEmpty(HomePagedata)) { JObject Abc = JObject.Parse(HomePagedata); string datahkj = string.Empty; foreach (object data in Abc) { datahkj = data.ToString(); } string[] splitRes = Regex.Split(datahkj, "js-stream-item stream-item stream-item expanding-stream-item");//Regex.Split(res_Get_searchURL, "{\"created_at\""); if (splitRes.Count() == 1) { splitRes = Regex.Split(datahkj, "ProfileTweet u-textBreak js-tweet js-stream-tweet js-actionable-tweet"); } splitRes = splitRes.Skip(1).ToArray(); if (splitRes[0].Contains("Pinned Tweet")) { splitRes = splitRes.Skip(1).ToArray(); } foreach (string item in splitRes) { string modified_Item = string.Empty; string text = string.Empty; string TweeterUserId = string.Empty; string TweeterUserScreanName = string.Empty; string Tweetid = string.Empty; if (item.Contains("data-retweet-id=")) { continue; } ///Tweet ID try { int startindex = item.IndexOf("data-item-id=\\\""); string start = item.Substring(startindex).Replace("data-item-id=\\\"", ""); int endindex = start.IndexOf("\\\""); string end = start.Substring(0, endindex); Tweetid = end; } catch (Exception ex) { } ///Tweet User Screen name try { int startindex = item.IndexOf("data-screen-name=\\\""); string start = item.Substring(startindex).Replace("data-screen-name=\\\"", ""); int endindex = start.IndexOf("\\\""); string end = start.Substring(0, endindex); TweeterUserScreanName = end; } catch (Exception ex) { } ///Tweet User User-id try { int startindex = item.IndexOf("data-user-id=\\\""); string start = item.Substring(startindex).Replace("data-user-id=\\\"", ""); int endindex = start.IndexOf("\\\""); string end = start.Substring(0, endindex); TweeterUserId = end; } catch (Exception ex) { } ///Tweet Text try { //1st editing //int startindex = item.IndexOf("\\\"js-tweet-text tweet-text\\\""); //string start = item.Substring(startindex).Replace("\\\"js-tweet-text tweet-text\\\"", ""); //int endindex = start.IndexOf("</p>"); //string end = start.Substring(0, endindex); //text = end.Replace("class=\\\"invisible\\\"", "").Replace("<b", "").Replace("</b", "").Replace("<s", "").Replace("</s", "").Replace("class=\\\"twitter-atreply pretty-link\\\" dir=\\\"ltr\\\"", "").Replace(">", "").Replace("class=\\\"js-display-url\\\"", "").Replace("class=\\\"invisible\\\">", "").Replace("class=\\\"tco-ellipsis\\\"", "").Replace("class=\\\"invisible\\\"", "").Replace(" ", "").Replace("</a", "").Replace("</span", "").Replace("<span", "").Replace("<a href=", "").Replace("rel=nofollow dir=ltr data-expanded-url=", "").Replace("rel=\\\"nofollow\\\" dir=\\\"ltr\\\" data-expanded-url=\\\"", "").Replace("class=\\\"twitter-timeline-link\\\" target=\\\"_blank\\\" title=\\\"", ""); //text = text.Replace("\"", "").Replace("<", "").Replace("\\\"", "").Replace("\\", ""); //2nd Editing //int startindex = item.IndexOf("ProfileTweet-text js-tweet-text u-dir"); //string start = item.Substring(startindex).Replace("ProfileTweet-text js-tweet-text u-dir", ""); //int endindex = start.IndexOf("</p>"); //string end = start.Substring(0, endindex); try { string[] getText = Regex.Split(item, "TweetTextSize TweetTextSize--16px js-tweet-text tweet-text"); try { text = Utils.getBetween(getText[1],">","<"); } catch { }; } catch { }; text = text.Replace("class=\\\"invisible\\\"", "").Replace("<b", "").Replace("</b", "").Replace("<s", "").Replace("</s", "").Replace("class=\\\"twitter-atreply pretty-link\\\" dir=\\\"ltr\\\"", "").Replace(">", "").Replace("class=\\\"js-display-url\\\"", "").Replace("class=\\\"invisible\\\">", "").Replace("class=\\\"tco-ellipsis\\\"", "").Replace("class=\\\"invisible\\\"", "").Replace(" ", "").Replace("</a", "").Replace("</span", "").Replace("<span", "").Replace("<a href=", "").Replace("rel=nofollow dir=ltr data-expanded-url=", "").Replace("rel=\\\"nofollow\\\" dir=\\\"ltr\\\" data-expanded-url=\\\"", "").Replace("class=\\\"twitter-timeline-link\\\" target=\\\"_blank\\\" title=\\\"", ""); text = text.Replace("\"", "").Replace("<", "").Replace("\\\"", "").Replace("\\", ""); string[] array = Regex.Split(text, "http"); text = string.Empty; foreach (string itemData in array) { if (!itemData.Contains("t.co")) { string data = string.Empty; if (itemData.Contains("//")) { data = "http" + itemData; if (!text.Contains(itemData.Replace(" ", ""))) { text += data; } } else { if (!text.Contains(itemData.Replace(" ", ""))) { text += itemData; } } } } } catch (Exception ex) { } StructTweetIDs structTweetIDs = new StructTweetIDs(); structTweetIDs.ID_Tweet = Tweetid; structTweetIDs.ID_Tweet_User = TweeterUserId; structTweetIDs.username__Tweet_User = TweeterUserScreanName; structTweetIDs.wholeTweetMessage = text; if (lst_structTweetIDs.Count < noOfRecords) { lst_structTweetIDs.Add(structTweetIDs); } else { break; } } } //i++; //goto startAgain; return lst_structTweetIDs; } catch (Exception ex) { return lst_structTweetIDs; } }
public List<StructTweetIDs> KeywordStructData(string keyword) { try { int counter = 0; lst_structTweetIDs = new List<StructTweetIDs>(); string res_Get_searchURL = string.Empty; string searchURL = string.Empty; if (keyword.Trim().Contains(" ")) { keyword = keyword.Replace(" ", "+"); } //string searchURL = "https://twitter.com/phoenix_search.phoenix?q=" + keyword + "&count=" + noOfRecords + "&include_entities=1&include_available_features=1&contributor_details=true&page=null&mode=relevance&query_source=typed_query"; if (noOfRecords > 0 && noOfRecords != 20) { searchURL = "http://search.twitter.com/search.json?q=" + Uri.EscapeDataString(keyword) + "&result_type=mixed&count=" + noOfRecords; } else { //searchURL = "https://api.twitter.com/1.1/search/tweets.json?q=" + keyword + "&result_type=mixed&count=100"; //http://search.twitter.com/search.json?q=blue%20angels&rpp=5&include_entities=true&result_type=mixed searchURL = "http://search.twitter.com/search.json?q=" + Uri.EscapeDataString(keyword) + "&result_type=mixed&count=" + noOfRecords; } startAgain: try { res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", ""); if (string.IsNullOrEmpty(res_Get_searchURL)) { res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", ""); } if (res_Get_searchURL.Contains("\"next_page\":") && counter < noOfRecords) { try { int startindex = res_Get_searchURL.IndexOf("\"next_page\":"); if (startindex > 0) { string start = res_Get_searchURL.Substring(startindex).Replace("\"next_page\":\"", ""); int endIndex = start.IndexOf("\","); string end = start.Substring(0, endIndex).Replace("from_user_id\":", ""); searchURL = "http://search.twitter.com/search.json" + end; } } catch (Exception ex) { } } else { return lst_structTweetIDs; } } catch (Exception ex) { System.Threading.Thread.Sleep(2000); res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", ""); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- res_Get_searchURL --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- res_Get_searchURL --> " + ex.Message, Globals.Path_TwtErrorLogs); } if (!string.IsNullOrEmpty(res_Get_searchURL)) { string[] splitRes = Regex.Split(res_Get_searchURL, "{\"created_at\""); //Regex.Split(res_Get_searchURL, "\"in_reply_to_status_id_str\""); splitRes = splitRes.Skip(1).ToArray(); foreach (string item in splitRes) { if (noOfRecords > counter) { counter++; } else { break; } string modified_Item = "\"from_user\"" + item; string id = ""; try { int startIndex = item.IndexOf("\"id_str\""); string start = item.Substring(startIndex); int endIndex = start.IndexOf("\","); string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", ""); id = end; } catch (Exception ex) { id = "null"; Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- id -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- id -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs); } string from_user_id = ""; try { int startIndex = item.IndexOf("from_user_id\":"); string start = item.Substring(startIndex); int endIndex = start.IndexOf(",\"from_user_id_str"); string end = start.Substring(0, endIndex).Replace("from_user_id\":", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("_str", "").Replace("user", "").Replace("}", "").Replace("]", ""); from_user_id = end; } catch (Exception ex) { from_user_id = "null"; Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwtErrorLogs); } string from_user = ""; try { if (item.Contains("\"screen_name\"")) { int startindex = item.IndexOf("\"screen_name\""); string start = item.Substring(startindex); int endIndex = start.IndexOf(",\""); string end = start.Substring(0, endIndex).Replace("screen_name", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", ""); from_user = end; } else { int startindex = item.IndexOf("\"from_user\""); string start = item.Substring(startindex); int endIndex = start.IndexOf(",\""); string end = start.Substring(0, endIndex).Replace("from_user", "").Replace("\"", "").Replace(",\"from", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", ""); from_user = end; } } catch (Exception ex) { from_user_id = "null"; Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user --> " + ex.Message, Globals.Path_TwtErrorLogs); } string text = string.Empty; try { int startindex = item.IndexOf("\"text\":"); string start = item.Substring(startindex).Replace("\"text\":", ""); int endIndex = start.IndexOf(",\""); if (endIndex == -1) { endIndex = start.IndexOf("}"); } string end = start.Substring(0, endIndex).Replace("screen_name", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", string.Empty); text = end; } catch (Exception ex) { from_user_id = "null"; Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user --> " + ex.Message, Globals.Path_TwtErrorLogs); } try { int startindex = item.IndexOf("\"text\":"); string start = item.Substring(startindex).Replace("\"text\":", ""); int endIndex = start.IndexOf(",\""); if (endIndex == -1) { endIndex = start.IndexOf("}"); } string end = start.Substring(0, endIndex).Replace("screen_name", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", string.Empty); text = end; } catch (Exception ex) { from_user_id = "null"; Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user --> " + ex.Message, Globals.Path_TwtErrorLogs); } StructTweetIDs structTweetIDs = new StructTweetIDs(); if (id != "null") { structTweetIDs.ID_Tweet = id; structTweetIDs.ID_Tweet_User = from_user_id; structTweetIDs.username__Tweet_User = from_user; structTweetIDs.wholeTweetMessage = text; lst_structTweetIDs.Add(structTweetIDs); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(id + ":" + from_user_id, Globals.Path_keywordFollowerScrapedData); } } } if (lst_structTweetIDs.Count < noOfRecords) { goto startAgain; } } catch (Exception ex) { Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs); } return lst_structTweetIDs; }
//Function for Returning TweetData to Wait and reply public List<StructTweetIDs> GetTweetData_WaitReply(string keyword) { try { try { lst_structTweetIDs = new List<StructTweetIDs>(); //AddToTweetCreatorLogs("[ " + DateTime.Now + " ] => [ Extracting Tweets for " + keyword + " ]"); string[] arraylst = new string[] { }; string scroll_cursor = "0"; GlobusHttpHelper HttpHelper = new GlobusHttpHelper(); for (int i = 0; i < noOfRecords; i++) { //AddToTweetCreatorLogs("[ " + DateTime.Now + " ] => [ Getting " + (i + 1) + " Page Tweets ]"); string pgsrcs = HttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/i/search/timeline?q=" + Uri.EscapeDataString(keyword) + "&src=typd&f=realtime&include_available_features=1&include_entities=1&last_note_ts=0&scroll_cursor=" + scroll_cursor), "", ""); //Getting the pages try { int startindex = pgsrcs.IndexOf("scroll_cursor"); string start = pgsrcs.Substring(startindex).Replace("scroll_cursor", string.Empty); int endindex = start.IndexOf("refresh_cursor"); string end = string.Empty; if (endindex >= 0) { end = start.Substring(0, endindex); scroll_cursor = end.Replace("\\", string.Empty).Replace("\"", string.Empty).Replace(",", string.Empty).Replace(":", string.Empty).Trim(); } else { endindex = start.IndexOf("\"}"); end = start.Substring(0, endindex); scroll_cursor = end; } } catch (Exception ex) { Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> StartKeywordExtracting() --> Getting Maxid --> " + ex.Message, Globals.Path_TweetCreatorErroLog); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> StartKeywordExtracting() --> Getting Maxid --> " + ex.Message, Globals.Path_TwtErrorLogs); } //getting the information:tweets,username,userid,tweetid JObject Abc = JObject.Parse(pgsrcs); string datahkj = string.Empty; datahkj = Abc["items_html"].ToString(); string[] splitRes = Regex.Split(((string)Abc["items_html"]), "js-stream-item stream-item stream-item expanding-stream-item");//Regex.Split(res_Get_searchURL, "{\"created_at\""); splitRes = splitRes.Skip(1).ToArray(); GlobusRegex regx = new GlobusRegex(); foreach (string item in splitRes) { string from_user = string.Empty; string from_user_id = string.Empty; string from_user_name = string.Empty; string id = string.Empty; string text = string.Empty; ///Tweet ID try { int startindex = item.IndexOf("data-item-id=\""); string start = item.Substring(startindex).Replace("data-item-id=\"", ""); int endindex = start.IndexOf("\""); string end = start.Substring(0, endindex); id = end; } catch (Exception ex) { Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_New() -- " + keyword + " --> userid --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_New() -- " + keyword + " --> userid --> " + ex.Message, Globals.Path_TwtErrorLogs); } ///Tweet User Screen name try { int startindex = item.IndexOf("data-screen-name=\""); string start = item.Substring(startindex).Replace("data-screen-name=\"", ""); int endindex = start.IndexOf("\""); string end = start.Substring(0, endindex); from_user_name = end; } catch (Exception ex) { Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_New() -- " + keyword + " --> from_user_name --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_New() -- " + keyword + " --> from_user_name --> " + ex.Message, Globals.Path_TwtErrorLogs); } ///Tweet User User-id try { int startindex = item.IndexOf("data-user-id=\""); string start = item.Substring(startindex).Replace("data-user-id=\"", ""); int endindex = start.IndexOf("\""); string end = start.Substring(0, endindex); from_user_id = end; } catch (Exception ex) { Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_New() -- " + keyword + " --> from_user_id --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_New() -- " + keyword + " --> from_user_id --> " + ex.Message, Globals.Path_TwtErrorLogs); } ///Tweet Text try { int startindex = item.IndexOf("js-tweet-text tweet-text\""); string start = item.Substring(startindex).Replace("js-tweet-text tweet-text\"", ""); int endindex = start.IndexOf("</p>"); string end = start.Substring(0, endindex); end = regx.StripTagsRegex(end); text = end.Replace(" ", "").Replace("a href=", "").Replace("/a", "").Replace("<span", "").Replace("</span", "").Replace("class=\\\"js-display-url\\\"", "").Replace("class=\\\"tco-ellipsis\\\"", "").Replace("class=\\\"invisible\\\"", "").Replace("<strong>", "").Replace("target=\\\"_blank\\\"", "").Replace("class=\\\"twitter-timeline-link\\\"", "").Replace("</strong>", "").Replace("rel=\\\"nofollow\\\" dir=\\\"ltr\\\" data-expanded-url=", ""); text = text.Replace(""", "").Replace("<", "").Replace(">", "").Replace("\"", "").Replace("\\", "").Replace("title=", ""); string[] array = Regex.Split(text, "http"); text = string.Empty; foreach (string itemData in array) { if (!itemData.Contains("t.co")) { string data = string.Empty; if (itemData.Contains("//")) { data = "http" + itemData; if (!text.Contains(itemData.Replace(" ", ""))) { text += data; } } else { if (!text.Contains(itemData.Replace(" ", ""))) { text += itemData; } } } } } catch (Exception ex) { Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_New() -- " + keyword + " --> text --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_New() -- " + keyword + " --> text --> " + ex.Message, Globals.Path_TwtErrorLogs); } StructTweetIDs structTweetIDs = new StructTweetIDs(); structTweetIDs.ID_Tweet = id; structTweetIDs.ID_Tweet_User = from_user_id; structTweetIDs.username__Tweet_User = from_user_name; structTweetIDs.wholeTweetMessage = text; Log("[ " + DateTime.Now + " ] => [ " + id + " ]"); Log("[ " + DateTime.Now + " ] => [ " + from_user_id + " ]"); Log("[ " + DateTime.Now + " ] => [ " + from_user_name + " ]"); Log("[ " + DateTime.Now + " ] => [ " + text + " ]"); Log("---------------------------------------------------------------------------------------------------------------------------------------------------"); if (text.Contains(keyword)) { lst_structTweetIDs.Add(structTweetIDs); lst_structTweetIDs = lst_structTweetIDs.Distinct().ToList(); } if (lst_structTweetIDs.Count == noOfRecords) { break; } } if (lst_structTweetIDs.Count == noOfRecords) { break; } } //AddToTweetCreatorLogs("[ " + DateTime.Now + " ] => [ " + lstweete.Count + " Total distinct Tweets ]"); //AddToTweetCreatorLogs("[ " + DateTime.Now + " ] => [ Finished Extracting Tweets for " + keyword + " ]"); //AddToTweetCreatorLogs("-----------------------------------------------------------------------------------------------------------------------"); } catch (Exception ex) { Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> StartKeywordExtracting() --> " + ex.Message, Globals.Path_TweetCreatorErroLog); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> StartKeywordExtracting() --> " + ex.Message, Globals.Path_TwtErrorLogs); } return lst_structTweetIDs; } catch (Exception ex) { return lst_structTweetIDs; GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_New() -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper); GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_New() -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs); } }
public List<StructTweetIDs> GetTweetData_New(string keyword) { lst_structTweetIDs = new List<StructTweetIDs>(); string Nextcounter = "0"; try { //StartAgain: string searchURL = "https://twitter.com/i/search/timeline?type=relevance&src=typd&include_available_features=1&include_entities=1&max_id=" + noOfRecords + "&q=" + Uri.EscapeDataString(keyword); string res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), string.Empty, string.Empty); JObject Abc = JObject.Parse(res_Get_searchURL); string datahkj = string.Empty; datahkj = Abc["items_html"].ToString(); string[] splitRes = Regex.Split(((string)Abc["items_html"]), "js-stream-item stream-item stream-item expanding-stream-item");//Regex.Split(res_Get_searchURL, "{\"created_at\""); splitRes = splitRes.Skip(1).ToArray(); GlobusRegex regx = new GlobusRegex(); foreach (string item in splitRes) { string from_user = string.Empty; string from_user_id = string.Empty; string from_user_name = string.Empty; string id = string.Empty; string text = string.Empty; ///Tweet ID try { int startindex = item.IndexOf("data-item-id=\""); string start = item.Substring(startindex).Replace("data-item-id=\"", ""); int endindex = start.IndexOf("\""); string end = start.Substring(0, endindex); id = end; } catch (Exception ex) { Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_New() -- " + keyword + " --> userid --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_New() -- " + keyword + " --> userid --> " + ex.Message, Globals.Path_TwtErrorLogs); } ///Tweet User Screen name try { int startindex = item.IndexOf("data-screen-name=\""); string start = item.Substring(startindex).Replace("data-screen-name=\"", ""); int endindex = start.IndexOf("\""); string end = start.Substring(0, endindex); from_user_name = end; } catch (Exception ex) { Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_New() -- " + keyword + " --> from_user_name --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_New() -- " + keyword + " --> from_user_name --> " + ex.Message, Globals.Path_TwtErrorLogs); } ///Tweet User User-id try { int startindex = item.IndexOf("data-user-id=\""); string start = item.Substring(startindex).Replace("data-user-id=\"", ""); int endindex = start.IndexOf("\""); string end = start.Substring(0, endindex); from_user_id = end; } catch (Exception ex) { Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_New() -- " + keyword + " --> from_user_id --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_New() -- " + keyword + " --> from_user_id --> " + ex.Message, Globals.Path_TwtErrorLogs); } ///Tweet Text try { int startindex = item.IndexOf("js-tweet-text tweet-text\""); string start = item.Substring(startindex).Replace("js-tweet-text tweet-text\"", ""); int endindex = start.IndexOf("</p>"); string end = start.Substring(0, endindex); end = regx.StripTagsRegex(end); text = end.Replace(" ", "").Replace("a href=", "").Replace("/a", "").Replace("<span", "").Replace("</span", "").Replace("class=\\\"js-display-url\\\"", "").Replace("class=\\\"tco-ellipsis\\\"", "").Replace("class=\\\"invisible\\\"", "").Replace("<strong>", "").Replace("target=\\\"_blank\\\"", "").Replace("class=\\\"twitter-timeline-link\\\"", "").Replace("</strong>", "").Replace("rel=\\\"nofollow\\\" dir=\\\"ltr\\\" data-expanded-url=", ""); text = text.Replace(""", "").Replace("<", "").Replace(">", "").Replace("\"", "").Replace("\\", "").Replace("title=", ""); string[] array = Regex.Split(text, "http"); text = string.Empty; foreach (string itemData in array) { if (!itemData.Contains("t.co")) { string data = string.Empty; if (itemData.Contains("//")) { data = "http" + itemData; if (!text.Contains(itemData.Replace(" ", ""))) { text += data; } } else { if (!text.Contains(itemData.Replace(" ", ""))) { text += itemData; } } } } } catch (Exception ex) { Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_New() -- " + keyword + " --> text --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_New() -- " + keyword + " --> text --> " + ex.Message, Globals.Path_TwtErrorLogs); } StructTweetIDs structTweetIDs = new StructTweetIDs(); structTweetIDs.ID_Tweet = id; structTweetIDs.ID_Tweet_User = from_user_id; structTweetIDs.username__Tweet_User = from_user_name; structTweetIDs.wholeTweetMessage = text; Log("[ " + DateTime.Now + " ] => [ " + id + " ]"); Log("[ " + DateTime.Now + " ] => [ " + from_user_id + " ]"); Log("[ " + DateTime.Now + " ] => [ " + from_user_name + " ]"); Log("[ " + DateTime.Now + " ] => [ " + text + " ]"); Log("---------------------------------------------------------------------------------------------------------------------------------------------------"); lst_structTweetIDs.Add(structTweetIDs); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(id + ":" + from_user_id, Globals.Path_keywordFollowerScrapedData); } //if (res_Get_searchURL.Contains("\"has_more_items\":true")) //{ // try // { // int startindex = res_Get_searchURL.IndexOf("{\"max_id\":\""); // string start = res_Get_searchURL.Substring(startindex).Replace("{\"max_id\":\"", ""); // int endindex = start.IndexOf("\","); // string end = start.Substring(0, endindex); // Nextcounter = end; // } // catch (Exception ex) // { // Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_New() -- " + keyword + " --> res_Get_searchURL --> " + ex.Message, Globals.Path_TwitterDataScrapper); // Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_New() -- " + keyword + " --> res_Get_searchURL --> " + ex.Message, Globals.Path_TwtErrorLogs); // } // //goto StartAgain; //} return lst_structTweetIDs; } catch (Exception ex) { return lst_structTweetIDs; GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_New() -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper); GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_New() -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs); } }
public List<StructTweetIDs> NewKeywordStructDataForOnlyTweet(string keyword) { try { BaseLib.GlobusRegex regx = new GlobusRegex(); int counter = 0; lst_structTweetIDs = new List<StructTweetIDs>(); //lstTweetIds=new List<string>(); string res_Get_searchURL = string.Empty; string searchURL = string.Empty; string maxid = string.Empty; string TweetId = string.Empty; string text = string.Empty; if (keyword.Trim().Contains(" ")) { keyword = keyword.Replace(" ", "+"); } startAgain: if (!RetweetFromUserName) { if (counter == 0) { searchURL = "https://twitter.com/i/search/timeline?q=" + Uri.EscapeDataString(keyword) + "&src=typd&f=realtime"; counter++; } else { searchURL = "https://twitter.com/i/search/timeline?q=" + Uri.EscapeDataString(keyword) + "&src=typd&f=realtime&include_available_features=1&include_entities=1&last_note_ts=0&oldest_unread_id=0&scroll_cursor=" + TweetId + ""; } } else { searchURL = "https://twitter.com/i/profiles/show/" + Uri.EscapeDataString(keyword) + "/timeline/with_replies?composed_count=0&count=" + RetweetExtractcount + "&include_available_features=1&include_entities=1"; } try { res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", ""); if (string.IsNullOrEmpty(res_Get_searchURL)) { res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", ""); } try { string[] splitRes = Regex.Split(res_Get_searchURL, "refresh_cursor"); foreach (string item in splitRes) { if (item.Contains("refresh_cursor")) { int startIndex = item.IndexOf("TWEET-"); string start = item.Substring(startIndex).Replace("data-user-id=\\\"", ""); int endIndex = start.IndexOf("\""); string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", ""); TweetId = end; } if (item.Contains("scroll_cursor")) { int startIndex = item.IndexOf("TWEET-"); string start = item.Substring(startIndex).Replace("data-user-id=\\\"", ""); int endIndex = start.IndexOf("\""); string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", ""); TweetId = end; } } } catch (Exception) { } } catch (Exception ex) { System.Threading.Thread.Sleep(2000); res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", ""); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- res_Get_searchURL --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- res_Get_searchURL --> " + ex.Message, Globals.Path_TwtErrorLogs); } // && !res_Get_searchURL.Contains("has_more_items\":false") if (!string.IsNullOrEmpty(res_Get_searchURL)) { //string[] splitRes = Regex.Split(res_Get_searchURL, "data-item-id"); //Regex.Split(res_Get_searchURL, "\"in_reply_to_status_id_str\""); string[] splitRes = Regex.Split(res_Get_searchURL, "data-item-id"); splitRes = splitRes.Skip(1).ToArray(); foreach (string item in splitRes) { if (item.Contains("data-screen-name=") && !item.Contains("js-actionable-user js-profile-popup-actionable")) { } else { continue; } string modified_Item = "\"from_user\"" + item; string id = ""; try { int startIndex = item.IndexOf("data-user-id="); string start = item.Substring(startIndex).Replace("data-user-id=\\\"", ""); int endIndex = start.IndexOf("\\\""); string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", ""); id = end; } catch (Exception ex) { id = "null"; Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- id -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- id -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs); } string tweetUserid = string.Empty; try { int startIndex = item.IndexOf("=\\\""); string start = item.Substring(startIndex).Replace("=\\\"", ""); int endIndex = start.IndexOf("\\\""); string end = start.Substring(0, endIndex).Replace("from_user_id\":", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("_str", "").Replace("user", "").Replace("}", "").Replace("]", ""); tweetUserid = end; } catch (Exception ex) { tweetUserid = "null"; Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwtErrorLogs); } try { int startindex = item.IndexOf("js-tweet-text tweet-text"); string start = item.Substring(startindex).Replace("js-tweet-text tweet-text", ""); int endindex = start.IndexOf("</p>"); if (endindex == -1) { endindex = 0; endindex = start.IndexOf("stream-item-footer"); } string end = start.Substring(0, endindex); end = regx.StripTagsRegex(end); text = end.Replace(" ", "").Replace("a href=", "").Replace("/a", "").Replace("<span", "").Replace("</span", "").Replace("class=\\\"js-display-url\\\"", "").Replace("class=\\\"tco-ellipsis\\\"", "").Replace("class=\\\"invisible\\\"", "").Replace("<strong>", "").Replace("target=\\\"_blank\\\"", "").Replace("class=\\\"twitter-timeline-link\\\"", "").Replace("</strong>", "").Replace("rel=\\\"nofollow\\\" dir=\\\"ltr\\\" data-expanded-url=", "").Replace("dir=\"ltr\"", ""); text = text.Replace(""", "").Replace("<", "").Replace(">", "").Replace("\"", "").Replace("\\", "").Replace("title=", "").Replace("&", "&").Replace("'", "'").Replace("<", "<").Replace(">", ">"); //string[] array = Regex.Split(text, "http"); //text = string.Empty; //foreach (string itemData in array) //{ // if (!itemData.Contains("t.co")) // { // string data = string.Empty; // if (itemData.Contains("//")) // { // data = ("http" + itemData).Replace(" span ", string.Empty); // if (!text.Contains(itemData.Replace(" ", "")))// && !data.Contains("class") && !text.Contains(data)) // { // text += data.Replace("u003c", string.Empty).Replace("u003e", string.Empty); // } // } // else // { // if (!text.Contains(itemData.Replace(" ", ""))) // { // text += itemData.Replace("u003c", string.Empty).Replace("u003e", string.Empty); // } // } // } //} } catch (Exception ex) { Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_New() -- " + keyword + " --> text --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_New() -- " + keyword + " --> text --> " + ex.Message, Globals.Path_TwtErrorLogs); } StructTweetIDs structTweetIDs = new StructTweetIDs(); if (!IsRetweetWithFovieteWithImages) { if (id != "null") { structTweetIDs.ID_Tweet = tweetUserid; structTweetIDs.ID_Tweet_User = id; lst_structTweetIDs.Add(structTweetIDs); //Log("[ " + DateTime.Now + " ] => [ " + tweetUserid + " ]"); //Log("-------------------------------------------------------------------------------------------------------------------------------"); } lst_structTweetIDs = lst_structTweetIDs.Distinct().ToList(); queTweetId.Enqueue(tweetUserid); } else { if (text.Contains("http://t.co")) { if (id != "null") { structTweetIDs.ID_Tweet = tweetUserid; structTweetIDs.ID_Tweet_User = id; lst_structTweetIDs.Add(structTweetIDs); //Log("[ " + DateTime.Now + " ] => [ " + tweetUserid + " ]"); //Log("-------------------------------------------------------------------------------------------------------------------------------"); } lst_structTweetIDs = lst_structTweetIDs.Distinct().ToList(); queTweetId.Enqueue(tweetUserid); } } //lstTweetIds.Add(tweetUserid); //lstTweetIds = lstTweetIds.Distinct().ToList(); //if (lst_structTweetIDs.Count >= noOfRecords) //{ // return lst_structTweetIDs; //} } if (lst_structTweetIDs.Count <= noOfRecords) { maxid = lst_structTweetIDs[lst_structTweetIDs.Count - 1].ID_Tweet; if (res_Get_searchURL.Contains("has_moreitems\":false")) { return lst_structTweetIDs; } else { goto startAgain; } } else { if (res_Get_searchURL.Contains("has_more_items\":false")) { return lst_structTweetIDs; } else goto startAgain; } } } catch (Exception ex) { Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs); } return lst_structTweetIDs; }
public List<StructTweetIDs> NewKeywordStructDataSearchByPeople(string keyword) { try { BaseLib.GlobusRegex regx = new GlobusRegex(); int counter = 0; lst_structTweetIDs = new List<StructTweetIDs>(); string res_Get_searchURL = string.Empty; string searchURL = string.Empty; string maxid = string.Empty; string TweetId = string.Empty; string text = string.Empty; string ProfileName = string.Empty; string Location = string.Empty; string Bio = string.Empty; string website = string.Empty; string NoOfTweets = string.Empty; string Followers = string.Empty; string Followings = string.Empty; if (keyword.Trim().Contains(" ")) { keyword = keyword.Replace(" ", "+"); } startAgain: if (!RetweetFromUserName) { if (counter == 0) { //searchURL = "https://twitter.com/i/search/timeline?type=recent&src=typd&include_available_features=1&include_entities=1&max_id=0&q=" + keyword + "&composed_count=0&count=" + noOfRecords + ""; //searchURL = "https://twitter.com/i/search/timeline?q=" + keyword + "&src=typd&f=realtime"; //29-4-2014 only for client it has been changed searchURL = "https://twitter.com/i/search/timeline?q=" + Uri.EscapeDataString(keyword) + "&src=typd&f=realtime&mode=users"; counter++; } else { //searchURL = "https://twitter.com/i/search/timeline?q=" + keyword + "&src=typd&f=realtime&include_available_features=1&include_entities=1&last_note_ts=0&oldest_unread_id=0&scroll_cursor=" + TweetId + ""; //29-4-2014 only for client it is changed searchURL = "https://twitter.com/i/search/timeline?q=" + Uri.EscapeDataString(keyword) + "&src=typd&f=realtime&mode=users&include_available_features=1&include_entities=1&last_note_ts=0&oldest_unread_id=0&scroll_cursor=" + TweetId + ""; } } else { searchURL = "https://twitter.com/i/profiles/show/" + Uri.EscapeDataString(keyword) + "/timeline/with_replies?composed_count=0&count=" + RetweetExtractcount + "&include_available_features=1&include_entities=1"; } try { res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", ""); if (string.IsNullOrEmpty(res_Get_searchURL)) { res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", ""); } try { //string sjss = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", ""); string[] splitRes = Regex.Split(res_Get_searchURL, "refresh_cursor"); //splitRes = splitRes.Skip(1).ToArray(); foreach (string item in splitRes) { if (item.Contains("refresh_cursor")) { int startIndex = item.IndexOf("USER-"); string start = item.Substring(startIndex).Replace("data-user-id=\\\"", ""); int endIndex = start.IndexOf("\""); string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", ""); TweetId = end; } if (item.Contains("scroll_cursor")) { int startIndex = item.IndexOf("USER-"); string start = item.Substring(startIndex).Replace("data-user-id=\\\"", ""); int endIndex = start.IndexOf("\""); string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", ""); TweetId = end; } } } catch (Exception) { } } catch (Exception ex) { System.Threading.Thread.Sleep(2000); res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", ""); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- res_Get_searchURL --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- res_Get_searchURL --> " + ex.Message, Globals.Path_TwtErrorLogs); } // && !res_Get_searchURL.Contains("has_more_items\":false") if (!string.IsNullOrEmpty(res_Get_searchURL)) { //string[] splitRes = Regex.Split(res_Get_searchURL, "data-item-id"); //Regex.Split(res_Get_searchURL, "\"in_reply_to_status_id_str\""); string[] splitRes = Regex.Split(res_Get_searchURL, "data-item-id"); splitRes = splitRes.Skip(1).ToArray(); foreach (string item in splitRes) { if (item.Contains("data-screen-name=") && !item.Contains("js-actionable-user js-profile-popup-actionable")) { //var avc = Newtonsoft.Json.JsonConvert.DeserializeObject<dynamic>(res_Get_searchURL); //string DataHtml = (string)avc["items_html"]; } else { //continue; } string modified_Item = "\"from_user\"" + item; string id = ""; try { int startIndex = item.IndexOf("data-user-id="); string start = item.Substring(startIndex).Replace("data-user-id=\\\"", ""); int endIndex = start.IndexOf("\\\""); string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", ""); id = end; } catch (Exception ex) { id = "null"; Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- id -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- id -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs); } string from_user_id = ""; try { int startIndex = item.IndexOf("data-screen-name=\\\""); string start = item.Substring(startIndex).Replace("data-screen-name=\\\"", ""); int endIndex = start.IndexOf("\\\""); string end = start.Substring(0, endIndex).Replace("from_user_id\":", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("_str", "").Replace("user", "").Replace("}", "").Replace("]", ""); from_user_id = end; } catch (Exception ex) { from_user_id = "null"; Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwtErrorLogs); } string tweetUserid = string.Empty; try { int startIndex = item.IndexOf("=\\\""); string start = item.Substring(startIndex).Replace("=\\\"", ""); int endIndex = start.IndexOf("\\\""); string end = start.Substring(0, endIndex).Replace("from_user_id\":", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("_str", "").Replace("user", "").Replace("}", "").Replace("]", ""); tweetUserid = end; } catch (Exception ex) { from_user_id = "null"; Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwtErrorLogs); } StructTweetIDs structTweetIDs = new StructTweetIDs(); if (id != "null") { structTweetIDs.ID_Tweet = tweetUserid; structTweetIDs.ID_Tweet_User = id; structTweetIDs.username__Tweet_User = from_user_id; structTweetIDs.wholeTweetMessage = text; lst_structTweetIDs.Add(structTweetIDs); Log("[ " + DateTime.Now + " ] => [ " + tweetUserid + " ]"); Log("[ " + DateTime.Now + " ] => [ " + id + " ]"); Log("[ " + DateTime.Now + " ] => [ " + from_user_id + " ]"); Log("-------------------------------------------------------------------------------------------------------------------------------"); if (!File.Exists(Globals.Path_KeywordScrapedListData + "-" + keyword + ".csv")) { GlobusFileHelper.AppendStringToTextfileNewLine("USERID , USERNAME , PROFILE NAME , BIO , LOCATION , WEBSITE , NO OF TWEETS , FOLLOWERS , FOLLOWINGS", Globals.Path_KeywordScrapedListData + "-" + keyword + ".csv"); } //foreach (TwitterDataScrapper.StructTweetIDs item in data) { ChilkatHttpHelpr objChilkat = new ChilkatHttpHelpr(); GlobusHttpHelper HttpHelper = new GlobusHttpHelper(); string ProfilePageSource = HttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/" + from_user_id), "", ""); string Responce = ProfilePageSource; #region Convert HTML to XML string xHtml = objChilkat.ConvertHtmlToXml(Responce); Chilkat.Xml xml = new Chilkat.Xml(); xml.LoadXml(xHtml); Chilkat.Xml xNode = default(Chilkat.Xml); Chilkat.Xml xBeginSearchAfter = default(Chilkat.Xml); #endregion int counterdata = 0; xBeginSearchAfter = null; string dataDescription = string.Empty; xNode = xml.SearchForAttribute(xBeginSearchAfter, "h1", "class", "ProfileHeaderCard-name"); while ((xNode != null)) { xBeginSearchAfter = xNode; if (counterdata == 0) { ProfileName = xNode.AccumulateTagContent("text", "script|style"); counterdata++; } else if (counterdata == 1) { website = xNode.AccumulateTagContent("text", "script|style"); counterdata++; } else { break; } // xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "profile-field"); xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "u-textUserColor"); } xBeginSearchAfter = null; dataDescription = string.Empty; xNode = xml.SearchForAttribute(xBeginSearchAfter, "p", "class", "ProfileHeaderCard-bio u-dir");//bio profile-field"); while ((xNode != null)) { xBeginSearchAfter = xNode; Bio = xNode.AccumulateTagContent("text", "script|style").Replace("'", "'").Replace(" ", string.Empty).Trim(); break; } xBeginSearchAfter = null; dataDescription = string.Empty; xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "ProfileHeaderCard-locationText u-dir");//location profile-field"); while ((xNode != null)) { xBeginSearchAfter = xNode; Location = xNode.AccumulateTagContent("text", "script|style"); break; } int counterData = 0; xBeginSearchAfter = null; dataDescription = string.Empty; xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "ProfileNav-stat ProfileNav-stat--link u-borderUserColor u-textCenter js-tooltip js-nav");//location profile-field"); while ((xNode != null)) { xBeginSearchAfter = xNode; if (counterData == 0) { // NoOfTweets = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "ProfileNav-value"); NoOfTweets = xNode.AccumulateTagContent("text", "script|style").Replace("Tweets", string.Empty).Replace(",", string.Empty).Replace("Tweet", string.Empty); counterData++; } else if (counterData == 1) { Followings = xNode.AccumulateTagContent("text", "script|style").Replace(" Following", string.Empty).Replace(",", string.Empty).Replace("Following", string.Empty); counterData++; } else if (counterData == 2) { Followers = xNode.AccumulateTagContent("text", "script|style").Replace("Followers", string.Empty).Replace(",", string.Empty).Replace("Follower", string.Empty); counterData++; } else { break; } //xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "js-nav"); xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "ProfileNav-stat ProfileNav-stat--link u-borderUserColor u-textCenter js-tooltip js-openSignupDialog js-nonNavigable u-textUserColor"); } if (!string.IsNullOrEmpty(from_user_id) && tweetUserid != "null") { string Id_user = tweetUserid.Replace("}]", string.Empty).Trim(); Globals.lstScrapedUserIDs.Add(Id_user); GlobusFileHelper.AppendStringToTextfileNewLine(id + "," + from_user_id + "," + ProfileName + "," + Bio.Replace(",", "") + "," + Location.Replace(",", "") + "," + website + "," + NoOfTweets.Replace(",", "").Replace("Tweets", "") + "," + Followers.Replace(",", "").Replace("Following", "") + "," + Followings.Replace(",", "").Replace("Followers", "").Replace("Follower", ""), Globals.Path_KeywordScrapedListData + "-" + keyword + ".csv"); GlobusFileHelper.AppendStringToTextfileNewLine(from_user_id, Globals.Path_KeywordScrapedListData + "-" + keyword + ".txt"); Log("[ " + DateTime.Now + " ] => [ " + from_user_id + "," + Id_user + "," + ProfileName + "," + Bio.Replace(",", "") + "," + Location + "," + website + "," + NoOfTweets + "," + Followers + "," + Followings + " ]"); } } } lst_structTweetIDs = lst_structTweetIDs.Distinct().ToList(); if (lst_structTweetIDs.Count >= noOfRecords) { return lst_structTweetIDs; } } if (lst_structTweetIDs.Count <= noOfRecords) { maxid = lst_structTweetIDs[lst_structTweetIDs.Count - 1].ID_Tweet; if (res_Get_searchURL.Contains("has_moreitems\":false")) { return lst_structTweetIDs; } else { goto startAgain; } } else { if (res_Get_searchURL.Contains("has_more_items\":false")) { return lst_structTweetIDs; } else goto startAgain; } } } catch (Exception ex) { Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs); } return lst_structTweetIDs; }
public List<StructTweetIDs> NewKeywordStructData1(string keyword) { try { BaseLib.GlobusRegex regx = new GlobusRegex(); int counter = 0; lst_structTweetIDs = new List<StructTweetIDs>(); string res_Get_searchURL = string.Empty; string searchURL = string.Empty; string maxid = string.Empty; string TweetId = string.Empty; string text = string.Empty; if (keyword.Trim().Contains(" ")) { keyword = keyword.Replace(" ", "+"); } startAgain: #region <Old Get request URl>> //if (counter == 0) //{ // //searchURL = "https://twitter.com/i/search/realtime?type=relevance&src=typd&include_available_features=1&include_entities=1&q=" + Uri.EscapeDataString(keyword); // //searchURL = "https://twitter.com/i/search/realtime?type=relevance&src=typd&composed_count=0&count=" + noOfRecords + "&include_available_features=1&include_entities=1&max_id=" + maxid + "&q=" + keyword; // //searchURL = "https://twitter.com/i/search/timeline?type=recent&src=typd&include_available_features=1&include_entities=1&max_id=0&q=" + keyword + "&composed_count=0&count=" + noOfRecords + ""; // //counter++; // //searchURL = "https://twitter.com/search?q=ranbir&mode=relevance&src=typd"; // //searchURL = "https://twitter.com/search?q=" + keyword + "&mode=relevance&src=typd"; // searchURL = "https://twitter.com/i/search/timeline?q=" + keyword + "&src=typd&mode=relevance&composed_count=0&include_available_features=1&include_entities=1&include_new_items_bar=true&interval=30000"; // counter++; //} //else //{ // if (res_Get_searchURL.Contains("has_more_items\":false")) // { // return lst_structTweetIDs; // } // //searchURL = "https://twitter.com/i/search/timeline?type=relevance&src=typd&include_available_features=1&include_entities=1&max_id=" + maxid + "&q=" + keyword; // //searchURL = "https://twitter.com/i/search/timeline?q=ranbir&src=typd&mode=relevance&composed_count=0&include_available_features=1&include_entities=1&include_new_items_bar=true&interval=30000&latent_count=25&refresh_cursor=TWEET-372646612473876480-374800735134687234"; // searchURL = "https://twitter.com/i/search/timeline?q=" + keyword + "&src=typd&mode=relevance&composed_count=0&include_available_features=1&include_entities=1&include_new_items_bar=true&interval=30000&latent_count=25&refresh_cursor=" + TweetId; //} #endregion if (!RetweetFromUserName) { if (counter == 0) { //searchURL = "https://twitter.com/i/search/timeline?type=recent&src=typd&include_available_features=1&include_entities=1&max_id=0&q=" + keyword + "&composed_count=0&count=" + noOfRecords + ""; searchURL = "https://twitter.com/i/search/timeline?q=" + Uri.EscapeDataString(keyword) + "&src=typd&f=realtime"; counter++; } else { searchURL = "https://twitter.com/i/search/timeline?q=" + Uri.EscapeDataString(keyword) + "&src=typd&f=realtime&include_available_features=1&include_entities=1&last_note_ts=0&oldest_unread_id=0&scroll_cursor=" + TweetId + ""; //29-4-2014 only for client it is changed //searchURL = "https://twitter.com/i/search/timeline?q=" + keyword + "&src=typd&f=realtime&mode=users&include_available_features=1&include_entities=1&last_note_ts=0&oldest_unread_id=0&scroll_cursor=" + TweetId + ""; } } else { searchURL = "https://twitter.com/i/profiles/show/" + Uri.EscapeDataString(keyword) + "/timeline/with_replies?composed_count=0&count=" + RetweetExtractcount + "&include_available_features=1&include_entities=1"; } try { res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", ""); if (string.IsNullOrEmpty(res_Get_searchURL)) { res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", ""); } try { //string sjss = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", ""); string[] splitRes = Regex.Split(res_Get_searchURL, "refresh_cursor"); //splitRes = splitRes.Skip(1).ToArray(); foreach (string item in splitRes) { if (item.Contains("refresh_cursor")) { int startIndex = item.IndexOf("TWEET-"); string start = item.Substring(startIndex).Replace("data-user-id=\\\"", ""); int endIndex = start.IndexOf("\""); string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", ""); TweetId = end; ////only for client 29/4 ////int startIndex = item.IndexOf("TWEET-"); //int startIndex = item.IndexOf("USER-"); //string start = item.Substring(startIndex).Replace("data-user-id=\\\"", ""); //int endIndex = start.IndexOf("\""); //string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", ""); //TweetId = end; } if (item.Contains("scroll_cursor")) { int startIndex = item.IndexOf("TWEET-"); string start = item.Substring(startIndex).Replace("data-user-id=\\\"", ""); int endIndex = start.IndexOf("\""); string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", ""); TweetId = end; } } } catch (Exception) { } } catch (Exception ex) { System.Threading.Thread.Sleep(2000); res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", ""); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- res_Get_searchURL --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- res_Get_searchURL --> " + ex.Message, Globals.Path_TwtErrorLogs); } // && !res_Get_searchURL.Contains("has_more_items\":false") if (!string.IsNullOrEmpty(res_Get_searchURL)) { //string[] splitRes = Regex.Split(res_Get_searchURL, "data-item-id"); //Regex.Split(res_Get_searchURL, "\"in_reply_to_status_id_str\""); string[] splitRes = Regex.Split(res_Get_searchURL, "data-item-id"); splitRes = splitRes.Skip(1).ToArray(); foreach (string item in splitRes) { if (item.Contains("data-screen-name=") && !item.Contains("js-actionable-user js-profile-popup-actionable")) { //var avc = Newtonsoft.Json.JsonConvert.DeserializeObject<dynamic>(res_Get_searchURL); //string DataHtml = (string)avc["items_html"]; } else { continue; } string modified_Item = "\"from_user\"" + item; string id = ""; try { int startIndex = item.IndexOf("data-user-id="); string start = item.Substring(startIndex).Replace("data-user-id=\\\"", ""); int endIndex = start.IndexOf("\\\""); string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", ""); id = end; } catch (Exception ex) { id = "null"; Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- id -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- id -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs); } string from_user_id = ""; try { int startIndex = item.IndexOf("data-screen-name=\\\""); string start = item.Substring(startIndex).Replace("data-screen-name=\\\"", ""); int endIndex = start.IndexOf("\\\""); string end = start.Substring(0, endIndex).Replace("from_user_id\":", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("_str", "").Replace("user", "").Replace("}", "").Replace("]", ""); from_user_id = end; } catch (Exception ex) { from_user_id = "null"; Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwtErrorLogs); } string tweetUserid = string.Empty; try { int startIndex = item.IndexOf("=\\\""); string start = item.Substring(startIndex).Replace("=\\\"", ""); int endIndex = start.IndexOf("\\\""); string end = start.Substring(0, endIndex).Replace("from_user_id\":", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("_str", "").Replace("user", "").Replace("}", "").Replace("]", ""); tweetUserid = end; } catch (Exception ex) { from_user_id = "null"; Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwtErrorLogs); } ///Tweet Text try { #region previous Code of Find Text //int startindex = item.IndexOf("js-tweet-text tweet-text\""); //TweetTextSize js-tweet-text tweet-text //if (startindex == -1) //{ // startindex = 0; // startindex = item.IndexOf("js-tweet-text tweet-text"); //} //string start = item.Substring(startindex).Replace("js-tweet-text tweet-text\"", "").Replace("js-tweet-text tweet-text tweet-text-rtl\"", ""); //int endindex = start.IndexOf("</p>"); //if (endindex == -1) //{ // endindex = 0; // endindex = start.IndexOf("stream-item-footer"); //} //string end = start.Substring(0, endindex); //end = regx.StripTagsRegex(end); //text = end.Replace(" ", "").Replace("a href=", "").Replace("/a", "").Replace("<span", "").Replace("</span", "").Replace("class=\\\"js-display-url\\\"", "").Replace("class=\\\"tco-ellipsis\\\"", "").Replace("class=\\\"invisible\\\"", "").Replace("<strong>", "").Replace("target=\\\"_blank\\\"", "").Replace("class=\\\"twitter-timeline-link\\\"", "").Replace("</strong>", "").Replace("rel=\\\"nofollow\\\" dir=\\\"ltr\\\" data-expanded-url=", ""); //text = text.Replace(""", "").Replace("<", "").Replace(">", "").Replace("\"", "").Replace("\\", "").Replace("title=", ""); //string[] array = Regex.Split(text, "http"); //text = string.Empty; //foreach (string itemData in array) //{ // if (!itemData.Contains("t.co")) // { // string data = string.Empty; // if (itemData.Contains("//")) // { // data = ("http" + itemData).Replace(" span ", string.Empty); // if (!text.Contains(itemData.Replace(" ", "")))// && !data.Contains("class") && !text.Contains(data)) // { // text += data.Replace("u003c", string.Empty).Replace("u003e", string.Empty); // } // } // else // { // if (!text.Contains(itemData.Replace(" ", ""))) // { // text += itemData.Replace("u003c", string.Empty).Replace("u003e", string.Empty).Replace("js-tweet-text tweet-text", ""); // } // } // } //} #endregion string[] getTweetText = Regex.Split(item, "TweetTextSize js-tweet-text tweet-text"); try { text = Utils.getBetween(getTweetText[1], "data-aria-label-part=", "href="); text = text.Replace("\\\"0\\\"\\u003e", "").Replace("\\u003ca", ""); } catch { }; } catch { }; StructTweetIDs structTweetIDs = new StructTweetIDs(); if(text.Contains("")) if (id != "null") { structTweetIDs.ID_Tweet = tweetUserid; structTweetIDs.ID_Tweet_User = id; structTweetIDs.username__Tweet_User = from_user_id; structTweetIDs.wholeTweetMessage = text; lst_structTweetIDs.Add(structTweetIDs); Log("[ " + DateTime.Now + " ] => [ " + tweetUserid + " ]"); Log("[ " + DateTime.Now + " ] => [ " + id + " ]"); Log("[ " + DateTime.Now + " ] => [ " + from_user_id + " ]"); Log("-------------------------------------------------------------------------------------------------------------------------------"); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(id + ":" + from_user_id, Globals.Path_keywordFollowerScrapedData); } lst_structTweetIDs = lst_structTweetIDs.Distinct().ToList(); if (lst_structTweetIDs.Count >= noOfRecords) { return lst_structTweetIDs; } } if (lst_structTweetIDs.Count <= noOfRecords) { maxid = lst_structTweetIDs[lst_structTweetIDs.Count - 1].ID_Tweet; if (res_Get_searchURL.Contains("has_moreitems\":false")) { return lst_structTweetIDs; } else { goto startAgain; } } else { if (res_Get_searchURL.Contains("has_more_items\":false")) { return lst_structTweetIDs; } else goto startAgain; } } } catch (Exception ex) { Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs); } return lst_structTweetIDs; }
public List<StructTweetIDs> KeywordStructData(string keyword) { try { int counter = 0; lst_structTweetIDs = new List<StructTweetIDs>(); string res_Get_searchURL = string.Empty; string searchURL = "https://twitter.com/phoenix_search.phoenix?q=" + keyword + "&count=" + noOfRecords + "&include_entities=1&include_available_features=1&contributor_details=true&page=null&mode=relevance&query_source=typed_query"; try { res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", ""); } catch (Exception ex) { System.Threading.Thread.Sleep(2000); res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", ""); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- res_Get_searchURL --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- res_Get_searchURL --> " + ex.Message, Globals.Path_TwtErrorLogs); } if (!string.IsNullOrEmpty(res_Get_searchURL)) { string[] splitRes = Regex.Split(res_Get_searchURL, "\"in_reply_to_status_id_str\"");//Regex.Split(res_Get_searchURL, "{\"created_at\""); splitRes = splitRes.Skip(1).ToArray(); foreach (string item in splitRes) { if (noOfRecords > counter) { counter++; } else { break; } string modified_Item = "\"from_user\"" + item; string id = ""; try { int startIndex = item.IndexOf("\"id_str\""); string start = item.Substring(startIndex); int endIndex = start.IndexOf("\","); string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", ""); id = end; } catch (Exception ex) { id = "null"; Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- id -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- id -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs); } string from_user_id = ""; try { int startIndex = item.IndexOf("\"id\""); string start = item.Substring(startIndex); int endIndex = start.IndexOf(",\""); string end = start.Substring(0, endIndex).Replace("id", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("_str", "").Replace("user", "").Replace("}", "").Replace("]", ""); from_user_id = end; } catch (Exception ex) { from_user_id = "null"; Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwtErrorLogs); } string from_user = ""; try { int startindex = item.IndexOf("\"screen_name\""); string start = item.Substring(startindex); int endIndex = start.IndexOf(",\""); string end = start.Substring(0, endIndex).Replace("screen_name", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", ""); from_user = end; } catch (Exception ex) { from_user_id = "null"; Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user --> " + ex.Message, Globals.Path_TwtErrorLogs); } string text = string.Empty; try { int startindex = item.IndexOf("\"text\":"); string start = item.Substring(startindex).Replace("\"text\":", ""); int endIndex = start.IndexOf(",\""); string end = start.Substring(0, endIndex).Replace("screen_name", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", ""); text = end; } catch (Exception ex) { from_user_id = "null"; Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user --> " + ex.Message, Globals.Path_TwtErrorLogs); } StructTweetIDs structTweetIDs = new StructTweetIDs(); if (id != "null") { structTweetIDs.ID_Tweet = id; structTweetIDs.ID_Tweet_User = from_user_id; structTweetIDs.username__Tweet_User = from_user; structTweetIDs.wholeTweetMessage = text; lst_structTweetIDs.Add(structTweetIDs); } } } } catch (Exception ex) { Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs); } return lst_structTweetIDs; }
public List <StructTweetIDs> GetTweetData_ByUserName(string keyword) { lst_structTweetIDs = new List <StructTweetIDs>(); string user_id = string.Empty; try { string searchURL = string.Empty; if (NumberHelper.ValidateNumber(keyword)) { searchURL = "https://api.twitter.com/1/statuses/user_timeline.json?include_entities=true&include_rts=true&user_id =" + keyword + "&count=" + TweetExtractCount; } else { searchURL = "https://api.twitter.com/1/statuses/user_timeline.json?include_entities=true&include_rts=true&screen_name=" + keyword + "&count=" + TweetExtractCount; } string res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", ""); string[] splitRes = Regex.Split(res_Get_searchURL, "{\"created_at");//Regex.Split(res_Get_searchURL, "{\"created_at\""); splitRes = splitRes.Skip(1).ToArray(); foreach (string item in splitRes) { string modified_Item = "\"from_user\"" + item; string text = string.Empty; string TweeterUserId = string.Empty; string TweeterUserScreanName = string.Empty; string Tweetid = Globussoft.GlobusHttpHelper.ParseEncodedJson(modified_Item, "id"); try { int startIndex = item.IndexOf("\"text\":"); string start = item.Substring(startIndex).Replace("\"text\":", ""); int endIndex = start.IndexOf(",\""); string end = start.Substring(0, endIndex); text = end.Replace("\"", string.Empty).Trim(); } catch (Exception ex) { Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_ByUserName() --> text -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_ByUserName() --> text -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs); } //get tweet user ID try { string item1 = Regex.Split(res_Get_searchURL, "user\":")[1]; int startIndex = item1.IndexOf("{\"id\":"); string start = item1.Substring(startIndex); int endIndex = start.IndexOf(",\"id_str"); string end = start.Substring(0, endIndex); TweeterUserId = end.Replace("{\"id\":", string.Empty).Trim(); } catch (Exception ex) { Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_ByUserName() --> TweeterUserId -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_ByUserName() --> TweeterUserId -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs); } try { //get tweet user screan Name int startIndex = item.IndexOf("screen_name"); string start = item.Substring(startIndex); int endIndex = start.IndexOf(",\""); string end = start.Substring(0, endIndex); TweeterUserScreanName = end.Replace("screen_name\":\"", string.Empty).Replace("\"", string.Empty).Trim(); } catch (Exception ex) { Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_ByUserName() --> TweeterUserScreanName -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_ByUserName() --> TweeterUserScreanName -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs); } StructTweetIDs structTweetIDs = new StructTweetIDs(); structTweetIDs.ID_Tweet = Tweetid; structTweetIDs.ID_Tweet_User = TweeterUserId; structTweetIDs.username__Tweet_User = TweeterUserScreanName; structTweetIDs.wholeTweetMessage = text; lst_structTweetIDs.Add(structTweetIDs); } } catch (Exception ex) { Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetTweetData_ByUserName() -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetTweetData_ByUserName() -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs); } return(lst_structTweetIDs); }
public List <StructTweetIDs> KeywordStructData(string keyword) { try { int counter = 0; lst_structTweetIDs = new List <StructTweetIDs>(); string res_Get_searchURL = string.Empty; string searchURL = "https://twitter.com/phoenix_search.phoenix?q=" + keyword + "&count=" + noOfRecords + "&include_entities=1&include_available_features=1&contributor_details=true&page=null&mode=relevance&query_source=typed_query"; try { res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", ""); } catch (Exception ex) { System.Threading.Thread.Sleep(2000); res_Get_searchURL = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", ""); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- res_Get_searchURL --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- res_Get_searchURL --> " + ex.Message, Globals.Path_TwtErrorLogs); } if (!string.IsNullOrEmpty(res_Get_searchURL)) { string[] splitRes = Regex.Split(res_Get_searchURL, "\"in_reply_to_status_id_str\"");//Regex.Split(res_Get_searchURL, "{\"created_at\""); splitRes = splitRes.Skip(1).ToArray(); foreach (string item in splitRes) { if (noOfRecords > counter) { counter++; } else { break; } string modified_Item = "\"from_user\"" + item; string id = ""; try { int startIndex = item.IndexOf("\"id_str\""); string start = item.Substring(startIndex); int endIndex = start.IndexOf("\","); string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", ""); id = end; } catch (Exception ex) { id = "null"; Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- id -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- id -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs); } string from_user_id = ""; try { int startIndex = item.IndexOf("\"id\""); string start = item.Substring(startIndex); int endIndex = start.IndexOf(",\""); string end = start.Substring(0, endIndex).Replace("id", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("_str", "").Replace("user", "").Replace("}", "").Replace("]", ""); from_user_id = end; } catch (Exception ex) { from_user_id = "null"; Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwtErrorLogs); } string from_user = ""; try { int startindex = item.IndexOf("\"screen_name\""); string start = item.Substring(startindex); int endIndex = start.IndexOf(",\""); string end = start.Substring(0, endIndex).Replace("screen_name", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", ""); from_user = end; } catch (Exception ex) { from_user_id = "null"; Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user --> " + ex.Message, Globals.Path_TwtErrorLogs); } string text = string.Empty; try { int startindex = item.IndexOf("\"text\":"); string start = item.Substring(startindex).Replace("\"text\":", ""); int endIndex = start.IndexOf(",\""); string end = start.Substring(0, endIndex).Replace("screen_name", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", ""); text = end; } catch (Exception ex) { from_user_id = "null"; Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user --> " + ex.Message, Globals.Path_TwtErrorLogs); } StructTweetIDs structTweetIDs = new StructTweetIDs(); if (id != "null") { structTweetIDs.ID_Tweet = id; structTweetIDs.ID_Tweet_User = from_user_id; structTweetIDs.username__Tweet_User = from_user; structTweetIDs.wholeTweetMessage = text; lst_structTweetIDs.Add(structTweetIDs); } } } } catch (Exception ex) { Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " --> " + ex.Message, Globals.Path_TwtErrorLogs); } return(lst_structTweetIDs); }