public List <string> GetMembers(string TweetUrl, ref GlobusHttpHelper HttpHelper, out string ReturnStatus) { string cursor = "-1"; string FollowingUrl = string.Empty; List <string> lstIds = new List <string>(); string userID; string Screen_name; int counter = 0; try { // string numResult = Regex.Match(TweetUrl, @"\d+").Value; TweetUrl = TweetUrl + "@@@"; string numResult = getBetween(TweetUrl, "status/", "@@@"); StartAgain: String DataCursor = string.Empty; if (counter == 0) { FollowingUrl = "https://twitter.com/i/katyperry/conversation/" + numResult + "?include_available_features=1&include_entities=1&max_position=0";//TweetUrl; counter++; } else { FollowingUrl = "https://twitter.com/i/katyperry/conversation/" + numResult + "?include_available_features=1&include_entities=1&max_position=" + cursor.Trim(); } string Data = HttpHelper.getHtmlfromUrl(new Uri(FollowingUrl), "", ""); if (string.IsNullOrEmpty(Data)) { Data = HttpHelper.getHtmlfromUrl(new Uri(FollowingUrl), "", ""); } if (string.IsNullOrEmpty(Data)) { AddToLog_ScrapMember("Either Url is Invalid or PageSource is getting Null or Empty."); ReturnStatus = "Error"; return(lstIds); } String DataCursor1 = string.Empty; if (!Data.Contains("Rate limit exceeded") && !Data.Contains("{\"errors\":[{\"message\":\"Sorry, that page does not exist\",\"code\":34}]}") && !string.IsNullOrEmpty(Data)) { String[] DataDivArr; if (Data.Contains("js-stream-tweet js-actionable-tweet")) { DataDivArr = Regex.Split(Data, "js-stream-tweet js-actionable-tweet"); } else { DataDivArr = Regex.Split(Data, "simple-tweet tweet"); } foreach (var DataDivArr_item in DataDivArr) { if (DataDivArr_item.Contains("min_position")) { //String DataCurso = System.Text.RegularExpressions.Regex.Split(Data, "data-cursor")[1]; DataCursor1 = DataDivArr_item.Substring(DataDivArr_item.IndexOf("min_position\":"), DataDivArr_item.IndexOf(",")).Replace(">", string.Empty).Replace("\n", string.Empty).Replace("\"", string.Empty).Replace("min_position", string.Empty).Replace(":", "").Replace(",", "").Trim(); } if (DataDivArr_item.Contains("<!DOCTYPE html>") || DataDivArr_item.Contains("min_position")) { continue; } if (DataDivArr_item.Contains("data-screen-name")) { int endIndex = 0; int startIndex = DataDivArr_item.IndexOf("data-screen-name"); try { endIndex = DataDivArr_item.IndexOf("data-name"); } catch { } if (endIndex == -1) { endIndex = DataDivArr_item.IndexOf("data-feedback-token"); } string GetDataStr = DataDivArr_item.Substring(startIndex, endIndex); //string _SCRNameID = (GetDataStr.Substring(GetDataStr.IndexOf("data-user-id"), GetDataStr.IndexOf("data-feedback-token", GetDataStr.IndexOf("data-user-id")) - GetDataStr.IndexOf("data-user-id")).Replace("data-user-id", string.Empty).Replace("=", string.Empty).Replace("\"", "").Replace("\\\\n", string.Empty).Replace("data-screen-name=", string.Empty).Replace("\\", "").Trim()); string _SCRName = (GetDataStr.Substring(GetDataStr.IndexOf("data-screen-name="), GetDataStr.IndexOf("data-user-id", GetDataStr.IndexOf("data-screen-name=")) - GetDataStr.IndexOf("data-screen-name=")).Replace("data-screen-name=", string.Empty).Replace("=", string.Empty).Replace("\"", "").Replace("\\\\n", string.Empty).Replace("data-screen-name=", string.Empty).Replace("\\", "").Trim()); if (_SCRName.Contains(" ")) { _SCRName = _SCRName.Split(' ')[0]; } if (noOfRecords > lstIds.Count) { lstIds.Add(_SCRName); lstIds = lstIds.Distinct().ToList(); AddToLog_ScrapMember("[ " + DateTime.Now + " ] => [" + _SCRName + " ]"); if (!File.Exists(Globals.Path_ScrapedMembersList)) { GlobusFileHelper.AppendStringToTextfileNewLine(" UserName , Url", Globals.Path_ScrapedMembersList); } GlobusFileHelper.AppendStringToTextfileNewLine(_SCRName + "," + TweetUrl, Globals.Path_ScrapedMembersList); } } } if (noOfRecords != lstIds.Count) { if (Data.Contains("min_position")) { DataCursor1 = Data.Substring(Data.IndexOf("min_position\":"), Data.IndexOf(",")).Replace(">", string.Empty).Replace("\n", string.Empty).Replace("\"", string.Empty).Replace("min_position", string.Empty).Replace(":", string.Empty).Replace(",", string.Empty).Trim(); cursor = DataCursor1; if (cursor.Contains("null") || cursor.Contains("Null")) { ReturnStatus = "No Error"; return(lstIds); } if (cursor != "0") { goto StartAgain; } } if (Data.Contains("\"has_more_items\":true")) { int startindex = Data.IndexOf("cursor"); string start = Data.Substring(startindex).Replace("cursor", ""); int lastindex = -1; lastindex = start.IndexOf(","); if (lastindex > 40) { lastindex = start.IndexOf("\n"); } string end = start.Substring(0, lastindex).Replace("\"", "").Replace("\n", string.Empty).Replace("=", string.Empty).Replace(":", string.Empty).Trim(); cursor = end; if (cursor != "0") { goto StartAgain; } } } ReturnStatus = "No Error"; return(lstIds); } else if (Data.Contains("401 Unauthorized")) { ReturnStatus = "Account is Suspended. "; return(new List <string>()); } else if (Data.Contains("{\"errors\":[{\"message\":\"Sorry, that page does not exist\",\"code\":34}]}")) { ReturnStatus = "Sorry, that page does not exist :"; return(lstIds); } else if (Data.Contains("Rate limit exceeded. Clients may not make more than 150 requests per hour.")) { ReturnStatus = "Rate limit exceeded. Clients may not make more than 150 requests per hour.:-"; return(lstIds); } else { ReturnStatus = "Error"; return(lstIds); } } catch (Exception ex) { //Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> getMembers() -- "" --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> getMembers() -- " + "" + " --> " + ex.Message, Globals.Path_TwtErrorLogs); //AddToLog_ScrapMember("[ " + DateTime.Now + " ] => [ You have entered invalid URL " + FollowingUrl + " ]"); ReturnStatus = "Error"; return(lstIds); } }
public void getTweetUsers(string Url, ref GlobusHttpHelper HttpHelper) { string cursor = "-1"; string FollowingUrl = string.Empty; List <string> lstIds = new List <string>(); string userID; string Screen_name; int counter = 0; try { Url = Url + "@@@"; string numResult = getBetween(Url, "status/", "@@@"); //Regex.Match(Url, @"\d+").Value; FollowingUrl = "https://twitter.com/i/activity/retweeted_popup?id=" + numResult; string Data = HttpHelper.getHtmlfromUrl(new Uri(FollowingUrl), "", ""); if (string.IsNullOrEmpty(Data)) { Data = HttpHelper.getHtmlfromUrl(new Uri(FollowingUrl), "", ""); } if (string.IsNullOrEmpty(Data)) { AddToLog_ScrapMember("Either Url is Invalid or PageSource is getting Null or Empty."); //ReturnStatus = "Error"; return; } String[] DataDivArr = null; if (Data.Contains("js-stream-item stream-item stream-item")) { DataDivArr = Regex.Split(Data, "js-stream-item stream-item stream-item"); } foreach (var DataDivArr_item in DataDivArr) { if (DataDivArr_item.Contains("data-screen-name")) { int endIndex = 0; int startIndex = DataDivArr_item.IndexOf("data-screen-name"); try { endIndex = DataDivArr_item.IndexOf("data-name"); } catch { } if (endIndex == -1) { endIndex = DataDivArr_item.IndexOf("data-feedback-token"); } string GetDataStr = DataDivArr_item.Substring(startIndex, endIndex); //string _SCRNameID = (GetDataStr.Substring(GetDataStr.IndexOf("data-user-id"), GetDataStr.IndexOf("data-feedback-token", GetDataStr.IndexOf("data-user-id")) - GetDataStr.IndexOf("data-user-id")).Replace("data-user-id", string.Empty).Replace("=", string.Empty).Replace("\"", "").Replace("\\\\n", string.Empty).Replace("data-screen-name=", string.Empty).Replace("\\", "").Trim()); string _SCRName = (GetDataStr.Substring(GetDataStr.IndexOf("data-screen-name="), GetDataStr.IndexOf("data-user-id", GetDataStr.IndexOf("data-screen-name=")) - GetDataStr.IndexOf("data-screen-name=")).Replace("data-screen-name=", string.Empty).Replace("=", string.Empty).Replace("\"", "").Replace("\\\\n", string.Empty).Replace("data-screen-name=", string.Empty).Replace("\\", "").Trim()); if (_SCRName.Contains(" ")) { _SCRName = _SCRName.Split(' ')[0]; } //if (noOfRecords > lstIds.Count) { lstIds.Add(_SCRName); lstIds = lstIds.Distinct().ToList(); AddToLog_ScrapMember("[ " + DateTime.Now + " ] => [" + _SCRName + " ]"); if (!File.Exists(Globals.Path_ScrapedMembersList)) { GlobusFileHelper.AppendStringToTextfileNewLine(" UserName , Url", Globals.Path_ScrapedMembersList); } GlobusFileHelper.AppendStringToTextfileNewLine(_SCRName + "," + Url, Globals.Path_ScrapedMembersList); } } } } catch { } }
public List <string> GetMembers(string keyword, out string ReturnStatus) { string cursor = "-1"; string FollowingUrl = string.Empty; List <string> lstIds = new List <string>(); string userID; string Screen_name; int counter = 0; try { Globussoft.GlobusHttpHelper HttpHelper = new Globussoft.GlobusHttpHelper(); StartAgain: if (counter == 0) { FollowingUrl = keyword; counter++; } else { FollowingUrl = keyword + "/timeline?cursor=" + cursor + "&cursor_index=&cursor_offset=&include_available_features=1&include_entities=1&is_forward=true"; } String DataCursor = string.Empty; string Data = HttpHelper.getHtmlfromUrl(new Uri(FollowingUrl), "", ""); if (string.IsNullOrEmpty(Data)) { Data = HttpHelper.getHtmlfromUrl(new Uri(FollowingUrl), "", ""); } if (string.IsNullOrEmpty(Data)) { AddToLog_ScrapMember("Either Url in Invalid or PageSource is getting Null or Empty."); ReturnStatus = "Error"; return(lstIds); } String DataCursor1 = string.Empty; if (!Data.Contains("Rate limit exceeded") && !Data.Contains("{\"errors\":[{\"message\":\"Sorry, that page does not exist\",\"code\":34}]}") && !string.IsNullOrEmpty(Data)) { String[] DataDivArr; if (Data.Contains("js-stream-item stream-item stream-item")) { DataDivArr = Regex.Split(Data, "js-stream-item stream-item stream-item"); } else { DataDivArr = Regex.Split(Data, "js-stream-item"); } foreach (var DataDivArr_item in DataDivArr) { if (DataDivArr_item.Contains("data-cursor")) { String DataCurso = System.Text.RegularExpressions.Regex.Split(Data, "data-cursor")[1]; DataCursor1 = DataCurso.Substring(DataCurso.IndexOf("="), DataCurso.IndexOf(">")).Replace(">", string.Empty).Replace("\n", string.Empty).Replace("\"", string.Empty).Replace("=", string.Empty).Trim(); } if (DataDivArr_item.Contains("<!DOCTYPE html>") || DataDivArr_item.Contains("cursor")) { continue; } if (DataDivArr_item.Contains("data-screen-name") && DataDivArr_item.Contains(" data-user-id")) { int endIndex = 0; int startIndex = DataDivArr_item.IndexOf("data-screen-name"); try { endIndex = DataDivArr_item.IndexOf(">"); } catch { } if (endIndex == -1) { endIndex = DataDivArr_item.IndexOf("data-feedback-token"); } string GetDataStr = DataDivArr_item.Substring(startIndex, endIndex); string _SCRNameID = (GetDataStr.Substring(GetDataStr.IndexOf("data-user-id"), GetDataStr.IndexOf("data-feedback-token", GetDataStr.IndexOf("data-user-id")) - GetDataStr.IndexOf("data-user-id")).Replace("data-user-id", string.Empty).Replace("=", string.Empty).Replace("\"", "").Replace("\\\\n", string.Empty).Replace("data-screen-name=", string.Empty).Replace("\\", "").Trim()); string _SCRName = (GetDataStr.Substring(GetDataStr.IndexOf("data-screen-name="), GetDataStr.IndexOf("data-user-id", GetDataStr.IndexOf("data-screen-name=")) - GetDataStr.IndexOf("data-screen-name=")).Replace("data-screen-name=", string.Empty).Replace("=", string.Empty).Replace("\"", "").Replace("\\\\n", string.Empty).Replace("data-screen-name=", string.Empty).Replace("\\", "").Trim()); if (noOfRecords > lstIds.Count) { lstIds.Add(_SCRName + ":" + _SCRNameID); AddToLog_ScrapMember("[ " + DateTime.Now + " ] => [" + _SCRNameID + " :: " + _SCRName + " ]"); if (!File.Exists(Globals.Path_ScrapedMembersList)) { GlobusFileHelper.AppendStringToTextfileNewLine("UserID , UserName , Url", Globals.Path_ScrapedMembersList); } GlobusFileHelper.AppendStringToTextfileNewLine(_SCRNameID + "," + _SCRName + "," + keyword, Globals.Path_ScrapedMembersList); } } } if (noOfRecords != lstIds.Count) { if (Data.Contains("data-cursor")) { int startindex = Data.IndexOf("data-cursor"); string start = Data.Substring(startindex).Replace("data-cursor", ""); int lastindex = start.IndexOf("<div class=\"stream profile-stream\">"); if (lastindex == -1) { lastindex = start.IndexOf("\n"); } string end = start.Substring(0, lastindex).Replace("\"", "").Replace("\n", string.Empty).Replace("=", string.Empty).Replace(">", string.Empty).Trim(); cursor = end; if (cursor != "0") { goto StartAgain; } } if (Data.Contains("cursor")) { int startindex = Data.IndexOf("cursor"); string start = Data.Substring(startindex).Replace("cursor", ""); int lastindex = -1; lastindex = start.IndexOf(","); if (lastindex > 40) { lastindex = start.IndexOf("\n"); } string end = start.Substring(0, lastindex).Replace("\"", "").Replace("\n", string.Empty).Replace("=", string.Empty).Replace(":", string.Empty).Trim(); cursor = end; if (cursor != "0") { goto StartAgain; } } } ReturnStatus = "No Error"; return(lstIds); } else if (Data.Contains("401 Unauthorized")) { ReturnStatus = "Account is Suspended. "; return(new List <string>()); } else if (Data.Contains("{\"errors\":[{\"message\":\"Sorry, that page does not exist\",\"code\":34}]}")) { ReturnStatus = "Sorry, that page does not exist :"; return(lstIds); } else if (Data.Contains("Rate limit exceeded. Clients may not make more than 150 requests per hour.")) { ReturnStatus = "Rate limit exceeded. Clients may not make more than 150 requests per hour.:-"; return(lstIds); } else { ReturnStatus = "Error"; return(lstIds); } } catch (Exception ex) { //Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> getMembers() -- "" --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> getMembers() -- " + "" + " --> " + ex.Message, Globals.Path_TwtErrorLogs); //AddToLog_ScrapMember("[ " + DateTime.Now + " ] => [ You have entered invalid URL " + FollowingUrl + " ]"); ReturnStatus = "Error"; return(lstIds); } }