public List <string> GetTextDataByTagAndAttributeName(string pageSrcHtml, string TagName, string AttributeName) { List <string> lstData = new List <string>(); try { bool success = false; string xHtml = string.Empty; Chilkat.HtmlToXml htmlToXml = new Chilkat.HtmlToXml(); //*** Check DLL working or not ********************** success = htmlToXml.UnlockComponent("THEBACHtmlToXml_7WY3A57sZH3O"); if ((success != true)) { Console.WriteLine(htmlToXml.LastErrorText); return(null); } htmlToXml.Html = pageSrcHtml; //** Convert Data Html to XML ******************************************* xHtml = htmlToXml.ToXml(); //****************************************** Chilkat.Xml xNode = default(Chilkat.Xml); Chilkat.Xml xBeginSearchAfter = default(Chilkat.Xml); Chilkat.Xml xml = new Chilkat.Xml(); xml.LoadXml(xHtml); #region Data Save in list From using XML Tag and Attribut string DescriptionMain = string.Empty; string dataDescription = string.Empty; xBeginSearchAfter = null; xNode = xml.SearchForAttribute(xBeginSearchAfter, TagName, "class", AttributeName); while ((xNode != null)) { //** Get Data Under Tag only Text Value********************************** dataDescription = xNode.GetXml();//.AccumulateTagContent("text", "script|style"); string text = xNode.AccumulateTagContent("text", "script|style"); lstData.Add(text); //** Get Data Under Tag All Html value * ********************************* //dataDescription = xNode.GetXml(); xBeginSearchAfter = xNode; xNode = xml.SearchForAttribute(xBeginSearchAfter, TagName, "class", AttributeName); } #endregion return(lstData); } catch (Exception) { return(lstData = null); } }
public List <string> GetDataTag(string pageSrcHtml, string TagName) { bool success = false; string xHtml = string.Empty; List <string> list = new List <string>(); Chilkat.HtmlToXml htmlToXml = new Chilkat.HtmlToXml(); success = htmlToXml.UnlockComponent("THEBACHtmlToXml_7WY3A57sZH3O"); if ((success != true)) { Console.WriteLine(htmlToXml.LastErrorText); return(null); } htmlToXml.Html = pageSrcHtml; //xHtml contain xml data xHtml = htmlToXml.ToXml(); //****************************************** Chilkat.Xml xNode = default(Chilkat.Xml); Chilkat.Xml xBeginSearchAfter = default(Chilkat.Xml); Chilkat.Xml xml = new Chilkat.Xml(); xml.LoadXml(xHtml); xBeginSearchAfter = null; xNode = xml.SearchForTag(xBeginSearchAfter, TagName); while ((xNode != null)) { string TagText = xNode.AccumulateTagContent("text", "script|style"); list.Add(TagText); xBeginSearchAfter = xNode; xNode = xml.SearchForTag(xBeginSearchAfter, TagName); } //xHtml. return(list); }
public void StartAcceptInvitations(ref GlobusHttpHelper httpHelper) { try { string csrfToken = string.Empty; string userFirstName = string.Empty; string UserLastName = string.Empty; string SenderName = string.Empty; string newPagesource = string.Empty; bool isTrue = false; int startRow = 1; string pageSource = httpHelper.getHtmlfromUrl1(new Uri("")); var resultForUserDetails = FindTheUserName(pageSource); try { resultForUserDetails = resultForUserDetails.Substring(resultForUserDetails.IndexOf("alt="), resultForUserDetails.IndexOf("height") - resultForUserDetails.IndexOf("alt=")).Replace("alt=", string.Empty).Replace("/", string.Empty).Trim(); userFirstName = resultForUserDetails.Split(' ')[0].Replace("\"", string.Empty); UserLastName = resultForUserDetails.Split(' ')[1].Replace("\"", string.Empty); } catch { } if (pageSource.Contains("csrfToken")) { csrfToken = pageSource.Substring(pageSource.IndexOf("csrfToken"), 100); string[] Arr = csrfToken.Split('"'); try { foreach (string item in Arr) { try { if (item.Contains("csrfToken=")) { csrfToken = item.Substring(item.IndexOf("csrfToken="), item.IndexOf("&", item.IndexOf("csrfToken=")) - item.IndexOf("csrfToken=")).Replace("csrfToken=", string.Empty).Replace("\"", string.Empty).Replace("\\", string.Empty).Trim();//Arr[2].Replace(@"\", string.Empty).Replace("//", string.Empty); break; } } catch (Exception ex) { } if (item.Contains("csrfToken=")) { csrfToken = item.Replace("csrfToken=", string.Empty).Trim(); break; } } } catch (Exception ex) { //csrfToken = Arr[0].Replace("csrfToken=", "").Replace("\\", ""); } } // For Show More //string postData1 = "pkey=inbox-invitations-pending&tcode=%5Bobject%20Arguments%5D&plist="; //string response1 = httpHelper.postFormData(new Uri(""+csrfToken+""),postData1); //string pageSource2=httpHelper.getHtmlfromUrl(new Uri("")); //*** Conver HTML to XML *******************************// #region Convert HTML to XML ChilkatHttpHelpr objhelper = new ChilkatHttpHelpr(); //xHtml contain xml data string xHtml = objhelper.ConvertHtmlToXml(pageSource); Chilkat.Xml xml = new Chilkat.Xml(); xml.LoadXml(xHtml); //xHtml. //// Iterate over all h1 tags: Chilkat.Xml xNode = default(Chilkat.Xml); Chilkat.Xml xBeginSearchAfter = default(Chilkat.Xml); #endregion #region Invitatin count xBeginSearchAfter = null; xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "invitation-count count "); try { while ((xNode != null)) { string strvalue = xNode.AccumulateTagContent("text", "script|style"); string Invitatincount = strvalue; Log("[ " + DateTime.Now + " ] => [ Invitation Count = " + Invitatincount + " UserName = "******" ]"); Log("-----------------------------------------------------------------------------------------------------------------------------------"); break; } } catch (Exception ex) { } #endregion do { newPagesource = httpHelper.getHtmlfromUrl1(new Uri("" + startRow + "&subFilter=&trk=&showBlocked=false")); if (newPagesource.Contains("inbox-list")) { string inbox_list = httpHelper.GetDataWithTagValueByTagAndAttributeNameWithClass(newPagesource, "ol", "inbox-list "); if (inbox_list.Contains("<li")) { isTrue = true; string[] srrLi = Regex.Split(inbox_list, "<li"); foreach (string item in srrLi) { try { if (item.Contains("data-gid=\"")) { string data_gid = item.Substring(item.IndexOf("data-gid=\"") + 10, item.IndexOf("\"", item.IndexOf("data-gid=\"") + 10) - (item.IndexOf("data-gid=\"") + 10)).Replace("\"", string.Empty).Replace("data-gid=\"", string.Empty).Trim(); int startindex1 = item.IndexOf("alt="); string start1 = item.Substring(startindex1).Replace("alt=", string.Empty); int endindex1 = start1.IndexOf("height"); string end1 = start1.Substring(0, endindex1).Replace("\"", string.Empty).Trim(); SenderName = end1; string response2 = httpHelper.getHtmlfromUrl1(new Uri("" + data_gid + "&actionType=invitationAccept&csrfToken=" + csrfToken + "&goback=%2Epiv_*1_*1_*1_*1_*1&trk=inbox-invitations-inv-accept&ctx=inbox&rnd=1366352095313")); if (response2.Contains(" are now connected")) { string SuccessMsg = string.Empty; int startindex = response2.IndexOf("<div class=\"confirmation\">"); if (startindex > 0) { try { string start = response2.Substring(startindex).Replace("<div class=\"confirmation\">", string.Empty); int endindex = start.IndexOf("<ul>"); string end = start.Substring(0, endindex); //SuccessMsg = end.Replace("<h4>", string.Empty).Replace("\"", string.Empty).Replace("</h4>", string.Empty).Replace("\n", string.Empty).Replace("\t", string.Empty).Replace("\"u002", "-").Replace("You", "User: "******"<h4>", string.Empty).Replace("\"", string.Empty).Replace("</h4>", string.Empty).Replace("\n", string.Empty).Replace("\t", string.Empty).Replace("\"u002", "-").Replace("You", "User: "******"( Name:" + userFirstName + " " + UserLastName + ") ").Trim(); } catch { } } GlobusFileHelper.AppendStringToTextfileNewLine(SuccessMsg, Globals.path_AcceptInvitationEmail); Log("[ " + DateTime.Now + " ] => [ " + SuccessMsg + " ]"); } if (!(response2.Contains(SenderName))) { Log("[ " + DateTime.Now + " ] => [ Invitation accepted from " + SenderName + " ]"); } else { //Log("There is some error !"); } } } catch (Exception ex) { } } } else { //Log("[ " + DateTime.Now + " ] => [ There is no invitation ! ]"); Log("[ " + DateTime.Now + " ] => [ No more invitations left to accept ! ]"); } } startRow = startRow + 10; } while (newPagesource.Contains("is now a connection.")); //else //{ // Log("[ " + DateTime.Now + " ] => [ There is no invitation ! ]"); //} //if (isTrue) //{ // StartAcceptInvitations(ref httpHelper); //} } catch (Exception ex) { } }
private void btnStart_Searching_Click(object sender, EventArgs e) { AllOfTheseWords = (txtAllofTheseKeywords.Text).ToString(); ThisExtractPhrase = (txtThisExactPhrase.Text).ToString(); AnyOfTheseWords = (txtAnyOfTheseWords.Text).ToString(); TheseHashTags = (txtTheseHashTags.Text).ToString(); NoneOfTheseWords = (txtNoneofTheseWords.Text).ToString(); FromTheseAccounts = (txtFromTheseAccounts.Text).ToString(); ToTheseAccounts = (txtToTheseAccounts.Text).ToString(); MentionTheseAccounts = (txtMentioningTheseAccounts.Text).ToString(); NearThisPlace = (txtNearThisPlace.Text).ToString(); AddToLog_AdvancedSearch("[ " + DateTime.Now + " ] => Process Started"); try { if (string.IsNullOrEmpty(ThisExtractPhrase)) { ThisExtractPhrase = ""; } else { ThisExtractPhrase = "%20%22" + ThisExtractPhrase; } } catch { } try { if (string.IsNullOrEmpty(AnyOfTheseWords)) { AnyOfTheseWords = ""; } else { AnyOfTheseWords = "%22%20" + AnyOfTheseWords; } } catch { } try { if (string.IsNullOrEmpty(TheseHashTags)) { TheseHashTags = ""; } else { TheseHashTags = "%20%23" + TheseHashTags; } } catch { } try { if (string.IsNullOrEmpty(NoneOfTheseWords)) { NoneOfTheseWords = ""; } else { NoneOfTheseWords = "%20-" + NoneOfTheseWords; } } catch { } try { if (string.IsNullOrEmpty(FromTheseAccounts)) { FromTheseAccounts = ""; } else { FromTheseAccounts = "%20from%3A" + FromTheseAccounts; } } catch { } try { if (string.IsNullOrEmpty(ToTheseAccounts)) { ToTheseAccounts = ""; } else { ToTheseAccounts = "%20to%3A" + ToTheseAccounts; } } catch { } try { if (string.IsNullOrEmpty(MentionTheseAccounts)) { MentionTheseAccounts = ""; } else { MentionTheseAccounts = "%20%40" + MentionTheseAccounts; } } catch { } try { if (string.IsNullOrEmpty(NearThisPlace)) { NearThisPlace = ""; } else { NearThisPlace = "%20near%3A%22" + NearThisPlace; } } catch { } try { if (!string.IsNullOrEmpty(txtAllofTheseKeywords.Text)) { #region Commented //try //{ // string Url = "" + AllOfTheseWords + ThisExtractPhrase + AnyOfTheseWords + NoneOfTheseWords + TheseHashTags + _selectedLanguage + FromTheseAccounts + ToTheseAccounts + MentionTheseAccounts + NearThisPlace + "%22%20within%3A15mi&src=typd"; // string response = _GlobusHttpHelper.getHtmlfromUrl(new Uri(Url), "", ""); //} //catch { } public List<StructTweetIDs> NewKeywordStructDataForSearchByKeyword(string keyword) #endregion { try { BaseLib.GlobusRegex regx = new GlobusRegex(); int counter = 0; string res_Get_searchURL = string.Empty; string searchURL = string.Empty; string maxid = string.Empty; string TweetId = string.Empty; string text = string.Empty; string ProfileName = string.Empty; string Location = string.Empty; string Bio = string.Empty; string website = string.Empty; string NoOfTweets = string.Empty; string Followers = string.Empty; string Followings = string.Empty; int noOfRecords = 0; try { noOfRecords = int.Parse(txtNoOfRecords.Text); } catch { } startAgain: if (counter == 0) { searchURL = "" + AllOfTheseWords + ThisExtractPhrase + AnyOfTheseWords + NoneOfTheseWords + TheseHashTags + _selectedLanguage + FromTheseAccounts + ToTheseAccounts + MentionTheseAccounts + NearThisPlace + "%22%20within%3A15mi&src=typd" + "&f=realtime"; counter++; } else { searchURL = "" + AllOfTheseWords + ThisExtractPhrase + AnyOfTheseWords + NoneOfTheseWords + TheseHashTags + _selectedLanguage + FromTheseAccounts + ToTheseAccounts + MentionTheseAccounts + NearThisPlace + "%22%20within%3A15mi&src=typd" + "&f=realtime&include_available_features=1&include_entities=1&last_note_ts=0&oldest_unread_id=0&scroll_cursor=" + TweetId + ""; } try { res_Get_searchURL = _GlobusHttpHelper.getHtmlfromUrl(new Uri(searchURL), "", ""); AddToLog_AdvancedSearch("[ " + DateTime.Now + " ] => Finding results for entered details "); if (string.IsNullOrEmpty(res_Get_searchURL)) { res_Get_searchURL = _GlobusHttpHelper.getHtmlfromUrl(new Uri(searchURL), "", ""); } try { //string sjss = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", ""); string[] splitRes = Regex.Split(res_Get_searchURL, "refresh_cursor"); //splitRes = splitRes.Skip(1).ToArray(); foreach (string item in splitRes) { if (item.Contains("refresh_cursor")) { int startIndex = item.IndexOf("TWEET-"); string start = item.Substring(startIndex).Replace("data-user-id=\\\"", ""); int endIndex = start.IndexOf("\""); string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", ""); TweetId = end; } if (item.Contains("scroll_cursor")) { int startIndex = item.IndexOf("TWEET-"); string start = item.Substring(startIndex).Replace("data-user-id=\\\"", ""); int endIndex = start.IndexOf("\""); string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", ""); TweetId = end; } } } catch (Exception) { } } catch (Exception ex) { System.Threading.Thread.Sleep(2000); res_Get_searchURL = _GlobusHttpHelper.getHtmlfromUrl(new Uri(searchURL), "", ""); } // && !res_Get_searchURL.Contains("has_more_items\":false") if (!string.IsNullOrEmpty(res_Get_searchURL)) { //string[] splitRes = Regex.Split(res_Get_searchURL, "data-item-id"); //Regex.Split(res_Get_searchURL, "\"in_reply_to_status_id_str\""); string[] splitRes = Regex.Split(res_Get_searchURL, "data-item-id"); splitRes = splitRes.Skip(1).ToArray(); foreach (string item in splitRes) { if (item.Contains("data-screen-name=") && !item.Contains("js-actionable-user js-profile-popup-actionable")) { //var avc = Newtonsoft.Json.JsonConvert.DeserializeObject<dynamic>(res_Get_searchURL); //string DataHtml = (string)avc["items_html"]; } else { continue; } string modified_Item = "\"from_user\"" + item; string id = ""; try { int startIndex = item.IndexOf("data-user-id="); string start = item.Substring(startIndex).Replace("data-user-id=\\\"", ""); int endIndex = start.IndexOf("\\\""); string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", ""); id = end; //lst_structTweetIDs.Add(id); AddToLog_AdvancedSearch("[ " + DateTime.Now + " ] => User Id " + id); } catch (Exception ex) { id = "null"; //Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- id -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper); } string from_user_id = ""; try { int startIndex = item.IndexOf("data-screen-name=\\\""); string start = item.Substring(startIndex).Replace("data-screen-name=\\\"", ""); int endIndex = start.IndexOf("\\\""); string end = start.Substring(0, endIndex).Replace("from_user_id\":", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("_str", "").Replace("user", "").Replace("}", "").Replace("]", ""); from_user_id = end; AddToLog_AdvancedSearch("[ " + DateTime.Now + " ] => User ScreenName " + from_user_id); } catch (Exception ex) { from_user_id = "null"; // Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwitterDataScrapper); } string tweetUserid = string.Empty; try { int startIndex = item.IndexOf("=\\\""); string start = item.Substring(startIndex).Replace("=\\\"", ""); int endIndex = start.IndexOf("\\\""); string end = start.Substring(0, endIndex).Replace("from_user_id\":", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("_str", "").Replace("user", "").Replace("}", "").Replace("]", ""); tweetUserid = end; AddToLog_AdvancedSearch("[ " + DateTime.Now + " ] => Tweet Id " + tweetUserid); } catch (Exception ex) { from_user_id = "null"; } ///Tweet Text #region Commented //try //{ // int startindex = item.IndexOf("js-tweet-text tweet-text\""); // if (startindex == -1) // { // startindex = 0; // startindex = item.IndexOf("js-tweet-text tweet-text"); // } // string start = item.Substring(startindex).Replace("js-tweet-text tweet-text\"", "").Replace("js-tweet-text tweet-text tweet-text-rtl\"", ""); // int endindex = start.IndexOf("</p>"); // if (endindex == -1) // { // endindex = 0; // endindex = start.IndexOf("stream-item-footer"); // } // string end = start.Substring(0, endindex); // end = regx.StripTagsRegex(end); // text = end.Replace(" ", "").Replace("a href=", "").Replace("/a", "").Replace("<span", "").Replace("</span", "").Replace("class=\\\"js-display-url\\\"", "").Replace("class=\\\"tco-ellipsis\\\"", "").Replace("class=\\\"invisible\\\"", "").Replace("<strong>", "").Replace("target=\\\"_blank\\\"", "").Replace("class=\\\"twitter-timeline-link\\\"", "").Replace("</strong>", "").Replace("rel=\\\"nofollow\\\" dir=\\\"ltr\\\" data-expanded-url=", ""); // text = text.Replace(""", "").Replace("<", "").Replace(">", "").Replace("\"", "").Replace("\\", "").Replace("title=", ""); // string[] array = Regex.Split(text, "http"); // text = string.Empty; // foreach (string itemData in array) // { // if (!itemData.Contains("")) // { // string data = string.Empty; // if (itemData.Contains("//")) // { // data = ("http" + itemData).Replace(" span ", string.Empty); // if (!text.Contains(itemData.Replace(" ", "")))// && !data.Contains("class") && !text.Contains(data)) // { // text += data.Replace("u003c", string.Empty).Replace("u003e", string.Empty); // } // } // else // { // if (!text.Contains(itemData.Replace(" ", ""))) // { // text += itemData.Replace("u003c", string.Empty).Replace("u003e", string.Empty).Replace("js-tweet-text tweet-text", ""); // } // } // } // } //} //catch { }; #endregion twtboardpro.TwitterDataScrapper.StructTweetIDs structTweetIDs = new twtboardpro.TwitterDataScrapper.StructTweetIDs(); if (id != "null") { structTweetIDs.ID_Tweet = tweetUserid; structTweetIDs.ID_Tweet_User = id; structTweetIDs.username__Tweet_User = from_user_id; structTweetIDs.wholeTweetMessage = text; lst_structTweetIDs.Add(structTweetIDs); } //if (!File.Exists(Globals.Path_KeywordScrapedListData + "-" + keyword + ".csv")) //{ // GlobusFileHelper.AppendStringToTextfileNewLine("USERID , USERNAME , PROFILE NAME , BIO , LOCATION , WEBSITE , NO OF TWEETS , FOLLOWERS , FOLLOWINGS", Globals.Path_KeywordScrapedListData + "-" + keyword + ".csv"); //} { ChilkatHttpHelpr objChilkat = new ChilkatHttpHelpr(); GlobusHttpHelper HttpHelper = new GlobusHttpHelper(); string ProfilePageSource = HttpHelper.getHtmlfromUrl(new Uri("" + from_user_id), "", ""); string Responce = ProfilePageSource; #region Convert HTML to XML string xHtml = objChilkat.ConvertHtmlToXml(Responce); Chilkat.Xml xml = new Chilkat.Xml(); xml.LoadXml(xHtml); Chilkat.Xml xNode = default(Chilkat.Xml); Chilkat.Xml xBeginSearchAfter = default(Chilkat.Xml); #endregion int counterdata = 0; xBeginSearchAfter = null; string dataDescription = string.Empty; xNode = xml.SearchForAttribute(xBeginSearchAfter, "h1", "class", "ProfileHeaderCard-name"); while ((xNode != null)) { xBeginSearchAfter = xNode; if (counterdata == 0) { ProfileName = xNode.AccumulateTagContent("text", "script|style"); counterdata++; } else if (counterdata == 1) { website = xNode.AccumulateTagContent("text", "script|style"); counterdata++; } else { break; } xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "u-textUserColor"); } xBeginSearchAfter = null; dataDescription = string.Empty; xNode = xml.SearchForAttribute(xBeginSearchAfter, "p", "class", "ProfileHeaderCard-bio u-dir");//bio profile-field"); while ((xNode != null)) { xBeginSearchAfter = xNode; Bio = xNode.AccumulateTagContent("text", "script|style").Replace("'", "'").Replace(" ", string.Empty).Trim(); break; } xBeginSearchAfter = null; dataDescription = string.Empty; xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "ProfileHeaderCard-locationText u-dir");//location profile-field"); while ((xNode != null)) { xBeginSearchAfter = xNode; Location = xNode.AccumulateTagContent("text", "script|style"); break; } int counterData = 0; xBeginSearchAfter = null; dataDescription = string.Empty; xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "ProfileNav-stat ProfileNav-stat--link u-borderUserColor u-textCenter js-tooltip js-nav");//location profile-field"); while ((xNode != null)) { xBeginSearchAfter = xNode; if (counterData == 0) { // NoOfTweets = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "ProfileNav-value"); NoOfTweets = xNode.AccumulateTagContent("text", "script|style").Replace("Tweets", string.Empty).Replace(",", string.Empty).Replace("Tweet", string.Empty); counterData++; } else if (counterData == 1) { Followings = xNode.AccumulateTagContent("text", "script|style").Replace(" Following", string.Empty).Replace(",", string.Empty).Replace("Following", string.Empty); counterData++; } else if (counterData == 2) { Followers = xNode.AccumulateTagContent("text", "script|style").Replace("Followers", string.Empty).Replace(",", string.Empty).Replace("Follower", string.Empty); counterData++; } else { break; } //xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "js-nav"); xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "ProfileNav-stat ProfileNav-stat--link u-borderUserColor u-textCenter js-tooltip js-openSignupDialog js-nonNavigable u-textUserColor"); } if (!string.IsNullOrEmpty(from_user_id) && tweetUserid != "null") { string Id_user = tweetUserid.Replace("}]", string.Empty).Trim(); Globals.lstScrapedUserIDs.Add(Id_user); // GlobusFileHelper.AppendStringToTextfileNewLine(id + "," + from_user_id + "," + ProfileName + "," + Bio.Replace(",", "") + "," + Location.Replace(",", "") + "," + website + "," + NoOfTweets.Replace(",", "").Replace("Tweets", "") + "," + Followers.Replace(",", "").Replace("Following", "") + "," + Followings.Replace(",", "").Replace("Followers", "").Replace("Follower", ""), Globals.Path_KeywordScrapedListData + "-" + keyword + ".csv"); // Log("[ " + DateTime.Now + " ] => [ " + from_user_id + "," + Id_user + "," + ProfileName + "," + Bio.Replace(",", "") + "," + Location + "," + website + "," + NoOfTweets + "," + Followers + "," + Followings + " ]"); } } lst_structTweetIDs = lst_structTweetIDs.Distinct().ToList(); if (lst_structTweetIDs.Count >= noOfRecords) { // return lst_structTweetIDs; } } if (lst_structTweetIDs.Count <= noOfRecords) { maxid = lst_structTweetIDs[lst_structTweetIDs.Count - 1].ID_Tweet; if (res_Get_searchURL.Contains("has_moreitems\":false")) { } else { goto startAgain; } } else { if (res_Get_searchURL.Contains("has_more_items\":false")) { } else { goto startAgain; } } } } catch (Exception ex) { } } } } catch { } }
public void scrapUserInfo(object param) { try { Array paramsArray = new object[1]; paramsArray = (Array)param; string UserName = (string)paramsArray.GetValue(0); string userId = string.Empty; string ProfileName = string.Empty; string Location = string.Empty; string Bio = string.Empty; string website = string.Empty; string NoOfTweets = string.Empty; string Followers = string.Empty; string Followings = string.Empty; string IsProfilePIc = string.Empty; ChilkatHttpHelpr objChilkat = new ChilkatHttpHelpr(); GlobusHttpHelper HttpHelper = new GlobusHttpHelper(); string ProfilePageSource = HttpHelper.getHtmlfromUrl(new Uri("" + UserName.Trim()), "", ""); if (string.IsNullOrEmpty(ProfilePageSource)) { ProfilePageSource = HttpHelper.getHtmlfromUrl(new Uri("" + UserName.Trim()), "", ""); } if (string.IsNullOrEmpty(ProfilePageSource)) { AddToLog_ScrapMember("[ " + DateTime.Now + " ] => [ User " + UserName + " is not exist or page source getting null.]"); return; } if (ProfilePageSource.Contains("Account suspended")) { AddToLog_ScrapMember("[ " + DateTime.Now + " ] => [ User " + UserName + " is suspended ]"); return; } string Responce = ProfilePageSource; #region Convert HTML to XML string xHtml = objChilkat.ConvertHtmlToXml(Responce); Chilkat.Xml xml = new Chilkat.Xml(); xml.LoadXml(xHtml); Chilkat.Xml xNode = default(Chilkat.Xml); Chilkat.Xml xBeginSearchAfter = default(Chilkat.Xml); #endregion int counterdata = 0; xBeginSearchAfter = null; string dataDescription = string.Empty; //xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "profile-field"); xNode = xml.SearchForAttribute(xBeginSearchAfter, "h1", "class", "ProfileHeaderCard-name"); while ((xNode != null)) { xBeginSearchAfter = xNode; if (counterdata == 0) { ProfileName = xNode.AccumulateTagContent("text", "script|style"); if (ProfileName.Contains("Verified account")) { ProfileName = ProfileName.Replace("Verified account", " "); } counterdata++; } else if (counterdata == 1) { website = xNode.AccumulateTagContent("text", "script|style"); if (website.Contains("Twitter Status")) { website = ""; } counterdata++; } else { break; } //xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "profile-field"); xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "u-textUserColor"); } xBeginSearchAfter = null; dataDescription = string.Empty; xNode = xml.SearchForAttribute(xBeginSearchAfter, "p", "class", "ProfileHeaderCard-bio u-dir");//bio profile-field"); while ((xNode != null)) { xBeginSearchAfter = xNode; Bio = xNode.AccumulateTagContent("text", "script|style").Replace("'", "'").Replace(" ", string.Empty).Trim(); break; } xBeginSearchAfter = null; dataDescription = string.Empty; //xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "location profile-field"); xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "ProfileHeaderCard-locationText u-dir");//location profile-field"); while ((xNode != null)) { xBeginSearchAfter = xNode; Location = xNode.AccumulateTagContent("text", "script|style"); break; } int counterData = 0; xBeginSearchAfter = null; dataDescription = string.Empty; //xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "data-element-term", "tweet_stats"); xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "ProfileNav-stat ProfileNav-stat--link u-borderUserColor u-textCenter js-tooltip js-nav"); while ((xNode != null)) { xBeginSearchAfter = xNode; if (counterData == 0) { NoOfTweets = xNode.AccumulateTagContent("text", "script|style").Replace("Tweets", string.Empty).Replace(",", string.Empty).Replace("Tweet", string.Empty); counterData++; } else if (counterData == 1) { Followings = xNode.AccumulateTagContent("text", "script|style").Replace(" Following", string.Empty).Replace(",", string.Empty).Replace("Following", string.Empty); counterData++; } else if (counterData == 2) { Followers = xNode.AccumulateTagContent("text", "script|style").Replace("Followers", string.Empty).Replace(",", string.Empty).Replace("Follower", string.Empty); counterData++; } else { break; } //xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "js-nav"); xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "ProfileNav-stat ProfileNav-stat--link u-borderUserColor u-textCenter js-tooltip js-openSignupDialog js-nonNavigable u-textUserColor"); } try { int startindex = ProfilePageSource.IndexOf("profile_id"); string start = ProfilePageSource.Substring(startindex).Replace("profile_id", ""); int endindex = start.IndexOf(","); string end = start.Substring(0, endindex).Replace(""", "").Replace("\"", "").Replace(":", "").Trim(); userId = end.Trim(); if (userId.Length > 15) { startindex = ProfilePageSource.IndexOf("profile_id""); start = ProfilePageSource.Substring(startindex).Replace("profile_id"", ""); endindex = start.IndexOf(","); end = start.Substring(0, endindex).Replace(""", "").Replace("\"", "").Replace(":", "").Replace(";", "").Trim(); userId = end.Trim(); } } catch { } if (ProfilePageSource.Contains("default_profile_6_400x400") || ProfilePageSource.Contains("default_profile_5_400x400") || ProfilePageSource.Contains("default_profile_4_400x400") || ProfilePageSource.Contains("default_profile_3_400x400") || ProfilePageSource.Contains("default_profile_2_400x400") || ProfilePageSource.Contains("default_profile_1_400x400") || ProfilePageSource.Contains("default_profile_0_400x400")) { IsProfilePIc = "No"; } else { IsProfilePIc = "Yes"; } if (!File.Exists(Globals.Path_UserListInfoData)) { GlobusFileHelper.AppendStringToTextfileNewLine("USERID , USERNAME , PROFILE NAME , BIO , LOCATION , WEBSITE , NO OF TWEETS , FOLLOWERS , FOLLOWINGS, ProfilePic", Globals.Path_UserListInfoData); } if (!string.IsNullOrEmpty(UserName)) { //string Id_user = item.ID_Tweet_User.Replace("}]", string.Empty).Trim(); //Globals.lstScrapedUserIDs.Add(Id_user); GlobusFileHelper.AppendStringToTextfileNewLine(userId + "," + UserName + "," + ProfileName + "," + Bio.Replace(",", "") + "," + Location.Replace(",", "") + "," + website + "," + NoOfTweets.Replace(",", "").Replace("Tweets", "") + "," + Followers.Replace(",", "").Replace("Following", "") + "," + Followings.Replace(",", "").Replace("Followers", "").Replace("Follower", "") + "," + IsProfilePIc, Globals.Path_UserListInfoData); AddToLog_ScrapMember("[ " + DateTime.Now + " ] => [ " + userId + "," + UserName + "," + ProfileName + "," + Bio.Replace(",", "") + "," + Location + "," + website + "," + NoOfTweets + "," + Followers + "," + Followings + " ," + IsProfilePIc + "]"); } } catch { } }