public List <string> GetHrefsByTagAndAttributeName(string pageSrcHtml, string TagName, string className) { List <string> lstData = new List <string>(); try { bool success = false; string xHtml = string.Empty; Chilkat.HtmlToXml htmlToXml = new Chilkat.HtmlToXml(); //*** Check DLL working or not ********************** success = htmlToXml.UnlockComponent("THEBACHtmlToXml_7WY3A57sZH3O"); if ((success != true)) { Console.WriteLine(htmlToXml.LastErrorText); return(null); } htmlToXml.Html = pageSrcHtml; //** Convert Data Html to XML ******************************************* xHtml = htmlToXml.ToXml(); //****************************************** Chilkat.Xml xNode = default(Chilkat.Xml); Chilkat.Xml xBeginSearchAfter = default(Chilkat.Xml); Chilkat.Xml xml = new Chilkat.Xml(); xml.LoadXml(xHtml); #region Data Save in list From using XML Tag and Attribut string DescriptionMain = string.Empty; string dataDescription = string.Empty; xBeginSearchAfter = null; xNode = xml.SearchForAttribute(xBeginSearchAfter, TagName, "class", className); while ((xNode != null)) { //** Get Data Under Tag only Text Value********************************** dataDescription = xNode.GetXml();//.AccumulateTagContent("text", "script|style"); List <string> lstHrefs = GetHrefFromString(dataDescription); lstData.AddRange(lstHrefs);//lstData.Add(dataDescription); //** Get Data Under Tag All Html value * ********************************* //dataDescription = xNode.GetXml(); xBeginSearchAfter = xNode; xNode = xml.SearchForAttribute(xBeginSearchAfter, TagName, "class", className); } #endregion return(lstData); } catch (Exception) { return(lstData = null); } }
public void CrawlingPageDataSource(string Url, ref GlobusHttpHelper HttpHelper) { // if (SearchCriteria.starter) { // if (SearchCriteria.starter) { try { Log("[ " + DateTime.Now + " ] => [ Start Parsing Process ]"); #region Data Initialization string Industry = string.Empty; string URLprofile = string.Empty; string firstname = string.Empty; string lastname = string.Empty; string location = string.Empty; string country = string.Empty; string postal = string.Empty; string phone = string.Empty; string USERemail = string.Empty; string code = string.Empty; string education1 = string.Empty; string education2 = string.Empty; string titlecurrent = string.Empty; string companycurrent = string.Empty; string titlepast1 = string.Empty; string companypast1 = string.Empty; string titlepast2 = string.Empty; string html = string.Empty; string companypast2 = string.Empty; string titlepast3 = string.Empty; string companypast3 = string.Empty; string titlepast4 = string.Empty; string companypast4 = string.Empty; string Recommendations = string.Empty; string Connection = string.Empty; string Designation = string.Empty; string Website = string.Empty; string Contactsettings = string.Empty; string recomandation = string.Empty; string titleCurrenttitle = string.Empty; string titleCurrenttitle2 = string.Empty; string titleCurrenttitle3 = string.Empty; string titleCurrenttitle4 = string.Empty; string Skill = string.Empty; string TypeOfProfile = "Public1"; string Finaldata = string.Empty; #endregion #region LDS_DataInitialization string LDS_FirstName = string.Empty; string LDS_LastName = string.Empty; string LDS_UserProfileLink = string.Empty; string LDS_HeadLineTitle = string.Empty; string LDS_CurrentTitle = string.Empty; string LDS_PastTitles = string.Empty; string LDS_Loction = string.Empty; string LDS_Country = string.Empty; string LDS_Connection = string.Empty; string LDS_Recommendations = string.Empty; string LDS_SkillAndExpertise = string.Empty; string LDS_Education = string.Empty; string LDS_Experience = string.Empty; string LDS_ProfileType = "Public"; string LDS_Groups = string.Empty; string LDS_UserEmail = string.Empty; string LDS_UserContactNumber = string.Empty; string LDS_CurrentCompany = string.Empty; string LDS_PastCompany = string.Empty; string LDS_LoginID = string.Empty; string LDS_Websites = string.Empty; string LDS_Industry = string.Empty; #endregion #region Chilkat Initialization Chilkat.Http http = new Chilkat.Http(); ///Chilkat Http Request to be used in Http Post... Chilkat.HttpRequest req = new Chilkat.HttpRequest(); Chilkat.HtmlUtil htmlUtil = new Chilkat.HtmlUtil(); // Any string unlocks the component for the 1st 30-days. bool success = http.UnlockComponent("THEBACHttp_b3C9o9QvZQ06"); if (success != true) { Console.WriteLine(http.LastErrorText); return; } http.CookieDir = "memory"; http.SendCookies = true; http.SaveCookies = true; html = HttpHelper.getHtmlfromUrl1(new Uri(Url)); html = htmlUtil.EntityDecode(html); //// Convert the HTML to XML: Chilkat.HtmlToXml htmlToXml = new Chilkat.HtmlToXml(); Chilkat.HtmlToXml htmlToXml1 = new Chilkat.HtmlToXml(); Chilkat.HtmlToXml htmlToXml2 = new Chilkat.HtmlToXml(); success = htmlToXml.UnlockComponent("THEBACHtmlToXml_7WY3A57sZH3O"); if ((success != true)) { Console.WriteLine(htmlToXml.LastErrorText); return; } string xHtml = null; string xHtml1 = null; //string xHtml2 = null; htmlToXml.Html = html; xHtml = htmlToXml.ToXml(); Chilkat.Xml xml = new Chilkat.Xml(); xml.LoadXml(xHtml); //// Iterate over all h1 tags: Chilkat.Xml xNode = default(Chilkat.Xml); Chilkat.Xml xBeginSearchAfter = default(Chilkat.Xml); #endregion #region for paRSING List<string> list = new List<string>(); List<string> Grouplist = new List<string>(); List<string> listtitle = new List<string>(); List<string> Currentlist = new List<string>(); List<string> Skilllst = new List<string>(); list.Clear(); //new parshing code List<string> TempFirstName = objChilkat.GetDataTagAttributewithId(html, "div", "name-container"); xBeginSearchAfter = null; xNode = xml.SearchForTag(xBeginSearchAfter, "dt"); Grouplist.Clear(); xBeginSearchAfter = null; #region parsergroup xNode = xml.SearchForAttribute(xBeginSearchAfter, "div", "class", "group-data"); while ((xNode != null)) { Finaldata = xNode.AccumulateTagContent("text", "/text"); Grouplist.Add(Finaldata); string[] tempC1 = Regex.Split(Finaldata, " at "); xBeginSearchAfter = xNode; xNode = xml.SearchForAttribute(xBeginSearchAfter, "div", "class", "group-data"); } int groupcounter = 0; string AllGRoup = string.Empty; foreach (string item in Grouplist) { if (item.Contains("Join")) { if (groupcounter == 0) { LDS_Groups = item; groupcounter++; } else { LDS_Groups = AllGRoup + ";" + item; } } } #endregion #region parserSkill xNode = xml.SearchForTag(xBeginSearchAfter, "dt"); Skilllst.Clear(); xBeginSearchAfter = null; xNode = xml.SearchForAttribute(xBeginSearchAfter, "div", "id", "profile-skills"); while ((xNode != null)) { Finaldata = xNode.AccumulateTagContent("text", "/text"); if (Finaldata.Contains("extlib: _toggleclass")) { try { string[] Temp = Finaldata.Split(';'); LDS_SkillAndExpertise = Temp[4]; } catch { } } else { try { LDS_SkillAndExpertise = Finaldata.Replace("Skills & Expertise", " "); Skilllst.Add(Finaldata); } catch { } } xBeginSearchAfter = xNode; xNode = xml.SearchForAttribute(xBeginSearchAfter, "div", "id", "profile-skills"); } if (LDS_SkillAndExpertise.Contains(" Endorsements LI.i18n.register('section_skills_person_endorsed_tmpl")) { LDS_SkillAndExpertise = string.Empty; } Skilllst.Distinct(); #endregion #region UrlProfile try { if (html.Contains("webProfileURL")) { int FirstPointForProfileURL = html.IndexOf("webProfileURL"); string FirstSubStringForProfileURL = html.Substring(FirstPointForProfileURL); int SecondPointForProfileURL = FirstSubStringForProfileURL.IndexOf(">"); int ThirdPointForProfileURL = FirstSubStringForProfileURL.IndexOf("</a>"); string SecondSubStringForProfileURL = FirstSubStringForProfileURL.Substring(SecondPointForProfileURL, ThirdPointForProfileURL - SecondPointForProfileURL); LDS_UserProfileLink = SecondSubStringForProfileURL.Replace(">", string.Empty); //qm.AddProfileUrl(URLprofile, DateTime.Now.ToString(), "0"); } } catch (Exception ex) { Console.WriteLine(ex.Message); } try { string[] UrlFull = System.Text.RegularExpressions.Regex.Split(Url, "&authType"); LDS_UserProfileLink = UrlFull[0]; LDS_UserProfileLink = Url; } catch { } #endregion #region Connection if (html.Contains("overview-connections")) { try { Connection = html.Substring(html.IndexOf("leo-module mod-util connections"), 500); string[] Arr = Connection.Split('>'); string tempConnection = Arr[5].Replace("</strong", "").Replace(")</h3", "").Replace("(", ""); if (tempConnection.Length < 8) { LDS_Connection = tempConnection + "Connection"; } else { LDS_Connection = string.Empty; } } catch (Exception ex) { //overview-connections try { LDS_Connection = html.Substring(html.IndexOf("overview-connections"), 50); string[] Arr = Connection.Split('>'); string tempConnection = Arr[3].Replace("</strong", "").Replace(")</h3", "").Replace("(", ""); LDS_Connection = tempConnection + "Connection"; } catch { } } } #endregion #region Recommendation if (html.Contains("Recommendations")) { try { string[] rList = System.Text.RegularExpressions.Regex.Split(html, "Recommendations"); string[] R3List = rList[2].Split('\n'); string temprecomandation = R3List[4].Replace("</strong>", "").Replace("<strong>", ""); if (temprecomandation.Contains("recommended")) { LDS_Recommendations = temprecomandation; } else { LDS_Recommendations = ""; } } catch (Exception ex) { LDS_Recommendations = string.Empty; } } #endregion #region Websites if (html.Contains("websites")) { try { string websitedem = html.Substring(html.IndexOf("websites"), 500); string[] Arr = Regex.Split(websitedem, "href"); foreach (string item in Arr) { if (item.Contains("redir/redirect?url")) { string tempArr = item.Substring(item.IndexOf("name="), 50); string[] temarr = tempArr.Split('\n'); LDS_Websites = temarr[1]; } } } catch (Exception ex) { LDS_Websites = string.Empty; } } #endregion #region Getting Industry try { string Industrytemp = html.Substring(html.IndexOf("Find users in this industry"), 100); string[] TempIndustery = Industrytemp.Split('>'); LDS_Industry = TempIndustery[1].Replace("</strong", "").Replace("</a", ""); } catch (Exception ex) { Console.WriteLine(ex.Message); } #endregion #region Getting First Name try { if (html.Contains("given-name")) { int FirstPointForProfilename = html.IndexOf("given-name"); string FirstSubStringForProfilename = html.Substring(FirstPointForProfilename); int SecondPointForProfilename = FirstSubStringForProfilename.IndexOf(">"); int ThirdPointForProfilename = FirstSubStringForProfilename.IndexOf("</span>"); string SecondSubStringForProfilename = FirstSubStringForProfilename.Substring(SecondPointForProfilename, ThirdPointForProfilename - SecondPointForProfilename); LDS_FirstName = SecondSubStringForProfilename.Replace(">", string.Empty); } } catch (Exception ex) { Console.WriteLine(ex.Message); } #endregion #region LastName try { if (html.Contains("family-name")) { int FirstPointForProfilelastname = html.IndexOf("family-name"); string FirstSubStringForProfilelastname = html.Substring(FirstPointForProfilelastname); int SecondPointForProfilelastname = FirstSubStringForProfilelastname.IndexOf(">"); int ThirdPointForProfilelastname = FirstSubStringForProfilelastname.IndexOf("</span>"); string SecondSubStringForProfilelastname = FirstSubStringForProfilelastname.Substring(SecondPointForProfilelastname, ThirdPointForProfilelastname - SecondPointForProfilelastname); string templastname = SecondSubStringForProfilelastname.Replace(">", string.Empty); if (templastname.Contains(",")) { string[] arrylastname = templastname.Split(','); LDS_LastName = arrylastname[0]; } else { LDS_LastName = templastname; } } } catch (Exception ex) { Console.WriteLine(ex.Message); } #endregion #region Designation Company Current try { if (html.Contains("phonetic-full-name")) { int FirstPointForProfileCurrent = html.IndexOf("phonetic-full-name"); string FirstSubStringForProfileCurrent = html.Substring(FirstPointForProfileCurrent); int SecondPointForProfileCurrent = FirstSubStringForProfileCurrent.IndexOf("display:block"); int ThirdPointForProfileCurrent = FirstSubStringForProfileCurrent.IndexOf("</p>"); string SecondSubStringForProfileCurrent = FirstSubStringForProfileCurrent.Substring(SecondPointForProfileCurrent, ThirdPointForProfileCurrent - SecondPointForProfileCurrent); titlecurrent = SecondSubStringForProfileCurrent.Replace("\">", "").Replace("display:block", string.Empty).Replace("<strong class=\"highlight\"", string.Empty).Replace("</strong", string.Empty).Trim(); string[] tempCCurent = Regex.Split(titlecurrent, " at "); LDS_HeadLineTitle = titlecurrent.Replace(",", ";"); LDS_CurrentCompany = tempCCurent[1].Replace(",", ";"); } else if (html.Contains("<p class=\"title\"")) { LDS_HeadLineTitle = html.Substring(html.IndexOf("<p class=\"title\""), 150); string[] HeadLineTitle = LDS_HeadLineTitle.Split('>'); string tempHeadLineTitle = HeadLineTitle[1].Replace("\n", "").Replace(")</h3", "").Replace("</p", ""); LDS_HeadLineTitle = tempHeadLineTitle; try { string[] tempCCurent = Regex.Split(tempHeadLineTitle, " at "); LDS_HeadLineTitle = tempCCurent[0]; LDS_CurrentCompany = tempCCurent[1]; } catch { } } } catch (Exception ex) { Console.WriteLine(ex.Message); } #endregion #region Education try { if (html.Contains("summary-education")) { int FirstPointForProfileeducation1 = html.IndexOf("summary-education"); string FirstSubStringForProfileeducation1 = html.Substring(FirstPointForProfileeducation1); int SecondPointForProfileeducation1 = FirstSubStringForProfileeducation1.IndexOf("<li>"); int ThirdPointForProfileeducation1 = FirstSubStringForProfileeducation1.IndexOf("</li>"); string SecondSubStringForProfileeducation1 = FirstSubStringForProfileeducation1.Substring(SecondPointForProfileeducation1, ThirdPointForProfileeducation1 - SecondPointForProfileeducation1); education1 = SecondSubStringForProfileeducation1.Replace("<li>", string.Empty).Replace(",", string.Empty).Trim(); } } catch (Exception ex) { Console.WriteLine(ex.Message); } #endregion #region Country try { if (html.Contains("locality")) { int FirstPointForlocality = html.IndexOf("locality"); string FirstSubStringForlocality = html.Substring(FirstPointForlocality); int SecondPointForlocality = FirstSubStringForlocality.IndexOf("location"); int ThirdPointForlocality = FirstSubStringForlocality.IndexOf("</a>"); string SecondSubStringForlocality = FirstSubStringForlocality.Substring(SecondPointForlocality, ThirdPointForlocality - SecondPointForlocality); string temlocation = SecondSubStringForlocality.Replace("location", string.Empty).Replace(">", string.Empty).Replace('"', ' '); string[] temp = temlocation.Split(','); LDS_Loction = temp[0].Replace("<strong class= highlight", string.Empty).Replace("</strong", string.Empty); LDS_Country = temp[1].Replace("<strong class= highlight", string.Empty).Replace("</strong", string.Empty); // country = temp[1].Replace("</strong", string.Empty); } } catch (Exception ex) { Console.WriteLine(ex.Message); } #endregion #region User Email try { if (html.Contains("Email & Phone:")) { int FirstPointFortitlepast1 = html.IndexOf("abook-email"); string FirstSubStringFortitlepast1 = html.Substring(FirstPointFortitlepast1); int SecondPointFortitlepast1 = FirstSubStringFortitlepast1.IndexOf("<a"); int ThirdPointFortitlepast1 = FirstSubStringFortitlepast1.IndexOf("</a>"); string SecondSubStringFortitlepast1 = FirstSubStringFortitlepast1.Substring(SecondPointFortitlepast1, ThirdPointFortitlepast1 - SecondPointFortitlepast1); string[] tempEmail = SecondSubStringFortitlepast1.Split('>'); LDS_UserEmail = tempEmail[1]; } } catch (Exception ex) { Console.WriteLine(ex.Message); } #endregion #region Type Of profile try { if (html.Contains("profile-header")) { int FirstPointForProfileType = html.IndexOf("profile-header"); string FirstSubStringForProfileType = html.Substring(FirstPointForProfileType); int SecondPointForProfileType = FirstSubStringForProfileType.IndexOf("class=\"n fn\""); int ThirdPointForProfileType = FirstSubStringForProfileType.IndexOf("</span>"); string SecondSubStringForProfileType = FirstSubStringForProfileType.Substring(SecondPointForProfileType, ThirdPointForProfileType - SecondPointForProfileType); string[] tempProfileType = SecondSubStringForProfileType.Split('>'); string ProfileType = tempProfileType[1]; LDS_ProfileType = ProfileType; } //<h1><span id="name" class="n fn">Private</span> else if (html.Contains(" class=\"n fn\"")) { try { string ProfileTypetemp = html.Substring(html.IndexOf("class=\"n fn\""), 20); string[] TempProfileType = ProfileTypetemp.Split('>'); LDS_ProfileType = TempProfileType[1].Replace("</strong", "").Replace("</a", ""); } catch { } } if (LDS_ProfileType != "Public") { LDS_ProfileType = "Private"; } } catch (Exception ex) { Console.WriteLine(ex.Message); } #endregion #region PhonNumber try { if (html.Contains("<dt>Phone:</dt>")) { int FirstPointFortitlepast1 = html.IndexOf("profile-personal"); string FirstSubStringFortitlepast1 = html.Substring(FirstPointFortitlepast1); int SecondPointFortitlepast1 = FirstSubStringFortitlepast1.IndexOf("<p>"); int ThirdPointFortitlepast1 = FirstSubStringFortitlepast1.IndexOf("<span"); string SecondSubStringFortitlepast1 = FirstSubStringFortitlepast1.Substring(SecondPointFortitlepast1, ThirdPointFortitlepast1 - SecondPointFortitlepast1); LDS_UserContactNumber = SecondSubStringFortitlepast1.Replace("<p>", string.Empty); } } catch (Exception ex) { Console.WriteLine(ex.Message); } #endregion xNode = xml.SearchForTag(xBeginSearchAfter, "dt"); xBeginSearchAfter = xNode; list.Clear(); #endregion #region Regionfor PastCompney try { if (html.Contains("summary-past")) { int FirstPointForPasttitle = html.IndexOf("summary-past"); string FirstSubStringForPasttitle = html.Substring(FirstPointForPasttitle); int SecondPointForPasttitle = FirstSubStringForPasttitle.IndexOf("<li>"); int ThirdPointForPasttitle = FirstSubStringForPasttitle.IndexOf("summary-education"); string SecondSubStringForPasttitle = FirstSubStringForPasttitle.Substring(SecondPointForPasttitle, ThirdPointForPasttitle - SecondPointForPasttitle); string FirstSubStringForPasttitlelast = htmlUtil.EntityDecode(SecondSubStringForPasttitle); htmlToXml1.Html = FirstSubStringForPasttitlelast; xHtml1 = htmlToXml1.ToXml(); Chilkat.Xml xml1 = new Chilkat.Xml(); xml1.LoadXml(xHtml1); //// Iterate over all h1 tags: Chilkat.Xml xNode1 = default(Chilkat.Xml); Chilkat.Xml xBeginSearchAfter1 = default(Chilkat.Xml); list.Clear(); string[] tempC1 = null; xNode1 = xml1.SearchForTag(xBeginSearchAfter1, "li"); while ((xNode1 != null)) { Finaldata = xNode1.AccumulateTagContent("text", "/text"); listtitle.Add(Finaldata); // list.Add(Finaldata); try { tempC1 = Regex.Split(Finaldata, " at "); } catch { } if (tempC1 != null) { try { list.Add(tempC1[1]); } catch { } } xNode1 = xml1.SearchForTag(xBeginSearchAfter1, "li"); xBeginSearchAfter1 = xNode1; } if (listtitle.Count > 0 || list.Count > 0) { try { titlepast1 = listtitle[0] != null ? listtitle[0] : string.Empty; titlepast2 = listtitle[1] != null ? listtitle[1] : string.Empty; titlepast3 = listtitle[2] != null ? listtitle[2] : string.Empty; titlepast4 = listtitle[3] != null ? listtitle[3] : string.Empty; } catch { } try { companypast1 = list[0] != null ? list[0] : string.Empty; companypast2 = list[1] != null ? list[1] : string.Empty; companypast3 = list[2] != null ? list[2] : string.Empty; companypast4 = list[3] != null ? list[3] : string.Empty; } catch { } } } } catch { }; list.Clear(); #endregion string companyCurrenttitle1 = string.Empty; string companyCurrenttitle2 = string.Empty; string companyCurrenttitle3 = string.Empty; string companyCurrenttitle4 = string.Empty; #region Regionfor summary-current try { if (html.Contains("summary-current")) { int FirstPointForCurrenttitle = html.IndexOf("summary-current"); string FirstSubStringForCurrenttitle = html.Substring(FirstPointForCurrenttitle); int SecondPointForCurrenttitle = FirstSubStringForCurrenttitle.IndexOf("<li>"); int ThirdPointForCurrenttitle = FirstSubStringForCurrenttitle.IndexOf("summary-past"); string SecondSubStringForCurrenttitle = FirstSubStringForCurrenttitle.Substring(SecondPointForCurrenttitle, ThirdPointForCurrenttitle - SecondPointForCurrenttitle); string FirstSubStringForCurrenttitlelast = htmlUtil.EntityDecode(SecondSubStringForCurrenttitle); htmlToXml1.Html = FirstSubStringForCurrenttitlelast; xHtml1 = htmlToXml1.ToXml(); Chilkat.Xml xml1 = new Chilkat.Xml(); xml1.LoadXml(xHtml1); //// Iterate over all h1 tags: Chilkat.Xml xNode1 = default(Chilkat.Xml); Chilkat.Xml xBeginSearchAfter1 = default(Chilkat.Xml); Currentlist.Clear(); list.Clear(); string[] tempC1 = null; xNode1 = xml1.SearchForTag(xBeginSearchAfter1, "li"); while ((xNode1 != null)) { Finaldata = xNode1.AccumulateTagContent("text", "/text"); Currentlist.Add(Finaldata); // list.Add(Finaldata); try { tempC1 = Regex.Split(Finaldata, " at "); } catch { } if (tempC1 != null) { try { list.Add(tempC1[1]); } catch { } } xNode1 = xml1.SearchForTag(xBeginSearchAfter1, "li"); xBeginSearchAfter1 = xNode1; } if (Currentlist.Count > 0 || list.Count > 0) { try { titleCurrenttitle = Currentlist[0] != null ? Currentlist[0] : string.Empty; titleCurrenttitle2 = Currentlist[1] != null ? Currentlist[1] : string.Empty; titleCurrenttitle3 = Currentlist[2] != null ? Currentlist[2] : string.Empty; titleCurrenttitle4 = Currentlist[3] != null ? Currentlist[3] : string.Empty; } catch { } try { companyCurrenttitle1 = list[0] != null ? list[0] : string.Empty; companyCurrenttitle2 = list[1] != null ? list[1] : string.Empty; companyCurrenttitle3 = list[2] != null ? list[2] : string.Empty; companyCurrenttitle4 = list[3] != null ? list[3] : string.Empty; } catch { } } } } catch { }; list.Clear(); #endregion #region RegionForEDUCATION try { if (html.Contains("summary-education")) { int FirstPointForEDUCATION = html.IndexOf("summary-education"); string FirstSubStringForEDUCATION = html.Substring(FirstPointForEDUCATION); int SecondPointForEDUCATION = FirstSubStringForEDUCATION.IndexOf("<li>"); int ThirdPointForEDUCATION = FirstSubStringForEDUCATION.IndexOf("</ul>"); string SecondSubStringForEDUCATION = FirstSubStringForEDUCATION.Substring(SecondPointForEDUCATION, ThirdPointForEDUCATION - SecondPointForEDUCATION); //string tempEDu = SecondSubStringForEDUCATION.Replace("<li>", string.Empty).Replace("</li>", string.Empty).Replace(" ", string.Empty).Replace("\n", string.Empty).Replace("\t", string.Empty).Trim(); string temptg = SecondSubStringForEDUCATION.Replace("<li>", ""); string[] templis6t = temptg.Split('/'); education1 = templis6t[0].Replace("\n", string.Empty).Replace("\t", string.Empty).Replace("<", string.Empty).Replace("span>", string.Empty).Replace(",", string.Empty).Trim(); education2 = templis6t[1].Replace("\n", string.Empty).Replace("\t", string.Empty).Replace("li>", string.Empty).Replace("<", string.Empty).Replace("span>", string.Empty).Replace(",", string.Empty).Trim(); } } catch { }; list.Clear(); #endregion string GroupPastJob = string.Empty; string GroupEduction = string.Empty; LDS_PastTitles = titlepast1 + ";" + titlepast3; LDS_PastCompany = companypast1 + ";" + companypast3; LDS_Education = education1 + ";" + education2; LDS_CurrentTitle = titleCurrenttitle; LDS_LoginID = _UserName;//SearchCriteria.LoginID; //"ProfileType" + "," + "UserProfileLink" + "," + "FirstName" + "," + "LastName" + "," + "HeadLineTitle" + "," + "CurrentTitle " + "," + "Company" + "," + "Connection" + "," + "Recommendations " + "," + "SkillAndExpertise " + "," + "Experience " + "," + " Education" + "," + "Groups" + "," + "UserEmail" + "," + "UserContactNumbe" + "," + "PastTitles" + "," + "PastCompany" + "," + "Loction" + "," + "Country" + "," + "titlepast3" + "," + "companypast3" + "," + "titlepast4" + "," + "companypast4" + ","; string LDS_FinalData = LDS_ProfileType.Replace(",", ";") + "," + LDS_UserProfileLink.Replace(",", ";") + "," + LDS_FirstName.Replace(",", ";") + "," + LDS_LastName.Replace(",", ";") + "," + LDS_HeadLineTitle.Replace(",", ";") + "," + LDS_CurrentTitle.Replace(",", ";") + "," + LDS_CurrentCompany.Replace(",", ";") + "," + LDS_Connection.Replace(",", ";") + "," + LDS_Recommendations.Replace(",", ";") + "," + LDS_SkillAndExpertise.Replace(",", ";") + "," + LDS_Experience.Replace(",", ";") + "," + LDS_Education.Replace(",", ";") + "," + LDS_Groups.Replace(",", ";") + "," + LDS_UserEmail.Replace(",", ";") + "," + LDS_UserContactNumber.Replace(",", ";") + "," + LDS_PastTitles.Replace(",", ";") + "," + LDS_PastCompany.Replace(",", ";") + "," + LDS_Loction.Replace(",", ";") + "," + LDS_Country.Replace(",", ";") + "," + LDS_Industry.Replace(",", ";") + "," + LDS_Websites.Replace(",", ";") + "," + LDS_LoginID.Replace(",", ";") + ","; if (LDS_FinalData.Contains("<strong class=\"highlight\"") || LDS_FinalData.Contains("<span class=\"full-name\"") || LDS_FinalData.Contains("<strong class=\"highlight\"") || LDS_FinalData.Contains("overview-connections\">")) { LDS_FinalData = LDS_FinalData.Replace("<span class=\"full-name\"", "").Replace("\n", "").Replace("<strong class=\"highlight\"", "").Replace("overview-connections\">", "").Replace("</strong>", "").Replace("<strong>", ""); } if (!string.IsNullOrEmpty(LDS_FirstName) || !string.IsNullOrEmpty(LDS_FirstName)) { Log(LDS_FinalData); } // if (SearchCriteria.starter) { string tempFinalData = LDS_FinalData.Replace(";", "").Replace(LDS_UserProfileLink, "").Replace("Public", "").Replace(",", "").Replace(LDS_LoginID, "").Trim(); if (!string.IsNullOrEmpty(tempFinalData)) { //AppFileHelper.AddingLinkedInDataToCSVFile(LDS_FinalData, Globals.path_ScrappedMembersFromGroup); string CSVHeader = "ProfileType" + "," + "UserProfileLink" + "," + "FirstName" + "," + "LastName" + "," + "HeadLineTitle" + "," + "Current Title " + "," + "Current Company" + "," + "Connection" + "," + "Recommendations " + "," + "SkillAndExpertise " + "," + "Experience " + "," + " Education" + "," + "Groups" + "," + "UserEmail" + "," + "UserContactNumber" + "," + "PastTitles" + "," + "PastCompany" + "," + "Location" + "," + "Country" + "," + "Industry" + "," + "WebSites" + "," + "LinkedInLoginID" + ","; string CSV_Content = TypeOfProfile.Replace(",", ";") + "," + LDS_UserProfileLink.Replace(",", ";") + "," + LDS_FirstName.Replace(",", ";") + "," + LDS_LastName.Replace(",", ";") + "," + LDS_HeadLineTitle.Replace(",", ";") + "," + LDS_CurrentTitle.Replace(",", ";") + "," + LDS_CurrentCompany.Replace(",", ";") + "," + LDS_Connection.Replace(",", ";") + "," + LDS_Recommendations.Replace(",", ";") + "," + LDS_SkillAndExpertise.Replace(",", ";") + "," + LDS_Experience.Replace(",", ";") + "," + LDS_Education.Replace(",", ";") + "," + LDS_Groups.Replace(",", ";") + "," + LDS_UserEmail.Replace(",", ";") + "," + LDS_UserContactNumber.Replace(",", ";") + "," + LDS_PastTitles.Replace(",", ";") + "," + LDS_PastCompany.Replace(",", ";") + "," + LDS_Loction.Replace(",", ";") + "," + LDS_Country.Replace(",", ";") + "," + LDS_Industry.Replace(",", ";") + "," + LDS_Websites.Replace(",", ";") + "," + LDS_LoginID.Replace(",", ";"); //string CSV_Content = TypeOfProfile + "," + LDS_UserProfileLink + "," + firstname + "," + lastname + "," + Company.Replace(",", ";") + "," + titlecurrent.Replace(",", ";") + "," + companycurrent.Replace(",", ";") + "," + Connection.Replace(",", ";") + "," + recomandation.Replace(",", string.Empty) + "," + Skill.Replace(",", ";") + "," + LDS_Experience.Replace(",", string.Empty) + "," + EducationCollection.Replace(",", ";") + "," + groupscollectin.Replace(",", ";") + "," + USERemail.Replace(",", ";") + "," + LDS_UserContact.Replace(",", ";") + "," + LDS_PastTitles + "," + AllComapny.Replace(",", ";") + "," + country.Replace(",", ";") + "," + location.Replace(",", ";") + "," + Industry.Replace(",", ";") + "," + Website.Replace(",", ";") + "," + LDS_LoginID + ",";// +TypeOfProfile + ","; CSVUtilities.ExportDataCSVFile(CSVHeader, CSV_Content, Globals.path_ScrappedMembersFromGroup); Log("[ " + DateTime.Now + " ] => [ Data Saved In CSV File With URL >>> " + LDS_UserProfileLink + " ]"); } //if (!string.IsNullOrEmpty(LDS_FirstName) || !string.IsNullOrEmpty(LDS_FirstName)) //{ // AppFileHelper.AddingLinkedInDataToCSVFile(LDS_FinalData, SearchCriteria.FileName); //} } } catch (Exception ex) { }; } } }
public void StartAcceptInvitations(ref GlobusHttpHelper httpHelper) { try { string csrfToken = string.Empty; string userFirstName = string.Empty; string UserLastName = string.Empty; string SenderName = string.Empty; string newPagesource = string.Empty; bool isTrue = false; int startRow = 1; string pageSource = httpHelper.getHtmlfromUrl1(new Uri("http://www.linkedin.com/inbox/invitations/pending")); var resultForUserDetails = FindTheUserName(pageSource); try { resultForUserDetails = resultForUserDetails.Substring(resultForUserDetails.IndexOf("alt="), resultForUserDetails.IndexOf("height") - resultForUserDetails.IndexOf("alt=")).Replace("alt=", string.Empty).Replace("/", string.Empty).Trim(); userFirstName = resultForUserDetails.Split(' ')[0].Replace("\"", string.Empty); UserLastName = resultForUserDetails.Split(' ')[1].Replace("\"", string.Empty); } catch { } if (pageSource.Contains("csrfToken")) { csrfToken = pageSource.Substring(pageSource.IndexOf("csrfToken"), 100); string[] Arr = csrfToken.Split('"'); try { foreach (string item in Arr) { try { if (item.Contains("csrfToken=")) { csrfToken = item.Substring(item.IndexOf("csrfToken="), item.IndexOf("&", item.IndexOf("csrfToken=")) - item.IndexOf("csrfToken=")).Replace("csrfToken=", string.Empty).Replace("\"", string.Empty).Replace("\\", string.Empty).Trim();//Arr[2].Replace(@"\", string.Empty).Replace("//", string.Empty); break; } } catch (Exception ex) { } if (item.Contains("csrfToken=")) { csrfToken = item.Replace("csrfToken=", string.Empty).Trim(); break; } } } catch (Exception ex) { //csrfToken = Arr[0].Replace("csrfToken=", "").Replace("\\", ""); } } // For Show More //string postData1 = "pkey=inbox-invitations-pending&tcode=%5Bobject%20Arguments%5D&plist="; //string response1 = httpHelper.postFormData(new Uri("http://www.linkedin.com/lite/web-action-track?csrfToken="+csrfToken+""),postData1); //string pageSource2=httpHelper.getHtmlfromUrl(new Uri("http://www.linkedin.com/inbox/invitations/pending/more?sinceDate=1366351490125&startRow=6&count=20&showBlocked=false&ctx=inbox&rnd=1366353236172")); //*** Conver HTML to XML *******************************// #region Convert HTML to XML ChilkatHttpHelpr objhelper = new ChilkatHttpHelpr(); //xHtml contain xml data string xHtml = objhelper.ConvertHtmlToXml(pageSource); Chilkat.Xml xml = new Chilkat.Xml(); xml.LoadXml(xHtml); //xHtml. //// Iterate over all h1 tags: Chilkat.Xml xNode = default(Chilkat.Xml); Chilkat.Xml xBeginSearchAfter = default(Chilkat.Xml); #endregion #region Invitatin count xBeginSearchAfter = null; xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "invitation-count count "); try { while ((xNode != null)) { string strvalue = xNode.AccumulateTagContent("text", "script|style"); string Invitatincount = strvalue; Log("[ " + DateTime.Now + " ] => [ Invitation Count = " + Invitatincount + " UserName = "******" ]"); Log("-----------------------------------------------------------------------------------------------------------------------------------"); break; } } catch (Exception ex) { } #endregion do { newPagesource = httpHelper.getHtmlfromUrl1(new Uri("https://www.linkedin.com/inbox/invitations?keywords=&sortBy=&startRow=" + startRow + "&subFilter=&trk=&showBlocked=false")); if (newPagesource.Contains("inbox-list")) { string inbox_list = httpHelper.GetDataWithTagValueByTagAndAttributeNameWithClass(newPagesource, "ol", "inbox-list "); if (inbox_list.Contains("<li")) { isTrue = true; string[] srrLi = Regex.Split(inbox_list, "<li"); foreach (string item in srrLi) { try { if (item.Contains("data-gid=\"")) { string data_gid = item.Substring(item.IndexOf("data-gid=\"") + 10, item.IndexOf("\"", item.IndexOf("data-gid=\"") + 10) - (item.IndexOf("data-gid=\"") + 10)).Replace("\"", string.Empty).Replace("data-gid=\"", string.Empty).Trim(); int startindex1 = item.IndexOf("alt="); string start1 = item.Substring(startindex1).Replace("alt=", string.Empty); int endindex1 = start1.IndexOf("height"); string end1 = start1.Substring(0, endindex1).Replace("\"", string.Empty).Trim(); SenderName = end1; string response2 = httpHelper.getHtmlfromUrl1(new Uri("http://www.linkedin.com/inbox/action?mboxItemGID=" + data_gid + "&actionType=invitationAccept&csrfToken=" + csrfToken + "&goback=%2Epiv_*1_*1_*1_*1_*1&trk=inbox-invitations-inv-accept&ctx=inbox&rnd=1366352095313")); if (response2.Contains(" are now connected")) { string SuccessMsg = string.Empty; int startindex = response2.IndexOf("<div class=\"confirmation\">"); if (startindex > 0) { try { string start = response2.Substring(startindex).Replace("<div class=\"confirmation\">", string.Empty); int endindex = start.IndexOf("<ul>"); string end = start.Substring(0, endindex); //SuccessMsg = end.Replace("<h4>", string.Empty).Replace("\"", string.Empty).Replace("</h4>", string.Empty).Replace("\n", string.Empty).Replace("\t", string.Empty).Replace("\"u002", "-").Replace("You", "User: "******"<h4>", string.Empty).Replace("\"", string.Empty).Replace("</h4>", string.Empty).Replace("\n", string.Empty).Replace("\t", string.Empty).Replace("\"u002", "-").Replace("You", "User: "******"( Name:" + userFirstName + " " + UserLastName + ") ").Trim(); } catch { } } GlobusFileHelper.AppendStringToTextfileNewLine(SuccessMsg, Globals.path_AcceptInvitationEmail); Log("[ " + DateTime.Now + " ] => [ " + SuccessMsg + " ]"); } if (!(response2.Contains(SenderName))) { Log("[ " + DateTime.Now + " ] => [ Invitation accepted from " + SenderName + " ]"); } else { //Log("There is some error !"); } } } catch (Exception ex) { } } } else { //Log("[ " + DateTime.Now + " ] => [ There is no invitation ! ]"); Log("[ " + DateTime.Now + " ] => [ No more invitations left to accept ! ]"); } } startRow = startRow + 10; } while (newPagesource.Contains("is now a connection.")); //else //{ // Log("[ " + DateTime.Now + " ] => [ There is no invitation ! ]"); //} //if (isTrue) //{ // StartAcceptInvitations(ref httpHelper); //} } catch (Exception ex) { } }
public void StartAcceptInvitations(ref GlobusHttpHelper httpHelper) { try { string csrfToken = string.Empty; string userFirstName = string.Empty; string UserLastName = string.Empty; string SenderName = string.Empty; string newPagesource = string.Empty; bool isTrue = false; int startRow = 1; string pageSource = httpHelper.getHtmlfromUrl1(new Uri("http://www.linkedin.com/inbox/invitations/pending")); var resultForUserDetails = FindTheUserName(pageSource); try { resultForUserDetails = resultForUserDetails.Substring(resultForUserDetails.IndexOf("alt="), resultForUserDetails.IndexOf("height") - resultForUserDetails.IndexOf("alt=")).Replace("alt=", string.Empty).Replace("/", string.Empty).Trim(); userFirstName = resultForUserDetails.Split(' ')[0].Replace("\"", string.Empty); UserLastName = resultForUserDetails.Split(' ')[1].Replace("\"", string.Empty); } catch { } if (pageSource.Contains("csrfToken")) { csrfToken = pageSource.Substring(pageSource.IndexOf("csrfToken"), 100); string[] Arr = csrfToken.Split('"'); try { foreach (string item in Arr) { try { if (item.Contains("csrfToken=")) { csrfToken = item.Substring(item.IndexOf("csrfToken="), item.IndexOf("&", item.IndexOf("csrfToken=")) - item.IndexOf("csrfToken=")).Replace("csrfToken=", string.Empty).Replace("\"", string.Empty).Replace("\\", string.Empty).Trim();//Arr[2].Replace(@"\", string.Empty).Replace("//", string.Empty); break; } } catch (Exception ex) { } if (item.Contains("csrfToken=")) { csrfToken = item.Replace("csrfToken=", string.Empty).Trim(); break; } } } catch (Exception ex) { //csrfToken = Arr[0].Replace("csrfToken=", "").Replace("\\", ""); } } // For Show More //string postData1 = "pkey=inbox-invitations-pending&tcode=%5Bobject%20Arguments%5D&plist="; //string response1 = httpHelper.postFormData(new Uri("http://www.linkedin.com/lite/web-action-track?csrfToken="+csrfToken+""),postData1); //string pageSource2=httpHelper.getHtmlfromUrl(new Uri("http://www.linkedin.com/inbox/invitations/pending/more?sinceDate=1366351490125&startRow=6&count=20&showBlocked=false&ctx=inbox&rnd=1366353236172")); //*** Conver HTML to XML *******************************// #region Convert HTML to XML ChilkatHttpHelpr objhelper = new ChilkatHttpHelpr(); //xHtml contain xml data string xHtml = objhelper.ConvertHtmlToXml(pageSource); Chilkat.Xml xml = new Chilkat.Xml(); xml.LoadXml(xHtml); //xHtml. //// Iterate over all h1 tags: Chilkat.Xml xNode = default(Chilkat.Xml); Chilkat.Xml xBeginSearchAfter = default(Chilkat.Xml); #endregion #region Invitatin count xBeginSearchAfter = null; xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "invitation-count count "); try { while ((xNode != null)) { string strvalue = xNode.AccumulateTagContent("text", "script|style"); string Invitatincount = strvalue; Log("[ " + DateTime.Now + " ] => [ Invitation Count = " + Invitatincount + " UserName = "******" ]"); Log("-----------------------------------------------------------------------------------------------------------------------------------"); break; } } catch (Exception ex) { } #endregion do { newPagesource = httpHelper.getHtmlfromUrl1(new Uri("https://www.linkedin.com/inbox/invitations?keywords=&sortBy=&startRow=" + startRow + "&subFilter=&trk=&showBlocked=false")); if (newPagesource.Contains("inbox-list")) { string inbox_list = httpHelper.GetDataWithTagValueByTagAndAttributeNameWithClass(newPagesource, "ol", "inbox-list "); if (inbox_list.Contains("<li")) { isTrue = true; string[] srrLi = Regex.Split(inbox_list, "<li"); foreach (string item in srrLi) { try { if (item.Contains("data-gid=\"")) { string data_gid = item.Substring(item.IndexOf("data-gid=\"") + 10, item.IndexOf("\"", item.IndexOf("data-gid=\"") + 10) - (item.IndexOf("data-gid=\"") + 10)).Replace("\"", string.Empty).Replace("data-gid=\"", string.Empty).Trim(); int startindex1 = item.IndexOf("alt="); string start1 = item.Substring(startindex1).Replace("alt=",string.Empty); int endindex1 = start1.IndexOf("height"); string end1 = start1.Substring(0, endindex1).Replace("\"", string.Empty).Trim(); SenderName = end1; string response2 = httpHelper.getHtmlfromUrl1(new Uri("http://www.linkedin.com/inbox/action?mboxItemGID=" + data_gid + "&actionType=invitationAccept&csrfToken=" + csrfToken + "&goback=%2Epiv_*1_*1_*1_*1_*1&trk=inbox-invitations-inv-accept&ctx=inbox&rnd=1366352095313")); if (response2.Contains(" are now connected")) { string SuccessMsg = string.Empty; int startindex = response2.IndexOf("<div class=\"confirmation\">"); if (startindex > 0) { try { string start = response2.Substring(startindex).Replace("<div class=\"confirmation\">", string.Empty); int endindex = start.IndexOf("<ul>"); string end = start.Substring(0, endindex); //SuccessMsg = end.Replace("<h4>", string.Empty).Replace("\"", string.Empty).Replace("</h4>", string.Empty).Replace("\n", string.Empty).Replace("\t", string.Empty).Replace("\"u002", "-").Replace("You", "User: "******"<h4>", string.Empty).Replace("\"", string.Empty).Replace("</h4>", string.Empty).Replace("\n", string.Empty).Replace("\t", string.Empty).Replace("\"u002", "-").Replace("You", "User: "******"( Name:" + userFirstName + " " + UserLastName + ") ").Trim(); } catch { } } GlobusFileHelper.AppendStringToTextfileNewLine(SuccessMsg, Globals.path_AcceptInvitationEmail); Log("[ " + DateTime.Now + " ] => [ " + SuccessMsg + " ]"); } if (!(response2.Contains(SenderName))) { Log("[ " + DateTime.Now + " ] => [ Invitation accepted from " + SenderName + " ]"); } else { //Log("There is some error !"); } } } catch (Exception ex) { } } } else { //Log("[ " + DateTime.Now + " ] => [ There is no invitation ! ]"); Log("[ " + DateTime.Now + " ] => [ No more invitations left to accept ! ]"); } } startRow = startRow + 10; } while (newPagesource.Contains("is now a connection.")); //else //{ // Log("[ " + DateTime.Now + " ] => [ There is no invitation ! ]"); //} //if (isTrue) //{ // StartAcceptInvitations(ref httpHelper); //} } catch (Exception ex) { } }
public List<string> GetHrefsByTagAndAttributeName(string pageSrcHtml, string TagName, string className) { List<string> lstData = new List<string>(); try { bool success = false; string xHtml = string.Empty; Chilkat.HtmlToXml htmlToXml = new Chilkat.HtmlToXml(); //*** Check DLL working or not ********************** success = htmlToXml.UnlockComponent("THEBACHtmlToXml_7WY3A57sZH3O"); if ((success != true)) { Console.WriteLine(htmlToXml.LastErrorText); return null; } htmlToXml.Html = pageSrcHtml; //** Convert Data Html to XML ******************************************* xHtml = htmlToXml.ToXml(); //****************************************** Chilkat.Xml xNode = default(Chilkat.Xml); Chilkat.Xml xBeginSearchAfter = default(Chilkat.Xml); Chilkat.Xml xml = new Chilkat.Xml(); xml.LoadXml(xHtml); #region Data Save in list From using XML Tag and Attribut string DescriptionMain = string.Empty; string dataDescription = string.Empty; xBeginSearchAfter = null; xNode = xml.SearchForAttribute(xBeginSearchAfter, TagName, "class", className); while ((xNode != null)) { //** Get Data Under Tag only Text Value********************************** dataDescription = xNode.GetXml();//.AccumulateTagContent("text", "script|style"); List<string> lstHrefs = GetHrefFromString(dataDescription); lstData.AddRange(lstHrefs);//lstData.Add(dataDescription); //** Get Data Under Tag All Html value * ********************************* //dataDescription = xNode.GetXml(); xBeginSearchAfter = xNode; xNode = xml.SearchForAttribute(xBeginSearchAfter, TagName, "class", className); } #endregion return lstData; } catch (Exception) { return lstData = null; } }
private void btnStart_Searching_Click(object sender, EventArgs e) { AllOfTheseWords = (txtAllofTheseKeywords.Text).ToString(); ThisExtractPhrase = (txtThisExactPhrase.Text).ToString(); AnyOfTheseWords = (txtAnyOfTheseWords.Text).ToString(); TheseHashTags = (txtTheseHashTags.Text).ToString(); NoneOfTheseWords = (txtNoneofTheseWords.Text).ToString(); FromTheseAccounts = (txtFromTheseAccounts.Text).ToString(); ToTheseAccounts = (txtToTheseAccounts.Text).ToString(); MentionTheseAccounts = (txtMentioningTheseAccounts.Text).ToString(); NearThisPlace = (txtNearThisPlace.Text).ToString(); AddToLog_AdvancedSearch("[ " + DateTime.Now + " ] => Process Started"); try { if (string.IsNullOrEmpty(ThisExtractPhrase)) { ThisExtractPhrase = ""; } else { ThisExtractPhrase = "%20%22" + ThisExtractPhrase; } } catch { } try { if (string.IsNullOrEmpty(AnyOfTheseWords)) { AnyOfTheseWords = ""; } else { AnyOfTheseWords = "%22%20" + AnyOfTheseWords; } } catch { } try { if (string.IsNullOrEmpty(TheseHashTags)) { TheseHashTags = ""; } else { TheseHashTags = "%20%23" + TheseHashTags; } } catch { } try { if (string.IsNullOrEmpty(NoneOfTheseWords)) { NoneOfTheseWords = ""; } else { NoneOfTheseWords = "%20-" + NoneOfTheseWords; } } catch { } try { if (string.IsNullOrEmpty(FromTheseAccounts)) { FromTheseAccounts = ""; } else { FromTheseAccounts = "%20from%3A" + FromTheseAccounts; } } catch { } try { if (string.IsNullOrEmpty(ToTheseAccounts)) { ToTheseAccounts = ""; } else { ToTheseAccounts = "%20to%3A" + ToTheseAccounts; } } catch { } try { if (string.IsNullOrEmpty(MentionTheseAccounts)) { MentionTheseAccounts = ""; } else { MentionTheseAccounts = "%20%40" + MentionTheseAccounts; } } catch { } try { if (string.IsNullOrEmpty(NearThisPlace)) { NearThisPlace = ""; } else { NearThisPlace = "%20near%3A%22" + NearThisPlace; } } catch { } try { if (!string.IsNullOrEmpty(txtAllofTheseKeywords.Text)) { #region Commented //try //{ // string Url = "https://twitter.com/search?f=realtime&q=" + AllOfTheseWords + ThisExtractPhrase + AnyOfTheseWords + NoneOfTheseWords + TheseHashTags + _selectedLanguage + FromTheseAccounts + ToTheseAccounts + MentionTheseAccounts + NearThisPlace + "%22%20within%3A15mi&src=typd"; // string response = _GlobusHttpHelper.getHtmlfromUrl(new Uri(Url), "", ""); //} //catch { } public List<StructTweetIDs> NewKeywordStructDataForSearchByKeyword(string keyword) #endregion { try { BaseLib.GlobusRegex regx = new GlobusRegex(); int counter = 0; string res_Get_searchURL = string.Empty; string searchURL = string.Empty; string maxid = string.Empty; string TweetId = string.Empty; string text = string.Empty; string ProfileName = string.Empty; string Location = string.Empty; string Bio = string.Empty; string website = string.Empty; string NoOfTweets = string.Empty; string Followers = string.Empty; string Followings = string.Empty; int noOfRecords = 0; try { noOfRecords = int.Parse(txtNoOfRecords.Text); } catch { } startAgain: if (counter == 0) { searchURL = "https://twitter.com/i/search/timeline?q=" + AllOfTheseWords + ThisExtractPhrase + AnyOfTheseWords + NoneOfTheseWords + TheseHashTags + _selectedLanguage + FromTheseAccounts + ToTheseAccounts + MentionTheseAccounts + NearThisPlace + "%22%20within%3A15mi&src=typd" + "&f=realtime"; counter++; } else { searchURL = "https://twitter.com/i/search/timeline?q=" + AllOfTheseWords + ThisExtractPhrase + AnyOfTheseWords + NoneOfTheseWords + TheseHashTags + _selectedLanguage + FromTheseAccounts + ToTheseAccounts + MentionTheseAccounts + NearThisPlace + "%22%20within%3A15mi&src=typd" + "&f=realtime&include_available_features=1&include_entities=1&last_note_ts=0&oldest_unread_id=0&scroll_cursor=" + TweetId + ""; } try { res_Get_searchURL = _GlobusHttpHelper.getHtmlfromUrl(new Uri(searchURL), "", ""); AddToLog_AdvancedSearch("[ " + DateTime.Now + " ] => Finding results for entered details "); if (string.IsNullOrEmpty(res_Get_searchURL)) { res_Get_searchURL = _GlobusHttpHelper.getHtmlfromUrl(new Uri(searchURL), "", ""); } try { //string sjss = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", ""); string[] splitRes = Regex.Split(res_Get_searchURL, "refresh_cursor"); //splitRes = splitRes.Skip(1).ToArray(); foreach (string item in splitRes) { if (item.Contains("refresh_cursor")) { int startIndex = item.IndexOf("TWEET-"); string start = item.Substring(startIndex).Replace("data-user-id=\\\"", ""); int endIndex = start.IndexOf("\""); string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", ""); TweetId = end; } if (item.Contains("scroll_cursor")) { int startIndex = item.IndexOf("TWEET-"); string start = item.Substring(startIndex).Replace("data-user-id=\\\"", ""); int endIndex = start.IndexOf("\""); string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", ""); TweetId = end; } } } catch (Exception) { } } catch (Exception ex) { System.Threading.Thread.Sleep(2000); res_Get_searchURL = _GlobusHttpHelper.getHtmlfromUrl(new Uri(searchURL), "", ""); } // && !res_Get_searchURL.Contains("has_more_items\":false") if (!string.IsNullOrEmpty(res_Get_searchURL)) { //string[] splitRes = Regex.Split(res_Get_searchURL, "data-item-id"); //Regex.Split(res_Get_searchURL, "\"in_reply_to_status_id_str\""); string[] splitRes = Regex.Split(res_Get_searchURL, "data-item-id"); splitRes = splitRes.Skip(1).ToArray(); foreach (string item in splitRes) { if (item.Contains("data-screen-name=") && !item.Contains("js-actionable-user js-profile-popup-actionable")) { //var avc = Newtonsoft.Json.JsonConvert.DeserializeObject<dynamic>(res_Get_searchURL); //string DataHtml = (string)avc["items_html"]; } else { continue; } string modified_Item = "\"from_user\"" + item; string id = ""; try { int startIndex = item.IndexOf("data-user-id="); string start = item.Substring(startIndex).Replace("data-user-id=\\\"", ""); int endIndex = start.IndexOf("\\\""); string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", ""); id = end; //lst_structTweetIDs.Add(id); AddToLog_AdvancedSearch("[ " + DateTime.Now + " ] => User Id " + id); } catch (Exception ex) { id = "null"; //Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- id -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper); } string from_user_id = ""; try { int startIndex = item.IndexOf("data-screen-name=\\\""); string start = item.Substring(startIndex).Replace("data-screen-name=\\\"", ""); int endIndex = start.IndexOf("\\\""); string end = start.Substring(0, endIndex).Replace("from_user_id\":", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("_str", "").Replace("user", "").Replace("}", "").Replace("]", ""); from_user_id = end; AddToLog_AdvancedSearch("[ " + DateTime.Now + " ] => User ScreenName " + from_user_id); } catch (Exception ex) { from_user_id = "null"; // Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwitterDataScrapper); } string tweetUserid = string.Empty; try { int startIndex = item.IndexOf("=\\\""); string start = item.Substring(startIndex).Replace("=\\\"", ""); int endIndex = start.IndexOf("\\\""); string end = start.Substring(0, endIndex).Replace("from_user_id\":", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("_str", "").Replace("user", "").Replace("}", "").Replace("]", ""); tweetUserid = end; AddToLog_AdvancedSearch("[ " + DateTime.Now + " ] => Tweet Id " + tweetUserid); } catch (Exception ex) { from_user_id = "null"; } ///Tweet Text #region Commented //try //{ // int startindex = item.IndexOf("js-tweet-text tweet-text\""); // if (startindex == -1) // { // startindex = 0; // startindex = item.IndexOf("js-tweet-text tweet-text"); // } // string start = item.Substring(startindex).Replace("js-tweet-text tweet-text\"", "").Replace("js-tweet-text tweet-text tweet-text-rtl\"", ""); // int endindex = start.IndexOf("</p>"); // if (endindex == -1) // { // endindex = 0; // endindex = start.IndexOf("stream-item-footer"); // } // string end = start.Substring(0, endindex); // end = regx.StripTagsRegex(end); // text = end.Replace(" ", "").Replace("a href=", "").Replace("/a", "").Replace("<span", "").Replace("</span", "").Replace("class=\\\"js-display-url\\\"", "").Replace("class=\\\"tco-ellipsis\\\"", "").Replace("class=\\\"invisible\\\"", "").Replace("<strong>", "").Replace("target=\\\"_blank\\\"", "").Replace("class=\\\"twitter-timeline-link\\\"", "").Replace("</strong>", "").Replace("rel=\\\"nofollow\\\" dir=\\\"ltr\\\" data-expanded-url=", ""); // text = text.Replace(""", "").Replace("<", "").Replace(">", "").Replace("\"", "").Replace("\\", "").Replace("title=", ""); // string[] array = Regex.Split(text, "http"); // text = string.Empty; // foreach (string itemData in array) // { // if (!itemData.Contains("t.co")) // { // string data = string.Empty; // if (itemData.Contains("//")) // { // data = ("http" + itemData).Replace(" span ", string.Empty); // if (!text.Contains(itemData.Replace(" ", "")))// && !data.Contains("class") && !text.Contains(data)) // { // text += data.Replace("u003c", string.Empty).Replace("u003e", string.Empty); // } // } // else // { // if (!text.Contains(itemData.Replace(" ", ""))) // { // text += itemData.Replace("u003c", string.Empty).Replace("u003e", string.Empty).Replace("js-tweet-text tweet-text", ""); // } // } // } // } //} //catch { }; #endregion twtboardpro.TwitterDataScrapper.StructTweetIDs structTweetIDs = new twtboardpro.TwitterDataScrapper.StructTweetIDs(); if (id != "null") { structTweetIDs.ID_Tweet = tweetUserid; structTweetIDs.ID_Tweet_User = id; structTweetIDs.username__Tweet_User = from_user_id; structTweetIDs.wholeTweetMessage = text; lst_structTweetIDs.Add(structTweetIDs); } //if (!File.Exists(Globals.Path_KeywordScrapedListData + "-" + keyword + ".csv")) //{ // GlobusFileHelper.AppendStringToTextfileNewLine("USERID , USERNAME , PROFILE NAME , BIO , LOCATION , WEBSITE , NO OF TWEETS , FOLLOWERS , FOLLOWINGS", Globals.Path_KeywordScrapedListData + "-" + keyword + ".csv"); //} { ChilkatHttpHelpr objChilkat = new ChilkatHttpHelpr(); GlobusHttpHelper HttpHelper = new GlobusHttpHelper(); string ProfilePageSource = HttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/" + from_user_id), "", ""); string Responce = ProfilePageSource; #region Convert HTML to XML string xHtml = objChilkat.ConvertHtmlToXml(Responce); Chilkat.Xml xml = new Chilkat.Xml(); xml.LoadXml(xHtml); Chilkat.Xml xNode = default(Chilkat.Xml); Chilkat.Xml xBeginSearchAfter = default(Chilkat.Xml); #endregion int counterdata = 0; xBeginSearchAfter = null; string dataDescription = string.Empty; xNode = xml.SearchForAttribute(xBeginSearchAfter, "h1", "class", "ProfileHeaderCard-name"); while ((xNode != null)) { xBeginSearchAfter = xNode; if (counterdata == 0) { ProfileName = xNode.AccumulateTagContent("text", "script|style"); counterdata++; } else if (counterdata == 1) { website = xNode.AccumulateTagContent("text", "script|style"); counterdata++; } else { break; } xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "u-textUserColor"); } xBeginSearchAfter = null; dataDescription = string.Empty; xNode = xml.SearchForAttribute(xBeginSearchAfter, "p", "class", "ProfileHeaderCard-bio u-dir");//bio profile-field"); while ((xNode != null)) { xBeginSearchAfter = xNode; Bio = xNode.AccumulateTagContent("text", "script|style").Replace("'", "'").Replace(" ", string.Empty).Trim(); break; } xBeginSearchAfter = null; dataDescription = string.Empty; xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "ProfileHeaderCard-locationText u-dir");//location profile-field"); while ((xNode != null)) { xBeginSearchAfter = xNode; Location = xNode.AccumulateTagContent("text", "script|style"); break; } int counterData = 0; xBeginSearchAfter = null; dataDescription = string.Empty; xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "ProfileNav-stat ProfileNav-stat--link u-borderUserColor u-textCenter js-tooltip js-nav");//location profile-field"); while ((xNode != null)) { xBeginSearchAfter = xNode; if (counterData == 0) { // NoOfTweets = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "ProfileNav-value"); NoOfTweets = xNode.AccumulateTagContent("text", "script|style").Replace("Tweets", string.Empty).Replace(",", string.Empty).Replace("Tweet", string.Empty); counterData++; } else if (counterData == 1) { Followings = xNode.AccumulateTagContent("text", "script|style").Replace(" Following", string.Empty).Replace(",", string.Empty).Replace("Following", string.Empty); counterData++; } else if (counterData == 2) { Followers = xNode.AccumulateTagContent("text", "script|style").Replace("Followers", string.Empty).Replace(",", string.Empty).Replace("Follower", string.Empty); counterData++; } else { break; } //xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "js-nav"); xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "ProfileNav-stat ProfileNav-stat--link u-borderUserColor u-textCenter js-tooltip js-openSignupDialog js-nonNavigable u-textUserColor"); } if (!string.IsNullOrEmpty(from_user_id) && tweetUserid != "null") { string Id_user = tweetUserid.Replace("}]", string.Empty).Trim(); Globals.lstScrapedUserIDs.Add(Id_user); // GlobusFileHelper.AppendStringToTextfileNewLine(id + "," + from_user_id + "," + ProfileName + "," + Bio.Replace(",", "") + "," + Location.Replace(",", "") + "," + website + "," + NoOfTweets.Replace(",", "").Replace("Tweets", "") + "," + Followers.Replace(",", "").Replace("Following", "") + "," + Followings.Replace(",", "").Replace("Followers", "").Replace("Follower", ""), Globals.Path_KeywordScrapedListData + "-" + keyword + ".csv"); // Log("[ " + DateTime.Now + " ] => [ " + from_user_id + "," + Id_user + "," + ProfileName + "," + Bio.Replace(",", "") + "," + Location + "," + website + "," + NoOfTweets + "," + Followers + "," + Followings + " ]"); } } lst_structTweetIDs = lst_structTweetIDs.Distinct().ToList(); if (lst_structTweetIDs.Count >= noOfRecords) { // return lst_structTweetIDs; } } if (lst_structTweetIDs.Count <= noOfRecords) { maxid = lst_structTweetIDs[lst_structTweetIDs.Count - 1].ID_Tweet; if (res_Get_searchURL.Contains("has_moreitems\":false")) { } else { goto startAgain; } } else { if (res_Get_searchURL.Contains("has_more_items\":false")) { } else goto startAgain; } } } catch (Exception ex) { } } } } catch { } }
public string GetDataWithTagValueByTagAndAttributeNameWithId(string pageSrcHtml, string TagName, string AttributeName) { string dataDescription = string.Empty; try { bool success = false; string xHtml = string.Empty; Chilkat.HtmlToXml htmlToXml = new Chilkat.HtmlToXml(); //*** Check DLL working or not ********************** success = htmlToXml.UnlockComponent("THEBACHtmlToXml_7WY3A57sZH3O"); if ((success != true)) { Console.WriteLine(htmlToXml.LastErrorText); return(null); } htmlToXml.Html = pageSrcHtml; //** Convert Data Html to XML ******************************************* xHtml = htmlToXml.ToXml(); //****************************************** Chilkat.Xml xNode = default(Chilkat.Xml); Chilkat.Xml xBeginSearchAfter = default(Chilkat.Xml); Chilkat.Xml xml = new Chilkat.Xml(); xml.LoadXml(xHtml); #region Data Save in list From using XML Tag and Attribut string DescriptionMain = string.Empty; string dataDescriptionValue = string.Empty; xBeginSearchAfter = null; xNode = xml.SearchForAttribute(xBeginSearchAfter, TagName, "id", AttributeName); while ((xNode != null)) { //** Get Data Under Tag only Text Value********************************** dataDescription = xNode.GetXml();//.AccumulateTagContent("text", "script|style"); dataDescriptionValue = dataDescriptionValue + dataDescription; // string text = xNode.AccumulateTagContent("text", "script|style"); // lstData.Add(text); // //** Get Data Under Tag All Html value * ********************************* // //dataDescription = xNode.GetXml(); xBeginSearchAfter = xNode; xNode = xml.SearchForAttribute(xBeginSearchAfter, TagName, "id", AttributeName); //if (dataDescription.Length > 500) //{ // break; //} } #endregion return(dataDescriptionValue); } catch (Exception) { return(dataDescription = null); } }
private void btnStart_Searching_Click(object sender, EventArgs e) { AllOfTheseWords = (txtAllofTheseKeywords.Text).ToString(); ThisExtractPhrase = (txtThisExactPhrase.Text).ToString(); AnyOfTheseWords = (txtAnyOfTheseWords.Text).ToString(); TheseHashTags = (txtTheseHashTags.Text).ToString(); NoneOfTheseWords = (txtNoneofTheseWords.Text).ToString(); FromTheseAccounts = (txtFromTheseAccounts.Text).ToString(); ToTheseAccounts = (txtToTheseAccounts.Text).ToString(); MentionTheseAccounts = (txtMentioningTheseAccounts.Text).ToString(); NearThisPlace = (txtNearThisPlace.Text).ToString(); AddToLog_AdvancedSearch("[ " + DateTime.Now + " ] => Process Started"); try { if (string.IsNullOrEmpty(ThisExtractPhrase)) { ThisExtractPhrase = ""; } else { ThisExtractPhrase = "%20%22" + ThisExtractPhrase; } } catch { } try { if (string.IsNullOrEmpty(AnyOfTheseWords)) { AnyOfTheseWords = ""; } else { AnyOfTheseWords = "%22%20" + AnyOfTheseWords; } } catch { } try { if (string.IsNullOrEmpty(TheseHashTags)) { TheseHashTags = ""; } else { TheseHashTags = "%20%23" + TheseHashTags; } } catch { } try { if (string.IsNullOrEmpty(NoneOfTheseWords)) { NoneOfTheseWords = ""; } else { NoneOfTheseWords = "%20-" + NoneOfTheseWords; } } catch { } try { if (string.IsNullOrEmpty(FromTheseAccounts)) { FromTheseAccounts = ""; } else { FromTheseAccounts = "%20from%3A" + FromTheseAccounts; } } catch { } try { if (string.IsNullOrEmpty(ToTheseAccounts)) { ToTheseAccounts = ""; } else { ToTheseAccounts = "%20to%3A" + ToTheseAccounts; } } catch { } try { if (string.IsNullOrEmpty(MentionTheseAccounts)) { MentionTheseAccounts = ""; } else { MentionTheseAccounts = "%20%40" + MentionTheseAccounts; } } catch { } try { if (string.IsNullOrEmpty(NearThisPlace)) { NearThisPlace = ""; } else { NearThisPlace = "%20near%3A%22" + NearThisPlace; } } catch { } try { if (!string.IsNullOrEmpty(txtAllofTheseKeywords.Text)) { #region Commented //try //{ // string Url = "https://twitter.com/search?f=realtime&q=" + AllOfTheseWords + ThisExtractPhrase + AnyOfTheseWords + NoneOfTheseWords + TheseHashTags + _selectedLanguage + FromTheseAccounts + ToTheseAccounts + MentionTheseAccounts + NearThisPlace + "%22%20within%3A15mi&src=typd"; // string response = _GlobusHttpHelper.getHtmlfromUrl(new Uri(Url), "", ""); //} //catch { } public List<StructTweetIDs> NewKeywordStructDataForSearchByKeyword(string keyword) #endregion { try { BaseLib.GlobusRegex regx = new GlobusRegex(); int counter = 0; string res_Get_searchURL = string.Empty; string searchURL = string.Empty; string maxid = string.Empty; string TweetId = string.Empty; string text = string.Empty; string ProfileName = string.Empty; string Location = string.Empty; string Bio = string.Empty; string website = string.Empty; string NoOfTweets = string.Empty; string Followers = string.Empty; string Followings = string.Empty; int noOfRecords = 0; try { noOfRecords = int.Parse(txtNoOfRecords.Text); } catch { } startAgain: if (counter == 0) { searchURL = "https://twitter.com/i/search/timeline?q=" + AllOfTheseWords + ThisExtractPhrase + AnyOfTheseWords + NoneOfTheseWords + TheseHashTags + _selectedLanguage + FromTheseAccounts + ToTheseAccounts + MentionTheseAccounts + NearThisPlace + "%22%20within%3A15mi&src=typd" + "&f=realtime"; counter++; } else { searchURL = "https://twitter.com/i/search/timeline?q=" + AllOfTheseWords + ThisExtractPhrase + AnyOfTheseWords + NoneOfTheseWords + TheseHashTags + _selectedLanguage + FromTheseAccounts + ToTheseAccounts + MentionTheseAccounts + NearThisPlace + "%22%20within%3A15mi&src=typd" + "&f=realtime&include_available_features=1&include_entities=1&last_note_ts=0&oldest_unread_id=0&scroll_cursor=" + TweetId + ""; } try { res_Get_searchURL = _GlobusHttpHelper.getHtmlfromUrl(new Uri(searchURL), "", ""); AddToLog_AdvancedSearch("[ " + DateTime.Now + " ] => Finding results for entered details "); if (string.IsNullOrEmpty(res_Get_searchURL)) { res_Get_searchURL = _GlobusHttpHelper.getHtmlfromUrl(new Uri(searchURL), "", ""); } try { //string sjss = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", ""); string[] splitRes = Regex.Split(res_Get_searchURL, "refresh_cursor"); //splitRes = splitRes.Skip(1).ToArray(); foreach (string item in splitRes) { if (item.Contains("refresh_cursor")) { int startIndex = item.IndexOf("TWEET-"); string start = item.Substring(startIndex).Replace("data-user-id=\\\"", ""); int endIndex = start.IndexOf("\""); string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", ""); TweetId = end; } if (item.Contains("scroll_cursor")) { int startIndex = item.IndexOf("TWEET-"); string start = item.Substring(startIndex).Replace("data-user-id=\\\"", ""); int endIndex = start.IndexOf("\""); string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", ""); TweetId = end; } } } catch (Exception) { } } catch (Exception ex) { System.Threading.Thread.Sleep(2000); res_Get_searchURL = _GlobusHttpHelper.getHtmlfromUrl(new Uri(searchURL), "", ""); } // && !res_Get_searchURL.Contains("has_more_items\":false") if (!string.IsNullOrEmpty(res_Get_searchURL)) { //string[] splitRes = Regex.Split(res_Get_searchURL, "data-item-id"); //Regex.Split(res_Get_searchURL, "\"in_reply_to_status_id_str\""); string[] splitRes = Regex.Split(res_Get_searchURL, "data-item-id"); splitRes = splitRes.Skip(1).ToArray(); foreach (string item in splitRes) { if (item.Contains("data-screen-name=") && !item.Contains("js-actionable-user js-profile-popup-actionable")) { //var avc = Newtonsoft.Json.JsonConvert.DeserializeObject<dynamic>(res_Get_searchURL); //string DataHtml = (string)avc["items_html"]; } else { continue; } string modified_Item = "\"from_user\"" + item; string id = ""; try { int startIndex = item.IndexOf("data-user-id="); string start = item.Substring(startIndex).Replace("data-user-id=\\\"", ""); int endIndex = start.IndexOf("\\\""); string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", ""); id = end; //lst_structTweetIDs.Add(id); AddToLog_AdvancedSearch("[ " + DateTime.Now + " ] => User Id " + id); } catch (Exception ex) { id = "null"; //Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- id -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper); } string from_user_id = ""; try { int startIndex = item.IndexOf("data-screen-name=\\\""); string start = item.Substring(startIndex).Replace("data-screen-name=\\\"", ""); int endIndex = start.IndexOf("\\\""); string end = start.Substring(0, endIndex).Replace("from_user_id\":", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("_str", "").Replace("user", "").Replace("}", "").Replace("]", ""); from_user_id = end; AddToLog_AdvancedSearch("[ " + DateTime.Now + " ] => User ScreenName " + from_user_id); } catch (Exception ex) { from_user_id = "null"; // Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwitterDataScrapper); } string tweetUserid = string.Empty; try { int startIndex = item.IndexOf("=\\\""); string start = item.Substring(startIndex).Replace("=\\\"", ""); int endIndex = start.IndexOf("\\\""); string end = start.Substring(0, endIndex).Replace("from_user_id\":", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("_str", "").Replace("user", "").Replace("}", "").Replace("]", ""); tweetUserid = end; AddToLog_AdvancedSearch("[ " + DateTime.Now + " ] => Tweet Id " + tweetUserid); } catch (Exception ex) { from_user_id = "null"; } ///Tweet Text #region Commented //try //{ // int startindex = item.IndexOf("js-tweet-text tweet-text\""); // if (startindex == -1) // { // startindex = 0; // startindex = item.IndexOf("js-tweet-text tweet-text"); // } // string start = item.Substring(startindex).Replace("js-tweet-text tweet-text\"", "").Replace("js-tweet-text tweet-text tweet-text-rtl\"", ""); // int endindex = start.IndexOf("</p>"); // if (endindex == -1) // { // endindex = 0; // endindex = start.IndexOf("stream-item-footer"); // } // string end = start.Substring(0, endindex); // end = regx.StripTagsRegex(end); // text = end.Replace(" ", "").Replace("a href=", "").Replace("/a", "").Replace("<span", "").Replace("</span", "").Replace("class=\\\"js-display-url\\\"", "").Replace("class=\\\"tco-ellipsis\\\"", "").Replace("class=\\\"invisible\\\"", "").Replace("<strong>", "").Replace("target=\\\"_blank\\\"", "").Replace("class=\\\"twitter-timeline-link\\\"", "").Replace("</strong>", "").Replace("rel=\\\"nofollow\\\" dir=\\\"ltr\\\" data-expanded-url=", ""); // text = text.Replace(""", "").Replace("<", "").Replace(">", "").Replace("\"", "").Replace("\\", "").Replace("title=", ""); // string[] array = Regex.Split(text, "http"); // text = string.Empty; // foreach (string itemData in array) // { // if (!itemData.Contains("t.co")) // { // string data = string.Empty; // if (itemData.Contains("//")) // { // data = ("http" + itemData).Replace(" span ", string.Empty); // if (!text.Contains(itemData.Replace(" ", "")))// && !data.Contains("class") && !text.Contains(data)) // { // text += data.Replace("u003c", string.Empty).Replace("u003e", string.Empty); // } // } // else // { // if (!text.Contains(itemData.Replace(" ", ""))) // { // text += itemData.Replace("u003c", string.Empty).Replace("u003e", string.Empty).Replace("js-tweet-text tweet-text", ""); // } // } // } // } //} //catch { }; #endregion twtboardpro.TwitterDataScrapper.StructTweetIDs structTweetIDs = new twtboardpro.TwitterDataScrapper.StructTweetIDs(); if (id != "null") { structTweetIDs.ID_Tweet = tweetUserid; structTweetIDs.ID_Tweet_User = id; structTweetIDs.username__Tweet_User = from_user_id; structTweetIDs.wholeTweetMessage = text; lst_structTweetIDs.Add(structTweetIDs); } //if (!File.Exists(Globals.Path_KeywordScrapedListData + "-" + keyword + ".csv")) //{ // GlobusFileHelper.AppendStringToTextfileNewLine("USERID , USERNAME , PROFILE NAME , BIO , LOCATION , WEBSITE , NO OF TWEETS , FOLLOWERS , FOLLOWINGS", Globals.Path_KeywordScrapedListData + "-" + keyword + ".csv"); //} { ChilkatHttpHelpr objChilkat = new ChilkatHttpHelpr(); GlobusHttpHelper HttpHelper = new GlobusHttpHelper(); string ProfilePageSource = HttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/" + from_user_id), "", ""); string Responce = ProfilePageSource; #region Convert HTML to XML string xHtml = objChilkat.ConvertHtmlToXml(Responce); Chilkat.Xml xml = new Chilkat.Xml(); xml.LoadXml(xHtml); Chilkat.Xml xNode = default(Chilkat.Xml); Chilkat.Xml xBeginSearchAfter = default(Chilkat.Xml); #endregion int counterdata = 0; xBeginSearchAfter = null; string dataDescription = string.Empty; xNode = xml.SearchForAttribute(xBeginSearchAfter, "h1", "class", "ProfileHeaderCard-name"); while ((xNode != null)) { xBeginSearchAfter = xNode; if (counterdata == 0) { ProfileName = xNode.AccumulateTagContent("text", "script|style"); counterdata++; } else if (counterdata == 1) { website = xNode.AccumulateTagContent("text", "script|style"); counterdata++; } else { break; } xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "u-textUserColor"); } xBeginSearchAfter = null; dataDescription = string.Empty; xNode = xml.SearchForAttribute(xBeginSearchAfter, "p", "class", "ProfileHeaderCard-bio u-dir");//bio profile-field"); while ((xNode != null)) { xBeginSearchAfter = xNode; Bio = xNode.AccumulateTagContent("text", "script|style").Replace("'", "'").Replace(" ", string.Empty).Trim(); break; } xBeginSearchAfter = null; dataDescription = string.Empty; xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "ProfileHeaderCard-locationText u-dir");//location profile-field"); while ((xNode != null)) { xBeginSearchAfter = xNode; Location = xNode.AccumulateTagContent("text", "script|style"); break; } int counterData = 0; xBeginSearchAfter = null; dataDescription = string.Empty; xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "ProfileNav-stat ProfileNav-stat--link u-borderUserColor u-textCenter js-tooltip js-nav");//location profile-field"); while ((xNode != null)) { xBeginSearchAfter = xNode; if (counterData == 0) { // NoOfTweets = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "ProfileNav-value"); NoOfTweets = xNode.AccumulateTagContent("text", "script|style").Replace("Tweets", string.Empty).Replace(",", string.Empty).Replace("Tweet", string.Empty); counterData++; } else if (counterData == 1) { Followings = xNode.AccumulateTagContent("text", "script|style").Replace(" Following", string.Empty).Replace(",", string.Empty).Replace("Following", string.Empty); counterData++; } else if (counterData == 2) { Followers = xNode.AccumulateTagContent("text", "script|style").Replace("Followers", string.Empty).Replace(",", string.Empty).Replace("Follower", string.Empty); counterData++; } else { break; } //xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "js-nav"); xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "ProfileNav-stat ProfileNav-stat--link u-borderUserColor u-textCenter js-tooltip js-openSignupDialog js-nonNavigable u-textUserColor"); } if (!string.IsNullOrEmpty(from_user_id) && tweetUserid != "null") { string Id_user = tweetUserid.Replace("}]", string.Empty).Trim(); Globals.lstScrapedUserIDs.Add(Id_user); // GlobusFileHelper.AppendStringToTextfileNewLine(id + "," + from_user_id + "," + ProfileName + "," + Bio.Replace(",", "") + "," + Location.Replace(",", "") + "," + website + "," + NoOfTweets.Replace(",", "").Replace("Tweets", "") + "," + Followers.Replace(",", "").Replace("Following", "") + "," + Followings.Replace(",", "").Replace("Followers", "").Replace("Follower", ""), Globals.Path_KeywordScrapedListData + "-" + keyword + ".csv"); // Log("[ " + DateTime.Now + " ] => [ " + from_user_id + "," + Id_user + "," + ProfileName + "," + Bio.Replace(",", "") + "," + Location + "," + website + "," + NoOfTweets + "," + Followers + "," + Followings + " ]"); } } lst_structTweetIDs = lst_structTweetIDs.Distinct().ToList(); if (lst_structTweetIDs.Count >= noOfRecords) { // return lst_structTweetIDs; } } if (lst_structTweetIDs.Count <= noOfRecords) { maxid = lst_structTweetIDs[lst_structTweetIDs.Count - 1].ID_Tweet; if (res_Get_searchURL.Contains("has_moreitems\":false")) { } else { goto startAgain; } } else { if (res_Get_searchURL.Contains("has_more_items\":false")) { } else { goto startAgain; } } } } catch (Exception ex) { } } } } catch { } }
public string GetDataWithTagValueByTagAndAttributeNameWithId(string pageSrcHtml, string TagName, string AttributeName) { string dataDescription = string.Empty; try { bool success = false; string xHtml = string.Empty; Chilkat.HtmlToXml htmlToXml = new Chilkat.HtmlToXml(); //*** Check DLL working or not ********************** success = htmlToXml.UnlockComponent("THEBACHtmlToXml_7WY3A57sZH3O"); if ((success != true)) { Console.WriteLine(htmlToXml.LastErrorText); return null; } htmlToXml.Html = pageSrcHtml; //** Convert Data Html to XML ******************************************* xHtml = htmlToXml.ToXml(); //****************************************** Chilkat.Xml xNode = default(Chilkat.Xml); Chilkat.Xml xBeginSearchAfter = default(Chilkat.Xml); Chilkat.Xml xml = new Chilkat.Xml(); xml.LoadXml(xHtml); #region Data Save in list From using XML Tag and Attribut string DescriptionMain = string.Empty; string dataDescriptionValue = string.Empty; xBeginSearchAfter = null; xNode = xml.SearchForAttribute(xBeginSearchAfter, TagName, "id", AttributeName); while ((xNode != null)) { //** Get Data Under Tag only Text Value********************************** dataDescription = xNode.GetXml();//.AccumulateTagContent("text", "script|style"); dataDescriptionValue = dataDescriptionValue + dataDescription; // string text = xNode.AccumulateTagContent("text", "script|style"); // lstData.Add(text); // //** Get Data Under Tag All Html value * ********************************* // //dataDescription = xNode.GetXml(); xBeginSearchAfter = xNode; xNode = xml.SearchForAttribute(xBeginSearchAfter, TagName, "id", AttributeName); //if (dataDescription.Length > 500) //{ // break; //} } #endregion return dataDescriptionValue; } catch (Exception) { return dataDescription = null; } }
public void getmentions() { if (!IsLoggedIn) { Login(); } if (IsNotSuspended) { string pageSource = globusHttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/" + Screen_name), "", ""); string[] href = Regex.Split(pageSource, "href=\"/" + Screen_name + "/status/"); href = href.Skip(1).ToArray(); foreach (string abc in href) { if (abc.Contains("tweet-timestamp js-permalink js-nav") && abc.Contains("js-tweet-text")) { string statusid = string.Empty; try { int startindex = abc.IndexOf("\""); string start = abc.Substring(0, startindex); statusid = start; } catch (Exception ex) { } string StatusPageSource = globusHttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/" + Screen_name + "/status/" + statusid), "", ""); string[] getTweets = Regex.Split(StatusPageSource, "simple-tweet tweet js-stream-tweet"); getTweets = getTweets.Skip(1).ToArray(); foreach (string tweets in getTweets) { string TweetText = string.Empty; string From_user_Screen_name = string.Empty; string From_user_id = string.Empty; #region Convert HTML to XML Chilkat.HtmlToXml htmlToXml = new Chilkat.HtmlToXml(); bool success = htmlToXml.UnlockComponent("THEBACHtmlToXml_7WY3A57sZH3O"); if ((success != true)) { Console.WriteLine(htmlToXml.LastErrorText); return; } string xHtml = null; htmlToXml.Html = tweets; //xHtml contain xml data xHtml = htmlToXml.ToXml(); Chilkat.Xml xml = new Chilkat.Xml(); xml.LoadXml(xHtml); //xHtml. //// Iterate over all h1 tags: Chilkat.Xml xNode = default(Chilkat.Xml); Chilkat.Xml xBeginSearchAfter = default(Chilkat.Xml); #endregion xNode = null; xBeginSearchAfter = null; xNode = xml.SearchForAttribute(xBeginSearchAfter, "p", "class", "js-tweet-text"); while ((xNode != null)) { TweetText = xNode.AccumulateTagContent("text", "script|style"); break; } try { int startindex = tweets.IndexOf("data-screen-name"); string start = tweets.Substring(startindex).Replace("data-screen-name=\"", ""); int endIndex = start.IndexOf("\""); string end = start.Substring(0, endIndex).Replace("screen_name", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", ""); From_user_Screen_name = end; } catch (Exception ex) { //Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user --> " + ex.Message, Globals.Path_TwitterDataScrapper); //Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user --> " + ex.Message, Globals.Path_TwtErrorLogs); } try { int startindex = tweets.IndexOf("data-user-id"); string start = tweets.Substring(startindex).Replace("data-user-id=\"", ""); int endIndex = start.IndexOf("\""); string end = start.Substring(0, endIndex).Replace("screen_name", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", ""); From_user_id = end; } catch (Exception ex) { //Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user --> " + ex.Message, Globals.Path_TwitterDataScrapper); //Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user --> " + ex.Message, Globals.Path_TwtErrorLogs); } string strQuery = "INSERT INTO tb_ReplyCampaign (TweetId , Username , ReplyUserName , ReplyUserId , TweetText , Reply) VALUES ('" + statusid + "' , '" + Username + "' , '" + From_user_Screen_name + "' , '" + From_user_id + "' , '" + TweetText + "' , '" + TweetText + "')"; DataBaseHandler.InsertQuery(strQuery, "tb_ReplyCampaign"); } } } } }
public void scrapUserInfo(object param) { try { Array paramsArray = new object[1]; paramsArray = (Array)param; string UserName = (string)paramsArray.GetValue(0); string userId = string.Empty; string ProfileName = string.Empty; string Location = string.Empty; string Bio = string.Empty; string website = string.Empty; string NoOfTweets = string.Empty; string Followers = string.Empty; string Followings = string.Empty; string IsProfilePIc = string.Empty; ChilkatHttpHelpr objChilkat = new ChilkatHttpHelpr(); GlobusHttpHelper HttpHelper = new GlobusHttpHelper(); string ProfilePageSource = HttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/" + UserName.Trim()), "", ""); if (string.IsNullOrEmpty(ProfilePageSource)) { ProfilePageSource = HttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/" + UserName.Trim()), "", ""); } if (string.IsNullOrEmpty(ProfilePageSource)) { AddToLog_ScrapMember("[ " + DateTime.Now + " ] => [ User " + UserName + " is not exist or page source getting null.]"); return; } if (ProfilePageSource.Contains("Account suspended")) { AddToLog_ScrapMember("[ " + DateTime.Now + " ] => [ User " + UserName + " is suspended ]"); return; } string Responce = ProfilePageSource; #region Convert HTML to XML string xHtml = objChilkat.ConvertHtmlToXml(Responce); Chilkat.Xml xml = new Chilkat.Xml(); xml.LoadXml(xHtml); Chilkat.Xml xNode = default(Chilkat.Xml); Chilkat.Xml xBeginSearchAfter = default(Chilkat.Xml); #endregion int counterdata = 0; xBeginSearchAfter = null; string dataDescription = string.Empty; //xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "profile-field"); xNode = xml.SearchForAttribute(xBeginSearchAfter, "h1", "class", "ProfileHeaderCard-name"); while ((xNode != null)) { xBeginSearchAfter = xNode; if (counterdata == 0) { ProfileName = xNode.AccumulateTagContent("text", "script|style"); if (ProfileName.Contains("Verified account")) { ProfileName = ProfileName.Replace("Verified account", " "); } counterdata++; } else if (counterdata == 1) { website = xNode.AccumulateTagContent("text", "script|style"); if (website.Contains("Twitter Status")) { website = ""; } counterdata++; } else { break; } //xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "profile-field"); xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "u-textUserColor"); } xBeginSearchAfter = null; dataDescription = string.Empty; xNode = xml.SearchForAttribute(xBeginSearchAfter, "p", "class", "ProfileHeaderCard-bio u-dir");//bio profile-field"); while ((xNode != null)) { xBeginSearchAfter = xNode; Bio = xNode.AccumulateTagContent("text", "script|style").Replace("'", "'").Replace(" ", string.Empty).Trim(); break; } xBeginSearchAfter = null; dataDescription = string.Empty; //xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "location profile-field"); xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "ProfileHeaderCard-locationText u-dir");//location profile-field"); while ((xNode != null)) { xBeginSearchAfter = xNode; Location = xNode.AccumulateTagContent("text", "script|style"); break; } int counterData = 0; xBeginSearchAfter = null; dataDescription = string.Empty; //xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "data-element-term", "tweet_stats"); xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "ProfileNav-stat ProfileNav-stat--link u-borderUserColor u-textCenter js-tooltip js-nav"); while ((xNode != null)) { xBeginSearchAfter = xNode; if (counterData == 0) { NoOfTweets = xNode.AccumulateTagContent("text", "script|style").Replace("Tweets", string.Empty).Replace(",", string.Empty).Replace("Tweet", string.Empty); counterData++; } else if (counterData == 1) { Followings = xNode.AccumulateTagContent("text", "script|style").Replace(" Following", string.Empty).Replace(",", string.Empty).Replace("Following", string.Empty); counterData++; } else if (counterData == 2) { Followers = xNode.AccumulateTagContent("text", "script|style").Replace("Followers", string.Empty).Replace(",", string.Empty).Replace("Follower", string.Empty); counterData++; } else { break; } //xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "js-nav"); xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "ProfileNav-stat ProfileNav-stat--link u-borderUserColor u-textCenter js-tooltip js-openSignupDialog js-nonNavigable u-textUserColor"); } try { int startindex = ProfilePageSource.IndexOf("profile_id"); string start = ProfilePageSource.Substring(startindex).Replace("profile_id", ""); int endindex = start.IndexOf(","); string end = start.Substring(0, endindex).Replace(""", "").Replace("\"", "").Replace(":", "").Trim(); userId = end.Trim(); if (userId.Length > 15) { startindex = ProfilePageSource.IndexOf("profile_id""); start = ProfilePageSource.Substring(startindex).Replace("profile_id"", ""); endindex = start.IndexOf(","); end = start.Substring(0, endindex).Replace(""", "").Replace("\"", "").Replace(":", "").Replace(";", "").Trim(); userId = end.Trim(); } } catch { } if (ProfilePageSource.Contains("default_profile_6_400x400") || ProfilePageSource.Contains("default_profile_5_400x400") || ProfilePageSource.Contains("default_profile_4_400x400") || ProfilePageSource.Contains("default_profile_3_400x400") || ProfilePageSource.Contains("default_profile_2_400x400") || ProfilePageSource.Contains("default_profile_1_400x400") || ProfilePageSource.Contains("default_profile_0_400x400")) { IsProfilePIc = "No"; } else { IsProfilePIc = "Yes"; } if (!File.Exists(Globals.Path_UserListInfoData)) { GlobusFileHelper.AppendStringToTextfileNewLine("USERID , USERNAME , PROFILE NAME , BIO , LOCATION , WEBSITE , NO OF TWEETS , FOLLOWERS , FOLLOWINGS, ProfilePic", Globals.Path_UserListInfoData); } if (!string.IsNullOrEmpty(UserName)) { //string Id_user = item.ID_Tweet_User.Replace("}]", string.Empty).Trim(); //Globals.lstScrapedUserIDs.Add(Id_user); GlobusFileHelper.AppendStringToTextfileNewLine(userId + "," + UserName + "," + ProfileName + "," + Bio.Replace(",", "") + "," + Location.Replace(",", "") + "," + website + "," + NoOfTweets.Replace(",", "").Replace("Tweets", "") + "," + Followers.Replace(",", "").Replace("Following", "") + "," + Followings.Replace(",", "").Replace("Followers", "").Replace("Follower", "") + "," + IsProfilePIc, Globals.Path_UserListInfoData); AddToLog_ScrapMember("[ " + DateTime.Now + " ] => [ " + userId + "," + UserName + "," + ProfileName + "," + Bio.Replace(",", "") + "," + Location + "," + website + "," + NoOfTweets + "," + Followers + "," + Followings + " ," + IsProfilePIc + "]"); } } catch { } }
public bool CheckAttributeexsist(string pageSrcHtml, string TagName, string AttributeName) { bool IsContain = false; try { bool success = false; string xHtml = string.Empty; Chilkat.HtmlToXml htmlToXml = new Chilkat.HtmlToXml(); //*** Check DLL working or not ********************** success = htmlToXml.UnlockComponent("THEBACHtmlToXml_7WY3A57sZH3O"); if ((success != true)) { Console.WriteLine(htmlToXml.LastErrorText); return IsContain; } htmlToXml.Html = pageSrcHtml; //** Convert Data Html to XML ******************************************* xHtml = htmlToXml.ToXml(); //****************************************** Chilkat.Xml xNode = default(Chilkat.Xml); Chilkat.Xml xBeginSearchAfter = default(Chilkat.Xml); Chilkat.Xml xml = new Chilkat.Xml(); xml.LoadXml(xHtml); #region Data Save in list From using XML Tag and Attribut string DescriptionMain = string.Empty; string dataDescription = string.Empty; xBeginSearchAfter = null; xNode = xml.SearchForAttribute(xBeginSearchAfter, TagName, "class", AttributeName); while ((xNode != null)) { IsContain = true; return IsContain; } #endregion } catch (Exception) { IsContain = false; } return IsContain; }
private void ScrapeWhotoFollow(TweetAccountManager tweetAccountManager, string Keyword, int NoOfPages) { try { AddThreadToDictionary(strModule(Module.WhoToScrap), tweetAccountManager.Username); int counter = 0; int PageCount = 1; // int NoOfData = NoOfPages * 20; List<string> username = new List<string>(); AddToScrapeLogs("[ " + DateTime.Now + " ] => [ Getting Users To Scrape Data For ]"); while (counter < NoOfPages) { // string pagsource = tweetAccountManager.globusHttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/who_to_follow/suggestions/search/users?q=" + Keyword + "&cursor=" + PageCount + "&include_available_features=1&include_entities=1&is_forward=true"), "", ""); string pagsource = ""; if (PageCount == 1) { pagsource = tweetAccountManager.globusHttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/i/search/timeline?q=" + Keyword + "&cursor=" + PageCount + "&include_available_features=1&include_entities=1&is_forward=true"), "", ""); // https://twitter.com/who_to_follow/suggestions/search/users?q=software&cursor=1&include_available_features=1&include_entities=1&is_forward=true } else { //if (pagsource.Contains("\"has_more_items\":true")) string uri = "https://twitter.com/i/search/timeline?q=" + Keyword + "&mode=users&include_available_features=1&include_entities=1&last_note_ts=555&scroll_cursor=USER-0-" + PageCount * 20; pagsource = tweetAccountManager.globusHttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/i/search/timeline?q=" + Keyword + "&mode=users&include_available_features=1&include_entities=1&last_note_ts=555&scroll_cursor=USER-0-" + PageCount * 20), "", ""); } // if (pagsource.Contains("has-more-items")) if (pagsource.Contains("has_more_items")) { PageCount++; string[] Aray = Regex.Split(pagsource, "js-stream-item stream-item"); //string[] Aray = Regex.Split(pagsource, "js-stream-item stream-item stream-user-item"); //js-stream-item stream-item // string[] Aray = Regex.Split(pagsource, "fullname js-action-profile-name"); Aray = Aray.Skip(1).ToArray(); foreach (string item in Aray) { string Userid = string.Empty; string Username = string.Empty; try { //int startindex = item.IndexOf("=\\\""); int startindex = item.IndexOf("data-user-id=\""); int startindexForItem = item.IndexOf("data-item-id=\\\""); if (startindex >= 0 && PageCount==2) { string start = item.Substring(startindex).Replace("data-user-id=\"", ""); //string start = item.Substring(startindex).Replace("=\\\"", ""); //int endindex = start.IndexOf("\\\""); int endindex = start.IndexOf("\""); string end = start.Substring(0, endindex); Userid = end; } else if (startindexForItem >= 0) { string start = item.Substring(startindexForItem).Replace("data-item-id=\\\"", ""); //string start = item.Substring(startindex).Replace("=\\\"", ""); //int endindex = start.IndexOf("\\\""); int endindex = start.IndexOf("\\\""); string end = start.Substring(0, endindex); Userid = end; } } catch (Exception ex) { Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> ScrapeWhotoFollow() 1--> " + ex.Message, Globals.Path_ScrapeUsersErroLog); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> ScrapeWhotoFollow() 1--> " + ex.Message, Globals.Path_TwtErrorLogs); } try { //int startindex = item.IndexOf("data-screen-name=\\\""); int startindex = item.IndexOf("data-screen-name=\""); int startindex2 = item.IndexOf("data-screen-name=\\\""); if (startindex >= 0) { //string start = item.Substring(startindex).Replace("data-screen-name=\\\"", ""); string start = item.Substring(startindex).Replace("data-screen-name=\"", ""); //int endindex = start.IndexOf("\\\""); int endindex = start.IndexOf("\""); string end = start.Substring(0, endindex); Username = end; username.Add(end); } if (startindex2 >= 0) { //string start = item.Substring(startindex).Replace("data-screen-name=\\\"", ""); string start = item.Substring(startindex2).Replace("data-screen-name=\\\"", ""); //int endindex = start.IndexOf("\\\""); int endindex = start.IndexOf("\\\""); string end = start.Substring(0, endindex); Username = end; username.Add(end); } } catch (Exception ex) { Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> ScrapeWhotoFollow() 1--> " + ex.Message, Globals.Path_ScrapeUsersErroLog); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> ScrapeWhotoFollow() 1--> " + ex.Message, Globals.Path_TwtErrorLogs); } try { if (!string.IsNullOrEmpty(Username) && !string.IsNullOrEmpty(Userid)) { string query = "INSERT INTO tb_UsernameDetails (Username , Userid) VALUES ('" + Username + "' ,'" + Userid + "') "; DataBaseHandler.InsertQuery(query, "tb_UsernameDetails"); } } catch (Exception ex) { Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> ScrapeWhotoFollow() --> Database --> " + ex.Message, Globals.Path_TwitterDataScrapper); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> ScrapeWhotoFollow() --> DataBase --> " + ex.Message, Globals.Path_TwtErrorLogs); } AddToScrapeLogs("[ " + DateTime.Now + " ] => [ " + Username + " :: " + Userid + " ]"); } username = username.Distinct().ToList(); counter++; } else { AddToScrapeLogs("[ " + DateTime.Now + " ] => [ No More Pages To Scrape For Keyword : " + Keyword + " ]"); break; } } if (!File.Exists(Globals.Path_KeywordScrapedListData + ".csv")) { GlobusFileHelper.AppendStringToTextfileNewLine("USERID , USERNAME , PROFILE NAME , BIO , LOCATION , WEBSITE , NOOFTWEETS , FOLLOWING , FOLLOWERS ", Globals.Path_KeywordScrapedListData + ".csv"); } foreach (string UserIds in username) { try { string ProfileName = string.Empty; string Location = string.Empty; string Bio = string.Empty; string website = string.Empty; string NoOfTweets = string.Empty; string Followers = string.Empty; string Followings = string.Empty; string userids = string.Empty; string TweetUsername = string.Empty; string Userid = string.Empty; string Username = string.Empty; ChilkatHttpHelpr objChilkat = new ChilkatHttpHelpr(); GlobusHttpHelper HttpHelper = new GlobusHttpHelper(); string ProfilePageSource = HttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/" + UserIds), "", ""); string Responce = ProfilePageSource; #region Convert HTML to XML string xHtml = objChilkat.ConvertHtmlToXml(Responce); Chilkat.Xml xml = new Chilkat.Xml(); xml.LoadXml(xHtml); Chilkat.Xml xNode = default(Chilkat.Xml); Chilkat.Xml xBeginSearchAfter = default(Chilkat.Xml); #endregion //xNode = xml.SearchForAttribute(xBeginSearchAfter, "ul", "class", "stats js-mini-profile-stats"); //xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "profile-field"); //userids = xNode.GetAttrValue("data-user-id"); //userids = xNode.AccumulateTagContent("text", "script|style"); if (Responce.Contains("has-more-items")) { PageCount++; //string[] Aray = Regex.Split(Responce, "js-stream-item stream-item stream-item"); //Aray = Aray.Skip(1).ToArray(); //foreach (string item in Aray) { try { #region commented //int startindex = item.IndexOf("=\\\""); //int startindex = item.IndexOf("data-user-id="); //if (startindex >= 0) //{ // string start = item.Substring(startindex).Replace("data-user-id=\"", ""); // //string start = item.Substring(startindex).Replace("=\\\"", ""); // //int endindex = start.IndexOf("\\\""); // int endindex = start.IndexOf("\""); // string end = start.Substring(0, endindex); // userids = end; // break; //} #endregion int startindex = Responce.IndexOf("profile_id"); string start = Responce.Substring(startindex).Replace("profile_id", ""); int endindex = start.IndexOf(","); string end = start.Substring(0, endindex).Replace(""", "").Replace("\"", "").Replace(":", "").Trim(); userids = end.Trim(); TweetUsername = UserIds; } catch (Exception ex) { Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> ScrapeWhotoFollow() 1--> " + ex.Message, Globals.Path_ScrapeUsersErroLog); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> ScrapeWhotoFollow() 1--> " + ex.Message, Globals.Path_TwtErrorLogs); } } } int counterdata = 0; xBeginSearchAfter = null; string dataDescription = string.Empty; xNode = xml.SearchForAttribute(xBeginSearchAfter, "h1", "class", "ProfileHeaderCard-name"); while ((xNode != null)) { xBeginSearchAfter = xNode; if (counterdata == 0) { ProfileName = xNode.AccumulateTagContent("text", "script|style").Replace("Verified account", ""); counterdata++; } else if (counterdata == 1) { website = xNode.AccumulateTagContent("text", "script|style"); counterdata++; } else { break; } //xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "profile-field"); xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "u-textUserColor"); } xBeginSearchAfter = null; dataDescription = string.Empty; xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "screen-name"); //while ((xNode != null)) //{ // xBeginSearchAfter = xNode; // TweetUsername = xNode.AccumulateTagContent("text", "script|style"); // break; //} xBeginSearchAfter = null; dataDescription = string.Empty; xNode = xml.SearchForAttribute(xBeginSearchAfter, "p", "class", "ProfileHeaderCard-bio u-dir");//bio profile-field"); while ((xNode != null)) { xBeginSearchAfter = xNode; Bio = xNode.AccumulateTagContent("text", "script|style").Replace("'", "'").Replace(" ", string.Empty).Trim(); break; } xBeginSearchAfter = null; dataDescription = string.Empty; //xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "location profile-field"); xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "ProfileHeaderCard-locationText u-dir"); while ((xNode != null)) { xBeginSearchAfter = xNode; Location = xNode.AccumulateTagContent("text", "script|style"); break; } int counterData = 0; xBeginSearchAfter = null; dataDescription = string.Empty; xNode = xml.SearchForAttribute(xBeginSearchAfter, "p", "class", "ProfileHeaderCard-bio u-dir");//bio profile-field"); while ((xNode != null)) { xBeginSearchAfter = xNode; Bio = xNode.AccumulateTagContent("text", "script|style").Replace("'", "'").Replace(" ", string.Empty).Trim(); break; } xBeginSearchAfter = null; dataDescription = string.Empty; //xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "location profile-field"); xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "ProfileHeaderCard-locationText u-dir");//location profile-field"); while ((xNode != null)) { xBeginSearchAfter = xNode; Location = xNode.AccumulateTagContent("text", "script|style"); break; } xBeginSearchAfter = null; dataDescription = string.Empty; //xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "data-element-term", "tweet_stats"); xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "ProfileNav-stat ProfileNav-stat--link u-borderUserColor u-textCenter js-tooltip js-nav"); while ((xNode != null)) { xBeginSearchAfter = xNode; if (counterData == 0) { NoOfTweets = xNode.AccumulateTagContent("text", "script|style").Replace("Tweets", string.Empty).Replace(",", string.Empty).Replace("Tweet", string.Empty); counterData++; } else if (counterData == 1) { Followings = xNode.AccumulateTagContent("text", "script|style").Replace(" Following", string.Empty).Replace(",", string.Empty).Replace("Following", string.Empty); counterData++; } else if (counterData == 2) { Followers = xNode.AccumulateTagContent("text", "script|style").Replace("Followers", string.Empty).Replace(",", string.Empty).Replace("Follower", string.Empty); counterData++; } else { break; } //xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "js-nav"); xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "ProfileNav-stat ProfileNav-stat--link u-borderUserColor u-textCenter js-tooltip js-openSignupDialog js-nonNavigable u-textUserColor"); } if (!string.IsNullOrEmpty(userids)) { lock (WhoTofollowThreadLock) { GlobusFileHelper.AppendStringToTextfileNewLine(userids + "," + TweetUsername + "," + ProfileName + "," + Bio.Replace(",", "") + "," + Location.Replace(",", "") + "," + website + "," + NoOfTweets.Replace(",", "").Replace("Tweets", "") + "," + Followers.Replace(",", "").Replace("Following", "") + "," + Followings.Replace(",", "").Replace("Followers", "").Replace("Follower", ""), Globals.Path_KeywordScrapedListData + ".csv"); } AddToScrapeLogs("[ " + DateTime.Now + " ] => [ " + userids + "," + TweetUsername + "," + ProfileName + "," + Bio.Replace(",", "") + "," + Location + "," + website + "," + NoOfTweets + "," + Followers + "," + Followings + " ]"); } } catch (Exception ex) { Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> ScrapeWhotoFollow() 3--> " + ex.Message, Globals.Path_ScrapeUsersErroLog); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> ScrapeWhotoFollow() 3--> " + ex.Message, Globals.Path_TwtErrorLogs); } } AddToScrapeLogs("[ " + DateTime.Now + " ] => [ Finished Scraping For " + tweetAccountManager.Username + " ]"); AddToProxysLogs("-----------------------------------------------------------------------------------------------------------------------"); } catch (Exception ex) { Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> ScrapeWhotoFollow() 2--> " + ex.Message, Globals.Path_ScrapeUsersErroLog); Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> ScrapeWhotoFollow() 2--> " + ex.Message, Globals.Path_TwtErrorLogs); } }