public List <string> GetHrefsByTagAndAttributeName(string pageSrcHtml, string TagName, string className)
        {
            List <string> lstData = new List <string>();

            try
            {
                bool   success = false;
                string xHtml   = string.Empty;

                Chilkat.HtmlToXml htmlToXml = new Chilkat.HtmlToXml();

                //*** Check DLL working or not **********************
                success = htmlToXml.UnlockComponent("THEBACHtmlToXml_7WY3A57sZH3O");
                if ((success != true))
                {
                    Console.WriteLine(htmlToXml.LastErrorText);
                    return(null);
                }

                htmlToXml.Html = pageSrcHtml;

                //** Convert Data Html to XML *******************************************
                xHtml = htmlToXml.ToXml();

                //******************************************
                Chilkat.Xml xNode             = default(Chilkat.Xml);
                Chilkat.Xml xBeginSearchAfter = default(Chilkat.Xml);
                Chilkat.Xml xml = new Chilkat.Xml();
                xml.LoadXml(xHtml);

                #region Data Save in list From using XML Tag and Attribut
                string DescriptionMain = string.Empty;
                string dataDescription = string.Empty;

                xBeginSearchAfter = null;

                xNode = xml.SearchForAttribute(xBeginSearchAfter, TagName, "class", className);
                while ((xNode != null))
                {
                    //** Get Data Under Tag only Text Value**********************************
                    dataDescription = xNode.GetXml();//.AccumulateTagContent("text", "script|style");

                    List <string> lstHrefs = GetHrefFromString(dataDescription);

                    lstData.AddRange(lstHrefs);//lstData.Add(dataDescription);

                    //** Get Data Under Tag All  Html value * *********************************
                    //dataDescription = xNode.GetXml();

                    xBeginSearchAfter = xNode;
                    xNode             = xml.SearchForAttribute(xBeginSearchAfter, TagName, "class", className);
                }
                #endregion
                return(lstData);
            }
            catch (Exception)
            {
                return(lstData = null);
            }
        }
예제 #2
0
        public void CrawlingPageDataSource(string Url, ref GlobusHttpHelper HttpHelper)
        {
            // if (SearchCriteria.starter)
            {
                // if (SearchCriteria.starter)
                {
                    try
                    {
                        Log("[ " + DateTime.Now + " ] => [ Start Parsing Process ]");

                        #region Data Initialization

                        string Industry = string.Empty;
                        string URLprofile = string.Empty;
                        string firstname = string.Empty;
                        string lastname = string.Empty;
                        string location = string.Empty;
                        string country = string.Empty;
                        string postal = string.Empty;
                        string phone = string.Empty;
                        string USERemail = string.Empty;
                        string code = string.Empty;
                        string education1 = string.Empty;
                        string education2 = string.Empty;
                        string titlecurrent = string.Empty;
                        string companycurrent = string.Empty;
                        string titlepast1 = string.Empty;
                        string companypast1 = string.Empty;
                        string titlepast2 = string.Empty;
                        string html = string.Empty;
                        string companypast2 = string.Empty;
                        string titlepast3 = string.Empty;
                        string companypast3 = string.Empty;
                        string titlepast4 = string.Empty;
                        string companypast4 = string.Empty;
                        string Recommendations = string.Empty;
                        string Connection = string.Empty;
                        string Designation = string.Empty;
                        string Website = string.Empty;
                        string Contactsettings = string.Empty;
                        string recomandation = string.Empty;

                        string titleCurrenttitle = string.Empty;
                        string titleCurrenttitle2 = string.Empty;
                        string titleCurrenttitle3 = string.Empty;
                        string titleCurrenttitle4 = string.Empty;
                        string Skill = string.Empty;
                        string TypeOfProfile = "Public1";

                        string Finaldata = string.Empty;
                        #endregion

                        #region LDS_DataInitialization
                        string LDS_FirstName = string.Empty;
                        string LDS_LastName = string.Empty;
                        string LDS_UserProfileLink = string.Empty;
                        string LDS_HeadLineTitle = string.Empty;
                        string LDS_CurrentTitle = string.Empty;
                        string LDS_PastTitles = string.Empty;
                        string LDS_Loction = string.Empty;
                        string LDS_Country = string.Empty;
                        string LDS_Connection = string.Empty;
                        string LDS_Recommendations = string.Empty;
                        string LDS_SkillAndExpertise = string.Empty;
                        string LDS_Education = string.Empty;
                        string LDS_Experience = string.Empty;
                        string LDS_ProfileType = "Public";
                        string LDS_Groups = string.Empty;
                        string LDS_UserEmail = string.Empty;
                        string LDS_UserContactNumber = string.Empty;
                        string LDS_CurrentCompany = string.Empty;
                        string LDS_PastCompany = string.Empty;
                        string LDS_LoginID = string.Empty;
                        string LDS_Websites = string.Empty;
                        string LDS_Industry = string.Empty;
                        #endregion

                        #region Chilkat Initialization

                        Chilkat.Http http = new Chilkat.Http();

                        ///Chilkat Http Request to be used in Http Post...
                        Chilkat.HttpRequest req = new Chilkat.HttpRequest();
                        Chilkat.HtmlUtil htmlUtil = new Chilkat.HtmlUtil();

                        // Any string unlocks the component for the 1st 30-days.
                        bool success = http.UnlockComponent("THEBACHttp_b3C9o9QvZQ06");
                        if (success != true)
                        {
                            Console.WriteLine(http.LastErrorText);
                            return;
                        }

                        http.CookieDir = "memory";
                        http.SendCookies = true;
                        http.SaveCookies = true;

                        html = HttpHelper.getHtmlfromUrl1(new Uri(Url));

                        html = htmlUtil.EntityDecode(html);

                        ////  Convert the HTML to XML:
                        Chilkat.HtmlToXml htmlToXml = new Chilkat.HtmlToXml();
                        Chilkat.HtmlToXml htmlToXml1 = new Chilkat.HtmlToXml();
                        Chilkat.HtmlToXml htmlToXml2 = new Chilkat.HtmlToXml();
                        success = htmlToXml.UnlockComponent("THEBACHtmlToXml_7WY3A57sZH3O");
                        if ((success != true))
                        {
                            Console.WriteLine(htmlToXml.LastErrorText);
                            return;
                        }

                        string xHtml = null;
                        string xHtml1 = null;
                        //string xHtml2 = null;

                        htmlToXml.Html = html;
                        xHtml = htmlToXml.ToXml();

                        Chilkat.Xml xml = new Chilkat.Xml();
                        xml.LoadXml(xHtml);

                        ////  Iterate over all h1 tags:
                        Chilkat.Xml xNode = default(Chilkat.Xml);
                        Chilkat.Xml xBeginSearchAfter = default(Chilkat.Xml);

                        #endregion

                        #region for paRSING
                        List<string> list = new List<string>();
                        List<string> Grouplist = new List<string>();
                        List<string> listtitle = new List<string>();
                        List<string> Currentlist = new List<string>();
                        List<string> Skilllst = new List<string>();
                        list.Clear();

                        //new parshing code

                        List<string> TempFirstName = objChilkat.GetDataTagAttributewithId(html, "div", "name-container");

                        xBeginSearchAfter = null;

                        xNode = xml.SearchForTag(xBeginSearchAfter, "dt");

                        Grouplist.Clear();
                        xBeginSearchAfter = null;
                        #region parsergroup
                        xNode = xml.SearchForAttribute(xBeginSearchAfter, "div", "class", "group-data");

                        while ((xNode != null))
                        {
                            Finaldata = xNode.AccumulateTagContent("text", "/text");

                            Grouplist.Add(Finaldata);

                            string[] tempC1 = Regex.Split(Finaldata, " at ");

                            xBeginSearchAfter = xNode;
                            xNode = xml.SearchForAttribute(xBeginSearchAfter, "div", "class", "group-data");

                        }

                        int groupcounter = 0;
                        string AllGRoup = string.Empty;
                        foreach (string item in Grouplist)
                        {
                            if (item.Contains("Join"))
                            {
                                if (groupcounter == 0)
                                {
                                    LDS_Groups = item;
                                    groupcounter++;
                                }
                                else
                                {
                                    LDS_Groups = AllGRoup + ";" + item;
                                }

                            }

                        }
                        #endregion

                        #region parserSkill
                        xNode = xml.SearchForTag(xBeginSearchAfter, "dt");

                        Skilllst.Clear();
                        xBeginSearchAfter = null;

                        xNode = xml.SearchForAttribute(xBeginSearchAfter, "div", "id", "profile-skills");

                        while ((xNode != null))
                        {
                            Finaldata = xNode.AccumulateTagContent("text", "/text");
                            if (Finaldata.Contains("extlib: _toggleclass"))
                            {
                                try
                                {
                                    string[] Temp = Finaldata.Split(';');
                                    LDS_SkillAndExpertise = Temp[4];
                                }
                                catch { }

                            }
                            else
                            {
                                try
                                {
                                    LDS_SkillAndExpertise = Finaldata.Replace("Skills & Expertise", " ");
                                    Skilllst.Add(Finaldata);
                                }
                                catch { }
                            }

                            xBeginSearchAfter = xNode;
                            xNode = xml.SearchForAttribute(xBeginSearchAfter, "div", "id", "profile-skills");

                        }

                        if (LDS_SkillAndExpertise.Contains(" Endorsements LI.i18n.register('section_skills_person_endorsed_tmpl"))
                        {
                            LDS_SkillAndExpertise = string.Empty;
                        }

                        Skilllst.Distinct();
                        #endregion

                        #region UrlProfile
                        try
                        {
                            if (html.Contains("webProfileURL"))
                            {
                                int FirstPointForProfileURL = html.IndexOf("webProfileURL");
                                string FirstSubStringForProfileURL = html.Substring(FirstPointForProfileURL);
                                int SecondPointForProfileURL = FirstSubStringForProfileURL.IndexOf(">");
                                int ThirdPointForProfileURL = FirstSubStringForProfileURL.IndexOf("</a>");

                                string SecondSubStringForProfileURL = FirstSubStringForProfileURL.Substring(SecondPointForProfileURL, ThirdPointForProfileURL - SecondPointForProfileURL);
                                LDS_UserProfileLink = SecondSubStringForProfileURL.Replace(">", string.Empty);
                                //qm.AddProfileUrl(URLprofile, DateTime.Now.ToString(), "0");
                            }
                        }
                        catch (Exception ex)
                        {
                            Console.WriteLine(ex.Message);
                        }

                        try
                        {
                            string[] UrlFull = System.Text.RegularExpressions.Regex.Split(Url, "&authType");
                            LDS_UserProfileLink = UrlFull[0];

                            LDS_UserProfileLink = Url;
                        }
                        catch { }
                        #endregion

                        #region Connection
                        if (html.Contains("overview-connections"))
                        {
                            try
                            {
                                Connection = html.Substring(html.IndexOf("leo-module mod-util connections"), 500);
                                string[] Arr = Connection.Split('>');
                                string tempConnection = Arr[5].Replace("</strong", "").Replace(")</h3", "").Replace("(", "");
                                if (tempConnection.Length < 8)
                                {
                                    LDS_Connection = tempConnection + "Connection";
                                }
                                else
                                {
                                    LDS_Connection = string.Empty;
                                }

                            }
                            catch (Exception ex)
                            {
                                //overview-connections
                                try
                                {
                                    LDS_Connection = html.Substring(html.IndexOf("overview-connections"), 50);
                                    string[] Arr = Connection.Split('>');
                                    string tempConnection = Arr[3].Replace("</strong", "").Replace(")</h3", "").Replace("(", "");
                                    LDS_Connection = tempConnection + "Connection";
                                }
                                catch { }
                            }
                        }
                        #endregion

                        #region Recommendation
                        if (html.Contains("Recommendations"))
                        {

                            try
                            {
                                string[] rList = System.Text.RegularExpressions.Regex.Split(html, "Recommendations");
                                string[] R3List = rList[2].Split('\n');
                                string temprecomandation = R3List[4].Replace("</strong>", "").Replace("<strong>", "");
                                if (temprecomandation.Contains("recommended"))
                                {
                                    LDS_Recommendations = temprecomandation;
                                }
                                else
                                {
                                    LDS_Recommendations = "";
                                }

                            }
                            catch (Exception ex)
                            {
                                LDS_Recommendations = string.Empty;
                            }
                        }
                        #endregion

                        #region Websites
                        if (html.Contains("websites"))
                        {
                            try
                            {
                                string websitedem = html.Substring(html.IndexOf("websites"), 500);

                                string[] Arr = Regex.Split(websitedem, "href");
                                foreach (string item in Arr)
                                {
                                    if (item.Contains("redir/redirect?url"))
                                    {
                                        string tempArr = item.Substring(item.IndexOf("name="), 50);
                                        string[] temarr = tempArr.Split('\n');
                                        LDS_Websites = temarr[1];
                                    }
                                }

                            }
                            catch (Exception ex)
                            {
                                LDS_Websites = string.Empty;
                            }
                        }
                        #endregion

                        #region Getting Industry
                        try
                        {

                            string Industrytemp = html.Substring(html.IndexOf("Find users in this industry"), 100);
                            string[] TempIndustery = Industrytemp.Split('>');
                            LDS_Industry = TempIndustery[1].Replace("</strong", "").Replace("</a", "");

                        }
                        catch (Exception ex)
                        {
                            Console.WriteLine(ex.Message);
                        }
                        #endregion

                        #region Getting First Name
                        try
                        {
                            if (html.Contains("given-name"))
                            {
                                int FirstPointForProfilename = html.IndexOf("given-name");
                                string FirstSubStringForProfilename = html.Substring(FirstPointForProfilename);
                                int SecondPointForProfilename = FirstSubStringForProfilename.IndexOf(">");
                                int ThirdPointForProfilename = FirstSubStringForProfilename.IndexOf("</span>");

                                string SecondSubStringForProfilename = FirstSubStringForProfilename.Substring(SecondPointForProfilename, ThirdPointForProfilename - SecondPointForProfilename);
                                LDS_FirstName = SecondSubStringForProfilename.Replace(">", string.Empty);
                            }
                        }
                        catch (Exception ex)
                        {
                            Console.WriteLine(ex.Message);
                        }
                        #endregion

                        #region LastName
                        try
                        {
                            if (html.Contains("family-name"))
                            {
                                int FirstPointForProfilelastname = html.IndexOf("family-name");
                                string FirstSubStringForProfilelastname = html.Substring(FirstPointForProfilelastname);
                                int SecondPointForProfilelastname = FirstSubStringForProfilelastname.IndexOf(">");
                                int ThirdPointForProfilelastname = FirstSubStringForProfilelastname.IndexOf("</span>");

                                string SecondSubStringForProfilelastname = FirstSubStringForProfilelastname.Substring(SecondPointForProfilelastname, ThirdPointForProfilelastname - SecondPointForProfilelastname);
                                string templastname = SecondSubStringForProfilelastname.Replace(">", string.Empty);
                                if (templastname.Contains(","))
                                {
                                    string[] arrylastname = templastname.Split(',');
                                    LDS_LastName = arrylastname[0];
                                }
                                else
                                {
                                    LDS_LastName = templastname;
                                }
                            }
                        }
                        catch (Exception ex)
                        {
                            Console.WriteLine(ex.Message);
                        }

                        #endregion

                        #region Designation Company Current
                        try
                        {
                            if (html.Contains("phonetic-full-name"))
                            {
                                int FirstPointForProfileCurrent = html.IndexOf("phonetic-full-name");
                                string FirstSubStringForProfileCurrent = html.Substring(FirstPointForProfileCurrent);
                                int SecondPointForProfileCurrent = FirstSubStringForProfileCurrent.IndexOf("display:block");
                                int ThirdPointForProfileCurrent = FirstSubStringForProfileCurrent.IndexOf("</p>");

                                string SecondSubStringForProfileCurrent = FirstSubStringForProfileCurrent.Substring(SecondPointForProfileCurrent, ThirdPointForProfileCurrent - SecondPointForProfileCurrent);
                                titlecurrent = SecondSubStringForProfileCurrent.Replace("\">", "").Replace("display:block", string.Empty).Replace("<strong class=\"highlight\"", string.Empty).Replace("</strong", string.Empty).Trim();
                                string[] tempCCurent = Regex.Split(titlecurrent, " at ");
                                LDS_HeadLineTitle = titlecurrent.Replace(",", ";");
                                LDS_CurrentCompany = tempCCurent[1].Replace(",", ";");

                            }

                            else if (html.Contains("<p class=\"title\""))
                            {
                                LDS_HeadLineTitle = html.Substring(html.IndexOf("<p class=\"title\""), 150);
                                string[] HeadLineTitle = LDS_HeadLineTitle.Split('>');
                                string tempHeadLineTitle = HeadLineTitle[1].Replace("\n", "").Replace(")</h3", "").Replace("</p", "");
                                LDS_HeadLineTitle = tempHeadLineTitle;
                                try
                                {
                                    string[] tempCCurent = Regex.Split(tempHeadLineTitle, " at ");
                                    LDS_HeadLineTitle = tempCCurent[0];
                                    LDS_CurrentCompany = tempCCurent[1];
                                }
                                catch { }
                            }

                        }
                        catch (Exception ex)
                        {
                            Console.WriteLine(ex.Message);
                        }
                        #endregion

                        #region Education
                        try
                        {
                            if (html.Contains("summary-education"))
                            {
                                int FirstPointForProfileeducation1 = html.IndexOf("summary-education");
                                string FirstSubStringForProfileeducation1 = html.Substring(FirstPointForProfileeducation1);
                                int SecondPointForProfileeducation1 = FirstSubStringForProfileeducation1.IndexOf("<li>");
                                int ThirdPointForProfileeducation1 = FirstSubStringForProfileeducation1.IndexOf("</li>");

                                string SecondSubStringForProfileeducation1 = FirstSubStringForProfileeducation1.Substring(SecondPointForProfileeducation1, ThirdPointForProfileeducation1 - SecondPointForProfileeducation1);
                                education1 = SecondSubStringForProfileeducation1.Replace("<li>", string.Empty).Replace(",", string.Empty).Trim();
                            }

                        }
                        catch (Exception ex)
                        {
                            Console.WriteLine(ex.Message);
                        }

                        #endregion

                        #region Country
                        try
                        {
                            if (html.Contains("locality"))
                            {
                                int FirstPointForlocality = html.IndexOf("locality");
                                string FirstSubStringForlocality = html.Substring(FirstPointForlocality);
                                int SecondPointForlocality = FirstSubStringForlocality.IndexOf("location");
                                int ThirdPointForlocality = FirstSubStringForlocality.IndexOf("</a>");

                                string SecondSubStringForlocality = FirstSubStringForlocality.Substring(SecondPointForlocality, ThirdPointForlocality - SecondPointForlocality);
                                string temlocation = SecondSubStringForlocality.Replace("location", string.Empty).Replace(">", string.Empty).Replace('"', ' ');
                                string[] temp = temlocation.Split(',');
                                LDS_Loction = temp[0].Replace("<strong class= highlight", string.Empty).Replace("</strong", string.Empty);
                                LDS_Country = temp[1].Replace("<strong class= highlight", string.Empty).Replace("</strong", string.Empty);
                                // country = temp[1].Replace("</strong", string.Empty);
                            }

                        }
                        catch (Exception ex)
                        {
                            Console.WriteLine(ex.Message);
                        }
                        #endregion

                        #region User Email
                        try
                        {
                            if (html.Contains("Email & Phone:"))
                            {
                                int FirstPointFortitlepast1 = html.IndexOf("abook-email");
                                string FirstSubStringFortitlepast1 = html.Substring(FirstPointFortitlepast1);
                                int SecondPointFortitlepast1 = FirstSubStringFortitlepast1.IndexOf("<a");
                                int ThirdPointFortitlepast1 = FirstSubStringFortitlepast1.IndexOf("</a>");

                                string SecondSubStringFortitlepast1 = FirstSubStringFortitlepast1.Substring(SecondPointFortitlepast1, ThirdPointFortitlepast1 - SecondPointFortitlepast1);
                                string[] tempEmail = SecondSubStringFortitlepast1.Split('>');
                                LDS_UserEmail = tempEmail[1];

                            }

                        }
                        catch (Exception ex)
                        {
                            Console.WriteLine(ex.Message);
                        }

                        #endregion

                        #region Type Of profile
                        try
                        {
                            if (html.Contains("profile-header"))
                            {
                                int FirstPointForProfileType = html.IndexOf("profile-header");
                                string FirstSubStringForProfileType = html.Substring(FirstPointForProfileType);
                                int SecondPointForProfileType = FirstSubStringForProfileType.IndexOf("class=\"n fn\"");
                                int ThirdPointForProfileType = FirstSubStringForProfileType.IndexOf("</span>");

                                string SecondSubStringForProfileType = FirstSubStringForProfileType.Substring(SecondPointForProfileType, ThirdPointForProfileType - SecondPointForProfileType);
                                string[] tempProfileType = SecondSubStringForProfileType.Split('>');
                                string ProfileType = tempProfileType[1];
                                LDS_ProfileType = ProfileType;
                            }
                            //<h1><span id="name" class="n fn">Private</span>
                            else if (html.Contains(" class=\"n fn\""))
                            {
                                try
                                {
                                    string ProfileTypetemp = html.Substring(html.IndexOf("class=\"n fn\""), 20);
                                    string[] TempProfileType = ProfileTypetemp.Split('>');
                                    LDS_ProfileType = TempProfileType[1].Replace("</strong", "").Replace("</a", "");
                                }
                                catch { }
                            }

                            if (LDS_ProfileType != "Public")
                            {
                                LDS_ProfileType = "Private";
                            }

                        }
                        catch (Exception ex)
                        {
                            Console.WriteLine(ex.Message);
                        }
                        #endregion

                        #region PhonNumber
                        try
                        {
                            if (html.Contains("<dt>Phone:</dt>"))
                            {
                                int FirstPointFortitlepast1 = html.IndexOf("profile-personal");
                                string FirstSubStringFortitlepast1 = html.Substring(FirstPointFortitlepast1);
                                int SecondPointFortitlepast1 = FirstSubStringFortitlepast1.IndexOf("<p>");
                                int ThirdPointFortitlepast1 = FirstSubStringFortitlepast1.IndexOf("<span");

                                string SecondSubStringFortitlepast1 = FirstSubStringFortitlepast1.Substring(SecondPointFortitlepast1, ThirdPointFortitlepast1 - SecondPointFortitlepast1);
                                LDS_UserContactNumber = SecondSubStringFortitlepast1.Replace("<p>", string.Empty);
                            }

                        }
                        catch (Exception ex)
                        {
                            Console.WriteLine(ex.Message);
                        }
                        #endregion

                        xNode = xml.SearchForTag(xBeginSearchAfter, "dt");
                        xBeginSearchAfter = xNode;

                        list.Clear();

                        #endregion

                        #region Regionfor PastCompney
                        try
                        {
                            if (html.Contains("summary-past"))
                            {

                                int FirstPointForPasttitle = html.IndexOf("summary-past");
                                string FirstSubStringForPasttitle = html.Substring(FirstPointForPasttitle);
                                int SecondPointForPasttitle = FirstSubStringForPasttitle.IndexOf("<li>");
                                int ThirdPointForPasttitle = FirstSubStringForPasttitle.IndexOf("summary-education");
                                string SecondSubStringForPasttitle = FirstSubStringForPasttitle.Substring(SecondPointForPasttitle, ThirdPointForPasttitle - SecondPointForPasttitle);
                                string FirstSubStringForPasttitlelast = htmlUtil.EntityDecode(SecondSubStringForPasttitle);

                                htmlToXml1.Html = FirstSubStringForPasttitlelast;
                                xHtml1 = htmlToXml1.ToXml();

                                Chilkat.Xml xml1 = new Chilkat.Xml();
                                xml1.LoadXml(xHtml1);

                                ////  Iterate over all h1 tags:
                                Chilkat.Xml xNode1 = default(Chilkat.Xml);
                                Chilkat.Xml xBeginSearchAfter1 = default(Chilkat.Xml);

                                list.Clear();
                                string[] tempC1 = null;
                                xNode1 = xml1.SearchForTag(xBeginSearchAfter1, "li");

                                while ((xNode1 != null))
                                {
                                    Finaldata = xNode1.AccumulateTagContent("text", "/text");
                                    listtitle.Add(Finaldata);
                                    // list.Add(Finaldata);

                                    try
                                    {
                                        tempC1 = Regex.Split(Finaldata, " at ");
                                    }
                                    catch { }
                                    if (tempC1 != null)
                                    {
                                        try
                                        {
                                            list.Add(tempC1[1]);
                                        }
                                        catch { }

                                    }

                                    xNode1 = xml1.SearchForTag(xBeginSearchAfter1, "li");
                                    xBeginSearchAfter1 = xNode1;
                                }

                                if (listtitle.Count > 0 || list.Count > 0)
                                {
                                    try
                                    {
                                        titlepast1 = listtitle[0] != null ? listtitle[0] : string.Empty;
                                        titlepast2 = listtitle[1] != null ? listtitle[1] : string.Empty;
                                        titlepast3 = listtitle[2] != null ? listtitle[2] : string.Empty;
                                        titlepast4 = listtitle[3] != null ? listtitle[3] : string.Empty;
                                    }
                                    catch { }

                                    try
                                    {
                                        companypast1 = list[0] != null ? list[0] : string.Empty;

                                        companypast2 = list[1] != null ? list[1] : string.Empty;

                                        companypast3 = list[2] != null ? list[2] : string.Empty;

                                        companypast4 = list[3] != null ? list[3] : string.Empty;
                                    }
                                    catch { }
                                }

                            }
                        }
                        catch { };

                        list.Clear();
                        #endregion

                        string companyCurrenttitle1 = string.Empty;

                        string companyCurrenttitle2 = string.Empty;

                        string companyCurrenttitle3 = string.Empty;

                        string companyCurrenttitle4 = string.Empty;

                        #region Regionfor summary-current
                        try
                        {
                            if (html.Contains("summary-current"))
                            {

                                int FirstPointForCurrenttitle = html.IndexOf("summary-current");
                                string FirstSubStringForCurrenttitle = html.Substring(FirstPointForCurrenttitle);
                                int SecondPointForCurrenttitle = FirstSubStringForCurrenttitle.IndexOf("<li>");
                                int ThirdPointForCurrenttitle = FirstSubStringForCurrenttitle.IndexOf("summary-past");
                                string SecondSubStringForCurrenttitle = FirstSubStringForCurrenttitle.Substring(SecondPointForCurrenttitle, ThirdPointForCurrenttitle - SecondPointForCurrenttitle);
                                string FirstSubStringForCurrenttitlelast = htmlUtil.EntityDecode(SecondSubStringForCurrenttitle);

                                htmlToXml1.Html = FirstSubStringForCurrenttitlelast;
                                xHtml1 = htmlToXml1.ToXml();

                                Chilkat.Xml xml1 = new Chilkat.Xml();
                                xml1.LoadXml(xHtml1);

                                ////  Iterate over all h1 tags:
                                Chilkat.Xml xNode1 = default(Chilkat.Xml);
                                Chilkat.Xml xBeginSearchAfter1 = default(Chilkat.Xml);

                                Currentlist.Clear();
                                list.Clear();
                                string[] tempC1 = null;
                                xNode1 = xml1.SearchForTag(xBeginSearchAfter1, "li");

                                while ((xNode1 != null))
                                {
                                    Finaldata = xNode1.AccumulateTagContent("text", "/text");
                                    Currentlist.Add(Finaldata);
                                    // list.Add(Finaldata);

                                    try
                                    {
                                        tempC1 = Regex.Split(Finaldata, " at ");
                                    }
                                    catch { }
                                    if (tempC1 != null)
                                    {
                                        try
                                        {
                                            list.Add(tempC1[1]);
                                        }
                                        catch { }

                                    }

                                    xNode1 = xml1.SearchForTag(xBeginSearchAfter1, "li");
                                    xBeginSearchAfter1 = xNode1;
                                }

                                if (Currentlist.Count > 0 || list.Count > 0)
                                {
                                    try
                                    {
                                        titleCurrenttitle = Currentlist[0] != null ? Currentlist[0] : string.Empty;
                                        titleCurrenttitle2 = Currentlist[1] != null ? Currentlist[1] : string.Empty;
                                        titleCurrenttitle3 = Currentlist[2] != null ? Currentlist[2] : string.Empty;
                                        titleCurrenttitle4 = Currentlist[3] != null ? Currentlist[3] : string.Empty;
                                    }
                                    catch { }

                                    try
                                    {
                                        companyCurrenttitle1 = list[0] != null ? list[0] : string.Empty;

                                        companyCurrenttitle2 = list[1] != null ? list[1] : string.Empty;

                                        companyCurrenttitle3 = list[2] != null ? list[2] : string.Empty;

                                        companyCurrenttitle4 = list[3] != null ? list[3] : string.Empty;
                                    }
                                    catch { }
                                }

                            }
                        }
                        catch { };

                        list.Clear();
                        #endregion

                        #region RegionForEDUCATION
                        try
                        {
                            if (html.Contains("summary-education"))
                            {

                                int FirstPointForEDUCATION = html.IndexOf("summary-education");
                                string FirstSubStringForEDUCATION = html.Substring(FirstPointForEDUCATION);
                                int SecondPointForEDUCATION = FirstSubStringForEDUCATION.IndexOf("<li>");
                                int ThirdPointForEDUCATION = FirstSubStringForEDUCATION.IndexOf("</ul>");
                                string SecondSubStringForEDUCATION = FirstSubStringForEDUCATION.Substring(SecondPointForEDUCATION, ThirdPointForEDUCATION - SecondPointForEDUCATION);
                                //string tempEDu = SecondSubStringForEDUCATION.Replace("<li>", string.Empty).Replace("</li>", string.Empty).Replace("  ", string.Empty).Replace("\n", string.Empty).Replace("\t", string.Empty).Trim();
                                string temptg = SecondSubStringForEDUCATION.Replace("<li>", "");

                                string[] templis6t = temptg.Split('/');
                                education1 = templis6t[0].Replace("\n", string.Empty).Replace("\t", string.Empty).Replace("<", string.Empty).Replace("span>", string.Empty).Replace(",", string.Empty).Trim();
                                education2 = templis6t[1].Replace("\n", string.Empty).Replace("\t", string.Empty).Replace("li>", string.Empty).Replace("<", string.Empty).Replace("span>", string.Empty).Replace(",", string.Empty).Trim();
                            }
                        }

                        catch { };

                        list.Clear();
                        #endregion

                        string GroupPastJob = string.Empty;
                        string GroupEduction = string.Empty;
                        LDS_PastTitles = titlepast1 + ";" + titlepast3;
                        LDS_PastCompany = companypast1 + ";" + companypast3;
                        LDS_Education = education1 + ";" + education2;
                        LDS_CurrentTitle = titleCurrenttitle;
                        LDS_LoginID = _UserName;//SearchCriteria.LoginID;                                                                                                                       //"ProfileType" + "," + "UserProfileLink" + "," + "FirstName" + "," + "LastName" + "," + "HeadLineTitle" + "," + "CurrentTitle " + "," + "Company" + "," + "Connection" + "," + "Recommendations " + "," + "SkillAndExpertise " + "," + "Experience " + "," + " Education" + "," + "Groups" + "," + "UserEmail" + "," + "UserContactNumbe" + "," + "PastTitles" + "," + "PastCompany" + "," + "Loction" + "," + "Country" + "," + "titlepast3" + "," + "companypast3" + "," + "titlepast4" + "," + "companypast4" + ",";
                        string LDS_FinalData = LDS_ProfileType.Replace(",", ";") + "," + LDS_UserProfileLink.Replace(",", ";") + "," + LDS_FirstName.Replace(",", ";") + "," + LDS_LastName.Replace(",", ";") + "," + LDS_HeadLineTitle.Replace(",", ";") + "," + LDS_CurrentTitle.Replace(",", ";") + "," + LDS_CurrentCompany.Replace(",", ";") + "," + LDS_Connection.Replace(",", ";") + "," + LDS_Recommendations.Replace(",", ";") + "," + LDS_SkillAndExpertise.Replace(",", ";") + "," + LDS_Experience.Replace(",", ";") + "," + LDS_Education.Replace(",", ";") + "," + LDS_Groups.Replace(",", ";") + "," + LDS_UserEmail.Replace(",", ";") + "," + LDS_UserContactNumber.Replace(",", ";") + "," + LDS_PastTitles.Replace(",", ";") + "," + LDS_PastCompany.Replace(",", ";") + "," + LDS_Loction.Replace(",", ";") + "," + LDS_Country.Replace(",", ";") + "," + LDS_Industry.Replace(",", ";") + "," + LDS_Websites.Replace(",", ";") + "," + LDS_LoginID.Replace(",", ";") + ",";

                        if (LDS_FinalData.Contains("<strong class=\"highlight\"") || LDS_FinalData.Contains("<span class=\"full-name\"") || LDS_FinalData.Contains("<strong class=\"highlight\"") || LDS_FinalData.Contains("overview-connections\">"))
                        {
                            LDS_FinalData = LDS_FinalData.Replace("<span class=\"full-name\"", "").Replace("\n", "").Replace("<strong class=\"highlight\"", "").Replace("overview-connections\">", "").Replace("</strong>", "").Replace("<strong>", "");
                        }
                        if (!string.IsNullOrEmpty(LDS_FirstName) || !string.IsNullOrEmpty(LDS_FirstName))
                        {
                            Log(LDS_FinalData);
                        }
                        // if (SearchCriteria.starter)
                        {

                            string tempFinalData = LDS_FinalData.Replace(";", "").Replace(LDS_UserProfileLink, "").Replace("Public", "").Replace(",", "").Replace(LDS_LoginID, "").Trim();

                            if (!string.IsNullOrEmpty(tempFinalData))
                            {
                                //AppFileHelper.AddingLinkedInDataToCSVFile(LDS_FinalData, Globals.path_ScrappedMembersFromGroup);

                                string CSVHeader = "ProfileType" + "," + "UserProfileLink" + "," + "FirstName" + "," + "LastName" + "," + "HeadLineTitle" + "," + "Current Title " + "," + "Current Company" + "," + "Connection" + "," + "Recommendations " + "," + "SkillAndExpertise " + "," + "Experience " + "," + " Education" + "," + "Groups" + "," + "UserEmail" + "," + "UserContactNumber" + "," + "PastTitles" + "," + "PastCompany" + "," + "Location" + "," + "Country" + "," + "Industry" + "," + "WebSites" + "," + "LinkedInLoginID" + ",";
                                string CSV_Content = TypeOfProfile.Replace(",", ";") + "," + LDS_UserProfileLink.Replace(",", ";") + "," + LDS_FirstName.Replace(",", ";") + "," + LDS_LastName.Replace(",", ";") + "," + LDS_HeadLineTitle.Replace(",", ";") + "," + LDS_CurrentTitle.Replace(",", ";") + "," + LDS_CurrentCompany.Replace(",", ";") + "," + LDS_Connection.Replace(",", ";") + "," + LDS_Recommendations.Replace(",", ";") + "," + LDS_SkillAndExpertise.Replace(",", ";") + "," + LDS_Experience.Replace(",", ";") + "," + LDS_Education.Replace(",", ";") + "," + LDS_Groups.Replace(",", ";") + "," + LDS_UserEmail.Replace(",", ";") + "," + LDS_UserContactNumber.Replace(",", ";") + "," + LDS_PastTitles.Replace(",", ";") + "," + LDS_PastCompany.Replace(",", ";") + "," + LDS_Loction.Replace(",", ";") + "," + LDS_Country.Replace(",", ";") + "," + LDS_Industry.Replace(",", ";") + "," + LDS_Websites.Replace(",", ";") + "," + LDS_LoginID.Replace(",", ";");

                                //string CSV_Content = TypeOfProfile + "," + LDS_UserProfileLink + "," + firstname + "," + lastname + "," + Company.Replace(",", ";") + "," + titlecurrent.Replace(",", ";") + "," + companycurrent.Replace(",", ";") + "," + Connection.Replace(",", ";") + "," + recomandation.Replace(",", string.Empty) + "," + Skill.Replace(",", ";") + "," + LDS_Experience.Replace(",", string.Empty) + "," + EducationCollection.Replace(",", ";") + "," + groupscollectin.Replace(",", ";") + "," + USERemail.Replace(",", ";") + "," + LDS_UserContact.Replace(",", ";") + "," + LDS_PastTitles + "," + AllComapny.Replace(",", ";") + "," + country.Replace(",", ";") + "," + location.Replace(",", ";") + "," + Industry.Replace(",", ";") + "," + Website.Replace(",", ";") + "," + LDS_LoginID + ",";// +TypeOfProfile + ",";

                                CSVUtilities.ExportDataCSVFile(CSVHeader, CSV_Content, Globals.path_ScrappedMembersFromGroup);
                                Log("[ " + DateTime.Now + " ] => [ Data Saved In CSV File With URL >>> " + LDS_UserProfileLink + " ]");
                            }

                            //if (!string.IsNullOrEmpty(LDS_FirstName) || !string.IsNullOrEmpty(LDS_FirstName))
                            //{
                            //    AppFileHelper.AddingLinkedInDataToCSVFile(LDS_FinalData, SearchCriteria.FileName);
                            //}
                        }

                    }
                    catch (Exception ex) { };

                }

            }
        }
        public void StartAcceptInvitations(ref GlobusHttpHelper httpHelper)
        {
            try
            {
                string csrfToken     = string.Empty;
                string userFirstName = string.Empty;
                string UserLastName  = string.Empty;
                string SenderName    = string.Empty;
                string newPagesource = string.Empty;
                bool   isTrue        = false;
                int    startRow      = 1;

                string pageSource = httpHelper.getHtmlfromUrl1(new Uri("http://www.linkedin.com/inbox/invitations/pending"));


                var resultForUserDetails = FindTheUserName(pageSource);
                try
                {
                    resultForUserDetails = resultForUserDetails.Substring(resultForUserDetails.IndexOf("alt="), resultForUserDetails.IndexOf("height") - resultForUserDetails.IndexOf("alt=")).Replace("alt=", string.Empty).Replace("/", string.Empty).Trim();
                    userFirstName        = resultForUserDetails.Split(' ')[0].Replace("\"", string.Empty);
                    UserLastName         = resultForUserDetails.Split(' ')[1].Replace("\"", string.Empty);
                }
                catch { }

                if (pageSource.Contains("csrfToken"))
                {
                    csrfToken = pageSource.Substring(pageSource.IndexOf("csrfToken"), 100);
                    string[] Arr = csrfToken.Split('"');
                    try
                    {
                        foreach (string item in Arr)
                        {
                            try
                            {
                                if (item.Contains("csrfToken="))
                                {
                                    csrfToken = item.Substring(item.IndexOf("csrfToken="), item.IndexOf("&", item.IndexOf("csrfToken=")) - item.IndexOf("csrfToken=")).Replace("csrfToken=", string.Empty).Replace("\"", string.Empty).Replace("\\", string.Empty).Trim();//Arr[2].Replace(@"\", string.Empty).Replace("//", string.Empty);

                                    break;
                                }
                            }
                            catch (Exception ex)
                            {
                            }

                            if (item.Contains("csrfToken="))
                            {
                                csrfToken = item.Replace("csrfToken=", string.Empty).Trim();

                                break;
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        //csrfToken = Arr[0].Replace("csrfToken=", "").Replace("\\", "");
                    }
                }

                // For Show More

                //string postData1 = "pkey=inbox-invitations-pending&tcode=%5Bobject%20Arguments%5D&plist=";
                //string response1 = httpHelper.postFormData(new Uri("http://www.linkedin.com/lite/web-action-track?csrfToken="+csrfToken+""),postData1);
                //string pageSource2=httpHelper.getHtmlfromUrl(new Uri("http://www.linkedin.com/inbox/invitations/pending/more?sinceDate=1366351490125&startRow=6&count=20&showBlocked=false&ctx=inbox&rnd=1366353236172"));
                //*** Conver HTML to XML *******************************//
                #region Convert HTML to XML
                ChilkatHttpHelpr objhelper = new ChilkatHttpHelpr();
                //xHtml contain xml data
                string xHtml = objhelper.ConvertHtmlToXml(pageSource);

                Chilkat.Xml xml = new Chilkat.Xml();
                xml.LoadXml(xHtml);
                //xHtml.

                ////  Iterate over all h1 tags:
                Chilkat.Xml xNode             = default(Chilkat.Xml);
                Chilkat.Xml xBeginSearchAfter = default(Chilkat.Xml);
                #endregion

                #region Invitatin count
                xBeginSearchAfter = null;
                xNode             = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "invitation-count count ");

                try
                {
                    while ((xNode != null))
                    {
                        string strvalue       = xNode.AccumulateTagContent("text", "script|style");
                        string Invitatincount = strvalue;
                        Log("[ " + DateTime.Now + " ] => [ Invitation Count = " + Invitatincount + " UserName = "******" ]");
                        Log("-----------------------------------------------------------------------------------------------------------------------------------");
                        break;
                    }
                }
                catch (Exception ex)
                {
                }

                #endregion


                do
                {
                    newPagesource = httpHelper.getHtmlfromUrl1(new Uri("https://www.linkedin.com/inbox/invitations?keywords=&sortBy=&startRow=" + startRow + "&subFilter=&trk=&showBlocked=false"));
                    if (newPagesource.Contains("inbox-list"))
                    {
                        string inbox_list = httpHelper.GetDataWithTagValueByTagAndAttributeNameWithClass(newPagesource, "ol", "inbox-list ");

                        if (inbox_list.Contains("<li"))
                        {
                            isTrue = true;
                            string[] srrLi = Regex.Split(inbox_list, "<li");

                            foreach (string item in srrLi)
                            {
                                try
                                {
                                    if (item.Contains("data-gid=\""))
                                    {
                                        string data_gid    = item.Substring(item.IndexOf("data-gid=\"") + 10, item.IndexOf("\"", item.IndexOf("data-gid=\"") + 10) - (item.IndexOf("data-gid=\"") + 10)).Replace("\"", string.Empty).Replace("data-gid=\"", string.Empty).Trim();
                                        int    startindex1 = item.IndexOf("alt=");
                                        string start1      = item.Substring(startindex1).Replace("alt=", string.Empty);
                                        int    endindex1   = start1.IndexOf("height");
                                        string end1        = start1.Substring(0, endindex1).Replace("\"", string.Empty).Trim();
                                        SenderName = end1;

                                        string response2 = httpHelper.getHtmlfromUrl1(new Uri("http://www.linkedin.com/inbox/action?mboxItemGID=" + data_gid + "&actionType=invitationAccept&csrfToken=" + csrfToken + "&goback=%2Epiv_*1_*1_*1_*1_*1&trk=inbox-invitations-inv-accept&ctx=inbox&rnd=1366352095313"));

                                        if (response2.Contains(" are now connected"))
                                        {
                                            string SuccessMsg = string.Empty;
                                            int    startindex = response2.IndexOf("<div class=\"confirmation\">");

                                            if (startindex > 0)
                                            {
                                                try
                                                {
                                                    string start    = response2.Substring(startindex).Replace("<div class=\"confirmation\">", string.Empty);
                                                    int    endindex = start.IndexOf("<ul>");
                                                    string end      = start.Substring(0, endindex);
                                                    //SuccessMsg = end.Replace("<h4>", string.Empty).Replace("\"", string.Empty).Replace("</h4>", string.Empty).Replace("\n", string.Empty).Replace("\t", string.Empty).Replace("\"u002", "-").Replace("You", "User: "******"<h4>", string.Empty).Replace("\"", string.Empty).Replace("</h4>", string.Empty).Replace("\n", string.Empty).Replace("\t", string.Empty).Replace("\"u002", "-").Replace("You", "User: "******"( Name:" + userFirstName + "  " + UserLastName + ") ").Trim();
                                                }
                                                catch { }
                                            }

                                            GlobusFileHelper.AppendStringToTextfileNewLine(SuccessMsg, Globals.path_AcceptInvitationEmail);
                                            Log("[ " + DateTime.Now + " ] => [ " + SuccessMsg + " ]");
                                        }
                                        if (!(response2.Contains(SenderName)))
                                        {
                                            Log("[ " + DateTime.Now + " ] => [ Invitation accepted from " + SenderName + " ]");
                                        }
                                        else
                                        {
                                            //Log("There is some error !");
                                        }
                                    }
                                }
                                catch (Exception ex)
                                {
                                }
                            }
                        }
                        else
                        {
                            //Log("[ " + DateTime.Now + " ] => [ There is no invitation ! ]");
                            Log("[ " + DateTime.Now + " ] => [ No more invitations left to accept ! ]");
                        }
                    }
                    startRow = startRow + 10;
                } while (newPagesource.Contains("is now a connection."));
                //else
                //{
                //    Log("[ " + DateTime.Now + " ] => [ There is no invitation ! ]");
                //}

                //if (isTrue)
                //{
                //    StartAcceptInvitations(ref httpHelper);
                //}
            }
            catch (Exception ex)
            {
            }
        }
예제 #4
0
        public void StartAcceptInvitations(ref GlobusHttpHelper httpHelper)
        {
            try
            {
                string csrfToken = string.Empty;
                string userFirstName = string.Empty;
                string UserLastName = string.Empty;
                string SenderName = string.Empty;
                string newPagesource = string.Empty;
                bool isTrue = false;
                int startRow = 1;

                string pageSource = httpHelper.getHtmlfromUrl1(new Uri("http://www.linkedin.com/inbox/invitations/pending"));

                var resultForUserDetails = FindTheUserName(pageSource);
                try
                {
                    resultForUserDetails = resultForUserDetails.Substring(resultForUserDetails.IndexOf("alt="), resultForUserDetails.IndexOf("height") - resultForUserDetails.IndexOf("alt=")).Replace("alt=", string.Empty).Replace("/", string.Empty).Trim();
                    userFirstName = resultForUserDetails.Split(' ')[0].Replace("\"", string.Empty);
                    UserLastName = resultForUserDetails.Split(' ')[1].Replace("\"", string.Empty);
                }
                catch { }

                if (pageSource.Contains("csrfToken"))
                {
                    csrfToken = pageSource.Substring(pageSource.IndexOf("csrfToken"), 100);
                    string[] Arr = csrfToken.Split('"');
                    try
                    {
                        foreach (string item in Arr)
                        {
                            try
                            {
                                if (item.Contains("csrfToken="))
                                {
                                    csrfToken = item.Substring(item.IndexOf("csrfToken="), item.IndexOf("&", item.IndexOf("csrfToken=")) - item.IndexOf("csrfToken=")).Replace("csrfToken=", string.Empty).Replace("\"", string.Empty).Replace("\\", string.Empty).Trim();//Arr[2].Replace(@"\", string.Empty).Replace("//", string.Empty);

                                    break;
                                }
                            }
                            catch (Exception ex)
                            {
                            }

                            if (item.Contains("csrfToken="))
                            {
                                csrfToken = item.Replace("csrfToken=", string.Empty).Trim();

                                break;
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        //csrfToken = Arr[0].Replace("csrfToken=", "").Replace("\\", "");
                    }
                }

                // For Show More

                //string postData1 = "pkey=inbox-invitations-pending&tcode=%5Bobject%20Arguments%5D&plist=";
                //string response1 = httpHelper.postFormData(new Uri("http://www.linkedin.com/lite/web-action-track?csrfToken="+csrfToken+""),postData1);
                //string pageSource2=httpHelper.getHtmlfromUrl(new Uri("http://www.linkedin.com/inbox/invitations/pending/more?sinceDate=1366351490125&startRow=6&count=20&showBlocked=false&ctx=inbox&rnd=1366353236172"));
                //*** Conver HTML to XML *******************************//
                #region Convert HTML to XML
                ChilkatHttpHelpr objhelper = new ChilkatHttpHelpr();
                //xHtml contain xml data
                string xHtml = objhelper.ConvertHtmlToXml(pageSource);

                Chilkat.Xml xml = new Chilkat.Xml();
                xml.LoadXml(xHtml);
                //xHtml.

                ////  Iterate over all h1 tags:
                Chilkat.Xml xNode = default(Chilkat.Xml);
                Chilkat.Xml xBeginSearchAfter = default(Chilkat.Xml);
                #endregion

                #region Invitatin count
                xBeginSearchAfter = null;
                xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "invitation-count count ");

                try
                {
                    while ((xNode != null))
                    {
                        string strvalue = xNode.AccumulateTagContent("text", "script|style");
                        string Invitatincount = strvalue;
                        Log("[ " + DateTime.Now + " ] => [ Invitation Count = " + Invitatincount + " UserName = "******" ]");
                        Log("-----------------------------------------------------------------------------------------------------------------------------------");
                        break;
                    }
                }
                catch (Exception ex)
                {

                }

                #endregion

                do
                {
                    newPagesource = httpHelper.getHtmlfromUrl1(new Uri("https://www.linkedin.com/inbox/invitations?keywords=&sortBy=&startRow=" + startRow + "&subFilter=&trk=&showBlocked=false"));
                if (newPagesource.Contains("inbox-list"))
                {
                    string inbox_list = httpHelper.GetDataWithTagValueByTagAndAttributeNameWithClass(newPagesource, "ol", "inbox-list ");

                    if (inbox_list.Contains("<li"))
                    {
                        isTrue = true;
                        string[] srrLi = Regex.Split(inbox_list, "<li");

                        foreach (string item in srrLi)
                        {
                            try
                            {
                                if (item.Contains("data-gid=\""))
                                {
                                    string data_gid = item.Substring(item.IndexOf("data-gid=\"") + 10, item.IndexOf("\"", item.IndexOf("data-gid=\"") + 10) - (item.IndexOf("data-gid=\"") + 10)).Replace("\"", string.Empty).Replace("data-gid=\"", string.Empty).Trim();
                                    int startindex1 = item.IndexOf("alt=");
                                    string start1 = item.Substring(startindex1).Replace("alt=",string.Empty);
                                    int endindex1 = start1.IndexOf("height");
                                    string end1 = start1.Substring(0, endindex1).Replace("\"", string.Empty).Trim();
                                    SenderName = end1;

                                    string response2 = httpHelper.getHtmlfromUrl1(new Uri("http://www.linkedin.com/inbox/action?mboxItemGID=" + data_gid + "&actionType=invitationAccept&csrfToken=" + csrfToken + "&goback=%2Epiv_*1_*1_*1_*1_*1&trk=inbox-invitations-inv-accept&ctx=inbox&rnd=1366352095313"));

                                    if (response2.Contains(" are now connected"))
                                    {
                                        string SuccessMsg = string.Empty;
                                        int startindex = response2.IndexOf("<div class=\"confirmation\">");

                                        if (startindex > 0)
                                        {
                                            try
                                            {
                                                string start = response2.Substring(startindex).Replace("<div class=\"confirmation\">", string.Empty);
                                                int endindex = start.IndexOf("<ul>");
                                                string end = start.Substring(0, endindex);
                                                //SuccessMsg = end.Replace("<h4>", string.Empty).Replace("\"", string.Empty).Replace("</h4>", string.Empty).Replace("\n", string.Empty).Replace("\t", string.Empty).Replace("\"u002", "-").Replace("You", "User: "******"<h4>", string.Empty).Replace("\"", string.Empty).Replace("</h4>", string.Empty).Replace("\n", string.Empty).Replace("\t", string.Empty).Replace("\"u002", "-").Replace("You", "User: "******"( Name:" + userFirstName + "  " + UserLastName + ") ").Trim();

                                            }
                                            catch { }
                                        }

                                        GlobusFileHelper.AppendStringToTextfileNewLine(SuccessMsg, Globals.path_AcceptInvitationEmail);
                                        Log("[ " + DateTime.Now + " ] => [ " + SuccessMsg + " ]");

                                    }
                                    if (!(response2.Contains(SenderName)))
                                    {
                                        Log("[ " + DateTime.Now + " ] => [ Invitation accepted from " + SenderName + " ]");
                                    }
                                    else
                                    {
                                        //Log("There is some error !");
                                    }
                                }
                            }
                            catch (Exception ex)
                            {
                            }
                        }

                    }
                    else
                    {
                        //Log("[ " + DateTime.Now + " ] => [ There is no invitation ! ]");
                        Log("[ " + DateTime.Now + " ] => [ No more invitations left to accept ! ]");

                    }
                }
                startRow = startRow + 10;
                } while (newPagesource.Contains("is now a connection."));
                //else
                //{
                //    Log("[ " + DateTime.Now + " ] => [ There is no invitation ! ]");
                //}

                //if (isTrue)
                //{
                //    StartAcceptInvitations(ref httpHelper);
                //}
            }
            catch (Exception ex)
            {
            }
        }
예제 #5
0
        public List<string> GetHrefsByTagAndAttributeName(string pageSrcHtml, string TagName, string className)
        {
            List<string> lstData = new List<string>();
            try
            {
                bool success = false;
                string xHtml = string.Empty;

                Chilkat.HtmlToXml htmlToXml = new Chilkat.HtmlToXml();

                //*** Check DLL working or not **********************
                success = htmlToXml.UnlockComponent("THEBACHtmlToXml_7WY3A57sZH3O");
                if ((success != true))
                {
                    Console.WriteLine(htmlToXml.LastErrorText);
                    return null;
                }

                htmlToXml.Html = pageSrcHtml;

                //** Convert Data Html to XML ******************************************* 
                xHtml = htmlToXml.ToXml();

                //******************************************
                Chilkat.Xml xNode = default(Chilkat.Xml);
                Chilkat.Xml xBeginSearchAfter = default(Chilkat.Xml);
                Chilkat.Xml xml = new Chilkat.Xml();
                xml.LoadXml(xHtml);

                #region Data Save in list From using XML Tag and Attribut
                string DescriptionMain = string.Empty;
                string dataDescription = string.Empty;

                xBeginSearchAfter = null;

                xNode = xml.SearchForAttribute(xBeginSearchAfter, TagName, "class", className);
                while ((xNode != null))
                {
                    //** Get Data Under Tag only Text Value**********************************
                    dataDescription = xNode.GetXml();//.AccumulateTagContent("text", "script|style");

                    List<string> lstHrefs = GetHrefFromString(dataDescription);

                    lstData.AddRange(lstHrefs);//lstData.Add(dataDescription);

                    //** Get Data Under Tag All  Html value * *********************************
                    //dataDescription = xNode.GetXml();

                    xBeginSearchAfter = xNode;
                    xNode = xml.SearchForAttribute(xBeginSearchAfter, TagName, "class", className);
                }
                #endregion
                return lstData;
            }
            catch (Exception)
            {
                return lstData = null;

            }
        }
예제 #6
0
        private void btnStart_Searching_Click(object sender, EventArgs e)
        {


            AllOfTheseWords = (txtAllofTheseKeywords.Text).ToString();
            ThisExtractPhrase = (txtThisExactPhrase.Text).ToString();
            AnyOfTheseWords = (txtAnyOfTheseWords.Text).ToString();
            TheseHashTags = (txtTheseHashTags.Text).ToString();
            NoneOfTheseWords = (txtNoneofTheseWords.Text).ToString();
            FromTheseAccounts = (txtFromTheseAccounts.Text).ToString();
            ToTheseAccounts = (txtToTheseAccounts.Text).ToString();
            MentionTheseAccounts = (txtMentioningTheseAccounts.Text).ToString();
            NearThisPlace = (txtNearThisPlace.Text).ToString();

            
            AddToLog_AdvancedSearch("[ " + DateTime.Now + " ] => Process Started");

            try
            {
                if (string.IsNullOrEmpty(ThisExtractPhrase))
                {
                    ThisExtractPhrase = "";
                }
                else
                {
                    ThisExtractPhrase = "%20%22" + ThisExtractPhrase;
                }
            }
            catch { }

            try
            {

                if (string.IsNullOrEmpty(AnyOfTheseWords))
                {
                    AnyOfTheseWords = "";
                }
                else
                {
                    AnyOfTheseWords = "%22%20" + AnyOfTheseWords;
                }
            }
            catch
            { }


            try
            {
                if (string.IsNullOrEmpty(TheseHashTags))
                {
                    TheseHashTags = "";
                }
                else
                {
                    TheseHashTags = "%20%23" + TheseHashTags;
                }
            }
            catch
            { }


            try
            {
                if (string.IsNullOrEmpty(NoneOfTheseWords))
                {
                    NoneOfTheseWords = "";
                }
                else
                {
                    NoneOfTheseWords = "%20-" + NoneOfTheseWords;
                }
            }
            catch
            { }


            try
            {
                if (string.IsNullOrEmpty(FromTheseAccounts))
                {
                    FromTheseAccounts = "";
                }
                else
                {
                    FromTheseAccounts = "%20from%3A" + FromTheseAccounts;
                }
            }
            catch
            { }


            try
            {
                if (string.IsNullOrEmpty(ToTheseAccounts))
                {
                    ToTheseAccounts = "";
                }
                else
                {
                    ToTheseAccounts = "%20to%3A" + ToTheseAccounts;
                }
            }
            catch
            { }


            try
            {
                if (string.IsNullOrEmpty(MentionTheseAccounts))
                {
                    MentionTheseAccounts = "";
                }
                else
                {
                    MentionTheseAccounts = "%20%40" + MentionTheseAccounts;
                }
            }
            catch
            { }

            try
            {
                if (string.IsNullOrEmpty(NearThisPlace))
                {
                    NearThisPlace = "";
                }
                else
                {
                    NearThisPlace = "%20near%3A%22" + NearThisPlace;
                }
            }
            catch
            { }




            try
            {
                if (!string.IsNullOrEmpty(txtAllofTheseKeywords.Text))
                {
                    #region Commented
                    //try
                    //{
                    //    string Url = "https://twitter.com/search?f=realtime&q=" + AllOfTheseWords + ThisExtractPhrase + AnyOfTheseWords + NoneOfTheseWords + TheseHashTags + _selectedLanguage + FromTheseAccounts + ToTheseAccounts + MentionTheseAccounts + NearThisPlace + "%22%20within%3A15mi&src=typd";
                    //    string response = _GlobusHttpHelper.getHtmlfromUrl(new Uri(Url), "", "");
                    //}
                    //catch { } public List<StructTweetIDs> NewKeywordStructDataForSearchByKeyword(string keyword) 
                    #endregion
                    {
                        try
                        {
                            BaseLib.GlobusRegex regx = new GlobusRegex();
                           
                            int counter = 0;
                            string res_Get_searchURL = string.Empty;
                            string searchURL = string.Empty;
                            string maxid = string.Empty;
                            string TweetId = string.Empty;
                            string text = string.Empty;

                            string ProfileName = string.Empty;
                            string Location = string.Empty;
                            string Bio = string.Empty;
                            string website = string.Empty;
                            string NoOfTweets = string.Empty;
                            string Followers = string.Empty;
                            string Followings = string.Empty;
                            int noOfRecords = 0;
                            try
                            {
                                noOfRecords = int.Parse(txtNoOfRecords.Text);
                            }
                            catch { }


                        startAgain:


                            if (counter == 0)
                            {
                                searchURL = "https://twitter.com/i/search/timeline?q=" + AllOfTheseWords + ThisExtractPhrase + AnyOfTheseWords + NoneOfTheseWords + TheseHashTags + _selectedLanguage + FromTheseAccounts + ToTheseAccounts + MentionTheseAccounts + NearThisPlace + "%22%20within%3A15mi&src=typd" + "&f=realtime";
                                counter++;
                            }
                            else
                            {

                                searchURL = "https://twitter.com/i/search/timeline?q=" + AllOfTheseWords + ThisExtractPhrase + AnyOfTheseWords + NoneOfTheseWords + TheseHashTags + _selectedLanguage + FromTheseAccounts + ToTheseAccounts + MentionTheseAccounts + NearThisPlace + "%22%20within%3A15mi&src=typd" + "&f=realtime&include_available_features=1&include_entities=1&last_note_ts=0&oldest_unread_id=0&scroll_cursor=" + TweetId + "";
                            }


                            try
                            {
                                res_Get_searchURL = _GlobusHttpHelper.getHtmlfromUrl(new Uri(searchURL), "", "");
                                 AddToLog_AdvancedSearch("[ " + DateTime.Now + " ] => Finding results for entered details ");

                                if (string.IsNullOrEmpty(res_Get_searchURL))
                                {
                                    res_Get_searchURL = _GlobusHttpHelper.getHtmlfromUrl(new Uri(searchURL), "", "");
                                }

                                try
                                {
                                    //string sjss = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", "");
                                    string[] splitRes = Regex.Split(res_Get_searchURL, "refresh_cursor");
                                    //splitRes = splitRes.Skip(1).ToArray();
                                    foreach (string item in splitRes)
                                    {
                                        if (item.Contains("refresh_cursor"))
                                        {
                                            int startIndex = item.IndexOf("TWEET-");
                                            string start = item.Substring(startIndex).Replace("data-user-id=\\\"", "");
                                            int endIndex = start.IndexOf("\"");
                                            string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", "");
                                            TweetId = end;


                                        }
                                        if (item.Contains("scroll_cursor"))
                                        {
                                            int startIndex = item.IndexOf("TWEET-");
                                            string start = item.Substring(startIndex).Replace("data-user-id=\\\"", "");
                                            int endIndex = start.IndexOf("\"");
                                            string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", "");
                                            TweetId = end;
                                        }
                                    }
                                }
                                catch (Exception)
                                {
                                }
                            }

                            catch (Exception ex)
                            {
                                System.Threading.Thread.Sleep(2000);
                                res_Get_searchURL = _GlobusHttpHelper.getHtmlfromUrl(new Uri(searchURL), "", "");


                            }
                            // && !res_Get_searchURL.Contains("has_more_items\":false")
                            if (!string.IsNullOrEmpty(res_Get_searchURL))
                            {
                                //string[] splitRes = Regex.Split(res_Get_searchURL, "data-item-id"); //Regex.Split(res_Get_searchURL, "\"in_reply_to_status_id_str\"");
                                string[] splitRes = Regex.Split(res_Get_searchURL, "data-item-id");

                                splitRes = splitRes.Skip(1).ToArray();


                                foreach (string item in splitRes)
                                {
                                    if (item.Contains("data-screen-name=") && !item.Contains("js-actionable-user js-profile-popup-actionable"))
                                    {
                                        //var avc = Newtonsoft.Json.JsonConvert.DeserializeObject<dynamic>(res_Get_searchURL);
                                        //string DataHtml = (string)avc["items_html"];
                                    }
                                    else
                                    {
                                        continue;
                                    }
                                    string modified_Item = "\"from_user\"" + item;

                                    string id = "";
                                    try
                                    {
                                        int startIndex = item.IndexOf("data-user-id=");
                                        string start = item.Substring(startIndex).Replace("data-user-id=\\\"", "");
                                        int endIndex = start.IndexOf("\\\"");
                                        string end = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", "");
                                        id = end;
                                        //lst_structTweetIDs.Add(id);
                                        AddToLog_AdvancedSearch("[ " + DateTime.Now + " ] => User Id " + id);
                                    }
                                    catch (Exception ex)
                                    {
                                        id = "null";
                                        //Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- id -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper);

                                    }

                                    string from_user_id = "";
                                    try
                                    {
                                        int startIndex = item.IndexOf("data-screen-name=\\\"");
                                        string start = item.Substring(startIndex).Replace("data-screen-name=\\\"", "");
                                        int endIndex = start.IndexOf("\\\"");
                                        string end = start.Substring(0, endIndex).Replace("from_user_id\":", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("_str", "").Replace("user", "").Replace("}", "").Replace("]", "");
                                        from_user_id = end;
                                        AddToLog_AdvancedSearch("[ " + DateTime.Now + " ] => User ScreenName " + from_user_id);
                                    }
                                    catch (Exception ex)
                                    {
                                        from_user_id = "null";
                                        // Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwitterDataScrapper);

                                    }

                                    string tweetUserid = string.Empty;
                                    try
                                    {
                                        int startIndex = item.IndexOf("=\\\"");
                                        string start = item.Substring(startIndex).Replace("=\\\"", "");
                                        int endIndex = start.IndexOf("\\\"");
                                        string end = start.Substring(0, endIndex).Replace("from_user_id\":", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("_str", "").Replace("user", "").Replace("}", "").Replace("]", "");
                                        tweetUserid = end;
                                        AddToLog_AdvancedSearch("[ " + DateTime.Now + " ] => Tweet Id " + tweetUserid);
                                    }
                                    catch (Exception ex)
                                    {
                                        from_user_id = "null";


                                    }

                                    ///Tweet Text 
                                    #region Commented
                                    //try
                                    //{


                                    //    int startindex = item.IndexOf("js-tweet-text tweet-text\"");
                                    //    if (startindex == -1)
                                    //    {
                                    //        startindex = 0;
                                    //        startindex = item.IndexOf("js-tweet-text tweet-text");
                                    //    }

                                    //    string start = item.Substring(startindex).Replace("js-tweet-text tweet-text\"", "").Replace("js-tweet-text tweet-text tweet-text-rtl\"", "");
                                    //    int endindex = start.IndexOf("</p>");

                                    //    if (endindex == -1)
                                    //    {
                                    //        endindex = 0;
                                    //        endindex = start.IndexOf("stream-item-footer");
                                    //    }

                                    //    string end = start.Substring(0, endindex);
                                    //    end = regx.StripTagsRegex(end);
                                    //    text = end.Replace("&nbsp;", "").Replace("a href=", "").Replace("/a", "").Replace("<span", "").Replace("</span", "").Replace("class=\\\"js-display-url\\\"", "").Replace("class=\\\"tco-ellipsis\\\"", "").Replace("class=\\\"invisible\\\"", "").Replace("<strong>", "").Replace("target=\\\"_blank\\\"", "").Replace("class=\\\"twitter-timeline-link\\\"", "").Replace("</strong>", "").Replace("rel=\\\"nofollow\\\" dir=\\\"ltr\\\" data-expanded-url=", "");
                                    //    text = text.Replace("&quot;", "").Replace("<", "").Replace(">", "").Replace("\"", "").Replace("\\", "").Replace("title=", "");

                                    //    string[] array = Regex.Split(text, "http");
                                    //    text = string.Empty;
                                    //    foreach (string itemData in array)
                                    //    {
                                    //        if (!itemData.Contains("t.co"))
                                    //        {
                                    //            string data = string.Empty;
                                    //            if (itemData.Contains("//"))
                                    //            {
                                    //                data = ("http" + itemData).Replace(" span ", string.Empty);
                                    //                if (!text.Contains(itemData.Replace(" ", "")))// && !data.Contains("class") && !text.Contains(data))
                                    //                {
                                    //                    text += data.Replace("u003c", string.Empty).Replace("u003e", string.Empty);
                                    //                }
                                    //            }
                                    //            else
                                    //            {
                                    //                if (!text.Contains(itemData.Replace(" ", "")))
                                    //                {
                                    //                    text += itemData.Replace("u003c", string.Empty).Replace("u003e", string.Empty).Replace("js-tweet-text tweet-text", "");
                                    //                }
                                    //            }
                                    //        }
                                    //    }
                                    //}
                                    //catch { };
                                    
                                    #endregion


                                    twtboardpro.TwitterDataScrapper.StructTweetIDs structTweetIDs = new twtboardpro.TwitterDataScrapper.StructTweetIDs();

                                    if (id != "null")
                                    {
                                        structTweetIDs.ID_Tweet = tweetUserid;
                                        structTweetIDs.ID_Tweet_User = id;
                                        structTweetIDs.username__Tweet_User = from_user_id;
                                        structTweetIDs.wholeTweetMessage = text;
                                        lst_structTweetIDs.Add(structTweetIDs);
                                    }


                                    //if (!File.Exists(Globals.Path_KeywordScrapedListData + "-" + keyword + ".csv"))
                                    //{
                                    //    GlobusFileHelper.AppendStringToTextfileNewLine("USERID , USERNAME , PROFILE NAME , BIO , LOCATION , WEBSITE , NO OF TWEETS , FOLLOWERS , FOLLOWINGS", Globals.Path_KeywordScrapedListData + "-" + keyword + ".csv");
                                    //}

                                    {

                                        ChilkatHttpHelpr objChilkat = new ChilkatHttpHelpr();
                                        GlobusHttpHelper HttpHelper = new GlobusHttpHelper();
                                        string ProfilePageSource = HttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/" + from_user_id), "", "");

                                        string Responce = ProfilePageSource;

                                        #region Convert HTML to XML

                                        string xHtml = objChilkat.ConvertHtmlToXml(Responce);
                                        Chilkat.Xml xml = new Chilkat.Xml();
                                        xml.LoadXml(xHtml);

                                        Chilkat.Xml xNode = default(Chilkat.Xml);
                                        Chilkat.Xml xBeginSearchAfter = default(Chilkat.Xml);
                                        #endregion

                                        int counterdata = 0;
                                        xBeginSearchAfter = null;
                                        string dataDescription = string.Empty;
                                        xNode = xml.SearchForAttribute(xBeginSearchAfter, "h1", "class", "ProfileHeaderCard-name");
                                        while ((xNode != null))
                                        {
                                            xBeginSearchAfter = xNode;
                                            if (counterdata == 0)
                                            {
                                                ProfileName = xNode.AccumulateTagContent("text", "script|style");
                                                counterdata++;
                                            }
                                            else if (counterdata == 1)
                                            {
                                                website = xNode.AccumulateTagContent("text", "script|style");
                                                counterdata++;
                                            }
                                            else
                                            {
                                                break;
                                            }
                                            
                                            xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "u-textUserColor");
                                        }

                                        xBeginSearchAfter = null;
                                        dataDescription = string.Empty;
                                        xNode = xml.SearchForAttribute(xBeginSearchAfter, "p", "class", "ProfileHeaderCard-bio u-dir");//bio profile-field");
                                        while ((xNode != null))
                                        {
                                            xBeginSearchAfter = xNode;
                                            Bio = xNode.AccumulateTagContent("text", "script|style").Replace("&#39;", "'").Replace("&#13;&#10;", string.Empty).Trim();
                                            break;
                                        }

                                        xBeginSearchAfter = null;
                                        dataDescription = string.Empty;
                                        xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "ProfileHeaderCard-locationText u-dir");//location profile-field");
                                        while ((xNode != null))
                                        {
                                            xBeginSearchAfter = xNode;
                                            Location = xNode.AccumulateTagContent("text", "script|style");
                                            break;
                                        }

                                        int counterData = 0;
                                        xBeginSearchAfter = null;
                                        dataDescription = string.Empty;
                                        xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "ProfileNav-stat ProfileNav-stat--link u-borderUserColor u-textCenter js-tooltip js-nav");//location profile-field");
                                        while ((xNode != null))
                                        {
                                            xBeginSearchAfter = xNode;
                                            if (counterData == 0)
                                            {
                                                // NoOfTweets = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "ProfileNav-value");
                                                NoOfTweets = xNode.AccumulateTagContent("text", "script|style").Replace("Tweets", string.Empty).Replace(",", string.Empty).Replace("Tweet", string.Empty);
                                                counterData++;
                                            }
                                            else if (counterData == 1)
                                            {
                                                Followings = xNode.AccumulateTagContent("text", "script|style").Replace(" Following", string.Empty).Replace(",", string.Empty).Replace("Following", string.Empty);
                                                counterData++;
                                            }
                                            else if (counterData == 2)
                                            {
                                                Followers = xNode.AccumulateTagContent("text", "script|style").Replace("Followers", string.Empty).Replace(",", string.Empty).Replace("Follower", string.Empty);
                                                counterData++;
                                            }
                                            else
                                            {
                                                break;
                                            }
                                            //xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "js-nav");
                                            xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "ProfileNav-stat ProfileNav-stat--link u-borderUserColor u-textCenter js-tooltip js-openSignupDialog js-nonNavigable u-textUserColor");
                                        }


                                        if (!string.IsNullOrEmpty(from_user_id) && tweetUserid != "null")
                                        {
                                            string Id_user = tweetUserid.Replace("}]", string.Empty).Trim();
                                            Globals.lstScrapedUserIDs.Add(Id_user);
                                            // GlobusFileHelper.AppendStringToTextfileNewLine(id + "," + from_user_id + "," + ProfileName + "," + Bio.Replace(",", "") + "," + Location.Replace(",", "") + "," + website + "," + NoOfTweets.Replace(",", "").Replace("Tweets", "") + "," + Followers.Replace(",", "").Replace("Following", "") + "," + Followings.Replace(",", "").Replace("Followers", "").Replace("Follower", ""), Globals.Path_KeywordScrapedListData + "-" + keyword + ".csv");
                                            // Log("[ " + DateTime.Now + " ] => [ " + from_user_id + "," + Id_user + "," + ProfileName + "," + Bio.Replace(",", "") + "," + Location + "," + website + "," + NoOfTweets + "," + Followers + "," + Followings + " ]");
                                        }
                                    }


                                    
                                    lst_structTweetIDs = lst_structTweetIDs.Distinct().ToList();

                                    if (lst_structTweetIDs.Count >= noOfRecords)
                                    {
                                       // return lst_structTweetIDs;
                                    }

                                }

                                if (lst_structTweetIDs.Count <= noOfRecords)
                                {
                                    maxid = lst_structTweetIDs[lst_structTweetIDs.Count - 1].ID_Tweet;

                                    if (res_Get_searchURL.Contains("has_moreitems\":false"))
                                    {
                                       
                                    }
                                    else
                                    {
                                        goto startAgain;
                                    }
                                }
                                else
                                {
                                    if (res_Get_searchURL.Contains("has_more_items\":false"))
                                    {
                                        
                                    }
                                    else
                                        goto startAgain;
                                }
                            }
                        }
                        catch (Exception ex)
                        {
                        }
                    }

                   
                }
            }


            catch
            { }


        }
        public string GetDataWithTagValueByTagAndAttributeNameWithId(string pageSrcHtml, string TagName, string AttributeName)
        {
            string dataDescription = string.Empty;

            try
            {
                bool   success = false;
                string xHtml   = string.Empty;

                Chilkat.HtmlToXml htmlToXml = new Chilkat.HtmlToXml();

                //*** Check DLL working or not **********************
                success = htmlToXml.UnlockComponent("THEBACHtmlToXml_7WY3A57sZH3O");
                if ((success != true))
                {
                    Console.WriteLine(htmlToXml.LastErrorText);
                    return(null);
                }

                htmlToXml.Html = pageSrcHtml;

                //** Convert Data Html to XML *******************************************
                xHtml = htmlToXml.ToXml();

                //******************************************
                Chilkat.Xml xNode             = default(Chilkat.Xml);
                Chilkat.Xml xBeginSearchAfter = default(Chilkat.Xml);
                Chilkat.Xml xml = new Chilkat.Xml();
                xml.LoadXml(xHtml);

                #region Data Save in list From using XML Tag and Attribut
                string DescriptionMain = string.Empty;

                string dataDescriptionValue = string.Empty;


                xBeginSearchAfter = null;

                xNode = xml.SearchForAttribute(xBeginSearchAfter, TagName, "id", AttributeName);
                while ((xNode != null))
                {
                    //** Get Data Under Tag only Text Value**********************************



                    dataDescription = xNode.GetXml();//.AccumulateTagContent("text", "script|style");

                    dataDescriptionValue = dataDescriptionValue + dataDescription;
                    //    string text = xNode.AccumulateTagContent("text", "script|style");
                    //    lstData.Add(text);

                    //    //** Get Data Under Tag All  Html value * *********************************
                    //    //dataDescription = xNode.GetXml();

                    xBeginSearchAfter = xNode;
                    xNode             = xml.SearchForAttribute(xBeginSearchAfter, TagName, "id", AttributeName);
                    //if (dataDescription.Length > 500)
                    //{
                    //    break;
                    //}
                }
                #endregion
                return(dataDescriptionValue);
            }
            catch (Exception)
            {
                return(dataDescription = null);
            }
        }
예제 #8
0
        private void btnStart_Searching_Click(object sender, EventArgs e)
        {
            AllOfTheseWords      = (txtAllofTheseKeywords.Text).ToString();
            ThisExtractPhrase    = (txtThisExactPhrase.Text).ToString();
            AnyOfTheseWords      = (txtAnyOfTheseWords.Text).ToString();
            TheseHashTags        = (txtTheseHashTags.Text).ToString();
            NoneOfTheseWords     = (txtNoneofTheseWords.Text).ToString();
            FromTheseAccounts    = (txtFromTheseAccounts.Text).ToString();
            ToTheseAccounts      = (txtToTheseAccounts.Text).ToString();
            MentionTheseAccounts = (txtMentioningTheseAccounts.Text).ToString();
            NearThisPlace        = (txtNearThisPlace.Text).ToString();


            AddToLog_AdvancedSearch("[ " + DateTime.Now + " ] => Process Started");

            try
            {
                if (string.IsNullOrEmpty(ThisExtractPhrase))
                {
                    ThisExtractPhrase = "";
                }
                else
                {
                    ThisExtractPhrase = "%20%22" + ThisExtractPhrase;
                }
            }
            catch { }

            try
            {
                if (string.IsNullOrEmpty(AnyOfTheseWords))
                {
                    AnyOfTheseWords = "";
                }
                else
                {
                    AnyOfTheseWords = "%22%20" + AnyOfTheseWords;
                }
            }
            catch
            { }


            try
            {
                if (string.IsNullOrEmpty(TheseHashTags))
                {
                    TheseHashTags = "";
                }
                else
                {
                    TheseHashTags = "%20%23" + TheseHashTags;
                }
            }
            catch
            { }


            try
            {
                if (string.IsNullOrEmpty(NoneOfTheseWords))
                {
                    NoneOfTheseWords = "";
                }
                else
                {
                    NoneOfTheseWords = "%20-" + NoneOfTheseWords;
                }
            }
            catch
            { }


            try
            {
                if (string.IsNullOrEmpty(FromTheseAccounts))
                {
                    FromTheseAccounts = "";
                }
                else
                {
                    FromTheseAccounts = "%20from%3A" + FromTheseAccounts;
                }
            }
            catch
            { }


            try
            {
                if (string.IsNullOrEmpty(ToTheseAccounts))
                {
                    ToTheseAccounts = "";
                }
                else
                {
                    ToTheseAccounts = "%20to%3A" + ToTheseAccounts;
                }
            }
            catch
            { }


            try
            {
                if (string.IsNullOrEmpty(MentionTheseAccounts))
                {
                    MentionTheseAccounts = "";
                }
                else
                {
                    MentionTheseAccounts = "%20%40" + MentionTheseAccounts;
                }
            }
            catch
            { }

            try
            {
                if (string.IsNullOrEmpty(NearThisPlace))
                {
                    NearThisPlace = "";
                }
                else
                {
                    NearThisPlace = "%20near%3A%22" + NearThisPlace;
                }
            }
            catch
            { }



            try
            {
                if (!string.IsNullOrEmpty(txtAllofTheseKeywords.Text))
                {
                    #region Commented
                    //try
                    //{
                    //    string Url = "https://twitter.com/search?f=realtime&q=" + AllOfTheseWords + ThisExtractPhrase + AnyOfTheseWords + NoneOfTheseWords + TheseHashTags + _selectedLanguage + FromTheseAccounts + ToTheseAccounts + MentionTheseAccounts + NearThisPlace + "%22%20within%3A15mi&src=typd";
                    //    string response = _GlobusHttpHelper.getHtmlfromUrl(new Uri(Url), "", "");
                    //}
                    //catch { } public List<StructTweetIDs> NewKeywordStructDataForSearchByKeyword(string keyword)
                    #endregion
                    {
                        try
                        {
                            BaseLib.GlobusRegex regx = new GlobusRegex();

                            int    counter           = 0;
                            string res_Get_searchURL = string.Empty;
                            string searchURL         = string.Empty;
                            string maxid             = string.Empty;
                            string TweetId           = string.Empty;
                            string text = string.Empty;

                            string ProfileName = string.Empty;
                            string Location    = string.Empty;
                            string Bio         = string.Empty;
                            string website     = string.Empty;
                            string NoOfTweets  = string.Empty;
                            string Followers   = string.Empty;
                            string Followings  = string.Empty;
                            int    noOfRecords = 0;
                            try
                            {
                                noOfRecords = int.Parse(txtNoOfRecords.Text);
                            }
                            catch { }


startAgain:


                            if (counter == 0)
                            {
                                searchURL = "https://twitter.com/i/search/timeline?q=" + AllOfTheseWords + ThisExtractPhrase + AnyOfTheseWords + NoneOfTheseWords + TheseHashTags + _selectedLanguage + FromTheseAccounts + ToTheseAccounts + MentionTheseAccounts + NearThisPlace + "%22%20within%3A15mi&src=typd" + "&f=realtime";
                                counter++;
                            }
                            else
                            {
                                searchURL = "https://twitter.com/i/search/timeline?q=" + AllOfTheseWords + ThisExtractPhrase + AnyOfTheseWords + NoneOfTheseWords + TheseHashTags + _selectedLanguage + FromTheseAccounts + ToTheseAccounts + MentionTheseAccounts + NearThisPlace + "%22%20within%3A15mi&src=typd" + "&f=realtime&include_available_features=1&include_entities=1&last_note_ts=0&oldest_unread_id=0&scroll_cursor=" + TweetId + "";
                            }


                            try
                            {
                                res_Get_searchURL = _GlobusHttpHelper.getHtmlfromUrl(new Uri(searchURL), "", "");
                                AddToLog_AdvancedSearch("[ " + DateTime.Now + " ] => Finding results for entered details ");

                                if (string.IsNullOrEmpty(res_Get_searchURL))
                                {
                                    res_Get_searchURL = _GlobusHttpHelper.getHtmlfromUrl(new Uri(searchURL), "", "");
                                }

                                try
                                {
                                    //string sjss = globushttpHelper.getHtmlfromUrl(new Uri(searchURL), "", "");
                                    string[] splitRes = Regex.Split(res_Get_searchURL, "refresh_cursor");
                                    //splitRes = splitRes.Skip(1).ToArray();
                                    foreach (string item in splitRes)
                                    {
                                        if (item.Contains("refresh_cursor"))
                                        {
                                            int    startIndex = item.IndexOf("TWEET-");
                                            string start      = item.Substring(startIndex).Replace("data-user-id=\\\"", "");
                                            int    endIndex   = start.IndexOf("\"");
                                            string end        = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", "");
                                            TweetId = end;
                                        }
                                        if (item.Contains("scroll_cursor"))
                                        {
                                            int    startIndex = item.IndexOf("TWEET-");
                                            string start      = item.Substring(startIndex).Replace("data-user-id=\\\"", "");
                                            int    endIndex   = start.IndexOf("\"");
                                            string end        = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", "");
                                            TweetId = end;
                                        }
                                    }
                                }
                                catch (Exception)
                                {
                                }
                            }

                            catch (Exception ex)
                            {
                                System.Threading.Thread.Sleep(2000);
                                res_Get_searchURL = _GlobusHttpHelper.getHtmlfromUrl(new Uri(searchURL), "", "");
                            }
                            // && !res_Get_searchURL.Contains("has_more_items\":false")
                            if (!string.IsNullOrEmpty(res_Get_searchURL))
                            {
                                //string[] splitRes = Regex.Split(res_Get_searchURL, "data-item-id"); //Regex.Split(res_Get_searchURL, "\"in_reply_to_status_id_str\"");
                                string[] splitRes = Regex.Split(res_Get_searchURL, "data-item-id");

                                splitRes = splitRes.Skip(1).ToArray();


                                foreach (string item in splitRes)
                                {
                                    if (item.Contains("data-screen-name=") && !item.Contains("js-actionable-user js-profile-popup-actionable"))
                                    {
                                        //var avc = Newtonsoft.Json.JsonConvert.DeserializeObject<dynamic>(res_Get_searchURL);
                                        //string DataHtml = (string)avc["items_html"];
                                    }
                                    else
                                    {
                                        continue;
                                    }
                                    string modified_Item = "\"from_user\"" + item;

                                    string id = "";
                                    try
                                    {
                                        int    startIndex = item.IndexOf("data-user-id=");
                                        string start      = item.Substring(startIndex).Replace("data-user-id=\\\"", "");
                                        int    endIndex   = start.IndexOf("\\\"");
                                        string end        = start.Substring(0, endIndex).Replace("id_str", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", "");
                                        id = end;
                                        //lst_structTweetIDs.Add(id);
                                        AddToLog_AdvancedSearch("[ " + DateTime.Now + " ] => User Id " + id);
                                    }
                                    catch (Exception ex)
                                    {
                                        id = "null";
                                        //Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- id -- " + keyword + " --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                                    }

                                    string from_user_id = "";
                                    try
                                    {
                                        int    startIndex = item.IndexOf("data-screen-name=\\\"");
                                        string start      = item.Substring(startIndex).Replace("data-screen-name=\\\"", "");
                                        int    endIndex   = start.IndexOf("\\\"");
                                        string end        = start.Substring(0, endIndex).Replace("from_user_id\":", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("_str", "").Replace("user", "").Replace("}", "").Replace("]", "");
                                        from_user_id = end;
                                        AddToLog_AdvancedSearch("[ " + DateTime.Now + " ] => User ScreenName " + from_user_id);
                                    }
                                    catch (Exception ex)
                                    {
                                        from_user_id = "null";
                                        // Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user_id --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                                    }

                                    string tweetUserid = string.Empty;
                                    try
                                    {
                                        int    startIndex = item.IndexOf("=\\\"");
                                        string start      = item.Substring(startIndex).Replace("=\\\"", "");
                                        int    endIndex   = start.IndexOf("\\\"");
                                        string end        = start.Substring(0, endIndex).Replace("from_user_id\":", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("_str", "").Replace("user", "").Replace("}", "").Replace("]", "");
                                        tweetUserid = end;
                                        AddToLog_AdvancedSearch("[ " + DateTime.Now + " ] => Tweet Id " + tweetUserid);
                                    }
                                    catch (Exception ex)
                                    {
                                        from_user_id = "null";
                                    }

                                    ///Tweet Text
                                    #region Commented
                                    //try
                                    //{


                                    //    int startindex = item.IndexOf("js-tweet-text tweet-text\"");
                                    //    if (startindex == -1)
                                    //    {
                                    //        startindex = 0;
                                    //        startindex = item.IndexOf("js-tweet-text tweet-text");
                                    //    }

                                    //    string start = item.Substring(startindex).Replace("js-tweet-text tweet-text\"", "").Replace("js-tweet-text tweet-text tweet-text-rtl\"", "");
                                    //    int endindex = start.IndexOf("</p>");

                                    //    if (endindex == -1)
                                    //    {
                                    //        endindex = 0;
                                    //        endindex = start.IndexOf("stream-item-footer");
                                    //    }

                                    //    string end = start.Substring(0, endindex);
                                    //    end = regx.StripTagsRegex(end);
                                    //    text = end.Replace("&nbsp;", "").Replace("a href=", "").Replace("/a", "").Replace("<span", "").Replace("</span", "").Replace("class=\\\"js-display-url\\\"", "").Replace("class=\\\"tco-ellipsis\\\"", "").Replace("class=\\\"invisible\\\"", "").Replace("<strong>", "").Replace("target=\\\"_blank\\\"", "").Replace("class=\\\"twitter-timeline-link\\\"", "").Replace("</strong>", "").Replace("rel=\\\"nofollow\\\" dir=\\\"ltr\\\" data-expanded-url=", "");
                                    //    text = text.Replace("&quot;", "").Replace("<", "").Replace(">", "").Replace("\"", "").Replace("\\", "").Replace("title=", "");

                                    //    string[] array = Regex.Split(text, "http");
                                    //    text = string.Empty;
                                    //    foreach (string itemData in array)
                                    //    {
                                    //        if (!itemData.Contains("t.co"))
                                    //        {
                                    //            string data = string.Empty;
                                    //            if (itemData.Contains("//"))
                                    //            {
                                    //                data = ("http" + itemData).Replace(" span ", string.Empty);
                                    //                if (!text.Contains(itemData.Replace(" ", "")))// && !data.Contains("class") && !text.Contains(data))
                                    //                {
                                    //                    text += data.Replace("u003c", string.Empty).Replace("u003e", string.Empty);
                                    //                }
                                    //            }
                                    //            else
                                    //            {
                                    //                if (!text.Contains(itemData.Replace(" ", "")))
                                    //                {
                                    //                    text += itemData.Replace("u003c", string.Empty).Replace("u003e", string.Empty).Replace("js-tweet-text tweet-text", "");
                                    //                }
                                    //            }
                                    //        }
                                    //    }
                                    //}
                                    //catch { };

                                    #endregion


                                    twtboardpro.TwitterDataScrapper.StructTweetIDs structTweetIDs = new twtboardpro.TwitterDataScrapper.StructTweetIDs();

                                    if (id != "null")
                                    {
                                        structTweetIDs.ID_Tweet             = tweetUserid;
                                        structTweetIDs.ID_Tweet_User        = id;
                                        structTweetIDs.username__Tweet_User = from_user_id;
                                        structTweetIDs.wholeTweetMessage    = text;
                                        lst_structTweetIDs.Add(structTweetIDs);
                                    }


                                    //if (!File.Exists(Globals.Path_KeywordScrapedListData + "-" + keyword + ".csv"))
                                    //{
                                    //    GlobusFileHelper.AppendStringToTextfileNewLine("USERID , USERNAME , PROFILE NAME , BIO , LOCATION , WEBSITE , NO OF TWEETS , FOLLOWERS , FOLLOWINGS", Globals.Path_KeywordScrapedListData + "-" + keyword + ".csv");
                                    //}

                                    {
                                        ChilkatHttpHelpr objChilkat        = new ChilkatHttpHelpr();
                                        GlobusHttpHelper HttpHelper        = new GlobusHttpHelper();
                                        string           ProfilePageSource = HttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/" + from_user_id), "", "");

                                        string Responce = ProfilePageSource;

                                        #region Convert HTML to XML

                                        string      xHtml = objChilkat.ConvertHtmlToXml(Responce);
                                        Chilkat.Xml xml   = new Chilkat.Xml();
                                        xml.LoadXml(xHtml);

                                        Chilkat.Xml xNode             = default(Chilkat.Xml);
                                        Chilkat.Xml xBeginSearchAfter = default(Chilkat.Xml);
                                        #endregion

                                        int counterdata = 0;
                                        xBeginSearchAfter = null;
                                        string dataDescription = string.Empty;
                                        xNode = xml.SearchForAttribute(xBeginSearchAfter, "h1", "class", "ProfileHeaderCard-name");
                                        while ((xNode != null))
                                        {
                                            xBeginSearchAfter = xNode;
                                            if (counterdata == 0)
                                            {
                                                ProfileName = xNode.AccumulateTagContent("text", "script|style");
                                                counterdata++;
                                            }
                                            else if (counterdata == 1)
                                            {
                                                website = xNode.AccumulateTagContent("text", "script|style");
                                                counterdata++;
                                            }
                                            else
                                            {
                                                break;
                                            }

                                            xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "u-textUserColor");
                                        }

                                        xBeginSearchAfter = null;
                                        dataDescription   = string.Empty;
                                        xNode             = xml.SearchForAttribute(xBeginSearchAfter, "p", "class", "ProfileHeaderCard-bio u-dir");//bio profile-field");
                                        while ((xNode != null))
                                        {
                                            xBeginSearchAfter = xNode;
                                            Bio = xNode.AccumulateTagContent("text", "script|style").Replace("&#39;", "'").Replace("&#13;&#10;", string.Empty).Trim();
                                            break;
                                        }

                                        xBeginSearchAfter = null;
                                        dataDescription   = string.Empty;
                                        xNode             = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "ProfileHeaderCard-locationText u-dir");//location profile-field");
                                        while ((xNode != null))
                                        {
                                            xBeginSearchAfter = xNode;
                                            Location          = xNode.AccumulateTagContent("text", "script|style");
                                            break;
                                        }

                                        int counterData = 0;
                                        xBeginSearchAfter = null;
                                        dataDescription   = string.Empty;
                                        xNode             = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "ProfileNav-stat ProfileNav-stat--link u-borderUserColor u-textCenter js-tooltip js-nav");//location profile-field");
                                        while ((xNode != null))
                                        {
                                            xBeginSearchAfter = xNode;
                                            if (counterData == 0)
                                            {
                                                // NoOfTweets = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "ProfileNav-value");
                                                NoOfTweets = xNode.AccumulateTagContent("text", "script|style").Replace("Tweets", string.Empty).Replace(",", string.Empty).Replace("Tweet", string.Empty);
                                                counterData++;
                                            }
                                            else if (counterData == 1)
                                            {
                                                Followings = xNode.AccumulateTagContent("text", "script|style").Replace(" Following", string.Empty).Replace(",", string.Empty).Replace("Following", string.Empty);
                                                counterData++;
                                            }
                                            else if (counterData == 2)
                                            {
                                                Followers = xNode.AccumulateTagContent("text", "script|style").Replace("Followers", string.Empty).Replace(",", string.Empty).Replace("Follower", string.Empty);
                                                counterData++;
                                            }
                                            else
                                            {
                                                break;
                                            }
                                            //xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "js-nav");
                                            xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "ProfileNav-stat ProfileNav-stat--link u-borderUserColor u-textCenter js-tooltip js-openSignupDialog js-nonNavigable u-textUserColor");
                                        }


                                        if (!string.IsNullOrEmpty(from_user_id) && tweetUserid != "null")
                                        {
                                            string Id_user = tweetUserid.Replace("}]", string.Empty).Trim();
                                            Globals.lstScrapedUserIDs.Add(Id_user);
                                            // GlobusFileHelper.AppendStringToTextfileNewLine(id + "," + from_user_id + "," + ProfileName + "," + Bio.Replace(",", "") + "," + Location.Replace(",", "") + "," + website + "," + NoOfTweets.Replace(",", "").Replace("Tweets", "") + "," + Followers.Replace(",", "").Replace("Following", "") + "," + Followings.Replace(",", "").Replace("Followers", "").Replace("Follower", ""), Globals.Path_KeywordScrapedListData + "-" + keyword + ".csv");
                                            // Log("[ " + DateTime.Now + " ] => [ " + from_user_id + "," + Id_user + "," + ProfileName + "," + Bio.Replace(",", "") + "," + Location + "," + website + "," + NoOfTweets + "," + Followers + "," + Followings + " ]");
                                        }
                                    }



                                    lst_structTweetIDs = lst_structTweetIDs.Distinct().ToList();

                                    if (lst_structTweetIDs.Count >= noOfRecords)
                                    {
                                        // return lst_structTweetIDs;
                                    }
                                }

                                if (lst_structTweetIDs.Count <= noOfRecords)
                                {
                                    maxid = lst_structTweetIDs[lst_structTweetIDs.Count - 1].ID_Tweet;

                                    if (res_Get_searchURL.Contains("has_moreitems\":false"))
                                    {
                                    }
                                    else
                                    {
                                        goto startAgain;
                                    }
                                }
                                else
                                {
                                    if (res_Get_searchURL.Contains("has_more_items\":false"))
                                    {
                                    }
                                    else
                                    {
                                        goto startAgain;
                                    }
                                }
                            }
                        }
                        catch (Exception ex)
                        {
                        }
                    }
                }
            }


            catch
            { }
        }
예제 #9
0
        public string GetDataWithTagValueByTagAndAttributeNameWithId(string pageSrcHtml, string TagName, string AttributeName)
        {
            string dataDescription = string.Empty;
            try
            {
                bool success = false;
                string xHtml = string.Empty;

                Chilkat.HtmlToXml htmlToXml = new Chilkat.HtmlToXml();

                //*** Check DLL working or not **********************
                success = htmlToXml.UnlockComponent("THEBACHtmlToXml_7WY3A57sZH3O");
                if ((success != true))
                {
                    Console.WriteLine(htmlToXml.LastErrorText);
                    return null;
                }

                htmlToXml.Html = pageSrcHtml;

                //** Convert Data Html to XML *******************************************
                xHtml = htmlToXml.ToXml();

                //******************************************
                Chilkat.Xml xNode = default(Chilkat.Xml);
                Chilkat.Xml xBeginSearchAfter = default(Chilkat.Xml);
                Chilkat.Xml xml = new Chilkat.Xml();
                xml.LoadXml(xHtml);

                #region Data Save in list From using XML Tag and Attribut
                string DescriptionMain = string.Empty;

                string dataDescriptionValue = string.Empty;

                xBeginSearchAfter = null;

                xNode = xml.SearchForAttribute(xBeginSearchAfter, TagName, "id", AttributeName);
                while ((xNode != null))
                {
                    //** Get Data Under Tag only Text Value**********************************

                    dataDescription = xNode.GetXml();//.AccumulateTagContent("text", "script|style");

                    dataDescriptionValue = dataDescriptionValue + dataDescription;
                    //    string text = xNode.AccumulateTagContent("text", "script|style");
                    //    lstData.Add(text);

                    //    //** Get Data Under Tag All  Html value * *********************************
                    //    //dataDescription = xNode.GetXml();

                    xBeginSearchAfter = xNode;
                    xNode = xml.SearchForAttribute(xBeginSearchAfter, TagName, "id", AttributeName);
                    //if (dataDescription.Length > 500)
                    //{
                    //    break;
                    //}
                }
                #endregion
                return dataDescriptionValue;
            }
            catch (Exception)
            {
                return dataDescription = null;

            }
        }
예제 #10
0
        public void getmentions()
        {
            if (!IsLoggedIn)
            {
                Login();
            }
            if (IsNotSuspended)
            {
                string pageSource = globusHttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/" + Screen_name), "", "");

                string[] href = Regex.Split(pageSource, "href=\"/" + Screen_name + "/status/");
                href = href.Skip(1).ToArray();
                foreach (string abc in href)
                {
                    if (abc.Contains("tweet-timestamp js-permalink js-nav") && abc.Contains("js-tweet-text"))
                    {
                        string statusid = string.Empty;
                        try
                        {
                            int startindex = abc.IndexOf("\"");
                            string start = abc.Substring(0, startindex);
                            statusid = start;
                        }
                        catch (Exception ex)
                        {

                        }

                        string StatusPageSource = globusHttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/" + Screen_name + "/status/" + statusid), "", "");

                        string[] getTweets = Regex.Split(StatusPageSource, "simple-tweet tweet js-stream-tweet");
                        getTweets = getTweets.Skip(1).ToArray();

                        foreach (string tweets in getTweets)
                        {
                            string TweetText = string.Empty;
                            string From_user_Screen_name = string.Empty;
                            string From_user_id = string.Empty;
                            #region Convert HTML to XML
                            Chilkat.HtmlToXml htmlToXml = new Chilkat.HtmlToXml();
                            bool success = htmlToXml.UnlockComponent("THEBACHtmlToXml_7WY3A57sZH3O");
                            if ((success != true))
                            {
                                Console.WriteLine(htmlToXml.LastErrorText);
                                return;
                            }

                            string xHtml = null;
                            htmlToXml.Html = tweets;

                            //xHtml contain xml data
                            xHtml = htmlToXml.ToXml();

                            Chilkat.Xml xml = new Chilkat.Xml();
                            xml.LoadXml(xHtml);
                            //xHtml.

                            ////  Iterate over all h1 tags:
                            Chilkat.Xml xNode = default(Chilkat.Xml);
                            Chilkat.Xml xBeginSearchAfter = default(Chilkat.Xml);
                            #endregion

                            xNode = null;
                            xBeginSearchAfter = null;
                            xNode = xml.SearchForAttribute(xBeginSearchAfter, "p", "class", "js-tweet-text");
                            while ((xNode != null))
                            {
                                TweetText = xNode.AccumulateTagContent("text", "script|style");
                                break;
                            }

                            try
                            {
                                int startindex = tweets.IndexOf("data-screen-name");
                                string start = tweets.Substring(startindex).Replace("data-screen-name=\"", "");
                                int endIndex = start.IndexOf("\"");
                                string end = start.Substring(0, endIndex).Replace("screen_name", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", "");
                                From_user_Screen_name = end;
                            }
                            catch (Exception ex)
                            {
                                //Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                                //Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user --> " + ex.Message, Globals.Path_TwtErrorLogs);
                            }

                            try
                            {
                                int startindex = tweets.IndexOf("data-user-id");
                                string start = tweets.Substring(startindex).Replace("data-user-id=\"", "");
                                int endIndex = start.IndexOf("\"");
                                string end = start.Substring(0, endIndex).Replace("screen_name", "").Replace("\"", "").Replace(":", "").Replace("{", "").Replace("}", "").Replace("]", "");
                                From_user_id = end;
                            }
                            catch (Exception ex)
                            {
                                //Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                                //Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> GetPhotoFromUsername() -- " + keyword + " -- from_user --> " + ex.Message, Globals.Path_TwtErrorLogs);
                            }

                            string strQuery = "INSERT INTO tb_ReplyCampaign (TweetId , Username , ReplyUserName , ReplyUserId , TweetText , Reply) VALUES ('" + statusid + "' , '" + Username + "' , '" + From_user_Screen_name + "' , '" + From_user_id + "' , '" + TweetText + "' , '" + TweetText + "')";
                            DataBaseHandler.InsertQuery(strQuery, "tb_ReplyCampaign");

                        }
                    }
                }

            }
        }
        public void scrapUserInfo(object param)
        {
            try
            {
                Array paramsArray = new object[1];

                paramsArray = (Array)param;
                string UserName = (string)paramsArray.GetValue(0);

                string userId       = string.Empty;
                string ProfileName  = string.Empty;
                string Location     = string.Empty;
                string Bio          = string.Empty;
                string website      = string.Empty;
                string NoOfTweets   = string.Empty;
                string Followers    = string.Empty;
                string Followings   = string.Empty;
                string IsProfilePIc = string.Empty;

                ChilkatHttpHelpr objChilkat        = new ChilkatHttpHelpr();
                GlobusHttpHelper HttpHelper        = new GlobusHttpHelper();
                string           ProfilePageSource = HttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/" + UserName.Trim()), "", "");

                if (string.IsNullOrEmpty(ProfilePageSource))
                {
                    ProfilePageSource = HttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/" + UserName.Trim()), "", "");
                }
                if (string.IsNullOrEmpty(ProfilePageSource))
                {
                    AddToLog_ScrapMember("[ " + DateTime.Now + " ] => [ User  " + UserName + " is not exist or page source getting null.]");
                    return;
                }

                if (ProfilePageSource.Contains("Account suspended"))
                {
                    AddToLog_ScrapMember("[ " + DateTime.Now + " ] => [ User  " + UserName + " is suspended ]");
                    return;
                }

                string Responce = ProfilePageSource;

                #region Convert HTML to XML

                string      xHtml = objChilkat.ConvertHtmlToXml(Responce);
                Chilkat.Xml xml   = new Chilkat.Xml();
                xml.LoadXml(xHtml);

                Chilkat.Xml xNode             = default(Chilkat.Xml);
                Chilkat.Xml xBeginSearchAfter = default(Chilkat.Xml);
                #endregion

                int counterdata = 0;
                xBeginSearchAfter = null;
                string dataDescription = string.Empty;
                //xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "profile-field");
                xNode = xml.SearchForAttribute(xBeginSearchAfter, "h1", "class", "ProfileHeaderCard-name");
                while ((xNode != null))
                {
                    xBeginSearchAfter = xNode;
                    if (counterdata == 0)
                    {
                        ProfileName = xNode.AccumulateTagContent("text", "script|style");
                        if (ProfileName.Contains("Verified account"))
                        {
                            ProfileName = ProfileName.Replace("Verified account", " ");
                        }
                        counterdata++;
                    }
                    else if (counterdata == 1)
                    {
                        website = xNode.AccumulateTagContent("text", "script|style");
                        if (website.Contains("Twitter Status"))
                        {
                            website = "";
                        }
                        counterdata++;
                    }
                    else
                    {
                        break;
                    }
                    //xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "profile-field");
                    xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "u-textUserColor");
                }

                xBeginSearchAfter = null;
                dataDescription   = string.Empty;
                xNode             = xml.SearchForAttribute(xBeginSearchAfter, "p", "class", "ProfileHeaderCard-bio u-dir");//bio profile-field");
                while ((xNode != null))
                {
                    xBeginSearchAfter = xNode;
                    Bio = xNode.AccumulateTagContent("text", "script|style").Replace("&#39;", "'").Replace("&#13;&#10;", string.Empty).Trim();
                    break;
                }

                xBeginSearchAfter = null;
                dataDescription   = string.Empty;
                //xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "location profile-field");
                xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "ProfileHeaderCard-locationText u-dir");//location profile-field");
                while ((xNode != null))
                {
                    xBeginSearchAfter = xNode;
                    Location          = xNode.AccumulateTagContent("text", "script|style");
                    break;
                }

                int counterData = 0;
                xBeginSearchAfter = null;
                dataDescription   = string.Empty;
                //xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "data-element-term", "tweet_stats");
                xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "ProfileNav-stat ProfileNav-stat--link u-borderUserColor u-textCenter js-tooltip js-nav");
                while ((xNode != null))
                {
                    xBeginSearchAfter = xNode;
                    if (counterData == 0)
                    {
                        NoOfTweets = xNode.AccumulateTagContent("text", "script|style").Replace("Tweets", string.Empty).Replace(",", string.Empty).Replace("Tweet", string.Empty);
                        counterData++;
                    }
                    else if (counterData == 1)
                    {
                        Followings = xNode.AccumulateTagContent("text", "script|style").Replace(" Following", string.Empty).Replace(",", string.Empty).Replace("Following", string.Empty);
                        counterData++;
                    }
                    else if (counterData == 2)
                    {
                        Followers = xNode.AccumulateTagContent("text", "script|style").Replace("Followers", string.Empty).Replace(",", string.Empty).Replace("Follower", string.Empty);
                        counterData++;
                    }
                    else
                    {
                        break;
                    }
                    //xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "js-nav");
                    xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "ProfileNav-stat ProfileNav-stat--link u-borderUserColor u-textCenter js-tooltip js-openSignupDialog js-nonNavigable u-textUserColor");
                }

                try
                {
                    int    startindex = ProfilePageSource.IndexOf("profile_id");
                    string start      = ProfilePageSource.Substring(startindex).Replace("profile_id", "");
                    int    endindex   = start.IndexOf(",");
                    string end        = start.Substring(0, endindex).Replace("&quot;", "").Replace("\"", "").Replace(":", "").Trim();
                    userId = end.Trim();
                    if (userId.Length > 15)
                    {
                        startindex = ProfilePageSource.IndexOf("profile_id&quot");
                        start      = ProfilePageSource.Substring(startindex).Replace("profile_id&quot", "");
                        endindex   = start.IndexOf(",");
                        end        = start.Substring(0, endindex).Replace("&quot;", "").Replace("\"", "").Replace(":", "").Replace(";", "").Trim();
                        userId     = end.Trim();
                    }
                }
                catch { }

                if (ProfilePageSource.Contains("default_profile_6_400x400") || ProfilePageSource.Contains("default_profile_5_400x400") || ProfilePageSource.Contains("default_profile_4_400x400") || ProfilePageSource.Contains("default_profile_3_400x400") || ProfilePageSource.Contains("default_profile_2_400x400") || ProfilePageSource.Contains("default_profile_1_400x400") || ProfilePageSource.Contains("default_profile_0_400x400"))
                {
                    IsProfilePIc = "No";
                }
                else
                {
                    IsProfilePIc = "Yes";
                }
                if (!File.Exists(Globals.Path_UserListInfoData))
                {
                    GlobusFileHelper.AppendStringToTextfileNewLine("USERID , USERNAME , PROFILE NAME , BIO , LOCATION , WEBSITE , NO OF TWEETS , FOLLOWERS , FOLLOWINGS, ProfilePic", Globals.Path_UserListInfoData);
                }
                if (!string.IsNullOrEmpty(UserName))
                {
                    //string Id_user = item.ID_Tweet_User.Replace("}]", string.Empty).Trim();
                    //Globals.lstScrapedUserIDs.Add(Id_user);
                    GlobusFileHelper.AppendStringToTextfileNewLine(userId + "," + UserName + "," + ProfileName + "," + Bio.Replace(",", "") + "," + Location.Replace(",", "") + "," + website + "," + NoOfTweets.Replace(",", "").Replace("Tweets", "") + "," + Followers.Replace(",", "").Replace("Following", "") + "," + Followings.Replace(",", "").Replace("Followers", "").Replace("Follower", "") + "," + IsProfilePIc, Globals.Path_UserListInfoData);
                    AddToLog_ScrapMember("[ " + DateTime.Now + " ] => [ " + userId + "," + UserName + "," + ProfileName + "," + Bio.Replace(",", "") + "," + Location + "," + website + "," + NoOfTweets + "," + Followers + "," + Followings + " ," + IsProfilePIc + "]");
                }
            }
            catch { }
        }
예제 #12
0
        public bool CheckAttributeexsist(string pageSrcHtml, string TagName, string AttributeName)
        {
            bool IsContain = false;
            try
            {
                bool success = false;
                string xHtml = string.Empty;

                Chilkat.HtmlToXml htmlToXml = new Chilkat.HtmlToXml();

                //*** Check DLL working or not **********************
                success = htmlToXml.UnlockComponent("THEBACHtmlToXml_7WY3A57sZH3O");
                if ((success != true))
                {
                    Console.WriteLine(htmlToXml.LastErrorText);
                    return IsContain;
                }

                htmlToXml.Html = pageSrcHtml;

                //** Convert Data Html to XML *******************************************
                xHtml = htmlToXml.ToXml();

                //******************************************
                Chilkat.Xml xNode = default(Chilkat.Xml);
                Chilkat.Xml xBeginSearchAfter = default(Chilkat.Xml);
                Chilkat.Xml xml = new Chilkat.Xml();
                xml.LoadXml(xHtml);

                #region Data Save in list From using XML Tag and Attribut
                string DescriptionMain = string.Empty;
                string dataDescription = string.Empty;

                xBeginSearchAfter = null;

                xNode = xml.SearchForAttribute(xBeginSearchAfter, TagName, "class", AttributeName);
                while ((xNode != null))
                {
                    IsContain = true;
                    return IsContain;
                }
                #endregion
            }
            catch (Exception)
            {
                IsContain = false;
            }
            return IsContain;
        }
예제 #13
0
        private void ScrapeWhotoFollow(TweetAccountManager tweetAccountManager, string Keyword, int NoOfPages)
        {
            try
            {
                AddThreadToDictionary(strModule(Module.WhoToScrap), tweetAccountManager.Username);
                int counter = 0;
                int PageCount = 1;
               // int NoOfData = NoOfPages * 20;
                List<string> username = new List<string>();
                AddToScrapeLogs("[ " + DateTime.Now + " ] => [ Getting Users To Scrape Data For ]");
                while (counter < NoOfPages)
                {
                   // string pagsource = tweetAccountManager.globusHttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/who_to_follow/suggestions/search/users?q=" + Keyword + "&cursor=" + PageCount + "&include_available_features=1&include_entities=1&is_forward=true"), "", "");
                    string pagsource = "";
                    if (PageCount == 1)
                    {
                         pagsource = tweetAccountManager.globusHttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/i/search/timeline?q=" + Keyword + "&cursor=" + PageCount + "&include_available_features=1&include_entities=1&is_forward=true"), "", ""); // https://twitter.com/who_to_follow/suggestions/search/users?q=software&cursor=1&include_available_features=1&include_entities=1&is_forward=true
                    }
                    else
                    {
                        //if (pagsource.Contains("\"has_more_items\":true"))
                        string uri = "https://twitter.com/i/search/timeline?q=" + Keyword + "&mode=users&include_available_features=1&include_entities=1&last_note_ts=555&scroll_cursor=USER-0-" + PageCount * 20;
                        pagsource = tweetAccountManager.globusHttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/i/search/timeline?q=" + Keyword + "&mode=users&include_available_features=1&include_entities=1&last_note_ts=555&scroll_cursor=USER-0-" + PageCount * 20), "", "");
                    }
                  //  if (pagsource.Contains("has-more-items"))
                    if (pagsource.Contains("has_more_items"))
                    {
                        PageCount++;
                         string[] Aray = Regex.Split(pagsource, "js-stream-item stream-item");
                        //string[] Aray = Regex.Split(pagsource, "js-stream-item stream-item stream-user-item");  //js-stream-item stream-item
                      //  string[] Aray = Regex.Split(pagsource, "fullname js-action-profile-name");
                        Aray = Aray.Skip(1).ToArray();
                        foreach (string item in Aray)
                        {
                            string Userid = string.Empty;
                            string Username = string.Empty;
                            try
                            {
                                //int startindex = item.IndexOf("=\\\"");
                                int startindex = item.IndexOf("data-user-id=\"");
                                int startindexForItem = item.IndexOf("data-item-id=\\\"");
                                if (startindex >= 0 && PageCount==2)
                                {
                                    string start = item.Substring(startindex).Replace("data-user-id=\"", "");
                                    //string start = item.Substring(startindex).Replace("=\\\"", "");
                                    //int endindex = start.IndexOf("\\\"");
                                    int endindex = start.IndexOf("\"");
                                    string end = start.Substring(0, endindex);
                                    Userid = end;
                                }
                                else if (startindexForItem >= 0)
                                {
                                    string start = item.Substring(startindexForItem).Replace("data-item-id=\\\"", "");
                                    //string start = item.Substring(startindex).Replace("=\\\"", "");
                                    //int endindex = start.IndexOf("\\\"");
                                    int endindex = start.IndexOf("\\\"");
                                    string end = start.Substring(0, endindex);
                                    Userid = end;

                                }
                            }
                            catch (Exception ex)
                            {
                                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> ScrapeWhotoFollow() 1--> " + ex.Message, Globals.Path_ScrapeUsersErroLog);
                                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> ScrapeWhotoFollow() 1--> " + ex.Message, Globals.Path_TwtErrorLogs);
                            }

                            try
                            {
                                //int startindex = item.IndexOf("data-screen-name=\\\"");
                                int startindex = item.IndexOf("data-screen-name=\"");
                                int startindex2 = item.IndexOf("data-screen-name=\\\"");
                                if (startindex >= 0)
                                {
                                    //string start = item.Substring(startindex).Replace("data-screen-name=\\\"", "");
                                    string start = item.Substring(startindex).Replace("data-screen-name=\"", "");
                                    //int endindex = start.IndexOf("\\\"");
                                    int endindex = start.IndexOf("\"");
                                    string end = start.Substring(0, endindex);
                                    Username = end;
                                    username.Add(end);
                                }

                                if (startindex2 >= 0)
                                {
                                    //string start = item.Substring(startindex).Replace("data-screen-name=\\\"", "");
                                    string start = item.Substring(startindex2).Replace("data-screen-name=\\\"", "");
                                    //int endindex = start.IndexOf("\\\"");
                                    int endindex = start.IndexOf("\\\"");
                                    string end = start.Substring(0, endindex);
                                    Username = end;
                                    username.Add(end);
                                }
                            }
                            catch (Exception ex)
                            {
                                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> ScrapeWhotoFollow() 1--> " + ex.Message, Globals.Path_ScrapeUsersErroLog);
                                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> ScrapeWhotoFollow() 1--> " + ex.Message, Globals.Path_TwtErrorLogs);
                            }

                            try
                            {
                                if (!string.IsNullOrEmpty(Username) && !string.IsNullOrEmpty(Userid))
                                {
                                    string query = "INSERT INTO tb_UsernameDetails (Username , Userid) VALUES ('" + Username + "' ,'" + Userid + "') ";
                                    DataBaseHandler.InsertQuery(query, "tb_UsernameDetails");
                                }
                            }
                            catch (Exception ex)
                            {
                                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> ScrapeWhotoFollow() --> Database --> " + ex.Message, Globals.Path_TwitterDataScrapper);
                                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> ScrapeWhotoFollow() --> DataBase --> " + ex.Message, Globals.Path_TwtErrorLogs);
                            }

                            AddToScrapeLogs("[ " + DateTime.Now + " ] => [ " + Username + " ::  " + Userid + " ]");
                        }
                        username = username.Distinct().ToList();
                        counter++;
                    }
                    else
                    {
                        AddToScrapeLogs("[ " + DateTime.Now + " ] => [ No More Pages To Scrape For Keyword : " + Keyword + " ]");
                        break;
                    }
                }

                if (!File.Exists(Globals.Path_KeywordScrapedListData + ".csv"))
                {
                    GlobusFileHelper.AppendStringToTextfileNewLine("USERID , USERNAME , PROFILE NAME , BIO  , LOCATION , WEBSITE , NOOFTWEETS , FOLLOWING , FOLLOWERS ", Globals.Path_KeywordScrapedListData + ".csv");
                }

                foreach (string UserIds in username)
                {
                    try
                    {
                        string ProfileName = string.Empty;
                        string Location = string.Empty;
                        string Bio = string.Empty;
                        string website = string.Empty;
                        string NoOfTweets = string.Empty;
                        string Followers = string.Empty;
                        string Followings = string.Empty;
                        string userids = string.Empty;
                        string TweetUsername = string.Empty;
                        string Userid = string.Empty;
                        string Username = string.Empty;
                        ChilkatHttpHelpr objChilkat = new ChilkatHttpHelpr();
                        GlobusHttpHelper HttpHelper = new GlobusHttpHelper();
                        string ProfilePageSource = HttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/" + UserIds), "", "");

                        string Responce = ProfilePageSource;

                        #region Convert HTML to XML

                        string xHtml = objChilkat.ConvertHtmlToXml(Responce);
                        Chilkat.Xml xml = new Chilkat.Xml();
                        xml.LoadXml(xHtml);

                        Chilkat.Xml xNode = default(Chilkat.Xml);
                        Chilkat.Xml xBeginSearchAfter = default(Chilkat.Xml);
                        #endregion

                        //xNode = xml.SearchForAttribute(xBeginSearchAfter, "ul", "class", "stats js-mini-profile-stats");
                        //xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "profile-field");
                        //userids = xNode.GetAttrValue("data-user-id");
                        //userids = xNode.AccumulateTagContent("text", "script|style");
                        if (Responce.Contains("has-more-items"))
                        {
                            PageCount++;
                            //string[] Aray = Regex.Split(Responce, "js-stream-item stream-item stream-item");
                            //Aray = Aray.Skip(1).ToArray();
                            //foreach (string item in Aray)
                            {

                                try
                                {
                                    #region commented
                                    //int startindex = item.IndexOf("=\\\"");
                                    //int startindex = item.IndexOf("data-user-id=");
                                    //if (startindex >= 0)
                                    //{
                                    //    string start = item.Substring(startindex).Replace("data-user-id=\"", "");
                                    //    //string start = item.Substring(startindex).Replace("=\\\"", "");
                                    //    //int endindex = start.IndexOf("\\\"");
                                    //    int endindex = start.IndexOf("\"");
                                    //    string end = start.Substring(0, endindex);
                                    //    userids = end;
                                    //    break;
                                    //}
                                    #endregion

                                    int startindex = Responce.IndexOf("profile_id");
                                    string start = Responce.Substring(startindex).Replace("profile_id", "");
                                    int endindex = start.IndexOf(",");
                                    string end = start.Substring(0, endindex).Replace("&quot;", "").Replace("\"", "").Replace(":", "").Trim();
                                    userids = end.Trim();
                                    TweetUsername = UserIds;

                                }

                                catch (Exception ex)
                                {
                                    Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> ScrapeWhotoFollow() 1--> " + ex.Message, Globals.Path_ScrapeUsersErroLog);
                                    Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> ScrapeWhotoFollow() 1--> " + ex.Message, Globals.Path_TwtErrorLogs);
                                }
                            }
                        }
                        int counterdata = 0;
                        xBeginSearchAfter = null;
                        string dataDescription = string.Empty;
                        xNode = xml.SearchForAttribute(xBeginSearchAfter, "h1", "class", "ProfileHeaderCard-name");
                        while ((xNode != null))
                        {
                            xBeginSearchAfter = xNode;
                            if (counterdata == 0)
                            {
                                ProfileName = xNode.AccumulateTagContent("text", "script|style").Replace("Verified account", "");
                                counterdata++;
                            }
                            else if (counterdata == 1)
                            {
                                website = xNode.AccumulateTagContent("text", "script|style");
                                counterdata++;
                            }
                            else
                            {
                                break;
                            }
                            //xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "profile-field");
                            xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "u-textUserColor");
                        }

                        xBeginSearchAfter = null;
                        dataDescription = string.Empty;
                        xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "screen-name");
                        //while ((xNode != null))
                        //{
                        //    xBeginSearchAfter = xNode;
                        //    TweetUsername = xNode.AccumulateTagContent("text", "script|style");
                        //    break;
                        //}

                        xBeginSearchAfter = null;
                        dataDescription = string.Empty;
                        xNode = xml.SearchForAttribute(xBeginSearchAfter, "p", "class", "ProfileHeaderCard-bio u-dir");//bio profile-field");
                        while ((xNode != null))
                        {
                            xBeginSearchAfter = xNode;
                            Bio = xNode.AccumulateTagContent("text", "script|style").Replace("&#39;", "'").Replace("&#13;&#10;", string.Empty).Trim();
                            break;
                        }

                        xBeginSearchAfter = null;
                        dataDescription = string.Empty;
                        //xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "location profile-field");
                        xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "ProfileHeaderCard-locationText u-dir");
                        while ((xNode != null))
                        {
                            xBeginSearchAfter = xNode;
                            Location = xNode.AccumulateTagContent("text", "script|style");
                            break;
                        }

                        int counterData = 0;
                        xBeginSearchAfter = null;
                        dataDescription = string.Empty;
                        xNode = xml.SearchForAttribute(xBeginSearchAfter, "p", "class", "ProfileHeaderCard-bio u-dir");//bio profile-field");
                        while ((xNode != null))
                        {
                            xBeginSearchAfter = xNode;
                            Bio = xNode.AccumulateTagContent("text", "script|style").Replace("&#39;", "'").Replace("&#13;&#10;", string.Empty).Trim();
                            break;
                        }

                        xBeginSearchAfter = null;
                        dataDescription = string.Empty;
                        //xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "location profile-field");
                        xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "ProfileHeaderCard-locationText u-dir");//location profile-field");
                        while ((xNode != null))
                        {
                            xBeginSearchAfter = xNode;
                            Location = xNode.AccumulateTagContent("text", "script|style");
                            break;
                        }

                        xBeginSearchAfter = null;
                        dataDescription = string.Empty;
                        //xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "data-element-term", "tweet_stats");
                        xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "ProfileNav-stat ProfileNav-stat--link u-borderUserColor u-textCenter js-tooltip js-nav");
                        while ((xNode != null))
                        {
                            xBeginSearchAfter = xNode;
                            if (counterData == 0)
                            {
                                NoOfTweets = xNode.AccumulateTagContent("text", "script|style").Replace("Tweets", string.Empty).Replace(",", string.Empty).Replace("Tweet", string.Empty);
                                counterData++;
                            }
                            else if (counterData == 1)
                            {
                                Followings = xNode.AccumulateTagContent("text", "script|style").Replace(" Following", string.Empty).Replace(",", string.Empty).Replace("Following", string.Empty);
                                counterData++;
                            }
                            else if (counterData == 2)
                            {
                                Followers = xNode.AccumulateTagContent("text", "script|style").Replace("Followers", string.Empty).Replace(",", string.Empty).Replace("Follower", string.Empty);
                                counterData++;
                            }
                            else
                            {
                                break;
                            }
                            //xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "js-nav");
                            xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "ProfileNav-stat ProfileNav-stat--link u-borderUserColor u-textCenter js-tooltip js-openSignupDialog js-nonNavigable u-textUserColor");
                        }

                        if (!string.IsNullOrEmpty(userids))
                        {
                            lock (WhoTofollowThreadLock)
                            {
                                GlobusFileHelper.AppendStringToTextfileNewLine(userids + "," + TweetUsername + "," + ProfileName + "," + Bio.Replace(",", "") + "," + Location.Replace(",", "") + "," + website + "," + NoOfTweets.Replace(",", "").Replace("Tweets", "") + "," + Followers.Replace(",", "").Replace("Following", "") + "," + Followings.Replace(",", "").Replace("Followers", "").Replace("Follower", ""), Globals.Path_KeywordScrapedListData + ".csv");
                            }
                            AddToScrapeLogs("[ " + DateTime.Now + " ] => [ " + userids + "," + TweetUsername + "," + ProfileName + "," + Bio.Replace(",", "") + "," + Location + "," + website + "," + NoOfTweets + "," + Followers + "," + Followings + " ]");
                        }
                    }
                    catch (Exception ex)
                    {
                        Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> ScrapeWhotoFollow() 3--> " + ex.Message, Globals.Path_ScrapeUsersErroLog);
                        Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> ScrapeWhotoFollow() 3--> " + ex.Message, Globals.Path_TwtErrorLogs);
                    }
                }
                AddToScrapeLogs("[ " + DateTime.Now + " ] => [ Finished Scraping For " + tweetAccountManager.Username + " ]");
                AddToProxysLogs("-----------------------------------------------------------------------------------------------------------------------");
            }
            catch (Exception ex)
            {
                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> ScrapeWhotoFollow() 2--> " + ex.Message, Globals.Path_ScrapeUsersErroLog);
                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> ScrapeWhotoFollow() 2--> " + ex.Message, Globals.Path_TwtErrorLogs);
            }
        }
예제 #14
0
        public void scrapUserInfo(object param)
        {
            try
            {
                Array paramsArray = new object[1];

                paramsArray = (Array)param;
                string UserName = (string)paramsArray.GetValue(0);

                string userId = string.Empty;
                string ProfileName = string.Empty;
                string Location = string.Empty;
                string Bio = string.Empty;
                string website = string.Empty;
                string NoOfTweets = string.Empty;
                string Followers = string.Empty;
                string Followings = string.Empty;
                string IsProfilePIc = string.Empty;

                ChilkatHttpHelpr objChilkat = new ChilkatHttpHelpr();
                GlobusHttpHelper HttpHelper = new GlobusHttpHelper();
                string ProfilePageSource = HttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/" + UserName.Trim()), "", "");

                if (string.IsNullOrEmpty(ProfilePageSource))
                {
                    ProfilePageSource = HttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/" + UserName.Trim()), "", "");
                }
                if (string.IsNullOrEmpty(ProfilePageSource))
                {
                    AddToLog_ScrapMember("[ " + DateTime.Now + " ] => [ User  " + UserName + " is not exist or page source getting null.]");
                    return;
                }

                if (ProfilePageSource.Contains("Account suspended"))
                {
                    AddToLog_ScrapMember("[ " + DateTime.Now + " ] => [ User  " + UserName + " is suspended ]");
                    return;
                }

                string Responce = ProfilePageSource;

                #region Convert HTML to XML

                string xHtml = objChilkat.ConvertHtmlToXml(Responce);
                Chilkat.Xml xml = new Chilkat.Xml();
                xml.LoadXml(xHtml);

                Chilkat.Xml xNode = default(Chilkat.Xml);
                Chilkat.Xml xBeginSearchAfter = default(Chilkat.Xml);
                #endregion

                int counterdata = 0;
                xBeginSearchAfter = null;
                string dataDescription = string.Empty;
                //xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "profile-field");
                xNode = xml.SearchForAttribute(xBeginSearchAfter, "h1", "class", "ProfileHeaderCard-name");
                while ((xNode != null))
                {
                    xBeginSearchAfter = xNode;
                    if (counterdata == 0)
                    {
                        ProfileName = xNode.AccumulateTagContent("text", "script|style");
                        if (ProfileName.Contains("Verified account"))
                        {
                            ProfileName = ProfileName.Replace("Verified account", " ");
                        }
                        counterdata++;
                    }
                    else if (counterdata == 1)
                    {
                        website = xNode.AccumulateTagContent("text", "script|style");
                        if (website.Contains("Twitter Status"))
                        {
                            website = "";
                        }
                        counterdata++;
                    }
                    else
                    {
                        break;
                    }
                    //xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "profile-field");
                    xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "u-textUserColor");
                }

                xBeginSearchAfter = null;
                dataDescription = string.Empty;
                xNode = xml.SearchForAttribute(xBeginSearchAfter, "p", "class", "ProfileHeaderCard-bio u-dir");//bio profile-field");
                while ((xNode != null))
                {
                    xBeginSearchAfter = xNode;
                    Bio = xNode.AccumulateTagContent("text", "script|style").Replace("&#39;", "'").Replace("&#13;&#10;", string.Empty).Trim();
                    break;
                }

                xBeginSearchAfter = null;
                dataDescription = string.Empty;
                //xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "location profile-field");
                xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "ProfileHeaderCard-locationText u-dir");//location profile-field");
                while ((xNode != null))
                {
                    xBeginSearchAfter = xNode;
                    Location = xNode.AccumulateTagContent("text", "script|style");
                    break;
                }

                int counterData = 0;
                xBeginSearchAfter = null;
                dataDescription = string.Empty;
                //xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "data-element-term", "tweet_stats");
                xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "ProfileNav-stat ProfileNav-stat--link u-borderUserColor u-textCenter js-tooltip js-nav");
                while ((xNode != null))
                {
                    xBeginSearchAfter = xNode;
                    if (counterData == 0)
                    {
                        NoOfTweets = xNode.AccumulateTagContent("text", "script|style").Replace("Tweets", string.Empty).Replace(",", string.Empty).Replace("Tweet", string.Empty);
                        counterData++;
                    }
                    else if (counterData == 1)
                    {
                        Followings = xNode.AccumulateTagContent("text", "script|style").Replace(" Following", string.Empty).Replace(",", string.Empty).Replace("Following", string.Empty);
                        counterData++;
                    }
                    else if (counterData == 2)
                    {
                        Followers = xNode.AccumulateTagContent("text", "script|style").Replace("Followers", string.Empty).Replace(",", string.Empty).Replace("Follower", string.Empty);
                        counterData++;
                    }
                    else
                    {
                        break;
                    }
                    //xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "js-nav");
                    xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "ProfileNav-stat ProfileNav-stat--link u-borderUserColor u-textCenter js-tooltip js-openSignupDialog js-nonNavigable u-textUserColor");
                }

                try
                {
                    int startindex = ProfilePageSource.IndexOf("profile_id");
                    string start = ProfilePageSource.Substring(startindex).Replace("profile_id", "");
                    int endindex = start.IndexOf(",");
                    string end = start.Substring(0, endindex).Replace("&quot;", "").Replace("\"", "").Replace(":", "").Trim();
                    userId = end.Trim();
                    if (userId.Length > 15)
                    {
                        startindex = ProfilePageSource.IndexOf("profile_id&quot");
                        start = ProfilePageSource.Substring(startindex).Replace("profile_id&quot", "");
                        endindex = start.IndexOf(",");
                        end = start.Substring(0, endindex).Replace("&quot;", "").Replace("\"", "").Replace(":", "").Replace(";", "").Trim();
                        userId = end.Trim();
                    }
                }
                catch { }

                if (ProfilePageSource.Contains("default_profile_6_400x400") || ProfilePageSource.Contains("default_profile_5_400x400") || ProfilePageSource.Contains("default_profile_4_400x400") || ProfilePageSource.Contains("default_profile_3_400x400") || ProfilePageSource.Contains("default_profile_2_400x400") || ProfilePageSource.Contains("default_profile_1_400x400") || ProfilePageSource.Contains("default_profile_0_400x400"))
                {
                    IsProfilePIc = "No";
                }
                else
                {
                    IsProfilePIc = "Yes";
                }
                if (!File.Exists(Globals.Path_UserListInfoData))
                {
                    GlobusFileHelper.AppendStringToTextfileNewLine("USERID , USERNAME , PROFILE NAME , BIO , LOCATION , WEBSITE , NO OF TWEETS , FOLLOWERS , FOLLOWINGS, ProfilePic", Globals.Path_UserListInfoData);
                }
                if (!string.IsNullOrEmpty(UserName))
                {
                    //string Id_user = item.ID_Tweet_User.Replace("}]", string.Empty).Trim();
                    //Globals.lstScrapedUserIDs.Add(Id_user);
                    GlobusFileHelper.AppendStringToTextfileNewLine(userId + "," + UserName + "," + ProfileName + "," + Bio.Replace(",", "") + "," + Location.Replace(",", "") + "," + website + "," + NoOfTweets.Replace(",", "").Replace("Tweets", "") + "," + Followers.Replace(",", "").Replace("Following", "") + "," + Followings.Replace(",", "").Replace("Followers", "").Replace("Follower", "") + "," + IsProfilePIc, Globals.Path_UserListInfoData);
                    AddToLog_ScrapMember("[ " + DateTime.Now + " ] => [ " + userId + "," + UserName + "," + ProfileName + "," + Bio.Replace(",", "") + "," + Location + "," + website + "," + NoOfTweets + "," + Followers + "," + Followings + " ," + IsProfilePIc + "]");
                }
            }
            catch { }
        }