コード例 #1
0
ファイル: frmMain_NewUI.cs プロジェクト: ahmetDostr/twtboard
        private void ScrapeKeywordSeacrh()
        {
            try
            {
                TwitterDataScrapper TweetData = new TwitterDataScrapper();

                txtRecords.Invoke(new MethodInvoker(delegate
                    {

                        if (!string.IsNullOrEmpty(txtRecords.Text.Trim()) && NumberHelper.ValidateNumber(txtRecords.Text.Trim()))
                        {
                            TwitterDataScrapper.noOfRecords = Convert.ToInt32(txtRecords.Text.Trim());
                            if (TwitterDataScrapper.noOfRecords == 0)
                            {
                                AddToScrapeLogs("[ " + DateTime.Now + " ] => [ Do not put Zero value ]");
                                AddToScrapeLogs("[ " + DateTime.Now + " ] => [ Default number of records is 100 ]");
                                TwitterDataScrapper.noOfRecords = 100;
                            }
                        }
                        else
                        {
                            AddToScrapeLogs("[ " + DateTime.Now + " ] => [ please enter value in number of users ]");
                            return;
                        }
                    }));

                AddToScrapeLogs("[ " + DateTime.Now + " ] => [ Scrape by Keyword ]");
                //List<TwitterDataScrapper.StructTweetIDs> data = TweetData.GetTweetData(txtScrapeKeyword.Text);
                //TweetData.logEvents.addToLogger += new EventHandler(DataScraperlogger_addToLogger);
                TweetData.logEvents.addToLogger += new EventHandler(DataScraperlogger_addToLogger);
                List<TwitterDataScrapper.StructTweetIDs> data = new List<TwitterDataScrapper.StructTweetIDs>();

                foreach (string itemKeyword in lstSearchByKeywords)
                {
                    if (!chkSearchByKeyWordByPeople.Checked)
                    {
                        data = TweetData.NewKeywordStructDataForSearchByKeyword(itemKeyword.Trim());
                        data = data.Distinct().ToList();
                    }
                    else
                    {
                        data = TweetData.NewKeywordStructDataSearchByPeople(itemKeyword.Trim());
                    }
                }

                //TweetData.logEvents.addToLogger -= new EventHandler(DataScraperlogger_addToLogger);
                TweetData.logEvents.addToLogger -= new EventHandler(DataScraperlogger_addToLogger);
                data = DistinctDataList(data);

                if (!(data.Count() > 0))
                {
                    //AddToScrapeLogs("Request Not Completed");
                    AddToScrapeLogs("[ " + DateTime.Now + " ] => [ Requesting For 100 User ids ]");
                    data = TweetData.NewKeywordStructData(txtScrapeKeyword.Text);
                }

               // AddToScrapeLogs("[ " + DateTime.Now + " ] => [ " + data.Count + " User ids Scraped ]");

                AddToScrapeLogs("[ " + DateTime.Now + " ] => [ Please Wait Till Data Is Retrieving ]");

                #region commentedRegion
                //if (!chkSearchByKeyWordByPeople.Checked)
                //{
                //    if (!File.Exists(Globals.Path_KeywordScrapedListData + "-" + txtScrapeKeyword.Text + ".csv"))
                //    {
                //        GlobusFileHelper.AppendStringToTextfileNewLine("USERID , USERNAME , PROFILE NAME , BIO , LOCATION , WEBSITE , NO OF TWEETS , FOLLOWERS , FOLLOWINGS", Globals.Path_KeywordScrapedListData + "-" + txtScrapeKeyword.Text + ".csv");
                //    }

                //    foreach (TwitterDataScrapper.StructTweetIDs item in data)
                //    {
                //        string ProfileName = string.Empty;
                //        string Location = string.Empty;
                //        string Bio = string.Empty;
                //        string website = string.Empty;
                //        string NoOfTweets = string.Empty;
                //        string Followers = string.Empty;
                //        string Followings = string.Empty;

                //        ChilkatHttpHelpr objChilkat = new ChilkatHttpHelpr();
                //        GlobusHttpHelper HttpHelper = new GlobusHttpHelper();
                //        string ProfilePageSource = HttpHelper.getHtmlfromUrl(new Uri("https://twitter.com/" + item.username__Tweet_User), "", "");

                //        string Responce = ProfilePageSource;

                //        #region Convert HTML to XML

                //        string xHtml = objChilkat.ConvertHtmlToXml(Responce);
                //        Chilkat.Xml xml = new Chilkat.Xml();
                //        xml.LoadXml(xHtml);

                //        Chilkat.Xml xNode = default(Chilkat.Xml);
                //        Chilkat.Xml xBeginSearchAfter = default(Chilkat.Xml);
                //        #endregion

                //        int counterdata = 0;
                //        xBeginSearchAfter = null;
                //        string dataDescription = string.Empty;
                //        //xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "profile-field");
                //        xNode = xml.SearchForAttribute(xBeginSearchAfter, "h1", "class", "ProfileHeaderCard-name");
                //        while ((xNode != null))
                //        {
                //            xBeginSearchAfter = xNode;
                //            if (counterdata == 0)
                //            {
                //                ProfileName = xNode.AccumulateTagContent("text", "script|style");
                //                counterdata++;
                //            }
                //            else if (counterdata == 1)
                //            {
                //                website = xNode.AccumulateTagContent("text", "script|style");
                //                if (website.Contains("Twitter Status"))
                //                {
                //                    website = "N/A";
                //                }
                //                counterdata++;
                //            }
                //            else
                //            {
                //                break;
                //            }
                //            //xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "profile-field");
                //            xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "u-textUserColor");
                //        }

                //        xBeginSearchAfter = null;
                //        dataDescription = string.Empty;
                //        xNode = xml.SearchForAttribute(xBeginSearchAfter, "p", "class", "ProfileHeaderCard-bio u-dir");//bio profile-field");
                //        while ((xNode != null))
                //        {
                //            xBeginSearchAfter = xNode;
                //            Bio = xNode.AccumulateTagContent("text", "script|style").Replace("&#39;", "'").Replace("&#13;&#10;", string.Empty).Trim();
                //            break;
                //        }

                //        xBeginSearchAfter = null;
                //        dataDescription = string.Empty;
                //        //xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "location profile-field");
                //        xNode = xml.SearchForAttribute(xBeginSearchAfter, "span", "class", "ProfileHeaderCard-locationText u-dir");//location profile-field");
                //        while ((xNode != null))
                //        {
                //            xBeginSearchAfter = xNode;
                //            Location = xNode.AccumulateTagContent("text", "script|style");
                //            break;
                //        }

                //        int counterData = 0;
                //        xBeginSearchAfter = null;
                //        dataDescription = string.Empty;
                //        //xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "data-element-term", "tweet_stats");
                //        xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "ProfileNav-stat ProfileNav-stat--link u-borderUserColor u-textCenter js-tooltip js-nav");
                //        while ((xNode != null))
                //        {
                //            xBeginSearchAfter = xNode;
                //            if (counterData == 0)
                //            {
                //                NoOfTweets = xNode.AccumulateTagContent("text", "script|style").Replace("Tweets", string.Empty).Replace(",", string.Empty).Replace("Tweet", string.Empty);
                //                counterData++;
                //            }
                //            else if (counterData == 1)
                //            {
                //                Followings = xNode.AccumulateTagContent("text", "script|style").Replace(" Following", string.Empty).Replace(",", string.Empty).Replace("Following", string.Empty);
                //                counterData++;
                //            }
                //            else if (counterData == 2)
                //            {
                //                Followers = xNode.AccumulateTagContent("text", "script|style").Replace("Followers", string.Empty).Replace(",", string.Empty).Replace("Follower", string.Empty);
                //                counterData++;
                //            }
                //            else
                //            {
                //                break;
                //            }
                //            //xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "js-nav");
                //            xNode = xml.SearchForAttribute(xBeginSearchAfter, "a", "class", "ProfileNav-stat ProfileNav-stat--link u-borderUserColor u-textCenter js-tooltip js-openSignupDialog js-nonNavigable u-textUserColor");
                //        }

                //        if (!string.IsNullOrEmpty(item.username__Tweet_User) && item.ID_Tweet_User != "null")
                //        {
                //            string Id_user = item.ID_Tweet_User.Replace("}]", string.Empty).Trim();
                //            Globals.lstScrapedUserIDs.Add(Id_user);
                //            GlobusFileHelper.AppendStringToTextfileNewLine(Id_user + "," + item.username__Tweet_User + "," + ProfileName + "," + Bio.Replace(",", "") + "," + Location.Replace(",", "") + "," + website + "," + NoOfTweets.Replace(",", "").Replace("Tweets", "") + "," + Followers.Replace(",", "").Replace("Following", "") + "," + Followings.Replace(",", "").Replace("Followers", "").Replace("Follower", ""), Globals.Path_KeywordScrapedListData + "-" + txtScrapeKeyword.Text + ".csv");
                //            AddToScrapeLogs("[ " + DateTime.Now + " ] => [ " + Id_user + "," + item.username__Tweet_User + "," + ProfileName + "," + Bio.Replace(",", "") + "," + Location + "," + website + "," + NoOfTweets + "," + Followers + "," + Followings + " ]");
                //        }
                //    }

                //}
                #endregion

                //AddToScrapeLogs("Retrieving data");
                AddToScrapeLogs("[ " + DateTime.Now + " ] => [ Adding Data To DataBase ]");
                Globals.lstScrapedUserIDs = Globals.lstScrapedUserIDs.Distinct().ToList();

                thread_AddingKeywordScrape = new Thread(() =>
                {
                    foreach (TwitterDataScrapper.StructTweetIDs item in data)
                    {
                        if (!string.IsNullOrEmpty(item.username__Tweet_User) && item.ID_Tweet_User != "null")
                        {
                            //AddToScrapeLogs(item.ID_Tweet_User);
                            clsDBQueryManager DataBase = new clsDBQueryManager();
                            DataBase.InsertOrUpdateScrapeSetting(item.ID_Tweet_User, item.username__Tweet_User, item.ID_Tweet);
                        }
                    }

                    AddToScrapeLogs("[ " + DateTime.Now + " ] => [ Exported location :- " + Globals.Path_KeywordScrapedList + " ]");
                    AddToScrapeLogs("[ " + DateTime.Now + " ] => [ PROCESS COMPLETED ]");
                    AddToScrapeLogs("------------------------------------------------------------------------------------------------------------------------------------------");

                });

                thread_AddingKeywordScrape.Start();

                if (Globals.IsDirectedFromFollower)
                {
                    Thread.Sleep(1000);
                    Globals.IsDirectedFromFollower = false;
                    AddToLog_Follower("[ " + DateTime.Now + " ] => [ " + data.Count + " User ids Scraped and Added To Follow List ]");
                    Tb_AccountManager.Invoke(new MethodInvoker(delegate
                    {
                        Tb_AccountManager.SelectedIndex = 2;
                    }));

                    //tabMain.SelectedIndex = 2;
                }
            }
            catch (Exception ex)
            {
                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine(DateTime.Now + " --> Error --> ScrapeKeywordSeacrh() --> " + ex.Message, Globals.Path_ScrapeUsersErroLog);
                Globussoft.GlobusFileHelper.AppendStringToTextfileNewLine("Error --> ScrapeKeywordSeacrh() --> " + ex.Message, Globals.Path_TwtErrorLogs);
            }
        }