// Quit from driver, deletes cookies public bool DriverQuit() { try { driverChrome.Quit(); LogHolder.AddLogFile("Operation is successful. Quitted from driver", 1); return(true); } catch (Exception e) { LogHolder.AddLogFile("Cannot quit from driver. See error message for detail information: " + e.Message, 1); return(false); } }
// Refreshing browser public bool RefreshDriver() { try { driverChrome.Navigate().Refresh(); string x = driverChrome.Url; LogHolder.AddLogFile("Given address page is refreshed: " + url, 1); return(true); } catch (Exception e) { LogHolder.AddLogFile("Given address page cannot be refreshed: " + url + e.Message, 1); return(false); } }
// settings of Driver private void DriverSettings() { var chromeOptions = new ChromeOptions(); chromeOptions.AddArgument("--headless"); // Open browser in headless mode. Comment this line if you want to see browser chromeOptions.AddUserProfilePreference("profile.default_content_setting_values.images", 2); // Disables images for fast loading driverChrome = new ChromeDriver(@"Write your test driver's path here", chromeOptions); // You must define your chrome's path driverChrome.Manage().Window.Maximize(); // Maximize the window // Wait time for elements waitZ = new WebDriverWait(driverChrome, new TimeSpan(0, 0, 0, 0, 10)); waitS = new WebDriverWait(driverChrome, new TimeSpan(0, 0, 3)); waitM = new WebDriverWait(driverChrome, new TimeSpan(0, 0, 30)); waitB = new WebDriverWait(driverChrome, new TimeSpan(0, 1, 0)); js = (IJavaScriptExecutor)driverChrome; LogHolder.AddLogFile("Given url " + url + " driver is started", 1); }
static void Main(string[] args) { bool loop = true; string takenInput; // Create log files to keep errors taken from driver and tweets that are scrapped LogHolder.CreateLogFile(1); LogHolder.CreateLogFile(2); Console.WriteLine("Hello! Welcome to TScraper. TScraper keeps all the tweets that sent for given username into the text file."); while (loop) { Console.WriteLine("\n\nSelect what you want:" + "\n1-> Just search for username, it won't work for protected accounts" + "\n2-> Enter twitter using username and password, then search for username you can access protected user's tweets if " + "you are following him/her" + "\n0-> Exit from the program"); takenInput = Console.ReadLine(); switch (takenInput) { case "1": twitterWithoutLoggingIn(); break; case "2": twitterWithLoggingIn(); break; case "0": Console.WriteLine("Good Bye!\n"); loop = false; break; default: Console.WriteLine("Please enter a valid input\n\n"); break; } } LogHolder.CloseLogFile(1); LogHolder.CloseLogFile(2); Console.ReadLine(); }
// Set Url of browser public bool SetURL(string url) { bool timeout = true; while (timeout) { int i = 0; try { driverChrome.Navigate().GoToUrl(url); return(true); } catch (Exception e) { // If cannot load, try to refresh 3 times LogHolder.AddLogFile(url + " is cannot be loaded. It could be connection problem. Please check your connection." + e.Message, 1); i++; if (i < 3) { if (RefreshDriver()) { return(true); } else { timeout = true; } } else { timeout = false; } } } return(false); }
private static void scrapTweets(Driver driver) { // First make a selection to specify what you want int selection = selectFunction(); if (selection == 1) { Console.WriteLine("Please enter a username:\t"); string username = Console.ReadLine(); driver.SetURL("https://www.twitter.com/" + username); } else if (selection == 2) { Console.WriteLine("Please enter a start date(YYYY-MM-DD format):\t"); string startDate = Console.ReadLine(); Console.WriteLine("Please enter an end date(YYYY-MM-DD format):\t"); string endDate = Console.ReadLine(); Console.WriteLine("Please enter a username:\t"); string username = Console.ReadLine(); driver.SetURL("https://twitter.com/search?q=(from%3A" + username + ")%20since%3A" + startDate + "%20until%3A" + endDate + "%20-filter%3Areplies&src=typed_query"); } else { return; } /* * Creating variables we need * Random to wait randomly while tweets are loading * k to realize end of page */ Console.WriteLine("Write time in seconds for waiting. If you wait more, you will scrap more\nNumber must start with 1 (nearly 100 tweets)"); int takenNum = Convert.ToInt32(Console.ReadLine()); Console.WriteLine("Operation is started please wait now"); System.Threading.Thread.Sleep(2000); Random random = new Random(); int k = 0; List <string> allTweets = new List <string>(); pageLoading(driver); if (usernameNotAvailable(driver)) { Console.WriteLine("Username cannot found check again"); driver.DriverQuit(); return; } if (protectedAccount(driver) && notLoaded(driver)) { while (true) { /* * Collect tweet data, when page is scrolled wait for loading bar * When tweets end, loop is going to break, you can decrease this wait time decreasing k value in if condition * */ var tweetList = driver.FindElements("TweetBoxList", "Twitter"); var dateList = driver.FindElements("TweetDateList", "Twitter"); for (int i = 0; i < tweetList.Count; i++) { try { allTweets.Add(dateList.ElementAt(i).GetAttribute("title") + " --> " + tweetList.ElementAt(i).GetProperty("innerText")); } catch (Exception) { System.Threading.Thread.Sleep(Convert.ToInt32(random.NextDouble() * takenNum * 1000)); } } k = driver.TwitterScrollDown(k); if (k == 6) { break; } if (driver.CheckElementAvailable("Reload", -1, "Twitter")) { if (!tryToReload(driver)) { break; } } } // When page is scrolled usually old tweets are going to be added in the list // Thanks to distinct we can avoid this List <string> distinctTweet = new List <string>(); distinctTweet = allTweets.Distinct().ToList(); foreach (var tweet in distinctTweet) { LogHolder.AddLogFile(tweet, 2); } } }