public static List <UserAgentsDto> GetUserAgents() { //List of user agents, currently empty List <UserAgentsDto> userAgents = new List <UserAgentsDto>(); //Go to website that offers list of user-agents WebPage page = ScrapingHelperClass.GoToPage("https://developers.whatismybrowser.com/useragents/explore/software_name/chrome/2"); //Scrape user-agents var tableRows = page.Html.CssSelect("div.corset table tbody tr"); //Save scraped to list foreach (var row in tableRows) { userAgents.Add(new UserAgentsDto { UserAgent = row.CssSelect("td.useragent a").First().InnerText }); } //Return full list with user-agents return(userAgents); }
// Consider this as main method static async Task MainAsync(string[] args) { //Make connection with connection string SqlConnection connection = new SqlConnection(@"Server=DESKTOP-6FF3SOR\SQLEXPRESS;Database=Words;Trusted_Connection=True"); //I use this just before the program starts so I could measure the time of my program DateTime dateTimePrePocetka = DateTime.Now; //Get page number int pageNumber = ScrapingHelperClass.GetPageNumber(); //Create empty list of word objects List <WordDto> Words = new List <WordDto>(); //Open connection to database connection.Open(); //This is loop that will loop through every page on website for (int i = 2; i <= pageNumber; i++) { //Set words from page number [i] Words = await ScrapingHelperClass.GetDataFromPageAsync(i); //Insert every word in database foreach (var word in Words) { DatabaseManipulation.Insert(word.Word, connection); } //Clear words list, so it could get new ones Words.Clear(); Console.WriteLine($"Done with {i} page."); } //Close connection to database connection.Close(); //The time in minutes it took my crawler to finish Console.WriteLine($"Minute: {DateTime.Now.Minute - dateTimePrePocetka.Minute}\n"); Console.WriteLine("Inserted all words!\nPress any key to close program . . ."); Console.ReadKey(); }
public static List <ProxiesDto> GetProxies() { //Create proxy list, currently empty List <ProxiesDto> proxy = new List <ProxiesDto>(); //Go to website that offers free IPs and ports WebPage page = ScrapingHelperClass.GoToPage("https://www.us-proxy.org/"); //Get specific rows that contains IPs and ports var tableRows = page.Html.CssSelect("div.table-responsive table tbody tr"); //Save to list every IP and port foreach (var row in tableRows) { proxy.Add(new ProxiesDto { //Get IP Adress IpAddress = row.CssSelect("td").First().InnerText, //Get port Port = int.Parse(row.CssSelect("td").Skip(1).First().InnerText) }); } //Return full list of proxies return(proxy); }