public static int Main() { string url = "https://www.unglobalcompact.org/what-is-gc/participants/search?page=1&search%5Bkeywords%5D=&search%5Bper_page%5D=50&search%5Bsort_direction%5D=asc&search%5Bsort_field%5D=&utf8=%E2%9C%93"; UNGC_DB.UNGC_DB_entries(url); return(0); }
public static void scrape_data(string url, NpgsqlCommand cmd) { string[] url_pieces = url.Split(new char[] { '1' }); UNGC_DB from_html = new UNGC_DB(); int pg_count = find_page_count(url); //Debug.WriteLine(pg_count); for (int i = 0; i < pg_count; i++) { Console.WriteLine(i); url = url_pieces[0] + (i + 1).ToString() + url_pieces[1]; List <string[]> scraped = from_html.get_page(url); UNGC_DB.enter_data(scraped, cmd); } }
public static void UNGC_DB_entries(string url) { string connectstring = "Host=localhost;Username=Seth;Database=ungc_test;Password=1234"; using (NpgsqlConnection conn = new NpgsqlConnection(connectstring)) // connect to our db { conn.Open(); using (NpgsqlCommand cmd = new NpgsqlCommand()) // open a new command string { cmd.Connection = conn; string[] fields = new string[] { "NAME", "DATE_JOINED", "DATE_DUE", // fields in UNGC data "COUNTRY", "ORG_TYPE", "SECTOR", "STATUS", "EMPLOYEES", "OWNERSHIP" }; cmd.CommandText = string.Format("CREATE TABLE IF NOT EXISTS UNGC({0} varchar(250), {1} date, {2} date, {3} varchar(150), {4} varchar(150), {5} varchar(150), {6} varchar(150), {7} int, {8} varchar(150));", fields); cmd.ExecuteNonQuery(); // Create and execute database command UNGC_DB.scrape_data(url, cmd); } } }
public List <string[]> get_page(string url) { // in: url of participant list page; out: List<string> containing data from each member ScrapingBrowser Browser = new ScrapingBrowser(); WebPage PageResult = Browser.NavigateToPage(new Uri(url), 0, "", null); string base_url = "https://www.unglobalcompact.org"; List <string[]> page_data = new List <string[]>(); HtmlNode[] array = PageResult.Html.CssSelect(".participants-table").ToArray <HtmlNode>(); // hold all table entries UNGC_DB li = new UNGC_DB(); // object to call link_info (make static?) for (int j = 0; j < array.Length; j++) { foreach (HtmlNode row in array[j].SelectNodes("tbody/tr")) // for each member in table { string next_link = row.SelectSingleNode("th/a").Attributes["href"].Value; // grab link to member's page string[] scrape = li.link_info(base_url + next_link).return_all(); // grab string[] from returned object page_data.Add(scrape); // add member's data to list } } return(page_data); }