Beispiel #1
0
        public static int Main()
        {
            string url = "https://www.unglobalcompact.org/what-is-gc/participants/search?page=1&search%5Bkeywords%5D=&search%5Bper_page%5D=50&search%5Bsort_direction%5D=asc&search%5Bsort_field%5D=&utf8=%E2%9C%93";

            UNGC_DB.UNGC_DB_entries(url);
            return(0);
        }
Beispiel #2
0
        public static void scrape_data(string url, NpgsqlCommand cmd)
        {
            string[] url_pieces = url.Split(new char[] { '1' });
            UNGC_DB  from_html  = new UNGC_DB();
            int      pg_count   = find_page_count(url);

            //Debug.WriteLine(pg_count);
            for (int i = 0; i < pg_count; i++)
            {
                Console.WriteLine(i);
                url = url_pieces[0] + (i + 1).ToString() + url_pieces[1];
                List <string[]> scraped = from_html.get_page(url);
                UNGC_DB.enter_data(scraped, cmd);
            }
        }
Beispiel #3
0
        public static void UNGC_DB_entries(string url)
        {
            string connectstring = "Host=localhost;Username=Seth;Database=ungc_test;Password=1234";

            using (NpgsqlConnection conn = new NpgsqlConnection(connectstring)) // connect to our db
            {
                conn.Open();
                using (NpgsqlCommand cmd = new NpgsqlCommand()) // open a new command string
                {
                    cmd.Connection = conn;
                    string[] fields = new string[] { "NAME", "DATE_JOINED", "DATE_DUE",   // fields in UNGC data
                                                     "COUNTRY", "ORG_TYPE", "SECTOR", "STATUS", "EMPLOYEES", "OWNERSHIP" };
                    cmd.CommandText = string.Format("CREATE TABLE IF NOT EXISTS UNGC({0} varchar(250), {1} date, {2} date, {3} varchar(150), {4} varchar(150), {5} varchar(150), {6} varchar(150), {7} int, {8} varchar(150));", fields);
                    cmd.ExecuteNonQuery();  // Create and execute database command
                    UNGC_DB.scrape_data(url, cmd);
                }
            }
        }
Beispiel #4
0
        public List <string[]> get_page(string url)
        {   // in: url of participant list page; out: List<string> containing data from each member
            ScrapingBrowser Browser    = new ScrapingBrowser();
            WebPage         PageResult = Browser.NavigateToPage(new Uri(url), 0, "", null);
            string          base_url   = "https://www.unglobalcompact.org";
            List <string[]> page_data  = new List <string[]>();

            HtmlNode[] array = PageResult.Html.CssSelect(".participants-table").ToArray <HtmlNode>(); // hold all table entries
            UNGC_DB    li    = new UNGC_DB();                                                         // object to call link_info (make static?)

            for (int j = 0; j < array.Length; j++)
            {
                foreach (HtmlNode row in array[j].SelectNodes("tbody/tr"))                      //  for each member in table
                {
                    string   next_link = row.SelectSingleNode("th/a").Attributes["href"].Value; // grab link to member's page
                    string[] scrape    = li.link_info(base_url + next_link).return_all();       // grab string[] from returned object
                    page_data.Add(scrape);                                                      // add member's data to list
                }
            }
            return(page_data);
        }