Exemplo n.º 1
0
        public void Post(List <JobPosting> jobs)
        {
            using (SqlConnection con = new SqlConnection(connectionString))
            {
                con.Open();

                for (int i = 0; i < jobs.Count; i++)
                {
                    JobPosting job = jobs[i];
                    try
                    {
                        // Post new job into the database.
                        SqlCommand cmd = ConnectionWrapper("Jobs_insert", con);
                        cmd.Parameters.AddWithValue("@link", job.Url);
                        cmd.Parameters.AddWithValue("@company", job.Company);
                        cmd.Parameters.AddWithValue("@title", job.JobTitle);
                        cmd.Parameters.AddWithValue("@description", job.JobDescription);
                        cmd.Parameters.AddWithValue("@location", job.Location);
                        cmd.Parameters.AddWithValue("@post_date", job.PostDate);
                        cmd.Parameters.AddWithValue("@archived", job.Archived);
                        cmd.Parameters.AddWithValue("@date_applied", job.DateApplied ?? (object)DBNull.Value);
                        cmd.Parameters.Add("@id", SqlDbType.Int).Direction = ParameterDirection.Output;
                        cmd.ExecuteNonQuery();
                    }
                    //if job already exists in the database, update the job's post date.
                    catch (SqlException exp) when(exp.Number == 2601)
                    {
                        SqlCommand c = ConnectionWrapper("Jobs_updatepostdate", con);

                        c.Parameters.AddWithValue("@link", job.Url);
                        c.Parameters.AddWithValue("@post_date", job.PostDate);
                        c.Parameters.AddWithValue("@location", job.Location);
                        c.ExecuteNonQuery();
                        continue;
                        // ignore and continue
                    }
                }
            }
        }
Exemplo n.º 2
0
        /*
         * The scraper will scrape every job listing on linkedin that contains my desired job titles.
         * Job titles include: .NET Developer, C# Developer, Software Engineer, S Developer, Web Developer,
         *                   Full Stack Developer, Back End Engineer, Front End Engineer, B and F Developer.
         *
         */

        public void LIScraper()
        {
            List <JobPosting> jobs = new List <JobPosting>();
            // The URL uses LinkedIn's built-in filters, enabling us to input job titles, distance in miles from a specific location, location, and job type.
            // EX: "keywords=Job+Title distance=15 locationId=PLACES%2Eus%2E7-1-0-19-99&f_TP=1%2C2&f_E=3%2C2&f_  JT=FULL_TIME
            string initialUrl =
                "https://www.linkedin.com/jobs/search?keywords=Software+Developer&distance=15&locationId=PLACES%2Eus%2E7-1-0-19-99&f_TP=1%2C2&f_E=3%2C2&f_JT=FULL_TIME&orig=FCTD&trk=jobs_jserp_facet_exp";

            ChromeOptions options = new ChromeOptions();

            options.AddArgument("--headless");
            options.AddArgument("--incognito");
            options.AddArgument("--ignore-certificate-errors");
            //options.DebuggerAddress = "{http://localhost:49647|2605:e000:840e:f700:d5b4:8734:4391:4048}";
            Trace.WriteLine("Hello");
            Trace.WriteLine(options);

            IWebDriver chromeDriver = new ChromeDriver(options);
            int        start        = 1;
            string     pageRange    = "&start=" + start + "&count=50";
            string     initialRange = initialUrl + pageRange;

            chromeDriver.Url = initialRange;
            var    html          = chromeDriver.PageSource;
            var    parser        = new HtmlParser();
            var    doc           = parser.Parse(html);
            var    listings      = doc.QuerySelectorAll("li.job-listing");
            string findListings  = doc.QuerySelector("div.results-context > div > strong").TextContent;
            int    totalListings = 0;

            if (findListings != null)
            {
                totalListings = Convert.ToInt32(findListings);
            }
            // LinkedIn allows up to 50 job listings on a single page.
            int pages = 1;

            addJobs(initialRange);
            if (totalListings > 50)
            {
                int extraPage = 0;
                if (totalListings % 50 > 0)
                {
                    extraPage = 1;
                }

                pages = (int)Math.Floor((decimal)totalListings / 50) + extraPage;

                for (int j = 1; j < pages; j++)
                {
                    start     = j * 50 + 1;
                    pageRange = "&start=" + start.ToString() + "&count=50";
                    addJobs(initialUrl + pageRange);
                }
            }

            void addJobs(string url)
            {
                if (pages > 1)
                {
                    options = new ChromeOptions();
                    options.AddArgument("--headless");
                    options.AddArgument("--incognito");
                    options.AddArgument("--ignore-certificate-errors");
                    chromeDriver     = new ChromeDriver(options);
                    chromeDriver.Url = url;
                    html             = chromeDriver.PageSource;
                    parser           = new HtmlParser();
                    doc      = parser.Parse(html);
                    listings = doc.QuerySelectorAll("li.job-listing");
                }

                for (int i = 0; i < listings.Length; i++)
                {
                    JobPosting job     = new JobPosting();
                    var        listing = listings[i]
                                         .QuerySelector("div.job-details");

                    var checkTitle = listing.QuerySelector("span.job-title-text").TextContent;
                    // Ignore job that contain these words in the job title.
                    if (!checkTitle.Contains("Senior") &&
                        !checkTitle.Contains("Sr") &&
                        !checkTitle.Contains("Lead") &&
                        !checkTitle.Contains("Principal") &&
                        !checkTitle.Contains("Java") &&
                        !checkTitle.Contains("Clearance") &&
                        !checkTitle.Contains("Graphics") &&
                        !checkTitle.Contains("Android") &&
                        !checkTitle.Contains("iOS") &&
                        !checkTitle.Contains("Wordpress") &&
                        !checkTitle.Contains("WordPress") &&
                        !checkTitle.Contains("PHP") &&
                        !checkTitle.Contains("Architect") &&
                        !checkTitle.Contains("Ruby") &&
                        !checkTitle.Contains("Manager") &&
                        !checkTitle.Contains("Design") &&
                        !checkTitle.Contains("UI") &&
                        !checkTitle.Contains("Python") &&
                        !checkTitle.Contains("HTML") &&
                        !checkTitle.Contains("CSS") &&
                        !checkTitle.Contains("Salesforce") &&
                        !checkTitle.Contains("SENIOR") &&
                        !checkTitle.Contains("Analyst") &&
                        checkTitle.Contains("Software Developer")    //this needs to be changed with each search
                        )
                    {
                        job.JobTitle = checkTitle;
                        job.PostDate = listing.QuerySelector("span.date-posted-or-new").TextContent;
                        job.Company  = listing.QuerySelector("span.company-name-text").TextContent;
                        string checkLocation = listing.QuerySelector("span.job-location > span").TextContent;
                        if (checkLocation.Contains(", US"))
                        {
                            job.Location = checkLocation.Replace(", US", "");
                        }
                        else
                        {
                            job.Location = checkLocation;
                        }

                        job.JobDescription = listing.QuerySelector("div.job-description").TextContent;
                        //Job Link
                        XmlDocument xml = new XmlDocument();
                        xml.LoadXml(listing.QuerySelector("a.job-title-link").OuterHtml);
                        XmlElement elem = xml.DocumentElement;
                        if (elem.HasAttribute("href"))
                        {
                            String attr = elem.GetAttribute("href");
                            var    uri  = attr.Split('?')[0];

                            job.Url = uri;
                        }
                        jobs.Add(job);
                    }
                }
            }

            ScraperService scraperService = new ScraperService(ConfigurationManager.ConnectionStrings["LIConnection"].ConnectionString);

            scraperService.Post(jobs);
        }