Example #1
0
        public List <string> SearchJob(JobAnalysis jobAnalysis)
        {
            _logger.Information($"Search Job: {jobAnalysis.Title}");

            List <string> urls = new List <string>();

            string keyWords = jobAnalysis.Title;

            HtmlDocument document = null;

            Func <bool> hasNext = new Func <bool>(() =>
                                                  document.DocumentNode.DescendantsAndSelf().Any(n =>
                                                                                                 n.Name.ToLower() == "a" && n.GetAttributeValue("data-automation", null) == "page-next"
                                                                                                 )
                                                  );

            int index = 1;

            do
            {
                // get total number of jobs
                _logger.Information($"  Search Job: {jobAnalysis.Title} - Page {index}");

                List <HtmlNode> urlNodes = null;
                do
                {
                    int chromeRetry = 0;
                    var url         = $"https://www.seek.com.au/{keyWords.Replace(" ", "-")}-jobs?page={index}";
                    try
                    {
                        chromeRetry++;

                        document = WebExtensions.LoadPage(url);
                        urlNodes = document.DocumentNode.DescendantsAndSelf()
                                   .Where(n =>
                                          n.Name.ToLower() == "a" &&
                                          n.GetAttributeValue("data-automation", "") == "jobTitle").ToList();
                    }
                    catch (Exception ex)
                    {
                        _logger.Error(ex, $"failed to get job list elements from url {url}. {chromeRetry} of {_searchOptions.MaxRetry} attempts.");
                    }
                } while (urlNodes == null);



                foreach (var urlNode in urlNodes)
                {
                    var href = urlNode.GetAttributeValue("href", null);
                    if (href != null)
                    {
                        urls.Add($@"https://www.seek.com.au{href}");
                    }
                }

                index++;
            } while (hasNext());

            return(urls);
        }
Example #2
0
        public void AnalyzeJobs(JobAnalysis jobAnalysis, JobAnalysisEntry jobAnalysisEntry, Dictionary <string, Job> jobs)
        {
            _logger.Information($"Analyzing Jobs for {jobAnalysis.Title} Entry: {jobAnalysisEntry._key}");

            if (jobAnalysis.Keywords != null)
            {
                HashSet <string> words = new HashSet <string>(new StringComparer());

                foreach (string keyword in jobAnalysis.Keywords)
                {
                    words.Add(keyword);
                }

                jobAnalysisEntry.KeywordStatistics = new Dictionary <string, int>();

                foreach (string keyword in words)
                {
                    words.Add(keyword);
                    jobAnalysisEntry.KeywordStatistics.Add(keyword,
                                                           jobs.Values.Count(j =>
                                                                             Regex.IsMatch(j.Description, $@"(^|\W){keyword}(\W|$)", RegexOptions.IgnoreCase)
                                                                             ));
                }
            }

            // city
            jobAnalysisEntry.CityStatistics = new Dictionary <string, int>();
            foreach (var job in jobs.Values)
            {
                if (jobAnalysisEntry.CityStatistics.ContainsKey(job.City))
                {
                    jobAnalysisEntry.CityStatistics[job.City] += 1;
                }
                else
                {
                    jobAnalysisEntry.CityStatistics.Add(job.City, 1);
                }
            }

            // worktype
            jobAnalysisEntry.WorkTypeStatistics = new Dictionary <string, int>();
            foreach (var job in jobs.Values)
            {
                if (jobAnalysisEntry.WorkTypeStatistics.ContainsKey(job.WorkType))
                {
                    jobAnalysisEntry.WorkTypeStatistics[job.WorkType] += 1;
                }
                else
                {
                    jobAnalysisEntry.WorkTypeStatistics.Add(job.WorkType, 1);
                }
            }
        }
Example #3
0
        public async Task Search(JobAnalysis jobAnalysis, IArangoDatabase client, string timeStamp)
        {
            var urls = SearchJob(jobAnalysis);

            Dictionary <string, Job> jobs = new Dictionary <string, Job>();

            JobAnalysisEntry jobAnalysisEntry = new JobAnalysisEntry()
            {
                _key              = $"{jobAnalysis._key}__{timeStamp}",
                AnalysisTime      = DateTime.Now,
                KeywordStatistics = new Dictionary <string, int>(),
                TotalJobs         = urls.Count,
            };

            int urlIndex = 0;

            // download jobs and add edge to entry
            foreach (var url in urls)
            {
                urlIndex++;
                _logger.Information($"Download Job Url ({urlIndex} of {urls.Count}): {url}");
                await DownloadJob(url, jobAnalysisEntry, client, jobs);
            }

            AnalyzeJobs(jobAnalysis, jobAnalysisEntry, jobs);

            client.UpsertIgnoreNull(jobAnalysisEntry);

            client.UpsertEdge <EntryOf, JobAnalysisEntry, JobAnalysis>(jobAnalysisEntry, jobAnalysis);

            // add edges to jobs;

            foreach (var job in jobs.Values)
            {
                client.UpsertEdge <JobAnalysisOf, JobAnalysisEntry, Job>(jobAnalysisEntry, job);
            }
        }