예제 #1
0
        private async Task <Job> ParseJob(XElement jobXml)
        {
            var job = new Job()
            {
                Id                = Int32.Parse(jobXml.Element("job_id").Value.Trim(), CultureInfo.InvariantCulture),
                Reference         = HttpUtility.HtmlDecode(jobXml.Element("reference")?.Value).Trim(),
                NumberOfPositions = Int32.Parse(jobXml.Element("no_of_positions").Value, CultureInfo.InvariantCulture),
                JobTitle          = HttpUtility.HtmlDecode(jobXml.Element("job_title").Value).Trim(),
                Department        = HttpUtility.HtmlDecode(jobXml.Element("business_unit").Value).Trim().Replace("ESCC: ", string.Empty),
                Organisation      = HttpUtility.HtmlDecode(jobXml.Element("region").Value).Trim(),
                DatePublished     = new DateTime(Int32.Parse(jobXml.Element("open_date").Value.Substring(0, 4), CultureInfo.InvariantCulture),
                                                 Int32.Parse(jobXml.Element("open_date").Value.Substring(5, 2), CultureInfo.InvariantCulture),
                                                 Int32.Parse(jobXml.Element("open_date").Value.Substring(8, 2), CultureInfo.InvariantCulture)),
                ClosingDate = new DateTime(Int32.Parse(jobXml.Element("expiry_date").Value.Substring(0, 4), CultureInfo.InvariantCulture),
                                           Int32.Parse(jobXml.Element("expiry_date").Value.Substring(5, 2), CultureInfo.InvariantCulture),
                                           Int32.Parse(jobXml.Element("expiry_date").Value.Substring(8, 2), CultureInfo.InvariantCulture))
            };

            if (job.Department.ToUpperInvariant() == "PARTNERSHIP")
            {
                if (job.JobTitle.Contains("(") && job.JobTitle.EndsWith(")"))
                {
                    var orgStarts = job.JobTitle.LastIndexOf("(");
                    job.Organisation = job.JobTitle.Substring(orgStarts + 1).TrimEnd(')');
                    job.JobTitle     = job.JobTitle.Substring(0, orgStarts).TrimEnd();
                }
                else
                {
                    job.Organisation = job.Department;
                }
                job.Department = string.Empty;
            }

            job.Locations.Add(HttpUtility.HtmlDecode(jobXml.Element("location_city").Value).Trim());

            var logo = jobXml.Element("media")?.Element("logo")?.Element("url")?.Value;

            if (!String.IsNullOrEmpty(logo))
            {
                // Ignore width and height from TribePad as they can be wrong!
                job.LogoUrl = new Uri(logo);
            }

            var advertHtml = new StringBuilder();

            advertHtml.Append(HttpUtility.HtmlDecode(jobXml.Element("package_description")?.Value));
            advertHtml.Append(HttpUtility.HtmlDecode(jobXml.Element("summary_external")?.Value));
            advertHtml.Append(HttpUtility.HtmlDecode(jobXml.Element("main_responsibilities")?.Value));
            job.AdvertHtml = new HtmlString(advertHtml.ToString());

            job.AdditionalInformationHtml = new HtmlString(HttpUtility.HtmlDecode(jobXml.Element("ideal_candidate")?.Value));
            job.EqualOpportunitiesHtml    = new HtmlString(HttpUtility.HtmlDecode(jobXml.Element("about_company")?.Value));

            job.JobType = HttpUtility.HtmlDecode(jobXml.Element("category_name")?.Value).Replace(" & ", " and ");

            if (jobXml.Element("no_apply")?.Value == "0")
            {
                job.ApplyUrl = new Uri(String.Format(CultureInfo.InvariantCulture, _applyUrl.ToString(), job.Id), UriKind.RelativeOrAbsolute);
            }

            job.Salary = await _salaryParser.ParseSalary(jobXml.ToString());

            job.WorkPattern = await _workPatternParser.ParseWorkPattern(jobXml.ToString());

            var contractTypeId = jobXml.Element("job_type")?.Value;

            if (!String.IsNullOrEmpty(contractTypeId))
            {
                job.ContractType = _contractTypes?.SingleOrDefault(x => x.LookupValueId == contractTypeId)?.Text;
            }

            return(job);
        }
        private async Task <Job> ParseJob(XElement jobXml)
        {
            var job = new Job()
            {
                Id                = Int32.Parse(jobXml.Element("job_id").Value.Trim(), CultureInfo.InvariantCulture),
                Reference         = HttpUtility.HtmlDecode(jobXml.Element("reference")?.Value).Trim(),
                NumberOfPositions = Int32.Parse(jobXml.Element("no_of_positions").Value, CultureInfo.InvariantCulture),
                JobTitle          = HttpUtility.HtmlDecode(jobXml.Element("job_title").Value).Trim(),
                Department        = HttpUtility.HtmlDecode(jobXml.Element("business_unit").Value).Trim().Replace("ESCC: ", string.Empty),
                Organisation      = HttpUtility.HtmlDecode(jobXml.Element("region").Value).Trim(),
                DatePublished     = new DateTime(Int32.Parse(jobXml.Element("open_date").Value.Substring(0, 4), CultureInfo.InvariantCulture),
                                                 Int32.Parse(jobXml.Element("open_date").Value.Substring(5, 2), CultureInfo.InvariantCulture),
                                                 Int32.Parse(jobXml.Element("open_date").Value.Substring(8, 2), CultureInfo.InvariantCulture))
            };

            var noClosingDate = jobXml.Element("evergreen")?.Value == "1";

            if (!noClosingDate)
            {
                job.ClosingDate = new DateTime(Int32.Parse(jobXml.Element("expiry_date").Value.Substring(0, 4), CultureInfo.InvariantCulture),
                                               Int32.Parse(jobXml.Element("expiry_date").Value.Substring(5, 2), CultureInfo.InvariantCulture),
                                               Int32.Parse(jobXml.Element("expiry_date").Value.Substring(8, 2), CultureInfo.InvariantCulture));
            }

            var canApplyForThisJob   = true;
            var comparableDepartment = job.Department.ToUpperInvariant();

            if (comparableDepartment == "PARTNERSHIP")
            {
                canApplyForThisJob = false;
                if (!ParseAndMoveOrganisationFromJobTitle(job))
                {
                    job.Organisation = job.Department;
                }
                job.Department = string.Empty;
            }
            else if (comparableDepartment == "ESCC SCHOOLS" || comparableDepartment == "ESCC ACADEMIES")
            {
                if (ParseAndMoveOrganisationFromJobTitle(job))
                {
                    job.Department = string.Empty;
                }
                else
                {
                    // If it's a school job but the school name is not in the job title, it's unknown
                    job.Organisation = string.Empty;
                    job.Department   = string.Empty;
                }
                canApplyForThisJob = false;
            }
            else if (comparableDepartment == "CHILDREN SERVICES")
            {
                job.Department = "Children's Services";
            }

            var locations = _locationParser.ParseLocations(jobXml.ToString());

            if (locations != null)
            {
                foreach (var location in locations)
                {
                    job.Locations.Add(location);
                }
            }

            var logo = jobXml.Element("media")?.Element("logo")?.Element("url")?.Value;

            if (!String.IsNullOrEmpty(logo))
            {
                // Ignore width and height from TribePad as they can be wrong!
                job.LogoUrl = new Uri(logo);
            }

            var advertHtml = new StringBuilder();

            advertHtml.Append(HttpUtility.HtmlDecode(jobXml.Element("package_description")?.Value));
            advertHtml.Append(HttpUtility.HtmlDecode(jobXml.Element("summary_external")?.Value));
            advertHtml.Append(HttpUtility.HtmlDecode(jobXml.Element("main_responsibilities")?.Value));

            var files = jobXml.Element("files")?.Elements("file");

            if (files != null)
            {
                if (files.Count() == 1)
                {
                    advertHtml.Append("<h2>Documents</h2><p>").AppendLinkToFile(files.First(), job.Id).Append("</p>");
                }
                else
                {
                    advertHtml.Append("<h2>Documents</h2><ul>");
                    foreach (var file in files)
                    {
                        advertHtml.Append("<li>").AppendLinkToFile(file, job.Id).Append("</li>");
                    }
                    advertHtml.Append("</ul>");
                }
            }

            job.AdvertHtml = new HtmlString(advertHtml.ToString());

            job.AdditionalInformationHtml = new HtmlString(HttpUtility.HtmlDecode(jobXml.Element("ideal_candidate")?.Value));
            job.EqualOpportunitiesHtml    = new HtmlString(HttpUtility.HtmlDecode(jobXml.Element("about_company")?.Value));

            job.JobType = HttpUtility.HtmlDecode(jobXml.Element("category_name")?.Value)?.Replace(" & ", " and ");

            if (jobXml.Element("no_apply")?.Value == "0" && canApplyForThisJob)
            {
                job.ApplyUrl = new Uri(String.Format(CultureInfo.InvariantCulture, _applyUrl.ToString(), job.Id), UriKind.RelativeOrAbsolute);
            }

            job.Salary = await _salaryParser.ParseSalary(jobXml.ToString()).ConfigureAwait(false);

            job.WorkPattern = await _workPatternParser.ParseWorkPattern(jobXml.ToString()).ConfigureAwait(false);

            var contractTypeId = jobXml.Element("job_type")?.Value;

            if (!String.IsNullOrEmpty(contractTypeId))
            {
                job.ContractType = _contractTypes?.SingleOrDefault(x => x.LookupValueId == contractTypeId)?.Text;
            }

            return(job);
        }
예제 #3
0
        /// <summary>
        /// Parses a job.
        /// </summary>
        /// <param name="sourceData">The source data for the job.</param>
        /// <returns></returns>
        public async Task <Job> ParseJob(string sourceData, string jobId)
        {
            try
            {
                HtmlAgilityPack.HtmlDocument htmlDocument = new HtmlAgilityPack.HtmlDocument();
                htmlDocument.OptionFixNestedTags = true;
                htmlDocument.LoadHtml(sourceData);

                // ParseErrors is an ArrayList containing any errors from the Load statement
                if (htmlDocument.ParseErrors != null && htmlDocument.ParseErrors.Count() > 0)
                {
                    foreach (var error in htmlDocument.ParseErrors)
                    {
                        var exception = new HttpParseException("Unable to parse job HTML from TalentLink");
                        exception.Data.Add("Job ID", jobId);
                        exception.Data.Add("Type of error", error.Code);
                        exception.Data.Add("Reason", error.Reason);
                        exception.Data.Add("Line", error.Line);
                        exception.Data.Add("Position on line", error.LinePosition);
                        exception.Data.Add("Source HTML", error.SourceText);
                        exception.ToExceptionless().Submit();
                    }
                    return(null);
                }
                else
                {
                    if (htmlDocument.DocumentNode != null)
                    {
                        var job = new Job();
                        job.JobTitle  = ParseValueFromElementById(htmlDocument, "h3", "JDText-Title");
                        job.Reference = ParseValueFromElementById(htmlDocument, "span", "JDText-Param2");
                        if (!String.IsNullOrEmpty(job.Reference))
                        {
                            job.JobTitle = job.JobTitle.Replace(" (" + job.Reference + ")", String.Empty);
                        }
                        job.Locations.Add(ParseValueFromElementById(htmlDocument, "span", "JDText-Param3"));
                        job.Organisation = ParseValueFromElementById(htmlDocument, "span", "JDText-Param4");
                        job.Department   = ParseValueFromElementById(htmlDocument, "span", "JDText-Param5");
                        job.ContractType = ParseValueFromElementById(htmlDocument, "span", "JDText-Param6");
                        job.JobType      = ParseValueFromElementById(htmlDocument, "span", "JDText-Param7");
                        job.Salary       = await _salaryParser.ParseSalary(htmlDocument.DocumentNode.OuterHtml);

                        DateTime closingDate;
                        DateTime.TryParse(ParseValueFromElementById(htmlDocument, "span", "JDText-Param9"), new CultureInfo("en-GB"), DateTimeStyles.AssumeLocal, out closingDate);
                        job.ClosingDate   = closingDate;
                        job.DatePublished = DateTime.UtcNow;

                        var agilityPackFormatters = new IHtmlAgilityPackHtmlFormatter[]
                        {
                            new RemoveUnwantedAttributesFormatter(new string[] { "style" }),
                            new ReplaceElementNameFormatter("h5", "h2"),
                            new RemoveUnwantedNodesFormatter(new[] { "u" }),
                            new RemoveElementByNameAndContentFormatter("h2", "Job Details"),
                            new RemoveElementsWithNoContentFormatter(new[] { "strong", "p" }),
                            new TruncateLongLinksFormatter(new HtmlLinkFormatter()),
                            new EmbeddedYouTubeVideosFormatter()
                        };
                        foreach (var formatter in agilityPackFormatters)
                        {
                            formatter.FormatHtml(htmlDocument);
                        }

                        // JDText-Field4 is additional info (ie small print) and JDText-Field6 is information for redeployees (ie more small print)
                        var additionalInfo = ParseValueFromElementById(htmlDocument, "span", "JDText-Field4") + Environment.NewLine +
                                             ParseValueFromElementById(htmlDocument, "span", "JDText-Field6");
                        if (!String.IsNullOrEmpty(additionalInfo))
                        {
                            additionalInfo = ApplyStringFormatters(additionalInfo);
                            job.AdditionalInformationHtml = new HtmlString(additionalInfo);
                        }

                        var equalOpportunities = ParseValueFromElementById(htmlDocument, "span", "JDText-Field5");
                        if (!String.IsNullOrEmpty(equalOpportunities))
                        {
                            equalOpportunities         = ApplyStringFormatters(equalOpportunities);
                            job.EqualOpportunitiesHtml = new HtmlString(equalOpportunities);
                        }

                        var parsedHtml = ParseValueFromElementById(htmlDocument, "div", "JD-Field1") + Environment.NewLine +
                                         ParseValueFromElementById(htmlDocument, "div", "JD-Field2") + Environment.NewLine +
                                         ParseValueFromElementById(htmlDocument, "div", "JD-Documents");

                        parsedHtml = ApplyStringFormatters(parsedHtml);
                        parsedHtml = new RemoveDuplicateTextFormatter("Closing date: " + job.ClosingDate.Value.ToBritishDate()).FormatHtml(parsedHtml);
                        parsedHtml = new RemoveDuplicateTextFormatter("Closing date: " + job.ClosingDate.Value.ToBritishDateWithDay()).FormatHtml(parsedHtml);
                        parsedHtml = new RemoveDuplicateTextFormatter("Salary: " + job.Salary.SalaryRange).FormatHtml(parsedHtml);
                        parsedHtml = new RemoveDuplicateTextFormatter("Salary: " + job.Salary.SalaryRange?.Replace(" to ", " - ")).FormatHtml(parsedHtml);
                        parsedHtml = new RemoveDuplicateTextFormatter("Contract type: " + job.ContractType).FormatHtml(parsedHtml);

                        job.AdvertHtml  = new HtmlString(parsedHtml);
                        job.WorkPattern = await _workPatternParser.ParseWorkPattern(parsedHtml);

                        var applyLink = htmlDocument.DocumentNode.SelectSingleNode($"//div[@id='JD-ActApplyDirect']/a");
                        if (applyLink != null)
                        {
                            job.ApplyUrl = new Uri(HttpUtility.HtmlDecode(applyLink.Attributes["href"].Value), UriKind.RelativeOrAbsolute);
                        }

                        return(job);
                    }
                    return(null);
                }
            }
            catch (Exception exception)
            {
                exception.Data.Add("Job ID", jobId);
                exception.ToExceptionless().Submit();
                return(null);
            }
        }