예제 #1
0
        private SimpleDataSet CreateIndexItemFromJob(int fakeNodeId, Job job, string indexType)
        {
            LogHelper.Info <BaseJobsIndexer>($"Building Examine index item for job '{job.Id}'");

            var simpleDataSet = new SimpleDataSet {
                NodeDefinition = new IndexedNode(), RowData = new Dictionary <string, string>()
            };

            simpleDataSet.NodeDefinition.NodeId = fakeNodeId;
            simpleDataSet.NodeDefinition.Type   = indexType;
            simpleDataSet.RowData.Add("id", job.Id);
            simpleDataSet.RowData.Add("reference", job.Reference);
            simpleDataSet.RowData.Add("title", _stopWordsRemover.Filter(job.JobTitle));
            simpleDataSet.RowData.Add("titleDisplay", job.JobTitle);
            simpleDataSet.RowData.Add("organisation", _stopWordsRemover.Filter(job.Organisation));
            simpleDataSet.RowData.Add("organisationDisplay", job.Organisation);
            simpleDataSet.RowData.Add("location", _stopWordsRemover.Filter(job.Location));
            simpleDataSet.RowData.Add("locationDisplay", job.Location); // because Somewhere-on-Sea needs to lose the "on" for searching but keep it for display
            simpleDataSet.RowData.Add("salary", _tagSanitiser.StripTags(_stopWordsRemover.Filter(job.Salary.SalaryRange)));
            simpleDataSet.RowData.Add("salaryRange", _stopWordsRemover.Filter(job.Salary.SearchRange));
            simpleDataSet.RowData.Add("salaryMin", job.Salary.MinimumSalary?.ToString("D7") ?? String.Empty);
            simpleDataSet.RowData.Add("salaryMax", job.Salary.MaximumSalary?.ToString("D7") ?? String.Empty);
            simpleDataSet.RowData.Add("salarySort", (job.Salary.MinimumSalary?.ToString("D7") ?? String.Empty) + " " + (job.Salary.MaximumSalary?.ToString("D7") ?? String.Empty) + " " + _stopWordsRemover.Filter(job.Salary.SalaryRange));
            simpleDataSet.RowData.Add("closingDate", job.ClosingDate.Value.ToIso8601DateTime());
            simpleDataSet.RowData.Add("closingDateDisplay", job.ClosingDate.Value.ToIso8601DateTime());
            simpleDataSet.RowData.Add("jobType", _stopWordsRemover.Filter(job.JobType));
            simpleDataSet.RowData.Add("jobTypeDisplay", job.JobType);
            simpleDataSet.RowData.Add("contractType", _stopWordsRemover.Filter(job.ContractType));
            simpleDataSet.RowData.Add("department", _stopWordsRemover.Filter(job.Department));
            simpleDataSet.RowData.Add("departmentDisplay", job.Department);
            simpleDataSet.RowData.Add("fullTime", job.WorkPattern.IsFullTime.ToString());
            simpleDataSet.RowData.Add("partTime", job.WorkPattern.IsPartTime.ToString());
            simpleDataSet.RowData.Add("workPattern", job.WorkPattern.ToString());
            if (job.AdvertHtml != null)
            {
                simpleDataSet.RowData.Add("fullText", _tagSanitiser.StripTags(job.AdvertHtml.ToHtmlString()));
                simpleDataSet.RowData.Add("fullHtml", job.AdvertHtml.ToHtmlString());
            }
            if (job.AdditionalInformationHtml != null)
            {
                simpleDataSet.RowData.Add("additionalInfo", job.AdditionalInformationHtml.ToHtmlString());
            }
            if (job.EqualOpportunitiesHtml != null)
            {
                simpleDataSet.RowData.Add("equalOpportunities", job.EqualOpportunitiesHtml.ToHtmlString());
            }
            simpleDataSet.RowData.Add("applyUrl", job.ApplyUrl?.ToString());

            return(simpleDataSet);
        }
예제 #2
0
        /// <summary>
        /// Get the text content of an HTML string, but without text used for links
        /// </summary>
        /// <param name="text">The text.</param>
        /// <param name="tagSanitiser">The tag sanitiser.</param>
        /// <returns></returns>
        public string TextOutsideLinks(string text, IHtmlTagSanitiser tagSanitiser)
        {
            if (String.IsNullOrEmpty(text))
            {
                return(text);
            }
            if (tagSanitiser == null)
            {
                throw new ArgumentNullException(nameof(tagSanitiser));
            }

            // Remove any links including the link text
            const string anythingExceptEndAnchor = "((?!</a>).)*";

            text = Regex.Replace(text, "<a [^>]*>" + anythingExceptEndAnchor + "</a>", String.Empty);

            // Remove any other HTML, and what's left is text outside links
            text = HttpUtility.HtmlDecode(tagSanitiser.StripTags(text));

            // Any remaining text is invalid
            return(text.Trim());
        }