private SimpleDataSet CreateIndexItemFromJob(int fakeNodeId, Job job, string indexType) { LogHelper.Info <BaseJobsIndexer>($"Building Examine index item for job '{job.Id}'"); var simpleDataSet = new SimpleDataSet { NodeDefinition = new IndexedNode(), RowData = new Dictionary <string, string>() }; simpleDataSet.NodeDefinition.NodeId = fakeNodeId; simpleDataSet.NodeDefinition.Type = indexType; simpleDataSet.RowData.Add("id", job.Id); simpleDataSet.RowData.Add("reference", job.Reference); simpleDataSet.RowData.Add("title", _stopWordsRemover.Filter(job.JobTitle)); simpleDataSet.RowData.Add("titleDisplay", job.JobTitle); simpleDataSet.RowData.Add("organisation", _stopWordsRemover.Filter(job.Organisation)); simpleDataSet.RowData.Add("organisationDisplay", job.Organisation); simpleDataSet.RowData.Add("location", _stopWordsRemover.Filter(job.Location)); simpleDataSet.RowData.Add("locationDisplay", job.Location); // because Somewhere-on-Sea needs to lose the "on" for searching but keep it for display simpleDataSet.RowData.Add("salary", _tagSanitiser.StripTags(_stopWordsRemover.Filter(job.Salary.SalaryRange))); simpleDataSet.RowData.Add("salaryRange", _stopWordsRemover.Filter(job.Salary.SearchRange)); simpleDataSet.RowData.Add("salaryMin", job.Salary.MinimumSalary?.ToString("D7") ?? String.Empty); simpleDataSet.RowData.Add("salaryMax", job.Salary.MaximumSalary?.ToString("D7") ?? String.Empty); simpleDataSet.RowData.Add("salarySort", (job.Salary.MinimumSalary?.ToString("D7") ?? String.Empty) + " " + (job.Salary.MaximumSalary?.ToString("D7") ?? String.Empty) + " " + _stopWordsRemover.Filter(job.Salary.SalaryRange)); simpleDataSet.RowData.Add("closingDate", job.ClosingDate.Value.ToIso8601DateTime()); simpleDataSet.RowData.Add("closingDateDisplay", job.ClosingDate.Value.ToIso8601DateTime()); simpleDataSet.RowData.Add("jobType", _stopWordsRemover.Filter(job.JobType)); simpleDataSet.RowData.Add("jobTypeDisplay", job.JobType); simpleDataSet.RowData.Add("contractType", _stopWordsRemover.Filter(job.ContractType)); simpleDataSet.RowData.Add("department", _stopWordsRemover.Filter(job.Department)); simpleDataSet.RowData.Add("departmentDisplay", job.Department); simpleDataSet.RowData.Add("fullTime", job.WorkPattern.IsFullTime.ToString()); simpleDataSet.RowData.Add("partTime", job.WorkPattern.IsPartTime.ToString()); simpleDataSet.RowData.Add("workPattern", job.WorkPattern.ToString()); if (job.AdvertHtml != null) { simpleDataSet.RowData.Add("fullText", _tagSanitiser.StripTags(job.AdvertHtml.ToHtmlString())); simpleDataSet.RowData.Add("fullHtml", job.AdvertHtml.ToHtmlString()); } if (job.AdditionalInformationHtml != null) { simpleDataSet.RowData.Add("additionalInfo", job.AdditionalInformationHtml.ToHtmlString()); } if (job.EqualOpportunitiesHtml != null) { simpleDataSet.RowData.Add("equalOpportunities", job.EqualOpportunitiesHtml.ToHtmlString()); } simpleDataSet.RowData.Add("applyUrl", job.ApplyUrl?.ToString()); return(simpleDataSet); }
/// <summary> /// Get the text content of an HTML string, but without text used for links /// </summary> /// <param name="text">The text.</param> /// <param name="tagSanitiser">The tag sanitiser.</param> /// <returns></returns> public string TextOutsideLinks(string text, IHtmlTagSanitiser tagSanitiser) { if (String.IsNullOrEmpty(text)) { return(text); } if (tagSanitiser == null) { throw new ArgumentNullException(nameof(tagSanitiser)); } // Remove any links including the link text const string anythingExceptEndAnchor = "((?!</a>).)*"; text = Regex.Replace(text, "<a [^>]*>" + anythingExceptEndAnchor + "</a>", String.Empty); // Remove any other HTML, and what's left is text outside links text = HttpUtility.HtmlDecode(tagSanitiser.StripTags(text)); // Any remaining text is invalid return(text.Trim()); }