public void StopWordsAreRemovedFromIndexedValues() { var stopWordsRemover = new LuceneStopWordsRemover(); var result = stopWordsRemover.Filter("Administration and Clerical"); Assert.AreEqual("Administration Clerical", result); }
public void ReturnsEmptyStringFromEmptyStringInput() { var stopWordsRemover = new LuceneStopWordsRemover(); var result = stopWordsRemover.Filter(String.Empty); Assert.AreEqual(String.Empty, result); }
/// <summary> /// Initializes a new instance of the <see cref="RedeploymentJobsIndexer"/> class. /// </summary> public RedeploymentJobsIndexer() { var salaryDescriptionParser = new TalentLinkSalaryFromDescriptionParser(); var salaryHtmlParser = new TalentLinkSalaryFromHtmlParser(salaryDescriptionParser); JobsProvider = new JobsDataFromTalentLink(ResultsUrl, AdvertUrl, new TalentLinkJobResultsHtmlParser(salaryDescriptionParser), new TalentLinkJobAdvertHtmlParser(salaryHtmlParser, new TalentLinkWorkPatternParser()), new ConfigurationProxyProvider(), true); StopWordsRemover = new LuceneStopWordsRemover(); TagSanitiser = new HtmlTagSanitiser(); JobTransformers = new Dictionary <IEnumerable <IJobMatcher>, IEnumerable <IJobTransformer> >() { { new IJobMatcher[] { new JointCommunityRehabilitationMatcher(), new LocationMatcher("Lewes") }, new IJobTransformer[] { new SetJobLocationTransformer(new[] { "Crowborough", "Lewes", "Peacehaven", "Wadhurst" }) } }, { new IJobMatcher[] { new JointCommunityRehabilitationMatcher(), new LocationMatcher("Eastbourne") }, new IJobTransformer[] { new SetJobLocationTransformer(new[] { "Eastbourne", "Hailsham", "Polegate", "Seaford" }) } }, { new IJobMatcher[] { new JointCommunityRehabilitationMatcher(), new LocationMatcher("Bexhill-on-Sea") }, new IJobTransformer[] { new SetJobLocationTransformer(new[] { "Bexhill-on-Sea", "Hastings", "Rural Rother" }) } } }; }
/// <summary> /// Initializes a new instance of the <see cref="RedeploymentJobsIndexer"/> class. /// </summary> public RedeploymentJobsIndexer() { var resultsUrls = ConfigurationManager.AppSettings["TribePadRedeploymentJobsResultsUrls"]?.Split(',').Select(x => new Uri(x)); var advertUrl = new Uri(ConfigurationManager.AppSettings["TribePadAdvertUrl"]); var lookupValuesApiUrl = new Uri(ConfigurationManager.AppSettings["TribePadLookupValuesUrl"]); var applyUrl = new Uri(ConfigurationManager.AppSettings["TribePadApplyUrl"]); // This setting is useful in a test environment where images are not present on the test domain var disableMediaDomainTransformer = ConfigurationManager.AppSettings["DoNotRemoveMediaDomainInJobAdverts"]?.ToUpperInvariant() == "TRUE"; var proxyProvider = new ConfigurationProxyProvider(); var lookupValuesProvider = new JobsLookupValuesFromTribePad(lookupValuesApiUrl, new LookupValuesFromTribePadBuiltInFieldParser(), new LookupValuesFromTribePadCustomFieldParser(), null, proxyProvider); var jobParser = new TribePadJobParser(lookupValuesProvider, new TribePadSalaryParser(lookupValuesProvider), new TribePadWorkPatternParser(lookupValuesProvider, new TribePadWorkPatternSplitter()), new TribePadLocationParser(), applyUrl); JobsProvider = new JobsDataFromTribePad(resultsUrls, advertUrl, jobParser, jobParser, proxyProvider); StopWordsRemover = new LuceneStopWordsRemover(); TagSanitiser = new HtmlTagSanitiser(); JobTransformers = new Dictionary <IEnumerable <IJobMatcher>, IEnumerable <IJobTransformer> >() { { new IJobMatcher[] { new JointCommunityRehabilitationMatcher(), new LocationMatcher("Lewes") }, new IJobTransformer[] { new SetJobLocationTransformer(new[] { "Crowborough", "Lewes", "Peacehaven", "Wadhurst" }) } }, { new IJobMatcher[] { new JointCommunityRehabilitationMatcher(), new LocationMatcher("Eastbourne") }, new IJobTransformer[] { new SetJobLocationTransformer(new[] { "Eastbourne", "Hailsham", "Polegate", "Seaford" }) } }, { new IJobMatcher[] { new JointCommunityRehabilitationMatcher(), new LocationMatcher("Bexhill-on-Sea") }, new IJobTransformer[] { new SetJobLocationTransformer(new[] { "Bexhill-on-Sea", "Hastings", "Rural Rother" }) } }, { new IJobMatcher[] { /* No matcher - apply to all jobs */ }, new IJobTransformer[] { new HtmlAgilityPackFormatterAdapter(new IHtmlAgilityPackHtmlFormatter[] { new RemoveUnwantedAttributesFormatter(new string[] { "style", "data-mce-style" }), new RemoveUnwantedNodesFormatter(new[] { "font", "u", "comment()" }, false), new RemoveUnwantedNodesFormatter(new[] { "style" }, true), new RemoveElementsWithNoContentFormatter(new[] { "strong", "p" }), new TruncateLongLinksFormatter(new HtmlLinkFormatter()), new EmbeddedYouTubeVideosFormatter(), new FakeListFormatter() }), new HtmlStringFormatterAdapter(new IHtmlStringFormatter[] { new CloseEmptyElementsFormatter(), new HouseStyleDateFormatter(), disableMediaDomainTransformer ? null : new RemoveMediaDomainUrlTransformer() }) } } }; }