public void StopWordsAreRemovedFromIndexedValues()
        {
            var stopWordsRemover = new LuceneStopWordsRemover();

            var result = stopWordsRemover.Filter("Administration and Clerical");

            Assert.AreEqual("Administration Clerical", result);
        }
        public void ReturnsEmptyStringFromEmptyStringInput()
        {
            var stopWordsRemover = new LuceneStopWordsRemover();

            var result = stopWordsRemover.Filter(String.Empty);

            Assert.AreEqual(String.Empty, result);
        }
예제 #3
0
        /// <summary>
        /// Initializes a new instance of the <see cref="RedeploymentJobsIndexer"/> class.
        /// </summary>
        public RedeploymentJobsIndexer()
        {
            var salaryDescriptionParser = new TalentLinkSalaryFromDescriptionParser();
            var salaryHtmlParser        = new TalentLinkSalaryFromHtmlParser(salaryDescriptionParser);

            JobsProvider = new JobsDataFromTalentLink(ResultsUrl, AdvertUrl,
                                                      new TalentLinkJobResultsHtmlParser(salaryDescriptionParser),
                                                      new TalentLinkJobAdvertHtmlParser(salaryHtmlParser, new TalentLinkWorkPatternParser()),
                                                      new ConfigurationProxyProvider(), true);
            StopWordsRemover = new LuceneStopWordsRemover();
            TagSanitiser     = new HtmlTagSanitiser();
            JobTransformers  = new Dictionary <IEnumerable <IJobMatcher>, IEnumerable <IJobTransformer> >()
            {
                { new IJobMatcher[] { new JointCommunityRehabilitationMatcher(), new LocationMatcher("Lewes") }, new IJobTransformer[] { new SetJobLocationTransformer(new[] { "Crowborough", "Lewes", "Peacehaven", "Wadhurst" }) } },
                { new IJobMatcher[] { new JointCommunityRehabilitationMatcher(), new LocationMatcher("Eastbourne") }, new IJobTransformer[] { new SetJobLocationTransformer(new[] { "Eastbourne", "Hailsham", "Polegate", "Seaford" }) } },
                { new IJobMatcher[] { new JointCommunityRehabilitationMatcher(), new LocationMatcher("Bexhill-on-Sea") }, new IJobTransformer[] { new SetJobLocationTransformer(new[] { "Bexhill-on-Sea", "Hastings", "Rural Rother" }) } }
            };
        }
        /// <summary>
        /// Initializes a new instance of the <see cref="RedeploymentJobsIndexer"/> class.
        /// </summary>
        public RedeploymentJobsIndexer()
        {
            var resultsUrls        = ConfigurationManager.AppSettings["TribePadRedeploymentJobsResultsUrls"]?.Split(',').Select(x => new Uri(x));
            var advertUrl          = new Uri(ConfigurationManager.AppSettings["TribePadAdvertUrl"]);
            var lookupValuesApiUrl = new Uri(ConfigurationManager.AppSettings["TribePadLookupValuesUrl"]);
            var applyUrl           = new Uri(ConfigurationManager.AppSettings["TribePadApplyUrl"]);

            // This setting is useful in a test environment where images are not present on the test domain
            var disableMediaDomainTransformer = ConfigurationManager.AppSettings["DoNotRemoveMediaDomainInJobAdverts"]?.ToUpperInvariant() == "TRUE";

            var proxyProvider        = new ConfigurationProxyProvider();
            var lookupValuesProvider = new JobsLookupValuesFromTribePad(lookupValuesApiUrl, new LookupValuesFromTribePadBuiltInFieldParser(), new LookupValuesFromTribePadCustomFieldParser(), null, proxyProvider);
            var jobParser            = new TribePadJobParser(lookupValuesProvider, new TribePadSalaryParser(lookupValuesProvider), new TribePadWorkPatternParser(lookupValuesProvider, new TribePadWorkPatternSplitter()), new TribePadLocationParser(), applyUrl);

            JobsProvider     = new JobsDataFromTribePad(resultsUrls, advertUrl, jobParser, jobParser, proxyProvider);
            StopWordsRemover = new LuceneStopWordsRemover();
            TagSanitiser     = new HtmlTagSanitiser();
            JobTransformers  = new Dictionary <IEnumerable <IJobMatcher>, IEnumerable <IJobTransformer> >()
            {
                { new IJobMatcher[] { new JointCommunityRehabilitationMatcher(), new LocationMatcher("Lewes") }, new IJobTransformer[] { new SetJobLocationTransformer(new[] { "Crowborough", "Lewes", "Peacehaven", "Wadhurst" }) } },
                { new IJobMatcher[] { new JointCommunityRehabilitationMatcher(), new LocationMatcher("Eastbourne") }, new IJobTransformer[] { new SetJobLocationTransformer(new[] { "Eastbourne", "Hailsham", "Polegate", "Seaford" }) } },
                { new IJobMatcher[] { new JointCommunityRehabilitationMatcher(), new LocationMatcher("Bexhill-on-Sea") }, new IJobTransformer[] { new SetJobLocationTransformer(new[] { "Bexhill-on-Sea", "Hastings", "Rural Rother" }) } },
                { new IJobMatcher[] { /* No matcher - apply to all jobs */ }, new IJobTransformer[] {
                      new HtmlAgilityPackFormatterAdapter(new IHtmlAgilityPackHtmlFormatter[] {
                            new RemoveUnwantedAttributesFormatter(new string[] { "style", "data-mce-style" }),
                            new RemoveUnwantedNodesFormatter(new[] { "font", "u", "comment()" }, false),
                            new RemoveUnwantedNodesFormatter(new[] { "style" }, true),
                            new RemoveElementsWithNoContentFormatter(new[] { "strong", "p" }),
                            new TruncateLongLinksFormatter(new HtmlLinkFormatter()),
                            new EmbeddedYouTubeVideosFormatter(),
                            new FakeListFormatter()
                        }),
                      new HtmlStringFormatterAdapter(new IHtmlStringFormatter[] {
                            new CloseEmptyElementsFormatter(),
                            new HouseStyleDateFormatter(),
                            disableMediaDomainTransformer ? null : new RemoveMediaDomainUrlTransformer()
                        })
                  } }
            };
        }