Exemple #1
0
 /// <summary>
 /// Initializes a new instance of the <see cref="BaseJobsIndexer" /> class.
 /// </summary>
 /// <param name="jobsProvider">The jobs provider.</param>
 /// <param name="stopWordsRemover">The stop words remover.</param>
 /// <param name="tagSanitiser">The tag sanitiser.</param>
 /// <exception cref="System.ArgumentNullException">stopWordsRemover</exception>
 protected BaseJobsIndexer(IJobsDataProvider jobsProvider, ISearchFilter stopWordsRemover, IHtmlTagSanitiser tagSanitiser)
 {
     if (jobsProvider == null)
     {
         throw new ArgumentNullException(nameof(jobsProvider));
     }
     if (stopWordsRemover == null)
     {
         throw new ArgumentNullException(nameof(stopWordsRemover));
     }
     if (tagSanitiser == null)
     {
         throw new ArgumentNullException(nameof(tagSanitiser));
     }
     _jobsProvider     = jobsProvider;
     _stopWordsRemover = stopWordsRemover;
     _tagSanitiser     = tagSanitiser;
 }
Exemple #2
0
        /// <summary>
        /// Get the text content of an HTML string, but without text used for links
        /// </summary>
        /// <param name="text">The text.</param>
        /// <param name="tagSanitiser">The tag sanitiser.</param>
        /// <returns></returns>
        public string TextOutsideLinks(string text, IHtmlTagSanitiser tagSanitiser)
        {
            if (String.IsNullOrEmpty(text))
            {
                return(text);
            }
            if (tagSanitiser == null)
            {
                throw new ArgumentNullException(nameof(tagSanitiser));
            }

            // Remove any links including the link text
            const string anythingExceptEndAnchor = "((?!</a>).)*";

            text = Regex.Replace(text, "<a [^>]*>" + anythingExceptEndAnchor + "</a>", String.Empty);

            // Remove any other HTML, and what's left is text outside links
            text = HttpUtility.HtmlDecode(tagSanitiser.StripTags(text));

            // Any remaining text is invalid
            return(text.Trim());
        }