A very loose HTML parser.
See also twintsam project for an alternative and more capable implementation of HtmlReader compatible with .NET framework XML readers.
Inheritance: IDisposable
示例#1
0
 protected virtual void CaseCData(HtmlReader htmlReader, TextWriter result)
 {
     if (_enabled)
     {
         WriteCData(result, htmlReader.Value);
     }
 }
示例#2
0
        protected virtual void CaseElement(HtmlReader htmlReader, TextWriter result)
        {
            string tagName = htmlReader.Name.ToLowerInvariant();

            // automatically close unclosed tag, if any
            if (AutoClosedTags.Contains(tagName))
            {
                if (FuzzyPopTag(tagName))
                {
                    WriteEndElement(result, RewriteTag(tagName));
                }
            }
            // reevaluate tags stack
            PushTag(tagName);
            CheckStack();
            if (_enabled && !IgnoreTags.Contains(tagName))
            {
                WriteElement(result, RewriteTag(tagName), htmlReader.Attributes, htmlReader.IsEmptyElement);
            }
            // pop empty tags immediately
            if (htmlReader.IsEmptyElement || EmptyTags.Contains(tagName))
            {
                if (FuzzyPopTag(tagName))
                {
                    CheckStack();
                }
            }
        }
示例#3
0
        protected virtual void CaseEndElement(HtmlReader htmlReader, TextWriter result)
        {
            string tagName = htmlReader.Name.ToLowerInvariant();

            if (FuzzyPopTag(tagName))
            {
                // only close tag if it was previously open
                if (_enabled && !IgnoreTags.Contains(tagName))
                {
                    WriteEndElement(result, RewriteTag(tagName));
                }
                CheckStack();
            }
        }
示例#4
0
        protected virtual void Sanitize(HtmlReader htmlReader, TextWriter result)
        {
            BeforeDocument(result);
            var entity = string.Empty;

            while (htmlReader.Read())
            {
                entity = htmlReader.Entity;

                switch (htmlReader.NodeType)
                {
                case HtmlNodeType.Element:
                    CaseElement(htmlReader, result);
                    break;

                case HtmlNodeType.EndElement:
                    CaseEndElement(htmlReader, result);
                    break;

                case HtmlNodeType.Text:
                    CaseText(htmlReader, result);
                    break;

                case HtmlNodeType.CDATA:
                    CaseCData(htmlReader, result);
                    break;
                }
            }
            // close unclosed tags
            while (_tags.Count > 0)
            {
                string tagName = PopTag();
                CheckStack();
                if (_enabled && !IgnoreTags.Contains(tagName))
                {
                    WriteEndElement(result, tagName);
                }
            }

            result.Write(entity);

            if (!string.IsNullOrEmpty(htmlReader.Name))
            {
                result.Write("<{0}", htmlReader.Name);
            }

            AfterDocument(result);
            result.Flush();
        }
示例#5
0
 public void Sanitize(TextReader input, TextWriter result)
 {
     if (input == null)
     {
         throw new ArgumentNullException("input");
     }
     if (result == null)
     {
         throw new ArgumentNullException("result");
     }
     using (var htmlReader = new HtmlReader(input))
     {
         Sanitize(htmlReader, result);
     }
     Reset();
 }
		protected virtual void CaseCData(HtmlReader htmlReader, TextWriter result)
		{
			if (_enabled)
			{
				WriteCData(result, htmlReader.Value);
			}
		}
		protected virtual void CaseEndElement(HtmlReader htmlReader, TextWriter result)
		{
			string tagName = htmlReader.Name.ToLowerInvariant();
			if (FuzzyPopTag(tagName))
			{
				// only close tag if it was previously open
				if (_enabled && !IgnoreTags.Contains(tagName))
				{
					WriteEndElement(result, RewriteTag(tagName));
				}
				CheckStack();
			}
		}
		protected virtual void CaseElement(HtmlReader htmlReader, TextWriter result)
		{
			string tagName = htmlReader.Name.ToLowerInvariant();

			// automatically close unclosed tag, if any
			if (AutoClosedTags.Contains(tagName))
			{
				if (FuzzyPopTag(tagName))
				{
					WriteEndElement(result, RewriteTag(tagName));
				}
			}
			// reevaluate tags stack
			PushTag(tagName);
			CheckStack();
			if (_enabled && !IgnoreTags.Contains(tagName))
			{
				WriteElement(result, RewriteTag(tagName), htmlReader.Attributes, htmlReader.IsEmptyElement);
			}
			// pop empty tags immediately
			if (htmlReader.IsEmptyElement || EmptyTags.Contains(tagName))
			{
				if (FuzzyPopTag(tagName))
				{
					CheckStack();
				}
			}
		}
		protected virtual void Sanitize(HtmlReader htmlReader, TextWriter result)
		{
			BeforeDocument(result);
			var entity = string.Empty;

			while (htmlReader.Read())
			{
				entity = htmlReader.Entity;

				switch (htmlReader.NodeType)
				{
					case HtmlNodeType.Element:
						CaseElement(htmlReader, result);
						break;

					case HtmlNodeType.EndElement:
						CaseEndElement(htmlReader, result);
						break;

					case HtmlNodeType.Text:
						CaseText(htmlReader, result);
						break;

					case HtmlNodeType.CDATA:
						CaseCData(htmlReader, result);
						break;
				}
			}
			// close unclosed tags
			while (_tags.Count > 0)
			{
				string tagName = PopTag();
				CheckStack();
				if (_enabled && !IgnoreTags.Contains(tagName))
				{
					WriteEndElement(result, tagName);
				}
			}

			result.Write(entity);

			if (!string.IsNullOrEmpty(htmlReader.Name))
				result.Write("<{0}", htmlReader.Name);

			AfterDocument(result);
			result.Flush();
		}
		public void Sanitize(TextReader input, TextWriter result)
		{
			if (input == null)
			{
				throw new ArgumentNullException(nameof(input));
			}
			if (result == null)
			{
				throw new ArgumentNullException(nameof(result));
			}
			using (var htmlReader = new HtmlReader(input))
			{
				Sanitize(htmlReader, result);
			}
			Reset();
		}