示例#1
0
		/// <summary>
		/// Loads the HTML document from the specified TextReader.
		/// </summary>
		/// <param name="reader">The TextReader used to feed the HTML data into the document. May not be null.</param>
		public void Load(TextReader reader)
		{
			// all Load methods pass down to this one
			if (reader == null)
			{
				throw new ArgumentNullException("reader");
			}

			_onlyDetectEncoding = false;

			if (OptionCheckSyntax)
			{
				_openednodes = new Hashtable();
			}
			else
			{
				_openednodes = null;
			}

			if (OptionUseIdAttribute)
			{
				_nodesid = new Hashtable();
			}
			else
			{
				_nodesid = null;
			}

			StreamReader sr = reader as StreamReader;
			if (sr != null)
			{
                                try
				{
				    // trigger bom read if needed
				    sr.Peek();
				}
				catch
				{
				    // void on purpose
				}
				_streamencoding = sr.CurrentEncoding;
				_text = new ImplStreamAsArray (sr);
			}
			else
			{
				_streamencoding = null;
				// Expensive, but cannot avoid since TextReader doesnt have any length of the underlying data
				_text = new DummyStreamAsArray (reader.ReadToEnd());
			}
			_declaredencoding = null;

			// SLIM: _text = reader.ReadToEnd();
			_documentnode = CreateNode(HtmlNodeType.Document, 0);
			Parse();

			if (OptionCheckSyntax)
			{
				foreach(HtmlNode node in _openednodes.Values)
				{
					if (!node._starttag)	// already reported
					{
						continue;
					}

					string html;
					if (OptionExtractErrorSourceText)
					{
						html = node.OuterHtml;
						if (html.Length > OptionExtractErrorSourceTextMaxLength)
						{
							html = html.Substring(0, OptionExtractErrorSourceTextMaxLength);
						}
					}
					else
					{
						html = string.Empty;
					}
					AddError(
						HtmlParseErrorCode.TagNotClosed,
						node._line, node._lineposition,
						node._streamposition, html,
						"End tag </" + node.Name + "> was not found");
				}

				// we don't need this anymore
				_openednodes.Clear();
			}
		}
示例#2
0
		/// <summary>
		/// Detects the encoding of an HTML text provided on a TextReader.
		/// </summary>
		/// <param name="reader">The TextReader used to feed the HTML. May not be null.</param>
		/// <returns>The detected encoding.</returns>
		public Encoding DetectEncoding(TextReader reader)
		{
			if (reader == null)
			{
				throw new ArgumentNullException("reader");
			}
			_onlyDetectEncoding = true;
			if (OptionCheckSyntax)
			{
				_openednodes = new Hashtable();
			}
			else
			{
				_openednodes = null;
			}

			if (OptionUseIdAttribute)
			{
				_nodesid = new Hashtable();
			}
			else
			{
				_nodesid = null;
			}

			StreamReader sr = reader as StreamReader;
			if (sr != null)
			{
				_streamencoding = sr.CurrentEncoding;
				_text = new ImplStreamAsArray (sr);
			}
			else
			{
				_streamencoding = null;
				// Expensive, but cannot avoid since TextReader doesnt have any length of the underlying data
				_text = new DummyStreamAsArray (reader.ReadToEnd());
			}
			_declaredencoding = null;

			// SLIM: _text = reader.ReadToEnd();
			_documentnode = CreateNode(HtmlNodeType.Document, 0);

			// this is a hack, but it allows us not to muck with the original parsing code
			try
			{
				Parse();
			}
			catch(EncodingFoundException ex)
			{
				_lastnodes.Clear();
				return ex.Encoding;
			}
			return null;
		}