/// <summary> /// Loads the HTML document from the specified TextReader. /// </summary> /// <param name="reader">The TextReader used to feed the HTML data into the document. May not be null.</param> public void Load(TextReader reader) { // all Load methods pass down to this one if (reader == null) { throw new ArgumentNullException("reader"); } _onlyDetectEncoding = false; if (OptionCheckSyntax) { _openednodes = new Hashtable(); } else { _openednodes = null; } if (OptionUseIdAttribute) { _nodesid = new Hashtable(); } else { _nodesid = null; } StreamReader sr = reader as StreamReader; if (sr != null) { try { // trigger bom read if needed sr.Peek(); } catch { // void on purpose } _streamencoding = sr.CurrentEncoding; _text = new ImplStreamAsArray (sr); } else { _streamencoding = null; // Expensive, but cannot avoid since TextReader doesnt have any length of the underlying data _text = new DummyStreamAsArray (reader.ReadToEnd()); } _declaredencoding = null; // SLIM: _text = reader.ReadToEnd(); _documentnode = CreateNode(HtmlNodeType.Document, 0); Parse(); if (OptionCheckSyntax) { foreach(HtmlNode node in _openednodes.Values) { if (!node._starttag) // already reported { continue; } string html; if (OptionExtractErrorSourceText) { html = node.OuterHtml; if (html.Length > OptionExtractErrorSourceTextMaxLength) { html = html.Substring(0, OptionExtractErrorSourceTextMaxLength); } } else { html = string.Empty; } AddError( HtmlParseErrorCode.TagNotClosed, node._line, node._lineposition, node._streamposition, html, "End tag </" + node.Name + "> was not found"); } // we don't need this anymore _openednodes.Clear(); } }
/// <summary> /// Detects the encoding of an HTML text provided on a TextReader. /// </summary> /// <param name="reader">The TextReader used to feed the HTML. May not be null.</param> /// <returns>The detected encoding.</returns> public Encoding DetectEncoding(TextReader reader) { if (reader == null) { throw new ArgumentNullException("reader"); } _onlyDetectEncoding = true; if (OptionCheckSyntax) { _openednodes = new Hashtable(); } else { _openednodes = null; } if (OptionUseIdAttribute) { _nodesid = new Hashtable(); } else { _nodesid = null; } StreamReader sr = reader as StreamReader; if (sr != null) { _streamencoding = sr.CurrentEncoding; _text = new ImplStreamAsArray (sr); } else { _streamencoding = null; // Expensive, but cannot avoid since TextReader doesnt have any length of the underlying data _text = new DummyStreamAsArray (reader.ReadToEnd()); } _declaredencoding = null; // SLIM: _text = reader.ReadToEnd(); _documentnode = CreateNode(HtmlNodeType.Document, 0); // this is a hack, but it allows us not to muck with the original parsing code try { Parse(); } catch(EncodingFoundException ex) { _lastnodes.Clear(); return ex.Encoding; } return null; }