/// <summary> Construct a page from a source.</summary> /// <param name="source">The source of characters. /// </param> public Page(Source source) { if (null == source) { throw new System.ArgumentException("source cannot be null"); } mSource = source; mIndex = new PageIndex(this); mUrl = null; mBaseUrl = null; }
/// <summary> Construct a page from the given string.</summary> /// <param name="text">The HTML text. /// </param> /// <param name="charset"><em>Optional</em>. The character set encoding that will /// be reported by {@link #getEncoding}. If charset is <code>null</code> /// the default character set is used. /// </param> public Page(System.String text, System.String charset) { if (null == text) { throw new System.ArgumentException("text cannot be null"); } if (null == charset) { charset = DEFAULT_CHARSET; } mSource = new StringSource(text, charset); mIndex = new PageIndex(this); mUrl = null; mBaseUrl = null; }
/// <summary> Construct a page from a stream encoded with the given charset.</summary> /// <param name="stream">The source of bytes. /// </param> /// <param name="charset">The encoding used. /// If null, defaults to the <code>DEFAULT_CHARSET</code>. /// </param> /// <exception cref="UnsupportedEncodingException">If the given charset /// is not supported. /// </exception> public Page(System.IO.Stream stream, System.String charset) { if (null == stream) { throw new System.ArgumentException("stream cannot be null"); } if (null == charset) { charset = DEFAULT_CHARSET; } mSource = new InputStreamSource(stream, charset); mIndex = new PageIndex(this); mUrl = null; mBaseUrl = null; }
private void GetPageContent(HttpProtocol obProtocol, bool bIsRefresh) { if (m_bHasContent && !bIsRefresh) { return; } if (obProtocol == null) { throw new ArgumentNullException("obProtocol", "Null HttpProtocol object specified"); } lock (this) { ParserStream stream = null; System.String type = String.Empty; System.String charset = String.Empty; try { m_obProtocolOutput = obProtocol.GetProtocolOutput(); if (m_obProtocolOutput.Status.Code == HttpProtocolStatus.SUCCESS) { m_bHasContent = true; this.m_HttpContentProperties = m_obProtocolOutput.Content.ContentProperties; type = this.ContentType; charset = GetCharset(type); stream = new ParserStream(new System.IO.MemoryStream(m_obProtocolOutput.Content.ContentData)); } if (null != stream) { mSource = new InputStreamSource(stream, charset, m_obProtocolOutput.Content.ContentData.Length); } } catch (System.Exception e) { throw new ParserException("Failed to get page content", e); } mUrl = obProtocol.URL.ToString(); mIndex = new PageIndex(this); } }
/// <summary> Reset the page by resetting the source of characters.</summary> public virtual void Reset() { Source.Reset(); mIndex = new PageIndex(this); // todo: is this really necessary? }
/// <summary> Construct a page from a source.</summary> /// <param name="source">The source of characters. /// </param> public Page(Source source) { if (null == source) throw new System.ArgumentException("source cannot be null"); mSource = source; mIndex = new PageIndex(this); mUrl = null; mBaseUrl = null; }
/// <summary> Construct a page from the given string.</summary> /// <param name="text">The HTML text. /// </param> /// <param name="charset"><em>Optional</em>. The character set encoding that will /// be reported by {@link #getEncoding}. If charset is <code>null</code> /// the default character set is used. /// </param> public Page(System.String text, System.String charset) { if (null == text) throw new System.ArgumentException("text cannot be null"); if (null == charset) charset = DEFAULT_CHARSET; mSource = new StringSource(text, charset); mIndex = new PageIndex(this); mUrl = null; mBaseUrl = null; }
/// <summary> Construct a page from a stream encoded with the given charset.</summary> /// <param name="stream">The source of bytes. /// </param> /// <param name="charset">The encoding used. /// If null, defaults to the <code>DEFAULT_CHARSET</code>. /// </param> /// <exception cref="UnsupportedEncodingException">If the given charset /// is not supported. /// </exception> public Page(System.IO.Stream stream, System.String charset) { if (null == stream) throw new System.ArgumentException("stream cannot be null"); if (null == charset) charset = DEFAULT_CHARSET; mSource = new InputStreamSource(stream, charset); mIndex = new PageIndex(this); mUrl = null; mBaseUrl = null; }
private void GetPageContent(HttpProtocol obProtocol, bool bIsRefresh) { if(m_bHasContent && !bIsRefresh) { return; } if(obProtocol == null) { throw new ArgumentNullException("obProtocol", "Null HttpProtocol object specified"); } lock(this) { ParserStream stream = null; System.String type = String.Empty; System.String charset = String.Empty; try { m_obProtocolOutput = obProtocol.GetProtocolOutput(); if (m_obProtocolOutput.Status.Code == HttpProtocolStatus.SUCCESS) { m_bHasContent = true; this.m_HttpContentProperties = m_obProtocolOutput.Content.ContentProperties; type = this.ContentType; charset = GetCharset(type); stream = new ParserStream(new System.IO.MemoryStream(m_obProtocolOutput.Content.ContentData)); } if (null != stream) { mSource = new InputStreamSource(stream,charset,m_obProtocolOutput.Content.ContentData.Length); } } catch (System.Exception e) { throw new ParserException("Failed to get page content", e); } mUrl = obProtocol.URL.ToString(); mIndex = new PageIndex(this); } }