Exemple #1
0
 /// <summary> Construct a page from a source.</summary>
 /// <param name="source">The source of characters.
 /// </param>
 public Page(Source source)
 {
     if (null == source)
     {
         throw new System.ArgumentException("source cannot be null");
     }
     mSource  = source;
     mIndex   = new PageIndex(this);
     mUrl     = null;
     mBaseUrl = null;
 }
Exemple #2
0
 /// <summary> Construct a page from the given string.</summary>
 /// <param name="text">The HTML text.
 /// </param>
 /// <param name="charset"><em>Optional</em>. The character set encoding that will
 /// be reported by {@link #getEncoding}. If charset is <code>null</code>
 /// the default character set is used.
 /// </param>
 public Page(System.String text, System.String charset)
 {
     if (null == text)
     {
         throw new System.ArgumentException("text cannot be null");
     }
     if (null == charset)
     {
         charset = DEFAULT_CHARSET;
     }
     mSource  = new StringSource(text, charset);
     mIndex   = new PageIndex(this);
     mUrl     = null;
     mBaseUrl = null;
 }
Exemple #3
0
 /// <summary> Construct a page from a stream encoded with the given charset.</summary>
 /// <param name="stream">The source of bytes.
 /// </param>
 /// <param name="charset">The encoding used.
 /// If null, defaults to the <code>DEFAULT_CHARSET</code>.
 /// </param>
 /// <exception cref="UnsupportedEncodingException">If the given charset
 /// is not supported.
 /// </exception>
 public Page(System.IO.Stream stream, System.String charset)
 {
     if (null == stream)
     {
         throw new System.ArgumentException("stream cannot be null");
     }
     if (null == charset)
     {
         charset = DEFAULT_CHARSET;
     }
     mSource  = new InputStreamSource(stream, charset);
     mIndex   = new PageIndex(this);
     mUrl     = null;
     mBaseUrl = null;
 }
Exemple #4
0
        private void GetPageContent(HttpProtocol obProtocol, bool bIsRefresh)
        {
            if (m_bHasContent && !bIsRefresh)
            {
                return;
            }

            if (obProtocol == null)
            {
                throw new ArgumentNullException("obProtocol", "Null HttpProtocol object specified");
            }

            lock (this)
            {
                ParserStream  stream  = null;
                System.String type    = String.Empty;
                System.String charset = String.Empty;
                try
                {
                    m_obProtocolOutput = obProtocol.GetProtocolOutput();
                    if (m_obProtocolOutput.Status.Code == HttpProtocolStatus.SUCCESS)
                    {
                        m_bHasContent = true;
                        this.m_HttpContentProperties = m_obProtocolOutput.Content.ContentProperties;
                        type    = this.ContentType;
                        charset = GetCharset(type);
                        stream  = new ParserStream(new System.IO.MemoryStream(m_obProtocolOutput.Content.ContentData));
                    }

                    if (null != stream)
                    {
                        mSource = new InputStreamSource(stream, charset, m_obProtocolOutput.Content.ContentData.Length);
                    }
                }
                catch (System.Exception e)
                {
                    throw new ParserException("Failed to get page content", e);
                }

                mUrl   = obProtocol.URL.ToString();
                mIndex = new PageIndex(this);
            }
        }
Exemple #5
0
 /// <summary> Reset the page by resetting the source of characters.</summary>
 public virtual void Reset()
 {
     Source.Reset();
     mIndex = new PageIndex(this);             // todo: is this really necessary?
 }
		/// <summary> Reset the page by resetting the source of characters.</summary>
		public virtual void Reset()
		{
			Source.Reset();
			mIndex = new PageIndex(this); // todo: is this really necessary?
		}
		/// <summary> Construct a page from a source.</summary>
		/// <param name="source">The source of characters.
		/// </param>
		public Page(Source source)
		{
			if (null == source)
				throw new System.ArgumentException("source cannot be null");
			mSource = source;
			mIndex = new PageIndex(this);
			mUrl = null;
			mBaseUrl = null;
		}
		/// <summary> Construct a page from the given string.</summary>
		/// <param name="text">The HTML text.
		/// </param>
		/// <param name="charset"><em>Optional</em>. The character set encoding that will
		/// be reported by {@link #getEncoding}. If charset is <code>null</code>
		/// the default character set is used.
		/// </param>
		public Page(System.String text, System.String charset)
		{
			if (null == text)
				throw new System.ArgumentException("text cannot be null");
			if (null == charset)
				charset = DEFAULT_CHARSET;
			mSource = new StringSource(text, charset);
			mIndex = new PageIndex(this);
			mUrl = null;
			mBaseUrl = null;
		}
		/// <summary> Construct a page from a stream encoded with the given charset.</summary>
		/// <param name="stream">The source of bytes.
		/// </param>
		/// <param name="charset">The encoding used.
		/// If null, defaults to the <code>DEFAULT_CHARSET</code>.
		/// </param>
		/// <exception cref="UnsupportedEncodingException">If the given charset
		/// is not supported.
		/// </exception>
		public Page(System.IO.Stream stream, System.String charset)
		{
			if (null == stream)
				throw new System.ArgumentException("stream cannot be null");
			if (null == charset)
				charset = DEFAULT_CHARSET;
			mSource = new InputStreamSource(stream, charset);
			mIndex = new PageIndex(this);
			mUrl = null;
			mBaseUrl = null;
		}
		private void GetPageContent(HttpProtocol obProtocol, bool bIsRefresh)
		{
			if(m_bHasContent && !bIsRefresh)
			{
				return;
			}

			if(obProtocol == null)
			{
				throw new ArgumentNullException("obProtocol", "Null HttpProtocol object specified");
			}

			lock(this)
			{
				ParserStream stream = null;
				System.String type = String.Empty;
				System.String charset = String.Empty;
				try
				{
					m_obProtocolOutput = obProtocol.GetProtocolOutput();
					if (m_obProtocolOutput.Status.Code == HttpProtocolStatus.SUCCESS)
					{
						m_bHasContent = true;
						this.m_HttpContentProperties = m_obProtocolOutput.Content.ContentProperties;
						type = this.ContentType;
						charset = GetCharset(type);
						stream = new ParserStream(new System.IO.MemoryStream(m_obProtocolOutput.Content.ContentData));
					}

					if (null != stream)
					{
						mSource = new InputStreamSource(stream,charset,m_obProtocolOutput.Content.ContentData.Length);
					}
				}
				catch (System.Exception e)
				{
					throw new ParserException("Failed to get page content", e);
				}

				mUrl = obProtocol.URL.ToString();
				mIndex = new PageIndex(this);
			}
		}