public override Object Convert(PipelineContext ctx, Object value)
 {
     if (value != null)
     {
         String html = value.ToString();
         if (html.Length > 0)
         {
             if (HtmlProcessor.QuessIsHtml(html))
             {
                 HtmlProcessor proc = new HtmlProcessor(html);
                 value = proc.GetText();
             }
         }
     }
     return(value);
 }
示例#2
0
        private void loadUrl(String fn)
        {
            Uri            uri = new Uri(Parent.UriBase + HttpUtility.UrlEncode(fn));
            HttpWebRequest req = (HttpWebRequest)HttpWebRequest.Create(uri);

            req.AutomaticDecompression = DecompressionMethods.Deflate | DecompressionMethods.GZip;
            req.KeepAlive = true;
            HttpWebResponse resp;

            try
            {
                resp = (HttpWebResponse)req.GetResponse();
            }
            catch (WebException we)
            {
                resp = (HttpWebResponse)we.Response;
                Logs.ErrorLog.Log("error: " + we);
                if (resp == null || resp.StatusCode != HttpStatusCode.InternalServerError)
                {
                    throw;
                }
                StreamReader x       = new StreamReader(resp.GetResponseStream(), Encoding.UTF8);
                String       strResp = x.ReadToEnd();
                Logs.ErrorLog.Log("error={0}", strResp);
                Logs.ErrorLog.Log("url={0}", uri);
                resp.Close();
                throw new BMException(we, strResp);
            }

            HtmlDocument doc;

            using (resp)
            {
                doc = new HtmlDocument();
                using (Stream respStream = resp.GetResponseStream())
                {
                    if (dbgStoreDir == null)
                    {
                        doc.Load(respStream, Encoding.UTF8);
                    }
                    else
                    {
                        MemoryStream m = new MemoryStream(4096);
                        CopyStream(m, respStream, 4096);
                        storeHtml(fn, m.GetBuffer(), (int)m.Length);
                        m.Position = 0;
                        doc.Load(m, Encoding.UTF8);
                    }
                }
            }
            HtmlProcessor = new HtmlProcessor(doc);



            //using (WebClient client = new WebClient())
            //{
            //   byte[] bytes = client.DownloadData(uri);
            //   if (dbgStoreDir != null) storeHtml(fn, bytes);
            //   MemoryStream m = new MemoryStream(bytes);
            //   m.Position = 0;
            //   HtmlDocument doc = new HtmlDocument();
            //   doc.Load(m, Encoding.UTF8);
            //   return new HtmlProcessor (doc);
            //}
        }