public override Object Convert(PipelineContext ctx, Object value) { if (value != null) { String html = value.ToString(); if (html.Length > 0) { if (HtmlProcessor.QuessIsHtml(html)) { HtmlProcessor proc = new HtmlProcessor(html); value = proc.GetText(); } } } return(value); }
private void loadUrl(String fn) { Uri uri = new Uri(Parent.UriBase + HttpUtility.UrlEncode(fn)); HttpWebRequest req = (HttpWebRequest)HttpWebRequest.Create(uri); req.AutomaticDecompression = DecompressionMethods.Deflate | DecompressionMethods.GZip; req.KeepAlive = true; HttpWebResponse resp; try { resp = (HttpWebResponse)req.GetResponse(); } catch (WebException we) { resp = (HttpWebResponse)we.Response; Logs.ErrorLog.Log("error: " + we); if (resp == null || resp.StatusCode != HttpStatusCode.InternalServerError) { throw; } StreamReader x = new StreamReader(resp.GetResponseStream(), Encoding.UTF8); String strResp = x.ReadToEnd(); Logs.ErrorLog.Log("error={0}", strResp); Logs.ErrorLog.Log("url={0}", uri); resp.Close(); throw new BMException(we, strResp); } HtmlDocument doc; using (resp) { doc = new HtmlDocument(); using (Stream respStream = resp.GetResponseStream()) { if (dbgStoreDir == null) { doc.Load(respStream, Encoding.UTF8); } else { MemoryStream m = new MemoryStream(4096); CopyStream(m, respStream, 4096); storeHtml(fn, m.GetBuffer(), (int)m.Length); m.Position = 0; doc.Load(m, Encoding.UTF8); } } } HtmlProcessor = new HtmlProcessor(doc); //using (WebClient client = new WebClient()) //{ // byte[] bytes = client.DownloadData(uri); // if (dbgStoreDir != null) storeHtml(fn, bytes); // MemoryStream m = new MemoryStream(bytes); // m.Position = 0; // HtmlDocument doc = new HtmlDocument(); // doc.Load(m, Encoding.UTF8); // return new HtmlProcessor (doc); //} }