/// <summary> /// 采集HTML /// </summary> /// <param name="url">采集地址</param> /// <param name="param">采集参数</param> /// <param name="method">POST,GET</param> /// <param name="sendencoding">发送数据的编码</param> /// <param name="receivencoding">接受数据的编码</param> /// <param name="cookie">CookieContainer</param> /// <returns>返回采集的数据</returns> public string CollectHtml(string url, string param, Method method, HtmlEncoding sendEncoding, HtmlEncoding receivEncoding) { string html = string.Empty; HttpWebRequest request = null; HttpWebResponse response = null; Stream stream = null; StreamReader sr = null; Stream reqStream = null; try { //ServicePointManager.CertificatePolicy = new AcceptAllCertificatePolicy(); byte[] bs = Encoding.GetEncoding(sendEncoding.ToString() == "UTF8" ? "UTF-8" : sendEncoding.ToString()).GetBytes(param); request = (HttpWebRequest)WebRequest.Create(url); request.Method = method.ToString(); request.Timeout = timeout; request.KeepAlive = true; request.ContentLength = bs.Length; request.UserAgent = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)"; request.ContentType = "application/x-www-form-urlencoded"; request.Accept = "image/jpeg, application/x-ms-application, image/gif, application/xaml+xml, image/pjpeg, application/x-ms-xbap, */*"; if (bs.Length > 0) { reqStream = request.GetRequestStream(); reqStream.Write(bs, 0, bs.Length); } response = (HttpWebResponse)request.GetResponse(); stream = response.GetResponseStream(); sr = new StreamReader(stream, Encoding.GetEncoding(receivEncoding.ToString() == "UTF8" ? "UTF-8" : receivEncoding.ToString())); html = sr.ReadToEnd(); } catch (Exception) { // FileHelper.WriteFile(ex.ToString(), DateTime.Now.ToShortDateString() + ".log", Environment.CurrentDirectory + "\\POST异常\\"); html = "timeout"; } finally { if (response != null) { response.Close(); } if (stream != null) { stream.Dispose(); } if (sr != null) { sr.Dispose(); } if (reqStream != null) { reqStream.Dispose(); } if (request != null) { request.Abort(); } } return(html); }
public void BindValue(object value) { var parent = _targetElement.Parent(); if (parent == null) { throw new NotSupportedException(); } var text = (string)value; if (parent.ElementTextMode() == TextMode.Preformated) { parent.AddTextNode(_targetElement.NodesIndexOfSelf(), HtmlEncoding.HtmlEncode(text)); _targetElement.Remove(); } else { var encoded = HtmlEncoding.HtmlEncode(text); encoded = encoded.Replace(" ", " "); if (encoded.EndsWith(" ")) { encoded = encoded.Substring(0, encoded.Length - 1) + " ";//如果末尾多出一个空格,则替换为 } encoded = encoded.Replace("\r\n", "\n").Replace("\r", "\n"); encoded = encoded.Replace("\n", "<br />"); _targetElement.ReplaceWith(encoded); } }
public void AddDetail(string label, string text, HtmlEncoding encoding = HtmlEncoding.UseEncoding) { AddBodyRow(x => { x.Header().Add("span").Text(label); x.Cell(text).Encoded(encoding == HtmlEncoding.UseEncoding); }); }
public void AddDetail(string label, string text, HtmlEncoding encoding = HtmlEncoding.UseEncoding) { if (text.IsEmpty()) { return; } _detailTag.AddDetail(label, text); }
private void ProcessBeginTag(Match match) { string tagName = match.Groups["tagName"].Value; bool selfClosed = match.Groups["selfClosed"].Success; if (HtmlSpecification.selfCloseTags.Contains(tagName, StringComparer.OrdinalIgnoreCase)) { selfClosed = true; } //检查父标签是否可选结束标记,并相应处理 { var element = containerStack.Peek() as DomElement; if (element != null && HtmlSpecification.optionalCloseTags.Contains(element.Name, StringComparer.OrdinalIgnoreCase)) { if (HtmlSpecification.ImmediatelyClose(element.Name, tagName)) { containerStack.Pop(); } } } //处理所有属性 var attributes = new Dictionary <string, string>(StringComparer.OrdinalIgnoreCase); foreach (Capture capture in match.Groups["attribute"].Captures) { string name = capture.FindCaptures(match.Groups["attrName"]).Single().Value; string value = capture.FindCaptures(match.Groups["attrValue"]).Select(c => c.Value).SingleOrDefault(); value = HtmlEncoding.HtmlDecode(value); if (attributes.ContainsKey(name)) { continue; } attributes.Add(name, value); } //加入容器堆栈。 { var element = CreateElement(tagName, attributes); if (!selfClosed) { containerStack.Push(element); } } }
void SetEncoding(String encoding) { if (HtmlEncoding.IsSupported(encoding)) { var enc = HtmlEncoding.Resolve(encoding); if (enc != null) { doc.InputEncoding = enc.WebName; tokenizer.Stream.Encoding = enc; } } }
/// <summary>Smart Encode HTML Text</summary> /// <param name="strict">If true all encode-able characters are encoded also non required characters.</param> public static string HtmlSmartEncode(string htmltext, HtmlEncoding htmlencoding, bool strict = false) { if (string.IsNullOrEmpty(htmltext)) { return(string.Empty); } var _text = htmltext.ToCharArray(); var _sb = new StringBuilder(); for (int index = 0; index < _text.Length; index++) { if (IsEncodedHtmlCharAtPosition(ref _text, ref index, out HtmlChar _hc)) { AppendChar(_sb, _hc, htmlencoding, true); }
/// <summary> /// 处理元素开始标签 /// </summary> /// <param name="beginTag">开始标签信息</param> /// <returns>处理过程中所创建的元素对象,若不支持则返回 null</returns> protected virtual IHtmlElement ProcessBeginTag(HtmlBeginTag beginTag) { string tagName = beginTag.TagName; bool selfClosed = beginTag.SelfClosed; //检查是否为自结束标签,并作相应处理 if (IsSelfCloseElement(beginTag)) { selfClosed = true; } //检查是否为CData标签,并作相应处理 if (IsCDataElement(beginTag)) { Reader.EnterCDataMode(tagName.ToLowerInvariant()); } //检查父级是否可选结束标记,并作相应处理 { var element = CurrentContainer as IHtmlElement; if (element != null && HtmlSpecification.optionalCloseTags.Contains(element.Name, StringComparer.OrdinalIgnoreCase)) { if (ImmediatelyClose(tagName, element)) { ContainerStack.Pop(); } } } //处理所有属性 var attributes = new Dictionary <string, string>(StringComparer.OrdinalIgnoreCase); foreach (var a in beginTag.Attributes) { string name = a.Name; string value = a.Value; if (value != null) { value = HtmlEncoding.HtmlDecode(value); } if (attributes.ContainsKey(name))//重复的属性名,只取第一个 { continue; } attributes.Add(name, value); } //创建元素 { var element = CreateElement(tagName, attributes); //加入容器堆栈 if (!selfClosed) { ContainerStack.Push(element); } return(element); } }
public void AddDetail(string label, string text, HtmlEncoding encoding = HtmlEncoding.UseEncoding) { if (text.IsEmpty()) return; _detailTag.AddDetail(label, text); }
public HtmlEncodingAttribute(HtmlEncoding encodingOption) { _encodingOption = encodingOption; }
public string CollectHtml(string url, string param, Method method, HtmlEncoding sendEncoding, HtmlEncoding receivEncoding, ref CookieContainer cookie, string Referer) { string html = string.Empty; HttpWebRequest request = null; HttpWebResponse response = null; Stream stream = null; StreamReader sr = null; Stream reqStream = null; try { // ServicePointManager.CertificatePolicy = new AcceptAllCertificatePolicy(); byte[] bs = Encoding.GetEncoding(sendEncoding.ToString()).GetBytes(param); request = (HttpWebRequest)WebRequest.Create(url); request.CookieContainer = cookie; request.Method = method.ToString(); request.UserAgent = "Mozilla/5.0 (iPad; U; CPU OS 3_2_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B500 Safari/531.21.10"; request.ContentType = "application/x-www-form-urlencoded"; request.KeepAlive = true; request.AllowAutoRedirect = true; int a = 108 * 256 * 256 + 1 * 256 + 5; int b = 145 * 256 * 256 + 110 * 256 + 35; int c = new Random().Next(b - a) + a; if (this.proxyIp != "") { request.Proxy = new WebProxy(this.proxyIp, 8088); } request.Accept = "image/jpeg, application/x-ms-application, image/gif, application/xaml+xml, image/pjpeg, application/x-ms-xbap, */*"; if (Referer != "") { request.Referer = Referer; } request.ContentLength = bs.Length; if (bs.Length > 0) { reqStream = request.GetRequestStream(); reqStream.Write(bs, 0, bs.Length); } response = (HttpWebResponse)request.GetResponse(); stream = response.GetResponseStream(); sr = new StreamReader(stream, Encoding.GetEncoding(receivEncoding.ToString() == "UTF8" ? "UTF-8" : receivEncoding.ToString())); html = sr.ReadToEnd(); } catch { html = "timeout"; } finally { if (response != null) { response.Close(); } if (stream != null) { stream.Dispose(); } if (sr != null) { sr.Dispose(); } if (reqStream != null) { reqStream.Dispose(); } if (request != null) { request.Abort(); } } return(html); }
/// <summary> /// 采集HTML /// </summary> /// <param name="url">采集地址</param> /// <param name="param">采集参数</param> /// <param name="method">POST,GET</param> /// <param name="sendencoding">发送数据的编码</param> /// <param name="receivencoding">接受数据的编码</param> /// <param name="cookie">CookieContainer</param> /// <returns>返回采集的数据</returns> public string CollectHtml(string url, string param, Method method, HtmlEncoding sendEncoding, HtmlEncoding receivEncoding, ref CookieContainer cookie) { return(CollectHtml(url, param, method, sendEncoding, receivEncoding, ref cookie, "")); }