Example #1
0
        /// <summary>
        /// 采集HTML
        /// </summary>
        /// <param name="url">采集地址</param>
        /// <param name="param">采集参数</param>
        /// <param name="method">POST,GET</param>
        /// <param name="sendencoding">发送数据的编码</param>
        /// <param name="receivencoding">接受数据的编码</param>
        /// <param name="cookie">CookieContainer</param>
        /// <returns>返回采集的数据</returns>
        public string CollectHtml(string url, string param, Method method, HtmlEncoding sendEncoding, HtmlEncoding receivEncoding)
        {
            string          html      = string.Empty;
            HttpWebRequest  request   = null;
            HttpWebResponse response  = null;
            Stream          stream    = null;
            StreamReader    sr        = null;
            Stream          reqStream = null;

            try {
                //ServicePointManager.CertificatePolicy = new AcceptAllCertificatePolicy();
                byte[] bs = Encoding.GetEncoding(sendEncoding.ToString() == "UTF8" ? "UTF-8" : sendEncoding.ToString()).GetBytes(param);
                request               = (HttpWebRequest)WebRequest.Create(url);
                request.Method        = method.ToString();
                request.Timeout       = timeout;
                request.KeepAlive     = true;
                request.ContentLength = bs.Length;
                request.UserAgent     = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)";
                request.ContentType   = "application/x-www-form-urlencoded";
                request.Accept        = "image/jpeg, application/x-ms-application, image/gif, application/xaml+xml, image/pjpeg, application/x-ms-xbap, */*";
                if (bs.Length > 0)
                {
                    reqStream = request.GetRequestStream();
                    reqStream.Write(bs, 0, bs.Length);
                }
                response = (HttpWebResponse)request.GetResponse();
                stream   = response.GetResponseStream();
                sr       = new StreamReader(stream, Encoding.GetEncoding(receivEncoding.ToString() == "UTF8" ? "UTF-8" : receivEncoding.ToString()));
                html     = sr.ReadToEnd();
            }
            catch (Exception) {
                // FileHelper.WriteFile(ex.ToString(), DateTime.Now.ToShortDateString() + ".log", Environment.CurrentDirectory + "\\POST异常\\");
                html = "timeout";
            }
            finally {
                if (response != null)
                {
                    response.Close();
                }

                if (stream != null)
                {
                    stream.Dispose();
                }

                if (sr != null)
                {
                    sr.Dispose();
                }
                if (reqStream != null)
                {
                    reqStream.Dispose();
                }
                if (request != null)
                {
                    request.Abort();
                }
            }
            return(html);
        }
Example #2
0
        public void BindValue(object value)
        {
            var parent = _targetElement.Parent();

            if (parent == null)
            {
                throw new NotSupportedException();
            }

            var text = (string)value;

            if (parent.ElementTextMode() == TextMode.Preformated)
            {
                parent.AddTextNode(_targetElement.NodesIndexOfSelf(), HtmlEncoding.HtmlEncode(text));
                _targetElement.Remove();
            }
            else
            {
                var encoded = HtmlEncoding.HtmlEncode(text);

                encoded = encoded.Replace("  ", "&nbsp; ");

                if (encoded.EndsWith("  "))
                {
                    encoded = encoded.Substring(0, encoded.Length - 1) + "&nbsp;";//如果末尾多出一个空格,则替换为&nbsp;
                }
                encoded = encoded.Replace("\r\n", "\n").Replace("\r", "\n");

                encoded = encoded.Replace("\n", "<br />");

                _targetElement.ReplaceWith(encoded);
            }
        }
Example #3
0
 public void AddDetail(string label, string text, HtmlEncoding encoding = HtmlEncoding.UseEncoding)
 {
     AddBodyRow(x =>
     {
         x.Header().Add("span").Text(label);
         x.Cell(text).Encoded(encoding == HtmlEncoding.UseEncoding);
     });
 }
Example #4
0
 public void AddDetail(string label, string text, HtmlEncoding encoding = HtmlEncoding.UseEncoding)
 {
     AddBodyRow(x =>
     {
         x.Header().Add("span").Text(label);
         x.Cell(text).Encoded(encoding == HtmlEncoding.UseEncoding);
     });
 }
Example #5
0
        public void AddDetail(string label, string text, HtmlEncoding encoding = HtmlEncoding.UseEncoding)
        {
            if (text.IsEmpty())
            {
                return;
            }

            _detailTag.AddDetail(label, text);
        }
Example #6
0
        private void ProcessBeginTag(Match match)
        {
            string tagName    = match.Groups["tagName"].Value;
            bool   selfClosed = match.Groups["selfClosed"].Success;

            if (HtmlSpecification.selfCloseTags.Contains(tagName, StringComparer.OrdinalIgnoreCase))
            {
                selfClosed = true;
            }


            //检查父标签是否可选结束标记,并相应处理
            {
                var element = containerStack.Peek() as DomElement;
                if (element != null && HtmlSpecification.optionalCloseTags.Contains(element.Name, StringComparer.OrdinalIgnoreCase))
                {
                    if (HtmlSpecification.ImmediatelyClose(element.Name, tagName))
                    {
                        containerStack.Pop();
                    }
                }
            }



            //处理所有属性
            var attributes = new Dictionary <string, string>(StringComparer.OrdinalIgnoreCase);

            foreach (Capture capture in match.Groups["attribute"].Captures)
            {
                string name  = capture.FindCaptures(match.Groups["attrName"]).Single().Value;
                string value = capture.FindCaptures(match.Groups["attrValue"]).Select(c => c.Value).SingleOrDefault();

                value = HtmlEncoding.HtmlDecode(value);

                if (attributes.ContainsKey(name))
                {
                    continue;
                }

                attributes.Add(name, value);
            }


            //加入容器堆栈。
            {
                var element = CreateElement(tagName, attributes);


                if (!selfClosed)
                {
                    containerStack.Push(element);
                }
            }
        }
        void SetEncoding(String encoding)
        {
            if (HtmlEncoding.IsSupported(encoding))
            {
                var enc = HtmlEncoding.Resolve(encoding);

                if (enc != null)
                {
                    doc.InputEncoding         = enc.WebName;
                    tokenizer.Stream.Encoding = enc;
                }
            }
        }
Example #8
0
        /// <summary>Smart Encode HTML Text</summary>
        /// <param name="strict">If true all encode-able characters are encoded also non required characters.</param>
        public static string HtmlSmartEncode(string htmltext, HtmlEncoding htmlencoding, bool strict = false)
        {
            if (string.IsNullOrEmpty(htmltext))
            {
                return(string.Empty);
            }

            var _text = htmltext.ToCharArray();
            var _sb   = new StringBuilder();

            for (int index = 0; index < _text.Length; index++)
            {
                if (IsEncodedHtmlCharAtPosition(ref _text, ref index, out HtmlChar _hc))
                {
                    AppendChar(_sb, _hc, htmlencoding, true);
                }
Example #9
0
        /// <summary>
        /// 处理元素开始标签
        /// </summary>
        /// <param name="beginTag">开始标签信息</param>
        /// <returns>处理过程中所创建的元素对象,若不支持则返回 null</returns>
        protected virtual IHtmlElement ProcessBeginTag(HtmlBeginTag beginTag)
        {
            string tagName    = beginTag.TagName;
            bool   selfClosed = beginTag.SelfClosed;

            //检查是否为自结束标签,并作相应处理
            if (IsSelfCloseElement(beginTag))
            {
                selfClosed = true;
            }


            //检查是否为CData标签,并作相应处理
            if (IsCDataElement(beginTag))
            {
                Reader.EnterCDataMode(tagName.ToLowerInvariant());
            }



            //检查父级是否可选结束标记,并作相应处理
            {
                var element = CurrentContainer as IHtmlElement;
                if (element != null && HtmlSpecification.optionalCloseTags.Contains(element.Name, StringComparer.OrdinalIgnoreCase))
                {
                    if (ImmediatelyClose(tagName, element))
                    {
                        ContainerStack.Pop();
                    }
                }
            }



            //处理所有属性
            var attributes = new Dictionary <string, string>(StringComparer.OrdinalIgnoreCase);

            foreach (var a in beginTag.Attributes)
            {
                string name  = a.Name;
                string value = a.Value;

                if (value != null)
                {
                    value = HtmlEncoding.HtmlDecode(value);
                }

                if (attributes.ContainsKey(name))//重复的属性名,只取第一个
                {
                    continue;
                }

                attributes.Add(name, value);
            }



            //创建元素
            {
                var element = CreateElement(tagName, attributes);


                //加入容器堆栈
                if (!selfClosed)
                {
                    ContainerStack.Push(element);
                }


                return(element);
            }
        }
        public void AddDetail(string label, string text, HtmlEncoding encoding = HtmlEncoding.UseEncoding)
        {
            if (text.IsEmpty()) return;

            _detailTag.AddDetail(label, text);
        }
 public HtmlEncodingAttribute(HtmlEncoding encodingOption)
 {
     _encodingOption = encodingOption;
 }
Example #12
0
        public string CollectHtml(string url, string param, Method method, HtmlEncoding
                                  sendEncoding, HtmlEncoding receivEncoding, ref CookieContainer cookie, string Referer)
        {
            string          html      = string.Empty;
            HttpWebRequest  request   = null;
            HttpWebResponse response  = null;
            Stream          stream    = null;
            StreamReader    sr        = null;
            Stream          reqStream = null;

            try {
                // ServicePointManager.CertificatePolicy = new AcceptAllCertificatePolicy();
                byte[] bs = Encoding.GetEncoding(sendEncoding.ToString()).GetBytes(param);
                request = (HttpWebRequest)WebRequest.Create(url);
                request.CookieContainer   = cookie;
                request.Method            = method.ToString();
                request.UserAgent         = "Mozilla/5.0 (iPad; U; CPU OS 3_2_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B500 Safari/531.21.10";
                request.ContentType       = "application/x-www-form-urlencoded";
                request.KeepAlive         = true;
                request.AllowAutoRedirect = true;
                int a = 108 * 256 * 256 + 1 * 256 + 5;
                int b = 145 * 256 * 256 + 110 * 256 + 35;
                int c = new Random().Next(b - a) + a;
                if (this.proxyIp != "")
                {
                    request.Proxy = new WebProxy(this.proxyIp, 8088);
                }

                request.Accept = "image/jpeg, application/x-ms-application, image/gif, application/xaml+xml, image/pjpeg, application/x-ms-xbap, */*";
                if (Referer != "")
                {
                    request.Referer = Referer;
                }
                request.ContentLength = bs.Length;
                if (bs.Length > 0)
                {
                    reqStream = request.GetRequestStream();
                    reqStream.Write(bs, 0, bs.Length);
                }
                response = (HttpWebResponse)request.GetResponse();
                stream   = response.GetResponseStream();
                sr       = new StreamReader(stream, Encoding.GetEncoding(receivEncoding.ToString() == "UTF8" ? "UTF-8" : receivEncoding.ToString()));
                html     = sr.ReadToEnd();
            }
            catch {
                html = "timeout";
            }
            finally {
                if (response != null)
                {
                    response.Close();
                }

                if (stream != null)
                {
                    stream.Dispose();
                }

                if (sr != null)
                {
                    sr.Dispose();
                }
                if (reqStream != null)
                {
                    reqStream.Dispose();
                }
                if (request != null)
                {
                    request.Abort();
                }
            }
            return(html);
        }
Example #13
0
 /// <summary>
 /// 采集HTML
 /// </summary>
 /// <param name="url">采集地址</param>
 /// <param name="param">采集参数</param>
 /// <param name="method">POST,GET</param>
 /// <param name="sendencoding">发送数据的编码</param>
 /// <param name="receivencoding">接受数据的编码</param>
 /// <param name="cookie">CookieContainer</param>
 /// <returns>返回采集的数据</returns>
 public string CollectHtml(string url, string param, Method method, HtmlEncoding sendEncoding, HtmlEncoding receivEncoding, ref CookieContainer cookie)
 {
     return(CollectHtml(url, param, method, sendEncoding, receivEncoding, ref cookie, ""));
 }