示例#1
0
        public static string GetContent(string sOriContent, string sOtherRemoveReg, string sPageUrl)
        {
            string sFormartted = sOriContent;

            sFormartted = Regex.Replace(sFormartted, @"<script[\s\S]*?</script>", "", RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.IgnoreCase);
            sFormartted = Regex.Replace(sFormartted, @"<iframe[^>]*>[\s\S]*?</iframe>", "", RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.IgnoreCase);
            string[] sOtherReg = sOtherRemoveReg.Split(new string[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries);
            foreach (string sRemoveReg in sOtherReg)
            {
                sFormartted = CRegex.Replace(sFormartted, sRemoveReg, "", 0);
            }
            Regex           re          = new Regex("<img[^>]+src=\\s*(?:'(?<src>[^']+)'|\"(?<src>[^\"]+)\"|(?<src>[^>\\s]+))\\s*[^>]*>", RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.IgnoreCase);
            MatchCollection mcs         = re.Matches(sFormartted);
            string          sOriStr     = "";
            string          sReplaceStr = "";

            foreach (Match mc in mcs)
            {
                sOriStr     = mc.Value;
                sReplaceStr = sOriStr.Replace(mc.Groups["src"].Value, CRegex.GetUrl(sPageUrl, mc.Groups["src"].Value));
                sFormartted = sFormartted.Replace(sOriStr, sReplaceStr);
            }
            re          = new Regex(@"<a[^>]+href=\s*(?:'(?<href>[^']+)'|""(?<href>[^""]+)""|(?<href>[^>\s]+))\s*[^>]*>", RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.IgnoreCase);
            mcs         = re.Matches(sFormartted);
            sOriStr     = "";
            sReplaceStr = "";
            foreach (Match mc in mcs)
            {
                sOriStr     = mc.Value;
                sReplaceStr = CRegex.Replace(sOriStr, @"<[^>]*\ba\b[^>]*>", "", 0);
                sFormartted = sFormartted.Replace(sOriStr, sReplaceStr);
            }
            return(sFormartted);
        }
示例#2
0
        public static List <CookieObj> GetCookieList(string sInput, List <CookieObj> listInput)
        {
            string strCookie = CRegex.GetText(sInput, @"Set-Cookie:(?<Cookie>[\s\S]+?)\n", "Cookie");

            strCookie = CRegex.Replace(strCookie, @"expires=([^;]+)GMT;", "", 0);
            strCookie = strCookie.Replace("path=/", "");
            List <string>    list = CRegex.GetList(strCookie, @"(?<cookie>[\w\d\&\.=]+[\w\d\._-]+);", "cookie");
            string           cookieName, cookieValue;
            CookieObj        mCookie    = null;
            List <CookieObj> listCookie = new List <CookieObj>();

            foreach (string s in list)
            {
                if (s.IndexOf('=') < 1)
                {
                    continue;
                }

                cookieName = s.Substring(0, s.IndexOf('='));
                if (cookieName == "domain")
                {
                    continue;
                }
                if (s.Length < s.IndexOf('=') + 1)
                {
                    cookieValue = "";
                }
                else
                {
                    cookieValue = s.Substring(s.IndexOf('=') + 1);
                }
                mCookie             = new CookieObj();
                mCookie.cookieName  = cookieName;
                mCookie.cookieValue = cookieValue;
                listCookie.Add(mCookie);
            }
            bool      blExists;
            CookieObj mInput;

            foreach (CookieObj model in listCookie)
            {
                blExists = false;
                for (int i = 0; i < listInput.Count; i++)
                {
                    mInput = listInput[i];
                    if (mInput.cookieName == model.cookieName)
                    {
                        blExists           = true;
                        mInput.cookieValue = model.cookieValue;
                    }
                }
                if (!blExists)
                {
                    listInput.Add(model);
                }
            }
            return(listInput);
        }
示例#3
0
 public static string ClearScript(string sDetail, bool blLink)
 {
     sDetail = CRegex.Replace(sDetail, @"<script([\s\S]+?)((</script>)|(/>))", "", 0);
     sDetail = CRegex.Replace(sDetail, @"<iframe([\s\S]+?)((</iframe>)|(/>))", "", 0);
     if (blLink)
     {
         sDetail = CRegex.Replace(sDetail, @"<a([\s\S]+?)((</a>)|(/>))", "", 0);
     }
     return(sDetail);
 }
示例#4
0
        public static string GetCookieByHead(string sInput)
        {
            string        strCookie     = CRegex.GetText(sInput, @"Set-Cookie:(?<Cookie>[\s\S]+?)\n", "Cookie");
            List <string> list          = CRegex.GetList(strCookie, @"(?<cookie>[\w\d\.]+=[\w\d\._-]+);", "cookie");
            string        sReturnCookie = "";

            foreach (string s in list)
            {
                sReturnCookie += s + "; ";
            }
            return(sReturnCookie);
        }
示例#5
0
 /// <summary>
 ///
 /// </summary>
 /// <param name="sHtml"></param>
 /// <param name="bClear"></param>
 public static string ClearTag(string sHtml, bool bClear)
 {
     if (sHtml == "")
     {
         return("");
     }
     sHtml = CRegex.Replace(sHtml, CRegex.sIFrameReg, "", 0);
     sHtml = CRegex.Replace(sHtml, CRegex.sScriptReg, "", 0);
     sHtml = CRegex.Replace(sHtml, @"(<[^>\s]*\b(\w)+\b[^>]*>)|([\s]+)|(<>)|(&nbsp;)", "", 0);
     sHtml = sHtml.Replace("\"", "").Replace("<", "").Replace(">", "");
     return(sHtml);
 }
示例#6
0
        public static string JsToHtml(string strJS)
        {
            string sReturn = strJS.Replace("document.writeln(\"", "");

            sReturn = sReturn.Replace("document.write(\"", "");
            sReturn = sReturn.Replace("document.write('", "");
            sReturn = CRegex.Replace(sReturn, @"(?<backslash>\\)[^\\]", "", "backslash");
            sReturn = sReturn.Replace(@"\\", @"\");
            sReturn = sReturn.Replace("/\\\\\\", "\\");
            sReturn = sReturn.Replace("/\\\\\\'", "\\'");
            sReturn = sReturn.Replace("/\\\\\\//", "\\/");
            sReturn = sReturn.Replace("\");", "");
            sReturn = sReturn.Replace("\")", "");
            sReturn = sReturn.Replace("');", "");
            return(sReturn);
        }
示例#7
0
 public static string ClearTag(string sHtml)
 {
     if (sHtml == "")
     {
         return("");
     }
     sHtml = CRegex.Replace(sHtml, CRegex.sIFrameReg, "", 0);
     sHtml = CRegex.Replace(sHtml, CRegex.sScriptReg, "", 0);
     sHtml = Regex.Replace(sHtml, @"<[^>]*>|&nbsp;", string.Empty, RegexOptions.IgnoreCase);
     sHtml = Regex.Replace(sHtml, @"\s\s", " ", RegexOptions.IgnoreCase);
     sHtml = Regex.Replace(sHtml, @"\s\s", " ", RegexOptions.IgnoreCase);
     sHtml = Regex.Replace(sHtml, @"\s\s", " ", RegexOptions.IgnoreCase);
     sHtml = Regex.Replace(sHtml, "&ldquo;", "“", RegexOptions.IgnoreCase);
     sHtml = Regex.Replace(sHtml, "&rdquo;", "”", RegexOptions.IgnoreCase);
     return(sHtml);
 }
示例#8
0
 /// <summary>
 /// 网页内容
 /// </summary>
 /// <param name="sInput">输入内容</param>
 /// <param name="sRegex">表达式字符串</param>
 public static string GetHtml(string sInput)
 {
     return(CRegex.Replace(sInput, @"(?<Head>[^<]+)<", "", "Head"));
 }
示例#9
0
        public static string GetPost(RequestArgs mArgs)
        {
            //(1)创建IPEndPoint实例和套接字
            IPAddress  hostIp = Dns.GetHostEntry(mArgs.IpAddress).AddressList[0];
            IPEndPoint ep     = new IPEndPoint(hostIp, 80);
            Socket     client = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp);

            //(2)连接服务器端
            try
            {
                client.Connect(ep);
            }
            catch (SocketException e)
            {
                Console.WriteLine(e.Message);
            }

            StringBuilder sbRequest = new StringBuilder();
            Uri           u         = new Uri(mArgs.Url);

            sbRequest.AppendLine(string.Format("{0} {1} HTTP/1.1", mArgs.Method, u.PathAndQuery));



            if (string.IsNullOrEmpty(mArgs.Accept))
            {
                sbRequest.AppendLine("Accept: image/gif, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/x-ms-application, application/vnd.ms-xpsdocument, application/xaml+xml, application/x-ms-xbap, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*");
            }
            else
            {
                sbRequest.AppendLine(string.Format("Accept: {0}", mArgs.Accept));
            }


            sbRequest.AppendLine("Accept-Language: zh-cn");
            sbRequest.AppendLine("User-Agent: Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; SLCC1; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30618; InfoPath.2)");
            sbRequest.AppendLine("Accept-Encoding: gzip, deflate");
            sbRequest.AppendLine(string.Format("Host: {0}", u.Host));
            if (!string.IsNullOrEmpty(mArgs.RefererUrl))
            {
                sbRequest.AppendLine(string.Format("Referer: {0}", mArgs.RefererUrl));
            }

            if (!string.IsNullOrEmpty(mArgs.ContentType))
            {
                sbRequest.AppendLine(string.Format("Content-Type: {0}", mArgs.ContentType));
            }

            sbRequest.AppendLine("Connection: Keep-Alive");
            if (mArgs.Method == "POST")
            {
                sbRequest.AppendLine(string.Format("Content-Length: {0}", mArgs.postData.Length));
            }
            sbRequest.AppendLine("");

            if (mArgs.Method == "POST")
            {
                sbRequest.AppendLine(mArgs.postData);
                sbRequest.AppendLine("");
            }

            //(3)发送请求
            client.Send(Encoding.ASCII.GetBytes(sbRequest.ToString()));

            //(4)接收数据
            StringBuilder recstr = new StringBuilder();

            byte[] buff = new byte[1024 * 3];

            int rCount = 0;

            while (true)
            {
                rCount = client.Receive(buff, buff.Length, SocketFlags.None); //读取数据
                if (rCount > 0)
                {
                    recstr.Append(Encoding.GetEncoding(mArgs.Encode).GetString(buff, 0, rCount));
                    System.Threading.Thread.Sleep(50);
                }
                if (rCount <= buff.Length)
                {
                    break;
                }
            }
            client.Close();
            if (mArgs.blGetHeaders)
            {
                return(recstr.ToString());
            }
            else
            {
                return(CRegex.Replace(recstr.ToString(), @"^HTTP[\s\S]+?(\r\n){2,}", "", 0));
            }
        }