public static string GetContent(string sOriContent, string sOtherRemoveReg, string sPageUrl) { string sFormartted = sOriContent; sFormartted = Regex.Replace(sFormartted, @"<script[\s\S]*?</script>", "", RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.IgnoreCase); sFormartted = Regex.Replace(sFormartted, @"<iframe[^>]*>[\s\S]*?</iframe>", "", RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.IgnoreCase); string[] sOtherReg = sOtherRemoveReg.Split(new string[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries); foreach (string sRemoveReg in sOtherReg) { sFormartted = CRegex.Replace(sFormartted, sRemoveReg, "", 0); } Regex re = new Regex("<img[^>]+src=\\s*(?:'(?<src>[^']+)'|\"(?<src>[^\"]+)\"|(?<src>[^>\\s]+))\\s*[^>]*>", RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.IgnoreCase); MatchCollection mcs = re.Matches(sFormartted); string sOriStr = ""; string sReplaceStr = ""; foreach (Match mc in mcs) { sOriStr = mc.Value; sReplaceStr = sOriStr.Replace(mc.Groups["src"].Value, CRegex.GetUrl(sPageUrl, mc.Groups["src"].Value)); sFormartted = sFormartted.Replace(sOriStr, sReplaceStr); } re = new Regex(@"<a[^>]+href=\s*(?:'(?<href>[^']+)'|""(?<href>[^""]+)""|(?<href>[^>\s]+))\s*[^>]*>", RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.IgnoreCase); mcs = re.Matches(sFormartted); sOriStr = ""; sReplaceStr = ""; foreach (Match mc in mcs) { sOriStr = mc.Value; sReplaceStr = CRegex.Replace(sOriStr, @"<[^>]*\ba\b[^>]*>", "", 0); sFormartted = sFormartted.Replace(sOriStr, sReplaceStr); } return(sFormartted); }
public static List <CookieObj> GetCookieList(string sInput, List <CookieObj> listInput) { string strCookie = CRegex.GetText(sInput, @"Set-Cookie:(?<Cookie>[\s\S]+?)\n", "Cookie"); strCookie = CRegex.Replace(strCookie, @"expires=([^;]+)GMT;", "", 0); strCookie = strCookie.Replace("path=/", ""); List <string> list = CRegex.GetList(strCookie, @"(?<cookie>[\w\d\&\.=]+[\w\d\._-]+);", "cookie"); string cookieName, cookieValue; CookieObj mCookie = null; List <CookieObj> listCookie = new List <CookieObj>(); foreach (string s in list) { if (s.IndexOf('=') < 1) { continue; } cookieName = s.Substring(0, s.IndexOf('=')); if (cookieName == "domain") { continue; } if (s.Length < s.IndexOf('=') + 1) { cookieValue = ""; } else { cookieValue = s.Substring(s.IndexOf('=') + 1); } mCookie = new CookieObj(); mCookie.cookieName = cookieName; mCookie.cookieValue = cookieValue; listCookie.Add(mCookie); } bool blExists; CookieObj mInput; foreach (CookieObj model in listCookie) { blExists = false; for (int i = 0; i < listInput.Count; i++) { mInput = listInput[i]; if (mInput.cookieName == model.cookieName) { blExists = true; mInput.cookieValue = model.cookieValue; } } if (!blExists) { listInput.Add(model); } } return(listInput); }
public static string ClearScript(string sDetail, bool blLink) { sDetail = CRegex.Replace(sDetail, @"<script([\s\S]+?)((</script>)|(/>))", "", 0); sDetail = CRegex.Replace(sDetail, @"<iframe([\s\S]+?)((</iframe>)|(/>))", "", 0); if (blLink) { sDetail = CRegex.Replace(sDetail, @"<a([\s\S]+?)((</a>)|(/>))", "", 0); } return(sDetail); }
/// <summary> /// /// </summary> /// <param name="sHtml"></param> /// <param name="bClear"></param> public static string ClearTag(string sHtml, bool bClear) { if (sHtml == "") { return(""); } sHtml = CRegex.Replace(sHtml, CRegex.sIFrameReg, "", 0); sHtml = CRegex.Replace(sHtml, CRegex.sScriptReg, "", 0); sHtml = CRegex.Replace(sHtml, @"(<[^>\s]*\b(\w)+\b[^>]*>)|([\s]+)|(<>)|( )", "", 0); sHtml = sHtml.Replace("\"", "").Replace("<", "").Replace(">", ""); return(sHtml); }
public static string JsToHtml(string strJS) { string sReturn = strJS.Replace("document.writeln(\"", ""); sReturn = sReturn.Replace("document.write(\"", ""); sReturn = sReturn.Replace("document.write('", ""); sReturn = CRegex.Replace(sReturn, @"(?<backslash>\\)[^\\]", "", "backslash"); sReturn = sReturn.Replace(@"\\", @"\"); sReturn = sReturn.Replace("/\\\\\\", "\\"); sReturn = sReturn.Replace("/\\\\\\'", "\\'"); sReturn = sReturn.Replace("/\\\\\\//", "\\/"); sReturn = sReturn.Replace("\");", ""); sReturn = sReturn.Replace("\")", ""); sReturn = sReturn.Replace("');", ""); return(sReturn); }
public static string ClearTag(string sHtml) { if (sHtml == "") { return(""); } sHtml = CRegex.Replace(sHtml, CRegex.sIFrameReg, "", 0); sHtml = CRegex.Replace(sHtml, CRegex.sScriptReg, "", 0); sHtml = Regex.Replace(sHtml, @"<[^>]*>| ", string.Empty, RegexOptions.IgnoreCase); sHtml = Regex.Replace(sHtml, @"\s\s", " ", RegexOptions.IgnoreCase); sHtml = Regex.Replace(sHtml, @"\s\s", " ", RegexOptions.IgnoreCase); sHtml = Regex.Replace(sHtml, @"\s\s", " ", RegexOptions.IgnoreCase); sHtml = Regex.Replace(sHtml, "“", "“", RegexOptions.IgnoreCase); sHtml = Regex.Replace(sHtml, "”", "”", RegexOptions.IgnoreCase); return(sHtml); }
/// <summary> /// 网页内容 /// </summary> /// <param name="sInput">输入内容</param> /// <param name="sRegex">表达式字符串</param> public static string GetHtml(string sInput) { return(CRegex.Replace(sInput, @"(?<Head>[^<]+)<", "", "Head")); }
public static string GetPost(RequestArgs mArgs) { //(1)创建IPEndPoint实例和套接字 IPAddress hostIp = Dns.GetHostEntry(mArgs.IpAddress).AddressList[0]; IPEndPoint ep = new IPEndPoint(hostIp, 80); Socket client = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp); //(2)连接服务器端 try { client.Connect(ep); } catch (SocketException e) { Console.WriteLine(e.Message); } StringBuilder sbRequest = new StringBuilder(); Uri u = new Uri(mArgs.Url); sbRequest.AppendLine(string.Format("{0} {1} HTTP/1.1", mArgs.Method, u.PathAndQuery)); if (string.IsNullOrEmpty(mArgs.Accept)) { sbRequest.AppendLine("Accept: image/gif, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/x-ms-application, application/vnd.ms-xpsdocument, application/xaml+xml, application/x-ms-xbap, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*"); } else { sbRequest.AppendLine(string.Format("Accept: {0}", mArgs.Accept)); } sbRequest.AppendLine("Accept-Language: zh-cn"); sbRequest.AppendLine("User-Agent: Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; SLCC1; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30618; InfoPath.2)"); sbRequest.AppendLine("Accept-Encoding: gzip, deflate"); sbRequest.AppendLine(string.Format("Host: {0}", u.Host)); if (!string.IsNullOrEmpty(mArgs.RefererUrl)) { sbRequest.AppendLine(string.Format("Referer: {0}", mArgs.RefererUrl)); } if (!string.IsNullOrEmpty(mArgs.ContentType)) { sbRequest.AppendLine(string.Format("Content-Type: {0}", mArgs.ContentType)); } sbRequest.AppendLine("Connection: Keep-Alive"); if (mArgs.Method == "POST") { sbRequest.AppendLine(string.Format("Content-Length: {0}", mArgs.postData.Length)); } sbRequest.AppendLine(""); if (mArgs.Method == "POST") { sbRequest.AppendLine(mArgs.postData); sbRequest.AppendLine(""); } //(3)发送请求 client.Send(Encoding.ASCII.GetBytes(sbRequest.ToString())); //(4)接收数据 StringBuilder recstr = new StringBuilder(); byte[] buff = new byte[1024 * 3]; int rCount = 0; while (true) { rCount = client.Receive(buff, buff.Length, SocketFlags.None); //读取数据 if (rCount > 0) { recstr.Append(Encoding.GetEncoding(mArgs.Encode).GetString(buff, 0, rCount)); System.Threading.Thread.Sleep(50); } if (rCount <= buff.Length) { break; } } client.Close(); if (mArgs.blGetHeaders) { return(recstr.ToString()); } else { return(CRegex.Replace(recstr.ToString(), @"^HTTP[\s\S]+?(\r\n){2,}", "", 0)); } }