public static string UnicodeToString(string str) { string str2 = ""; str = CRegex.Replace(str, "[\r\n]", "", 0); if (!string.IsNullOrEmpty(str)) { string[] strArray = str.Replace(@"\u", "㊣").Split(new char[] { '㊣' }); try { str2 = str2 + strArray[0]; for (int i = 1; i < strArray.Length; i++) { string str3 = strArray[i]; if (!(string.IsNullOrEmpty(str3) || (str3.Length < 4))) { str3 = strArray[i].Substring(0, 4); str2 = str2 + ((char)int.Parse(str3, NumberStyles.HexNumber)); str2 = str2 + strArray[i].Substring(4); } } } catch (FormatException) { str2 = str2 + "Erorr"; } } return(str2); }
/// <summary> /// 将Unicode字串\u.\u.格式字串转换为原始字符串 /// </summary> /// <param name="str"></param> /// <returns></returns> public static string UnicodeToString(string str) { string outStr = ""; str = CRegex.Replace(str, "[\r\n]", "", 0); if (!string.IsNullOrEmpty(str)) { string[] strlist = str.Replace("\\u", "㊣").Split('㊣'); try { outStr += strlist[0]; for (int i = 1; i < strlist.Length; i++) { string strTemp = strlist[i]; if (!string.IsNullOrEmpty(strTemp) && strTemp.Length >= 4) { strTemp = strlist[i].Substring(0, 4); //将unicode字符转为10进制整数,然后转为char中文字符 outStr += (char)int.Parse(strTemp, System.Globalization.NumberStyles.HexNumber); outStr += strlist[i].Substring(4); } } } catch (FormatException ex) { outStr += "Erorr";//ex.Message; } } return(outStr); }
public static string GetContent(string sOriContent, string sOtherRemoveReg, string sPageUrl, DataTable dtAntiLink) { string sFormartted = sOriContent; //去掉有危险的标记 sFormartted = Regex.Replace(sFormartted, @"<script[\s\S]*?</script>", "", RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.IgnoreCase); sFormartted = Regex.Replace(sFormartted, @"<iframe[^>]*>[\s\S]*?</iframe>", "", RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.IgnoreCase); Regex r = new Regex(@"<input[\s\S]+?>|<form[\s\S]+?>|</form[\s\S]*?>|<select[\s\S]+?>?</select>|<textarea[\s\S]*?>?</textarea>|<file[\s\S]*?>|<noscript>|</noscript>", RegexOptions.IgnoreCase); sFormartted = r.Replace(sFormartted, ""); string[] sOtherReg = sOtherRemoveReg.Split(new string[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries); foreach (string sRemoveReg in sOtherReg) { sFormartted = CRegex.Replace(sFormartted, sRemoveReg, "", 0); } //图片路径 //sFormartted = _ReplaceUrl("<img[^>]+src\\s*=\\s*(?:'(?<src>[^']+)'|\"(?<src>[^\"]+)\"|(?<src>[^>\\s]+))\\s*[^>]*>", "src", sFormartted,sPageUrl); sFormartted = _ReplaceUrl("<img[\\s\\S]+?src\\s*=\\s*(?:'(?<src>[^']+)'|\"(?<src>[^\"]+)\"|(?<src>[^>\\s]+))\\s*[^>]*>", "src", sFormartted, sPageUrl); //反防盗链 string domain = GetDomain(sPageUrl); DataRow[] drs = dtAntiLink.Select("Domain='" + domain + "'"); if (drs.Length > 0) { foreach (DataRow dr in drs) { switch (Convert.ToInt32(dr["Type"])) { case 1: //置换 sFormartted = sFormartted.Replace(dr["imgUrl"].ToString(), "http://stat.580k.com/t.asp?url="); break; default: //附加 sFormartted = sFormartted.Replace(dr["imgUrl"].ToString(), "http://stat.580k.com/t.asp?url=" + dr["imgUrl"].ToString()); break; } } } //A链接 sFormartted = _ReplaceUrl(@"<a[^>]+href\s*=\s*(?:'(?<href>[^']+)'|""(?<href>[^""]+)""|(?<href>[^>\s]+))\s*[^>]*>", "href", sFormartted, sPageUrl); //CSS sFormartted = _ReplaceUrl(@"<link[^>]+href\s*=\s*(?:'(?<href>[^']+)'|""(?<href>[^""]+)""|(?<href>[^>\s]+))\s*[^>]*>", "href", sFormartted, sPageUrl); //BACKGROUND sFormartted = _ReplaceUrl(@"background\s*=\s*(?:'(?<img>[^']+)'|""(?<img>[^""]+)""|(?<img>[^>\s]+))", "img", sFormartted, sPageUrl); //style方式的背景:background-image:url(...) sFormartted = _ReplaceUrl(@"background-image\s*:\s*url\s*\x28(?<img>[^\x29]+)\x29", "img", sFormartted, sPageUrl); //FLASH sFormartted = _ReplaceUrl(@"<param\s[^>]+""movie""[^>]+value\s*=\s*""(?<flash>[^"">]+\x2eswf)""[^>]*>", "flash", sFormartted, sPageUrl); //XSL if (IsXml(sFormartted)) { sFormartted = _ReplaceUrl(@"<\x3fxml-stylesheet\s+[^\x3f>]+href=\s*(?:'(?<href>[^']+)'|""(?<href>[^""]+)"")\s*[^\x3f>]*\x3f>", "href", sFormartted, sPageUrl); } //script //sFormartted = _ReplaceUrl(@"<script[^>]+src\s*=\s*(?:'(?<src>[^']+)'|""(?<src>[^""]+)""|(?<src>[^>\s]+))\s*[^>]*>", "src", sFormartted,sPageUrl); return(sFormartted); }
/// <summary> /// 网页内容 /// </summary> /// <param name="sInput">输入内容</param> public static string GetHtml(string sInput) { return(CRegex.Replace(sInput, @"(?<Head>[^<]+)<", "", "Head")); }