static void Main(string[] args) { //抓取小说 FileStream fs = null; if (File.Exists("D\\c.txt")) { fs = new FileStream("D:\\c.txt", FileMode.Append); } else { fs = new FileStream("D:\\c.txt", FileMode.Create); } var sw = new StreamWriter(fs, Encoding.UTF8); var baseUrl = "http://www.42xs.com/read/0/404/"; var nextUrl = "171271.html"; var url = ""; var txt = ""; var title = ""; while (nextUrl != "") { try { url = baseUrl + nextUrl; var doc = new JumonyParser().LoadDocument(url); var titleDom = doc.FindFirst("#center > div.title > h1"); title = titleDom.InnerText(); var dom = doc.FindFirst("#content"); txt = dom.InnerText(); var domNext = doc.FindFirst("#container > div:nth-child(3) > div > div.jump > a:nth-child(6)"); nextUrl = domNext.Attribute("href").Value(); } catch { Console.WriteLine("{0}没有成功", url); nextUrl = ""; } Console.WriteLine(title); sw.WriteLine(""); sw.WriteLine(title); sw.WriteLine(""); sw.WriteLine(txt); } Console.Write("The End. Press any key to exit..."); Console.ReadKey(); sw.Close(); fs.Close(); }
public void StyleParseTest() { var document = new JumonyParser().LoadDocument(Path.Combine(Environment.CurrentDirectory, "CssStyleSettingTest1.html")); Assert.AreEqual(document.FindFirst("div").Style().GetValue("display"), "none", "无法正确解析丢失分号的属性值"); Assert.AreEqual(document.FindFirst("a").Style().GetValue("display"), null, "无法正确解析没有style属性的元素"); Assert.AreEqual(document.FindFirst("p").Style().GetValue("width"), "12px", "无法正确解析空白样式设置值后随的正确表达式"); Assert.AreEqual(document.FindFirst("p").Style().GetValue("display"), null, "无法正确解析空白样式设置值"); var element = document.FindFirst("div#test1"); Assert.AreEqual(element.Style().GetValue("display"), "none", "CSS 属性设置不应区分大小写"); }
public void Test1() { var context = new ControllerContext(HttpContext.Request.RequestContext, new TestController()); var result = ViewEngines.Engines.FindView(context, "~/ActionUrlTest/Test1.html", null); Assert.NotNull(result.View, "找不到视图"); IHtmlDocument document; using (var writer = new StringWriter()) { result.View.Render(new ViewContext(context, result.View, new ViewDataDictionary(), new TempDataDictionary(), writer), writer); document = new JumonyParser().Parse(writer.ToString()); } var link = document.FindFirst("a"); Assert.NotNull(link); Assert.AreEqual(link.Attribute("href").Value(), "/TestController/TestAction?arg=args"); }
public void AttributeTest1() { var document = new JumonyParser().LoadDocument(Path.Combine(Environment.CurrentDirectory, "Test1.html")); var dataValues = new Dictionary <string, object>() { { "StyleClass", "Test" }, { "ThisTime", new DateTime(2000, 1, 2) }, { "ScriptValue1", null } }; HtmlBinding.Create(document, dataValues).DataBind(); Assert.AreEqual(document.FindFirst("body").Attribute("class").Value(), "Test", "针对属性的表达式绑定不成功"); Assert.AreEqual(document.FindFirst("body").Attribute("test").Value(), "this time is 2000-01-02 #", "格式表达式测试失败"); }
public void Test1() { var document = new JumonyParser().LoadDocument(Path.Combine(Environment.CurrentDirectory, "Test1.html")); HtmlBinding.Create(document, null).DataBind(); Assert.AreEqual(document.FindFirst("title").InnerHtml(), "Test Title abc text", "对 title 元素内容的文本替换测试失败"); }
public static void start() { string pageStr = Util.getHtmlStr("http://www.huachuan.gov.cn/zwgk/xxgksyzl/fgfgg/index.html", Encoding.Default); IHtmlDocument source = new JumonyParser().Parse(pageStr); int totalPage = int.Parse(source.FindFirst("font").InnerText().Split(new string[] { "共" }, StringSplitOptions.RemoveEmptyEntries)[1].Split(new string[] { "页" }, StringSplitOptions.RemoveEmptyEntries)[0]); Debug.WriteLine("----->数据总页数<-----:" + totalPage); for (int i = 0; i < totalPage; i++) { getByPage(i); } }
public void Test1() { var document = new JumonyParser().LoadDocument(Path.Combine(Environment.CurrentDirectory, "Test1.html")); var data = new Dictionary <string, object>(); data.Add("StyleClass", null); data.Add("ThisTime", null); data.Add("ScriptValue1", "TestValue"); HtmlBinding.Create(document, data).DataBind(); StringAssert.Contains(document.FindFirst("script").InnerHtml(), "var value1 =\"TestValue\";"); }
private void NewHTMLhreperGJ() { string url = ""; this.Dispatcher.Invoke(new Action(() => { url = UPURLganji.Text; })); int cityItems = 0; this.Dispatcher.Invoke(new Action(() => { cityItems = Convert.ToInt32(ganjicombc.SelectedValue); })); try { using (var ctx = new oaEntities()) { #region 加载首页 DateTime DBtime = Convert.ToDateTime(DateTime.Now.Year.ToString() + "-" + DateTime.Now.Month.ToString() + "-" + DateTime.Now.Day.ToString()); DateTime ds = DateTime.Now; var ftdef = ctx.T_FGJHtmlData.DefaultIfEmpty().Where(x => x.FbTime != DBtime && x.Laiyuan == "赶集" && x.CityID == cityItems && x.FbTime <= ds).FirstOrDefault(); if (ftdef != null) { MaxTime = ctx.T_FGJHtmlData.DefaultIfEmpty().Where(x => x.FbTime != DBtime && x.Laiyuan == "赶集" && x.CityID == cityItems && x.FbTime <= ds).Max(x => x.FbTime); } string ALLhtml = GetHTMLstr_gj(url, ""); ALLhtml = ALLhtml.Replace("<!-- ", string.Empty); ALLhtml = ALLhtml.Replace(" -->", string.Empty); IHtmlDocument document = new JumonyParser().Parse(ALLhtml); IEnumerable <IHtmlElement> result1 = document.Find(".list-items"); L_Class_Ganji.Clear(); foreach (var item in result1) { #region MyRegion try { newWORD.Class1 GJclass = new newWORD.Class1(); IHtmlElement item_a = item.FindFirst("a"); //信息名称 GJclass.TextName = item.FindFirst(".house-name").InnerText().Trim(); #region 判断时间的方法 string Timestr = item.FindFirst(".house-pulishtime").InnerText().Trim(); DateTime Dte = DateTime.Now; GJclass.FbTime = Convert.ToDateTime(Dte.Year.ToString() + "-" + Dte.Month.ToString() + "-" + Dte.Day.ToString()); Timestr = Timestr.Replace("更新于", string.Empty); Timestr = Timestr.Replace("前", string.Empty); if (Timestr != "今天") { if (Timestr.IndexOf("分钟") > -1) { if (Timestr.Replace("分钟", string.Empty).Trim().Length <= 0) { continue; } GJclass.FbTime = Dte.AddMinutes(-(Convert.ToInt32(Timestr.Replace("分钟", string.Empty)))); } else if (Timestr.IndexOf("小时") > -1) { if (Timestr.Replace("小时", string.Empty).Trim().Length <= 0) { continue; } GJclass.FbTime = Dte.AddHours(-(Convert.ToInt32(Timestr.Replace("小时", string.Empty)))); } else if (Timestr.IndexOf("天") > -1) { if (Timestr.Trim() == "昨天") { GJclass.FbTime = Dte.AddDays(-1); } else if (Timestr.Trim() == "前天") { GJclass.FbTime = Dte.AddDays(-2); } else { if (Timestr.Replace("天", string.Empty).Trim().Length <= 0) { continue; } GJclass.FbTime = Dte.AddDays(-3); } } else if (Timestr.Trim() == "刚刚") { GJclass.FbTime = DateTime.Now; } else { string[] strTime_ = Timestr.Split('-'); GJclass.FbTime = Convert.ToDateTime((GJclass.FbTime.Year - 1) + "-" + strTime_[0] + "-" + strTime_[1]); } if (GJclass.FbTime <= MaxTime) { continue; } } #endregion //连接 GJclass.href = item_a.Attribute("href").Value().IndexOf("https") > -1 ? item_a.Attribute("href").Value() : "https://3g.ganji.com" + item_a.Attribute("href").Value(); //地址 GJclass.Address = item.FindFirst(".house-addr").FindFirst(".house-area").InnerText(); L_Class_Ganji.Add(GJclass); } catch (Exception ex) { } #endregion } #endregion this.Dispatcher.Invoke(new Action(() => { GJSUMcount.Text = L_Class_Ganji.Count().ToString(); })); #region 加载分页 for (int i = 0; i < L_Class_Ganji.Count; i++) { Thread.Sleep(3000); try { this.Dispatcher.Invoke(new Action(() => { GJReadCount.Text = i.ToString(); })); string this_html = GetHTMLstr_gj(L_Class_Ganji[i].href, ""); if (this_html.Trim().Length <= 0) { continue; } IHtmlDocument document_this = new JumonyParser().Parse(this_html); //金额 L_Class_Ganji[i].FwSumMoney = document_this.Exists(".house-price") ? document_this.FindFirst(".house-price").InnerText().Replace("算房贷", string.Empty).Trim() : string.Empty; var this_Image = document_this.Find(".slide-area>li>img"); L_Class_Ganji[i].Image_str = string.Empty; foreach (var img in this_Image) { L_Class_Ganji[i].Image_str = L_Class_Ganji[i].Image_str.Length > 0 ? L_Class_Ganji[i].Image_str + "---" + img.Attribute("data-big-image").Value() : "有---" + img.Attribute("data-big-image").Value(); } var list = document_this.Find(".house-type>span"); //电话 var php = document_this.Find(".tel"); foreach (var item in php) { if (item.Attribute("href").Value().Trim().Length > 0) { L_Class_Ganji[i].photo = item.Attribute("href").Value().Trim().Replace("tel:", string.Empty); } } //联系人 L_Class_Ganji[i].Laiyuan = "赶集"; L_Class_Ganji[i].PersonName = document_this.Exists(".broker") ? document_this.FindFirst(".broker").FindFirst("span").InnerText().Replace("(个人)", string.Empty) : string.Empty; foreach (var item in list) { if (item.InnerText().IndexOf("朝") > -1) { L_Class_Ganji[i].FwChaoxiang = PFUANDtext(item, "朝"); } else if (item.InnerText().IndexOf("室") > -1) { L_Class_Ganji[i].FwHuXing = PFUANDtext(item, "室"); } else if (item.InnerText().IndexOf("层") > -1) { L_Class_Ganji[i].FwLoucheng = PFUANDtext(item, "层"); } else if (item.InnerText().IndexOf("㎡") > -1) { L_Class_Ganji[i].FwMianji = PFUANDtext(item, "㎡"); } else if (item.InnerText().IndexOf("产权") > -1) { L_Class_Ganji[i].FwNianxian = PFUANDtext(item, "产权"); } else if (item.InnerText().IndexOf("毛") > -1) { L_Class_Ganji[i].FwZhuangxiu = PFUANDtext(item, "毛"); } else if (item.InnerText().IndexOf("装修") > -1) { L_Class_Ganji[i].FwZhuangxiu = PFUANDtext(item, "装修"); } } } catch (Exception e) { } } #endregion #region 进行数据存储 SaveDataHTML(ctx, L_Class_Ganji, cityItems); ctx.SaveChanges(); #endregion } this.Dispatcher.Invoke(new Action(() => { GJLISTbox.Text = "完成更新————" + DateTime.Now.ToString(); })); } catch (Exception e) { this.Dispatcher.Invoke(new Action(() => { Ertext.Text = DateTime.Now.ToString() + e.ToString(); })); WhileBOOL2 = false; } WhileBOOL2 = false; }
public void Start_go() { falg = true; DataTable dt = new DataTable();//mogujie E_COMMERCE SqlConnection conn = new SqlConnection("Data Source=10.1.56.31;Initial Catalog=mogujie;Persist Security Info=True;User ID=sa;Password=123456"); SqlConnection conn_E = new SqlConnection("Data Source=10.1.56.31;Initial Catalog=E_COMMERCE;Persist Security Info=True;User ID=sa;Password=123456"); conn.Open(); conn_E.Open(); string sql = "select * from mogujieurl";//每次抓取是到数据库修改链接对应的页数页数 SqlDataAdapter adap = new SqlDataAdapter(sql, conn); adap.Fill(dt); //一级分类的编号 ------------------------------------------------------ string INDUSTRY_ID = "内衣配饰"; //二级分类的编号 ------------------------------------------------------ string PRODUCTTYPE_ID = null; //购物平台的来源 string PLATFORM_ID = "蘑菇街"; //string month = textBox1.Text; //if(month=="") //{ // MessageBox.Show("请输入月份"); // return ; //} DateTime sy = new DateTime(); //sy为datetime型 sy = System.DateTime.Today; //取当前日期给sy string year = sy.Year.ToString(); //取年份 string month = sy.Month.ToString(); //取月份 for (int i = 0; i < dt.Rows.Count; i++) //i = 14 count = 22 { int BRAND_ID = 177156; string tradeItemId = null; //--------贸易项目ID decimal DISCOUNT = 0.00M; //---------促销价 string url = null; //----------------详情页URL string url_ = null; //---------------分类连接 int pagenum = 0; //------------------页数 string TITLE = "无数据"; //----------标题 int perPage = 0; //------------------每页的条数 object[] a = dt.Rows[i].ItemArray; url_ = (string)a[3]; pagenum = (Int32)a[6]; //尝试100页,不到的可以跳出; pagenum = 100; PRODUCTTYPE_ID = (string)a[7]; url_ = url_.Split('?')[0].ToString(); int urlnum = int.Parse(url_.Split('/')[5].ToString()); HttpUtility http = new HttpUtility(); for (int ii = 0; ii < pagenum; ii++) // { int page = ii + 1; string url_and = "http://list.mogujie.com/search?cKey=pc-wall-v1&page=" + page + "&fcid=" + urlnum + "&ad=2"; string Area_Html = http.GetHtmlText(url_and, "utf-8", "text/html;charset=utf-8", ""); var dom = (JObject)JsonConvert.DeserializeObject(Area_Html); try { perPage = dom["result"]["wall"]["docs"].Count(); } catch { break; } for (int iii = 0; iii < perPage; iii++)// { try { //url_and = "http://list.mogujie.com/search?cKey=pc-wall-v1&page=" + page + "&fcid=" + urlnum + "&ad=2"; //Area_Html = http.GetHtmlText(url_and, "UTF-8", "text/html;charset=utf-8", ""); //var dom = (JObject)JsonConvert.DeserializeObject(Area_Html); url = dom["result"]["wall"]["docs"][iii]["link"].ToString();//商品编号 DISCOUNT = Convert.ToDecimal(dom["result"]["wall"]["docs"][iii]["price"].ToString()); tradeItemId = dom["result"]["wall"]["docs"][iii]["tradeItemId"].ToString(); TITLE = dom["result"]["wall"]["docs"][iii]["title"].ToString(); } catch { } try {//-----------------------------------------------------------修改月份--------------- string url_Ishave = "'" + url + "'"; DataTable dt3 = new DataTable(); sql = @"select * from DATE where TITLE = '" + TITLE + "' and GOODSID = '" + tradeItemId + "' and MONTH = '" + month + "' and YEAR = '" + year + "'"; adap = new SqlDataAdapter(sql, conn); adap.Fill(dt3); if (dt3.Rows.Count >= 1) { richTextBox1.Text = ""; richTextBox1.Text = "重复数据"; continue; } } catch { continue; } string html = null; try { html = http.GetHtmlText(url, "utf-8", "text/html; charset=utf-8"); } catch { continue; } var documenthtml = new JumonyParser().Parse(html); //商品编号----------------------------------------------------------------------------------------------- string GOODSID = "无数据"; //店铺名称-------------------------------------------------------------------------------------------------- string SHOPNAME = "无数据"; //收藏数------------------------------------------------------------------------------------------------------ string COLLECTION = "无数据"; string COLLECTION_NUM = "0"; //省市--------------------------------------------------------------------------------------------------- string area = "无数据"; string PROVINCE = "其他"; string CITY = "其他"; int PROVINCE_ID = 0; int CITY_ID = 0; GOODSID = tradeItemId; try { var shopId1 = documenthtml.FindFirst("#shopId"); string shopId = shopId1.Attribute("value").Value(); string url_shop = "http://www.mogujie.com/trade/shopweb_index/asyShopHead?&shopId=" + shopId; Area_Html = http.GetHtmlText(url_shop, "GBK", "text/html;charset=GBK", ""); try { var dom_SS = (JObject)JsonConvert.DeserializeObject(Area_Html); SHOPNAME = dom_SS["data"]["shopInfo"]["name"].ToString(); //店铺名称 COLLECTION = dom_SS["data"]["shop_befaved_num"].ToString(); COLLECTION_NUM = COLLECTION; //收藏数 area = dom_SS["data"]["shopInfo"]["area"].ToString(); //省市 if (area.IndexOf("省") > -1 && area.Length == 6) { PROVINCE = area.Substring(0, 2); CITY = area.Substring(3, 3); } //省市编号查询 CITY = '\'' + CITY + '\''; try { DataTable dt1 = new DataTable(); sql = "select CITY_CODE from CITY where CITY_NAME = " + CITY; adap = new SqlDataAdapter(sql, conn_E); adap.Fill(dt1); object[] a1 = dt1.Rows[0].ItemArray; CITY_ID = (int)a1[0]; } catch { } //省编号查询 PROVINCE = '\'' + PROVINCE + '\''; try { DataTable dt2 = new DataTable(); sql = "select PROVINCE_CODE from PROVINCE where PROVINCE_NAME = " + PROVINCE; adap = new SqlDataAdapter(sql, conn_E); adap.Fill(dt2); object[] a2 = dt2.Rows[0].ItemArray; PROVINCE_ID = (int)a2[0]; } catch { } } catch { } } catch { } //价格---------------------------------------------------------------------------------------------------- decimal PRICE = 0.00M; try { var text = documenthtml.FindFirst("#J_OriginPrice"); string PRICE1 = text.InnerHtml(); int count = PRICE1.Length - PRICE1.Replace("¥", "").Length; if (count == 1) { PRICE = Convert.ToDecimal(PRICE1.Split('¥')[1]); } else { PRICE = DISCOUNT; } } catch { PRICE = DISCOUNT; } //总销量------------------------------------------------------------------------------------------ int SALE_VOLUME = 0; try { var text = documenthtml.FindFirst(".property-extra"); var text1 = text.FindFirst(".J_SaleNum"); string text2 = text1.InnerHtml(); SALE_VOLUME = int.Parse(text2); } catch { } //销售额--------------------------------------------------------------------------------------------- decimal SALE_AMOUNT = 0.00M; try { SALE_AMOUNT = DISCOUNT * SALE_VOLUME; } catch { } //总评价---------------------------------------------------------------------------------------------- int COMMEN_NUM = 0; try { var text = documenthtml.FindFirst(".property-extra"); var text1 = text.FindFirst(".num"); string text2 = text1.InnerHtml(); COMMEN_NUM = int.Parse(text2); } catch { } //好评中评差评-------------------------------------------------------------------------------------------- int POSITIVE_COMMEN = 0; //好 int MODERATE_COMMEN = 0; //中 int NEGATIVE_COMMEN = 0; //差 try //好 { var text = documenthtml.FindFirst(".comment-content"); var text1 = text.FindFirst(".list"); var text2 = text1.Find(".best"); POSITIVE_COMMEN = text2.Count(); } catch { } try //差 { var text = documenthtml.FindFirst(".comment-content"); var text1 = text.FindFirst(".list"); var text2 = text1.Find("a"); NEGATIVE_COMMEN = text2.Count() - POSITIVE_COMMEN; } catch { } try //中 { MODERATE_COMMEN = COMMEN_NUM - POSITIVE_COMMEN - NEGATIVE_COMMEN; } catch { } string strstr = null; string REPUTATION = "无数据"; string SHOPTYPE = "无数据"; string SHELVES_TIME = "无数据"; int YEAR = int.Parse(year); int MONTH = int.Parse(month); string DETAIL_URL = url; strstr = "商品编号 = " + GOODSID + "\n" + "一级分类的编号 = " + INDUSTRY_ID + "\n" + "二级分类的编号 = " + PRODUCTTYPE_ID + "\n" + "购物平台的来源 = " + PLATFORM_ID + "\n" + "品牌ID = " + BRAND_ID + "\n" + "省份 = " + PROVINCE_ID + "\n" + "店铺名称 = " + SHOPNAME + "\n" + "城市 = " + CITY_ID + "\n" + "title = " + TITLE + "\n" + "价格 = " + PRICE + "\n" + "促销价 = " + DISCOUNT + "\n" + "销售总量 = " + SALE_VOLUME + "\n" + "销售额 = " + SALE_AMOUNT + "\n" + "总评 = " + COMMEN_NUM + "\n" + "好评 = " + POSITIVE_COMMEN + "\n" + "中评 = " + MODERATE_COMMEN + "\n" + "差评 = " + NEGATIVE_COMMEN + "\n" + "收藏人数 = " + COLLECTION_NUM + "\n" + "名誉 = " + REPUTATION + "\n" + "shoptype = " + SHOPTYPE + "\n" + "上市时间 = " + SHELVES_TIME + "\n" + "年 = " + YEAR + "\n" + "月 = " + MONTH + "\n" + "详细地址 = " + url; richTextBox1.Text = " "; TITLE = TITLE.Replace("'", "-"); SHOPNAME = SHOPNAME.Replace("'", "-"); sql = string.Format("INSERT INTO DATE VALUES ('{0}','{1}','{2}','{3}','{4}','{5}','{6}','{7}','{8}','{9}','{10}','{11}','{12}','{13}','{14}','{15}','{16}','{17}','{18}','{19}','{20}','{21}','{22}','{23}')", GOODSID, INDUSTRY_ID, PRODUCTTYPE_ID, PLATFORM_ID, BRAND_ID, PROVINCE_ID, SHOPNAME, CITY_ID, TITLE, PRICE, DISCOUNT, SALE_VOLUME, SALE_AMOUNT, COMMEN_NUM, POSITIVE_COMMEN, MODERATE_COMMEN, NEGATIVE_COMMEN, COLLECTION_NUM, REPUTATION, SHOPTYPE, SHELVES_TIME, YEAR, MONTH, url); SqlCommand com = new SqlCommand(sql, conn); com.ExecuteNonQuery(); richTextBox1.Text = "--------插入成功---------\n" + strstr; //Thread.Sleep(750); } //for循环 每个页面商品的个数 } //for循环 每个分类的页数 } //for循环 URL表 conn.Close(); MessageBox.Show("蘑菇街抓取完成"); }
private bool GO_58com(List <Class1> L_Class) { WebClient client = new WebClient(); client.Encoding = Encoding.UTF8; string html = client.DownloadString(URL); IHtmlDocument document = new JumonyParser().Parse(html); if (document.FindFirst("title").InnerText().Trim() == "请输入验证码") { this.Dispatcher.Invoke(new Action(() => { DataGrid.Visibility = Visibility.Collapsed; Webbrowser1.Visibility = Visibility.Visible; Webbrowser1.Navigate(URL); })); //弹出网站 //System.Diagnostics.Process.Start(URL); return(true); } else { IEnumerable <IHtmlElement> result = document.Find(".house-list-wrap"); IEnumerable <IHtmlElement> t = result.Find("li"); Dictionary <string, string> dir = new Dictionary <string, string>(); foreach (var item in t) { #region MyRegion Class1 _class = new Class1(); _class.TextName = GetN_value(item, ".title>a"); _class.href = item.Exists(".title > a") ? item.FindFirst(".title>a").Attribute("href").Value():string.Empty; _class.Quyu = "同城"; _class.PersonName = GetN_value(item, ".jjrname-outer"); _class.Laiyuan = "58"; IEnumerable <IHtmlElement> adds = item.Find(".baseinfo"); IEnumerable <IHtmlElement> addsa = adds.Find("a"); string adess = ""; foreach (var addsaa in addsa) { adess += addsaa.InnerText(); } _class.Address = adess; adds.Find("a"); string[] ssp = GetN_value(item, ".qj-listright").Split(' '); int j = ssp.Length == 10 ? 0 : 10 - ssp.Length; _class.SumMoney = GetN_value(item, ".sum"); _class.PingMoney = GetN_value(item, ".unit"); _class.Allpm = GetN_value(item, ".baseinfo"); _class.datetime = GetN_value(item, ".time"); _class.Image_Count = GetInt_value(item, ".picNum"); _class.Image_str = _class.Image_Count > 0 ? "有" : string.Empty; L_Class.Add(_class); r++; #endregion } return(false); } }
public void SpecificationTest9() { var document = new JumonyParser().LoadDocument(Path.Combine(Environment.CurrentDirectory, "SpecificationTest9.html")); Assert.AreEqual(document.FindFirst("body").InnerText(), " <-anc> <_test> <中文> <1a> <:abc> ", "特殊字符解析错误"); }