public void FilterUsingRuleSetTest() { //两个条件 BaseRule rule1 = new BeginEndRule("<div>", "</div>", false, false, true, true); rule1.RuleNo = 10; BaseRule rule2 = new BeginEndRule("<span>", "</span>", false, false, true, true); rule2.RuleNo = 11; BaseRule rule3 = new BeginEndRule("<div id=img>", "</div>", false, false, true, true); rule3.RuleNo = 12; RuleSet ruleset = new RuleSet(); ruleset.Rules.Add(rule1); ruleset.Rules.Add(rule2); ruleset.Rules.Add(rule3); ruleset.NeedImageLocalizer = true; string raw= @"1<div>a</div>2<span>b</span>3<div id=img><img src=""http://www.tourol.cn/Img/slide/1.png""/></div>"; Assert.AreEqual(@"ab<img src=""/FetchImages/" + Math.Abs( "http://www.tourol.cn/Img/slide/1.png".GetHashCode())+ ".png\"/>", ruleset.FilterUsingRuleSet(ref raw, false)); //输出为json格式 RuleSet ruleset2 = new RuleSet(); ruleset2.Rules.Add(rule1); ruleset2.Rules.Add(rule2); ruleset2.Code = "name"; string raw2 = "1<div>a</div>2<span>b</span>3"; Assert.AreEqual(@"name:""ab""", ruleset2.FilterUsingRuleSet(ref raw2, true)); }
public void FilterUsingRuleSetTestReal() { string content = "<div id=\"portal-block-445000270656\" class=\"udiyblock\" type=\"CommonSource\"> <div id=\"jqlast_maincontent\" class=\"jqlast_main_title\">" + "<h1>仙都风景名胜区</h1><span class=\"grade\">AAAA</span><span onmouseover=\"show_dk(event,this)\" onmouseout=\"hide_dk()\" class=\"cosPicLast s_dpjj_img\"></span><div class=\"thDiv\"><div class=\"thDiv\">" + "<span id=\"checkGuid_0_0\" class=\"checkGuid yanKer\">" + "<div class=\"nopicYk none\" style=\"display: none; \">" + "<span class=\"nopicYk_head\"></span>" + "<div class=\"nopicYk_mit\">" + "<p class=\"nopicYk_p\">该景区已参加验客大赛,赶快写博客、打擂台,赢万元轿车吧!" + "<a href=\"http://www.17u.com/special/yanke/\" target=\"_blank\" title=\"什么是验客大赛?\" rel=\"nofollow\">(什么是验客大赛?)</a>" + "</p></div></div></span></div></div></div><span class=\"list_sale\" id=\"last_sale\" style=\"display: block; \"><span id=\"last_sale_t\">8分钟</span>前有人预订了该景点</span></div>"; #region 模拟2个ruleset //两个条件 BaseRule rule1 = new BeginEndRule("<div id=\"jqlast_maincontent\" class=\"jqlast_main_title\"><h1>", "</h1>", false, false, true, true); rule1.RuleNo = 10; rule1.Name = "标题rule"; RuleSet ruleset = new RuleSet(); ruleset.Name = "标题"; ruleset.Rules.Add(rule1); ruleset.Code = "title"; //第二个set BaseRule rule3 = new BeginEndRule("<span class=\"grade\">", "</span>", false, false, true, true); rule3.RuleNo = 10; rule3.Name = "等级rule"; RuleSet ruleset2 = new RuleSet(); ruleset2.Name = "等级"; ruleset2.Code = "level"; ruleset2.Rules.Add(rule3); Assert.AreEqual("仙都风景名胜区", ruleset.FilterUsingRuleSet(ref content,false)); Assert.AreEqual("AAAA", ruleset2.FilterUsingRuleSet(ref content, false)); #endregion }
public void FilterUsingRuleTest() { string rawContent = @" <html><meta keywor......../> <title>启园</title> <p class=""tkType""> <span class=""tkSpan"" style=""cursor: default"" id=""se_title_10""> <span>清明上河图</span> </span> </p> <span fan=2 class=""xjq_new"" _left=""420"" _top=""581"" _width=""115"" _height=""22""> <s fxprice=""2"" danbaotype=""0"" class=""return""> <span class=""return_tt clearfix""></span><span class=""return_ct""><span class=""nob"">¥</span>2</span> </s> </span> </td> <td class=""sp_price""> <span class=""parGd"">¥95</span> </td> <td> <dl class=""saveMne""> <dt><span class=""Mne"">¥</span>85</dt> </dl> </td> <td> <span id=""onlinepay_0_0"">景区支付</span> </td> <td> <a title=""预订"" href=""javascript:void(0)"" onclick=""GetOrderUrl(5886,13033);return false;"" class=""yd_butm"" rel=""nofollow""> 预 订</a> </td> </tr> <tr class=""listTr""> <td> <p class=""tkType""> <span class=""tkSpan"" style=""cursor: default"" id=""se_title_11""> <span>广州/香港街</span> </span> </p> <span fan=2 class=""xjq_new"" _left=""420"" _top=""581"" _width=""115"" _height=""22""> <s fxprice=""2"" danbaotype=""0"" class=""return""> <span class=""return_tt clearfix""></span><span class=""return_ct""><span class=""nob"">¥</span>2</span> </s> </span> </td> <td class=""sp_price""> <span class=""parGd"">¥95</span> </td> <td> <dl class=""saveMne""> <dt><span class=""Mne"">¥</span>85</dt> </dl> </td> <td> <span id=""onlinepay_0_0"">景区支付</span> </td> <td> <a title=""预订"" href=""javascript:void(0)"" onclick=""GetOrderUrl(5886,13034);return false;"" class=""yd_butm"" rel=""nofollow""> 预 订</a> </td> </tr> <tr class=""listTr""> <td> <p class=""tkType""> <span class=""tkSpan"" style=""cursor: default"" id=""se_title_12""> <span>屏岩洞府</span> </span> </p> <span fan=2 class=""xjq_new"" _left=""420"" _top=""581"" _width=""115"" _height=""22""> <s fxprice=""2"" danbaotype=""0"" class=""return""> <span class=""return_tt clearfix""></span><span class=""return_ct""><span class=""nob"">¥</span>2</span> </s> </span> </td> <td class=""sp_price""> <span class=""parGd"">¥80</span> </td> <td> <dl class=""saveMne""> <dt><span class=""Mne"">¥</span>70</dt> </dl> </td> <td> <span id=""onlinepay_0_0"">景区支付</span> </td> <td> <a title=""预订"" href=""javascript:void(0)"" onclick=""GetOrderUrl(5886,13039);return false;"" class=""yd_butm"" rel=""nofollow""> 预 订</a> </td> </tr> <tr class=""listTr""> <td> <span id=""jibie"">aaaa</span> "; string raw1 = rawContent; string regexExp=@"id=""se_title_\d+"">.*?<span>(?<t_name>.*?)</span>.*?""parGd"">.?(?<t_price1>\d+)</span>.*?""Mne"">.</span>(?<price2>\d+)</dt>"; Assert.AreEqual("清明上河图||95||85||&&广州/香港街||95||85||&&屏岩洞府||80||70||&&", new RegexRule(regexExp).FilterUsingRule(ref raw1)); // new RegexRule(@"id=""se_title_\d+""\>\r*\s+\<span\>(?<t_name>.*?)\</span\>").FilterUsingRule(ref rawContent)); /* name: id=""se_title_1"">\s*<span>(?<t_name>.*?)</span> */ string raw2 = rawContent; RuleSet set = new RuleSet(); BaseRule rule1 = new RegexRule(regexExp); rule1.RuleNo = 10; BaseRule rule2 = new BeginEndRule("<title>","</title>",false,false,true,true); rule2.RuleNo = 9; set.Rules.Add(rule1); set.Rules.Add(rule2); string result=set.FilterUsingRuleSet(ref raw2, false); Console.Write(result); Assert.AreEqual("启园清明上河图||95||85||&&广州/香港街||95||85||&&屏岩洞府||80||70||&&", result); }