예제 #1
0
        public void FilterUsingRuleSetTest()
        {
            //两个条件
            BaseRule rule1 = new BeginEndRule("<div>", "</div>", false, false, true, true);
            rule1.RuleNo = 10;
            BaseRule rule2 = new BeginEndRule("<span>", "</span>", false, false, true, true);
            rule2.RuleNo = 11;
            BaseRule rule3 = new BeginEndRule("<div id=img>", "</div>", false, false, true, true);
            rule3.RuleNo = 12;
            RuleSet ruleset = new RuleSet();

            ruleset.Rules.Add(rule1);
            ruleset.Rules.Add(rule2);
            ruleset.Rules.Add(rule3);
            ruleset.NeedImageLocalizer = true;
            string raw= @"1<div>a</div>2<span>b</span>3<div id=img><img src=""http://www.tourol.cn/Img/slide/1.png""/></div>";
            Assert.AreEqual(@"ab<img src=""/FetchImages/" + Math.Abs( "http://www.tourol.cn/Img/slide/1.png".GetHashCode())+ ".png\"/>", ruleset.FilterUsingRuleSet(ref raw, false));

            //输出为json格式
            RuleSet ruleset2 = new RuleSet();
            ruleset2.Rules.Add(rule1);
            ruleset2.Rules.Add(rule2);
            ruleset2.Code = "name";
            string raw2 = "1<div>a</div>2<span>b</span>3";
            Assert.AreEqual(@"name:""ab""", ruleset2.FilterUsingRuleSet(ref raw2, true));
        }
예제 #2
0
        public void FilterUsingRuleSetTestReal()
        {
            string content = "<div id=\"portal-block-445000270656\" class=\"udiyblock\" type=\"CommonSource\"> <div id=\"jqlast_maincontent\" class=\"jqlast_main_title\">"
                                        + "<h1>仙都风景名胜区</h1><span class=\"grade\">AAAA</span><span onmouseover=\"show_dk(event,this)\" onmouseout=\"hide_dk()\" class=\"cosPicLast s_dpjj_img\"></span><div class=\"thDiv\"><div class=\"thDiv\">"
                                        + "<span id=\"checkGuid_0_0\" class=\"checkGuid yanKer\">"
                                        + "<div class=\"nopicYk none\" style=\"display: none; \">"
                                        + "<span class=\"nopicYk_head\"></span>"
                                        + "<div class=\"nopicYk_mit\">"
                                        + "<p class=\"nopicYk_p\">该景区已参加验客大赛,赶快写博客、打擂台,赢万元轿车吧!"
                                        + "<a href=\"http://www.17u.com/special/yanke/\" target=\"_blank\" title=\"什么是验客大赛?\" rel=\"nofollow\">(什么是验客大赛?)</a>"
                                        + "</p></div></div></span></div></div></div><span class=\"list_sale\" id=\"last_sale\" style=\"display: block; \"><span id=\"last_sale_t\">8分钟</span>前有人预订了该景点</span></div>";
            #region 模拟2个ruleset

            //两个条件
            BaseRule rule1 = new BeginEndRule("<div id=\"jqlast_maincontent\" class=\"jqlast_main_title\"><h1>", "</h1>", false, false, true, true);
            rule1.RuleNo = 10;
            rule1.Name = "标题rule";
            RuleSet ruleset = new RuleSet();
            ruleset.Name = "标题";
            ruleset.Rules.Add(rule1);
            ruleset.Code = "title";

            //第二个set
            BaseRule rule3 = new BeginEndRule("<span class=\"grade\">", "</span>", false, false, true, true);
            rule3.RuleNo = 10;
            rule3.Name = "等级rule";
            RuleSet ruleset2 = new RuleSet();
            ruleset2.Name = "等级";
            ruleset2.Code = "level";
            ruleset2.Rules.Add(rule3);

            Assert.AreEqual("仙都风景名胜区",
                ruleset.FilterUsingRuleSet(ref content,false));
            Assert.AreEqual("AAAA",
                ruleset2.FilterUsingRuleSet(ref content, false));
            #endregion
        }
예제 #3
0
        public void FilterUsingRuleTest()
        {
            string rawContent = @"	 <html><meta keywor......../>
            <title>启园</title>
            <p class=""tkType"">
                                                    <span class=""tkSpan"" style=""cursor: default"" id=""se_title_10"">
                                                        <span>清明上河图</span>
                                                    </span>
                                                </p>
                                                    <span fan=2  class=""xjq_new"" _left=""420"" _top=""581"" _width=""115"" _height=""22"">
                                                        <s fxprice=""2"" danbaotype=""0"" class=""return"">
                                                            <span class=""return_tt clearfix""></span><span class=""return_ct""><span class=""nob"">¥</span>2</span>
                                                        </s>
                                                    </span>
                                            </td>
                                            <td class=""sp_price"">
                                                <span class=""parGd"">¥95</span>
                                            </td>
                                            <td>
                                                <dl class=""saveMne"">
                                                    <dt><span class=""Mne"">¥</span>85</dt>
                                                </dl>
                                            </td>
                                            <td>
                                                <span id=""onlinepay_0_0"">景区支付</span>
                                            </td>
                                            <td>
                                                <a title=""预订"" href=""javascript:void(0)"" onclick=""GetOrderUrl(5886,13033);return false;"" class=""yd_butm"" rel=""nofollow"">
                                                    预&nbsp;订</a>
                                            </td>
                                        </tr>
                                        <tr class=""listTr"">
                                            <td>
                                                <p class=""tkType"">
                                                    <span class=""tkSpan"" style=""cursor: default"" id=""se_title_11"">
                                                        <span>广州/香港街</span>
                                                    </span>
                                                </p>
                                                    <span fan=2  class=""xjq_new"" _left=""420"" _top=""581"" _width=""115"" _height=""22"">
                                                        <s fxprice=""2"" danbaotype=""0"" class=""return"">
                                                            <span class=""return_tt clearfix""></span><span class=""return_ct""><span class=""nob"">¥</span>2</span>
                                                        </s>
                                                    </span>
                                            </td>
                                            <td class=""sp_price"">
                                                <span class=""parGd"">¥95</span>
                                            </td>
                                            <td>
                                                <dl class=""saveMne"">
                                                    <dt><span class=""Mne"">¥</span>85</dt>
                                                </dl>
                                            </td>
                                            <td>
                                                <span id=""onlinepay_0_0"">景区支付</span>
                                            </td>
                                            <td>
                                                <a title=""预订"" href=""javascript:void(0)"" onclick=""GetOrderUrl(5886,13034);return false;"" class=""yd_butm"" rel=""nofollow"">
                                                    预&nbsp;订</a>
                                            </td>
                                        </tr>
                                        <tr class=""listTr"">
                                            <td>
                                                <p class=""tkType"">
                                                    <span class=""tkSpan"" style=""cursor: default"" id=""se_title_12"">
                                                        <span>屏岩洞府</span>
                                                    </span>
                                                </p>
                                                    <span fan=2  class=""xjq_new"" _left=""420"" _top=""581"" _width=""115"" _height=""22"">
                                                        <s fxprice=""2"" danbaotype=""0"" class=""return"">
                                                            <span class=""return_tt clearfix""></span><span class=""return_ct""><span class=""nob"">¥</span>2</span>
                                                        </s>
                                                    </span>
                                            </td>
                                            <td class=""sp_price"">
                                                <span class=""parGd"">¥80</span>
                                            </td>
                                            <td>
                                                <dl class=""saveMne"">
                                                    <dt><span class=""Mne"">¥</span>70</dt>
                                                </dl>
                                            </td>
                                            <td>
                                                <span id=""onlinepay_0_0"">景区支付</span>
                                            </td>
                                            <td>
                                                <a title=""预订"" href=""javascript:void(0)"" onclick=""GetOrderUrl(5886,13039);return false;"" class=""yd_butm"" rel=""nofollow"">
                                                    预&nbsp;订</a>
                                            </td>
                                        </tr>
                                        <tr class=""listTr"">
                                            <td>
            <span id=""jibie"">aaaa</span>
                            ";

            string raw1 = rawContent;
            string regexExp=@"id=""se_title_\d+"">.*?<span>(?<t_name>.*?)</span>.*?""parGd"">.?(?<t_price1>\d+)</span>.*?""Mne"">.</span>(?<price2>\d+)</dt>";
            Assert.AreEqual("清明上河图||95||85||&&广州/香港街||95||85||&&屏岩洞府||80||70||&&",
                new RegexRule(regexExp).FilterUsingRule(ref raw1));
               // new RegexRule(@"id=""se_title_\d+""\>\r*\s+\<span\>(?<t_name>.*?)\</span\>").FilterUsingRule(ref rawContent));
            /*
             name: id=""se_title_1"">\s*<span>(?<t_name>.*?)</span>
             */
            string raw2 = rawContent;
            RuleSet set = new RuleSet();
            BaseRule rule1 = new RegexRule(regexExp);
            rule1.RuleNo = 10;
            BaseRule rule2 = new BeginEndRule("<title>","</title>",false,false,true,true);
            rule2.RuleNo = 9;
            set.Rules.Add(rule1);
            set.Rules.Add(rule2);

            string result=set.FilterUsingRuleSet(ref raw2, false);
            Console.Write(result);
            Assert.AreEqual("启园清明上河图||95||85||&&广州/香港街||95||85||&&屏岩洞府||80||70||&&",
             result);
        }