예제 #1
0
        public void testReplaceHtml()
        {
            String html = @"<p>
  Hello. <font size=""2"">I am size 2</font>
  <font color=""red"">and I am red</font>
</p>";

            String result = @"<p>
  Hello. I am size 2
  and I am red
</p>";

            Assert.AreEqual(result, RegPattern.ReplaceHtml(html, "font", false));

            html   = @"<p><span style=""color:red;"">kkk</span> aaaaaaaa</p>";
            result = @"<p>kkk aaaaaaaa</p>";
            Assert.AreEqual(result, RegPattern.ReplaceHtml(html, "span", false));

            html   = @"<p>zzz <a href=""ccc.html"">aaa name</a> pppp</p>";
            result = @"<p>zzz aaa name pppp</p>";
            Assert.AreEqual(result, RegPattern.ReplaceHtml(html, "a", false));


            //---------------------------------------------------------------------------------------------



            html = @"<p>
<script>         alert('sss');       </script>
<style>
p {color:red;}
</style>
  Hello. <font size=""2"">I am size 2</font>
  <font color=""red"">and I am red</font>
</p>";

            result = @"<p>


  Hello. <font size=""2"">I am size 2</font>
  <font color=""red"">and I am red</font>
</p>";

            String x = RegPattern.ReplaceHtml(html, "script", true);

            x = RegPattern.ReplaceHtml(x, "style", true);

            Assert.AreEqual(result, x);

            html   = @" <p>  pic1 <img src=""eeeeeeee.jpg"" />   </p>  pic2<IMG src=""xxxxxx.gif"" >  ";
            result = @" <p>  pic1    </p>  pic2  ";
            x      = RegPattern.ReplaceHtml(html, "img", true);
            Assert.AreEqual(result, x);


            html   = @" <p>  pic1 <br/>    </p>  pic2<BR> <div><br   /></div> <br /> ";
            result = @" <p>  pic1     </p>  pic2 <div></div>  ";
            x      = RegPattern.ReplaceHtml(html, "br", true);
            Assert.AreEqual(result, x);
        }
예제 #2
0
        private string filterPage(string input, SpiderTemplate spiderTemplate)
        {
            if (strUtil.IsNullOrEmpty(spiderTemplate.DetailClearTag))
            {
                return(input);
            }

            String[] arrTag = spiderTemplate.DetailClearTag.ToLower().Split(',');
            if (arrTag.Length == 0)
            {
                return(input);
            }

            List <String> rTag = new List <String>();

            logger.Info("filterTag, input=" + input);

            // 过滤标签,以及标签内部的内容
            foreach (String tag in arrTag)
            {
                // font/span/a 只过滤tag,不过滤内容;其他都过滤内容
                if (tag == "font" || tag == "span" || tag == "a")
                {
                    rTag.Add(tag);
                    continue;
                }

                logger.Info("tag=" + tag);

                input = RegPattern.ReplaceHtml(input, tag, true);
            }

            logger.Info("filterTag, clear tag1=" + input);


            // 只过滤标签,不过滤标签的内容
            foreach (String tag in rTag)
            {
                logger.Info("tag=" + tag);
                input = RegPattern.ReplaceHtml(input, tag, false);
            }

            logger.Info("filterTag, clear tag2=" + input);


            return(input);
        }