예제 #1
0
        public void TestExtractMeta()
        {
            var crawler = new RuiJiCrawler();
            var request = new Request("https://my.oschina.net/zhupingqi/blog/1826317");

            var response = crawler.Request(request);
            var content  = response.Data.ToString();

            var parser = new RuiJiParser();
            var eb     = parser.ParseExtract(@"
[meta]
	#title
	css h1.header:text

	#author
	css div.blog-meta .avatar + span:text

	#date
	css div.blog-meta > div.item:first:text
	regS /发布于/ 1

	#words_i
	css div.blog-meta > div.item:eq(1):text
	regS / / 1

	#content
	css #articleContent:html"    );

            var result = RuiJiExtractor.Extract(content, eb.Result);

            Assert.True(true);
        }
예제 #2
0
        public void TestExtract2()
        {
            var crawler = new RuiJiCrawler();
            var request = new Request("https://www.oschina.net/blog");

            var response = crawler.Request(request);
            var content  = response.Data.ToString();

            var parser = new RuiJiParser();
            var eb     = parser.ParseExtract("css a.blog-title-link:[href]\nexp https://my.oschina.net/*/blog/*");
            var result = RuiJiExtractor.Extract(content, eb.Result);

            Assert.True(true);
        }
예제 #3
0
        public void TestExtractTile()
        {
            var crawler = new RuiJiCrawler();
            var request = new Request("http://www.ruijihg.com/archives/category/tech/bigdata");

            var response = crawler.Request(request);
            var content  = response.Data.ToString();

            var parser = new RuiJiParser();
            var eb     = parser.ParseExtract(@"[tile]
css article:html

    [meta]
	#title
	css .entry-header:text

	#summary
	css .entry-header + p:text
	ex /Read more »/ -e"    );

            var result = RuiJiExtractor.Extract(content, eb.Result);

            Assert.True(true);
        }