Ejemplo n.º 1
0
        public object TestFeed(FeedModel feed, [FromUri] bool down, [FromUri] bool debug = false)
        {
            try
            {
                var compile = new UrlCompile();
                var addrs   = compile.GetResult(feed.Address);
                var results = new List <ExtractResult>();

                foreach (var addr in addrs)
                {
                    feed.Address = addr.ToString();
                    var job  = new FeedJob();
                    var snap = job.DoTask(feed, false);

                    if (string.IsNullOrEmpty(feed.RuiJiExpression))
                    {
                        results.Add(new ExtractResult());
                        continue;
                    }

                    var block = RuiJiBlockParser.ParserBlock(feed.RuiJiExpression);

                    var result = RuiJiExtractor.Extract(snap.Content, block);

                    if (!debug)
                    {
                        CrawlTaskFunc.ClearContent(result);
                    }

                    if (down)
                    {
                        var s = new FileStorage(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "www", "download"));

                        var files = result.Content.ToString().Replace("\r\n", "\n").Split('\n');
                        foreach (var file in files)
                        {
                            if (!string.IsNullOrEmpty(file) && Uri.IsWellFormedUriString(file, UriKind.Absolute))
                            {
                                var res = Crawler.Request(file);
                                var c   = new DownloadContentModel();
                                c.Url   = file.Trim();
                                c.IsRaw = res.IsRaw;
                                c.Data  = res.Data;

                                s.Insert(c);
                            }
                        }
                    }

                    results.Add(result);
                }

                return(results);
            }
            catch (Exception ex)
            {
                return(ex);
            }
        }
Ejemplo n.º 2
0
        public void TestPaging()
        {
            var crawler = new RuiJiCrawler();
            var request = new Request("https://www.kuaidaili.com/free/inha/10");

            var response = crawler.Request(request);
            var content  = response.Data.ToString();

            var exp = @"
[tile]
	css table.table-bordered tr:gt(0):ohtml

	[meta]
	#ip
	css td[data-title='IP']:text

    #port
    css td[data-title='PORT']:text

[paging]
css #listnav a[href]";

            var block  = RuiJiBlockParser.ParserBlock(exp);
            var result = RuiJiExtractor.Extract(content, block);

            if (result.Paging != null && result.Paging.Count > 0 && result.Tiles != null)
            {
                var storage = new FileStorage(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "www", "download"));

                PagingExtractor.CrawlPage(request.Uri, result, block, (u, res) => {
                    var c   = new DownloadContentModel();
                    c.Url   = u.AbsolutePath.Trim();
                    c.IsRaw = false;
                    c.Data  = JsonConvert.SerializeObject(res.Tiles);

                    storage.Insert(c);
                }, int.MaxValue);
            }

            Assert.True(true);
        }