Esempio n. 1
0
        public object TestFeed(FeedModel feed, [FromUri] bool down, [FromUri] bool debug = false)
        {
            try
            {
                var compile = new UrlCompile();
                var addrs   = compile.GetResult(feed.Address);
                var results = new List <ExtractResult>();

                foreach (var addr in addrs)
                {
                    feed.Address = addr.ToString();
                    var job  = new FeedJob();
                    var snap = job.DoTask(feed, false);

                    if (string.IsNullOrEmpty(feed.RuiJiExpression))
                    {
                        results.Add(new ExtractResult());
                        continue;
                    }

                    var block = RuiJiBlockParser.ParserBlock(feed.RuiJiExpression);

                    var result = RuiJiExtractor.Extract(snap.Content, block);

                    if (!debug)
                    {
                        CrawlTaskFunc.ClearContent(result);
                    }

                    if (down)
                    {
                        var s = new FileStorage(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "www", "download"));

                        var files = result.Content.ToString().Replace("\r\n", "\n").Split('\n');
                        foreach (var file in files)
                        {
                            if (!string.IsNullOrEmpty(file) && Uri.IsWellFormedUriString(file, UriKind.Absolute))
                            {
                                var res = Crawler.Request(file);
                                var c   = new DownloadContentModel();
                                c.Url   = file.Trim();
                                c.IsRaw = res.IsRaw;
                                c.Data  = res.Data;

                                s.Insert(c);
                            }
                        }
                    }

                    results.Add(result);
                }

                return(results);
            }
            catch (Exception ex)
            {
                return(ex);
            }
        }
Esempio n. 2
0
        public object Run(object t, ParallelTask task)
        {
            var model = t as CrawlTaskModel;

            var results  = new List <ExtractResult>();
            var reporter = task.Progress as IProgress <string>;

            reporter.Report("正在读取Feed记录");
            var feed = FeedLiteDb.GetFeed(model.FeedId);

            reporter.Report("正在下载 Feed");

            var compile = new CompileFeedAddress();

            feed.Address = compile.Compile(feed.Address);

            var job  = new FeedJob();
            var snap = job.DoTask(feed, false);

            reporter.Report("Feed 下载完成");

            var block = RuiJiExpression.ParserBlock(feed.RuiJiExpression);

            var feedResult = RuiJiExtracter.Extract(snap.Content, block);

            results.Add(feedResult);

            reporter.Report("正在提取Feed地址");
            var j    = new FeedExtractJob();
            var urls = j.ExtractAddress(snap);

            reporter.Report("Feed地址提取完成");

            foreach (var url in urls)
            {
                reporter.Report("正在提取地址 " + url);
                var r = ContentQueue.Instance.Extract(url);

                results.AddRange(r);
            }

            reporter.Report("计算完成");

            if (!model.IncludeContent)
            {
                results.ForEach((m) =>
                {
                    ClearContent(m);
                });
            }

            return(results);
        }
Esempio n. 3
0
        public object Run(object t, ParallelTask task)
        {
            var model = t as CrawlTaskModel;

            var results  = new List <object>();
            var reporter = task.Progress as IProgress <string>;

            reporter.Report("正在读取Feed记录");
            var feed = FeedLiteDb.GetFeed(model.FeedId);

            reporter.Report("正在下载 Feed");

            var compile = new UrlCompile();
            var addrs   = compile.GetResult(feed.Address);

            foreach (var addr in addrs)
            {
                feed.Address = addr.ToString();

                var job  = new FeedJob();
                var snap = job.DoTask(feed, false);
                reporter.Report("Feed 下载完成");

                var block = RuiJiBlockParser.ParserBlock(feed.RuiJiExpression);

                var feedResult = RuiJiExtractor.Extract(snap.Content, block);
                results.Add(feedResult);

                reporter.Report("正在提取Feed地址");
                var j    = new FeedExtractJob();
                var urls = j.ExtractAddress(snap);
                reporter.Report("Feed地址提取完成");

                if (!string.IsNullOrEmpty(snap.RuiJiExpression))
                {
                    foreach (var url in urls)
                    {
                        reporter.Report("正在提取地址 " + url);
                        var result = Cooperater.GetResult(url);

                        if (result != null)
                        {
                            var cm = new ContentModel();
                            cm.Id    = model.FeedId;
                            cm.Url   = url;
                            cm.Metas = result.Metas;
                            cm.CDate = DateTime.Now;

                            results.Add(cm);
                        }
                    }
                }

                reporter.Report("计算完成");

                if (!model.IncludeContent)
                {
                    results.ForEach((m) =>
                    {
                        ClearContent(m);
                    });
                }
            }

            return(results);
        }