Ejemplo n.º 1
0
            protected override Task ParseAsync(DataFlowContext context)
            {
                var newsList = context.Selectable.SelectList(Selectors.XPath(".//div[@class='news_block']"));

                foreach (var news in newsList)
                {
                    var title   = news.Select(Selectors.XPath(".//h2[@class='news_entry']"))?.Value;
                    var url     = news.Select(Selectors.XPath(".//h2[@class='news_entry']/a/@href"))?.Value;
                    var summary = news.Select(Selectors.XPath(".//div[@class='entry_summary']"))?.Value;
                    var views   = news.Select(Selectors.XPath(".//span[@class='view']"))?.Value.Replace(" 人浏览", "");

                    if (!string.IsNullOrWhiteSpace(url))
                    {
                        var request = context.CreateNewRequest(new Uri(url));
                        request.Properties.Add("title", title);
                        request.Properties.Add("url", url);
                        request.Properties.Add("summary", summary);
                        request.Properties.Add("views", views);

                        context.AddFollowRequests(request);
                    }
                }

                return(Task.CompletedTask);
            }
        public override void OnHanlder(DataFlowContext context, BilibiliListRet parseObj)
        {
            // Console.WriteLine(Newtonsoft.Json.JsonConvert.SerializeObject(parseObj));

            if (parseObj == null)
            {
                return;
            }

            if (parseObj.data.archives.Length == 0)
            {
                return;
            }

            using var db = DBSet.GetCon(DBSet.SqliteDBName.Bilibili);

            int newCount = 0;

            foreach (var item in parseObj.data.archives)
            {
                var av = new AV()
                {
                    Id        = item.aid,
                    bvId      = item.bvid,
                    copyright = item.copyright,
                    ctime     = item.ctime,
                    ctime2    = item.ctime.UnixToDateTime().ToLongDateString(),
                    pic       = item.pic,
                    title     = item.title,
                    videos    = item.videos,
                    view      = item.stat.view,
                    rid       = item.tid,
                    UpId      = item.owner.mid,
                    cid       = item.cid,
                };

                av.stat = item.stat;
                var existsDB = db.SingleById <AV>(av.Id);

                if (existsDB != null)
                {
                    // 之前有过这个视频,则忽略这次操作
                    Console.WriteLine($"exits {existsDB.title}");
                    continue;
                }

                db.Insert(av);
                newCount++;

                // 这里可以获得up的一些简单的信息 https://api.bilibili.com/x/web-interface/card?mid=3630684&photo=1
                // 这里不判断了,只要发新视频,都更新一次up主信息
                var request = UpProcess.CreateRquerst(av.UpId);
                context.AddFollowRequests(request);

                // 爬取 tag 信息,这个信息可能需要不断的更新才行,但也仅限于视频更新1个月以内吧 https://api.bilibili.com/x/web-interface/view/detail/tag?aid=286927170
                request = TagProcess.CreateRquerst(av.Id);
                context.AddFollowRequests(request);

                // todo 如果视频有多个,还得获得视频下面分视频的数据 https://api.bilibili.com/x/player/pagelist?bvid=BV1wf4y1X7ka

                // 获得视频的简介 https://api.bilibili.com/x/web-interface/archive/desc?aid=286927170
                request = DescProcess.CreateRquerst(av.Id);
                context.AddFollowRequests(request);

                // todo 定时更新获得视频的状态 https://api.bilibili.com/x/web-interface/archive/stat?aid=286927170

                // todo 读取评论信息 https://api.bilibili.com/x/v2/reply?pn=2&type=1&oid=286927170&sort=0
                //  评论的回复翻页内容 https://api.bilibili.com/x/v2/reply/reply?&pn=2&type=1&oid=244913305&ps=10&root=3602777175

                // todo 抓取弹幕信息 http://comment.bilibili.com/245666614.xml 这里用的是cid

                // 爬取封面照片
                request = ImageProcess.CreateRequest(av);
                context.AddFollowRequests(request);
            }

            if (newCount > -1)
            {
                notfindCount++;
            }
            else
            {
                notfindCount = 0;
            }


            if (notfindCount < 20)
            {
                Console.WriteLine("getNextPage");
                var page = (int)context.Request.Properties["pageNo"];
                var tid  = (int)context.Request.Properties["rid"];

                // if (page < 2)
                {
                    var request = CreateListRequest(tid, page + 1);
                    context.AddFollowRequests(request);
                }
            }
            //else
            //{
            //    Console.WriteLine("finish");
            //}
        }
Ejemplo n.º 3
0
        /// <summary>
        /// 数据解析
        /// </summary>
        /// <param name="context">处理上下文</param>
        /// <returns></returns>
        public override async Task HandleAsync(DataFlowContext context)
        {
            context.NotNull(nameof(context));
            context.Response.NotNull(nameof(context.Response));

            if (!IsValidRequest(context.Request))
            {
                Logger.LogInformation(
                    $"{GetType().Name} ignore parse request {context.Request.RequestUri}, {context.Request.Hash}");
                return;
            }

            if (context.Selectable == null)
            {
                if (SelectableBuilder != null)
                {
                    context.Selectable = SelectableBuilder(context);
                }
                else
                {
                    var text = context.Response.ReadAsString().TrimStart();
                    if (text.StartsWith("<!DOCTYPE html>") || text.StartsWith("<html>"))
                    {
                        context.Selectable = CreateHtmlSelectable(context, text);
                    }
                    else
                    {
                        try
                        {
                            var token = (JObject)JsonConvert.DeserializeObject(text);
                            context.Selectable = new JsonSelectable(token);
                        }
                        catch
                        {
                            context.Selectable = new TextSelectable(text);
                        }
                    }
                }
            }

            await ParseAsync(context);

            var requests = new List <Request>();

            if (_followRequestQueriers != null)
            {
                foreach (var followRequestQuerier in _followRequestQueriers)
                {
                    var followRequests = followRequestQuerier(context);
                    if (followRequests != null)
                    {
                        requests.AddRange(followRequests);
                    }
                }
            }

            foreach (var request in requests)
            {
                if (IsValidRequest(request))
                {
                    // 在此强制设制 Owner, 防止用户忘记导致出错
                    request.Owner = context.Request.Owner;
                    request.Agent = context.Response.Agent;
                    context.AddFollowRequests(request);
                }
            }
        }