Пример #1
0
            protected override Task <DataFlowResult> Parse(DataFlowContext context)
            {
                var response = context.GetResponse();

                context.AddItem("URL", response.Request.Url);
                context.AddItem("Title", context.GetSelectable().XPath(".//title").GetValue());

                Dictionary <string, string> tags = new Dictionary <string, string>();
                var tagNodes = context.GetSelectable().XPath("//*[@id=\"post_rank\"]/div[2]/div/div[@class='tag_div']/ul/li/a").Nodes();

                foreach (var node in tagNodes)
                {
                    var url  = node.XPath("./@href").GetValue();
                    var name = node.GetValue();
                    tags.Add(url, name);
                    Console.WriteLine("url:" + url + " - name:" + name);
                }

                var requests = new List <Request>();

                foreach (var tag in tags)
                {
                    var request = new Request
                    {
                        Url     = tag.Key,
                        OwnerId = response.Request.OwnerId,
                    };
                    request.Properties.Add("tag", tag.Value);

                    requests.Add(request);
                }
                context.AddTargetRequests(requests.ToArray());

                return(Task.FromResult(DataFlowResult.Success));
            }
Пример #2
0
        /// <summary>
        /// 取得详细图片查看的分页url
        /// </summary>
        /// <param name="context"></param>
        public static void GetDetailPageUrl(DataFlowContext context)
        {
            var response = context.GetResponse();
            Dictionary <string, string> pageSet = new Dictionary <string, string>();
            var pages       = context.GetSelectable().XPath("//*[@id=\"pages\"]/a[not(@class)]/@href").GetValues();
            var requestList = new List <Request>();

            foreach (var page in pages)
            {
                if (!pageSet.ContainsKey(page))
                {
                    var request = new Request
                    {
                        Url     = page,
                        OwnerId = response.Request.OwnerId,
                    };
                    request.AddProperty("tag", response.Request.GetProperty("tag"));
                    request.AddProperty("referer", response.Request.GetProperty("referer"));
                    requestList.Add(request);

                    pageSet.Add(page, page);
                }
            }

            if (requestList.Count > 0)
            {
                context.AddTargetRequests(requestList.ToArray());
            }
        }
Пример #3
0
            public override Task <DataFlowResult> Parse(DataFlowContext context)
            {
                var response = context.GetResponse();

                //context.AddItem("URL", response.Request.Url);
                //context.AddItem("Title", context.GetSelectable().XPath(".//title").GetValue());

                Console.ForegroundColor = ConsoleColor.Blue;
                Console.WriteLine("第一页:" + context.GetSelectable().XPath(".//title").GetValue());
                Console.ForegroundColor = ConsoleColor.White;
                //var subs = context.GetSelectable().XPath("//*[@id=\"post_rank\"]/div[2]/div/div[@class='tag_div']/ul/li/a").Nodes().Count();
                Dictionary <string, string> pageSet = new Dictionary <string, string>();
                var pages       = context.GetSelectable().XPath("//*[@id=\"listdiv\"]/div[@class='pagesYY']/div/a[not(@class)]/@href").GetValues();
                var requestList = new List <Request>();

                foreach (var page in pages)
                {
                    if (!pageSet.ContainsKey(page))
                    {
                        var request = new Request();
                        request.Url     = page;
                        request.OwnerId = response.Request.OwnerId;
                        requestList.Add(request);

                        pageSet.Add(page, page);
                    }
                }

                if (requestList.Count > 0)
                {
                    context.AddTargetRequests(requestList.ToArray());
                }

                return(Task.FromResult(DataFlowResult.Success));
            }
Пример #4
0
            public override Task <DataFlowResult> Parse(DataFlowContext context)
            {
                var response = context.GetResponse();

                context.AddItem("URL", response.Request.Url);
                context.AddItem("Title", context.GetSelectable().XPath(".//title").GetValue());

                Dictionary <string, string> tags = new Dictionary <string, string>();
                var tagNodes = context.GetSelectable().XPath("//*[@id=\"post_rank\"]/div[2]/div/div[@class='tag_div']/ul/li/a").Nodes();

                foreach (var node in tagNodes)
                {
                    var url  = node.XPath("./@href").GetValue();
                    var name = node.GetValue();
                    tags.Add(url, name);
                    Console.WriteLine("url:" + url + " - name:" + name);
                }

                var requests = new List <Request>();

                foreach (var sub in tags)
                {
                    var request = new Request
                    {
                        Url     = sub.Key,
                        OwnerId = response.Request.OwnerId
                    };
                    requests.Add(request);

                    CreateDirByTag(sub.Value);
                }
                context.AddTargetRequests(requests.ToArray());

                /*var subs = context.GetSelectable().XPath("//*[@id=\"post_rank\"]/div[2]/div/div[@class='tag_div']/ul/li/a/@href").GetValues();
                 * var requests = new List<Request>();
                 * foreach (var sub in subs)
                 * {
                 *  var request = new Request();
                 *  request.Url = sub;
                 *  request.OwnerId = response.Request.OwnerId;
                 *  requests.Add(request);
                 *  Console.WriteLine("sub parse:" + sub);
                 * }
                 * context.AddTargetRequests(requests.ToArray());*/

                return(Task.FromResult(DataFlowResult.Success));
            }
Пример #5
0
        public override async Task <DataFlowResult> HandleAsync(DataFlowContext context)
        {
            try
            {
                var response = context.GetResponse();
                var request  = response.Request;
                // 如果不匹配则终止数据流程
                if (CanParse != null && !CanParse(request))
                {
                    return(DataFlowResult.Terminated);
                }

                Selectable?.Invoke(context);

                var parserResult = await Parse(context);

                if (parserResult == DataFlowResult.Failed || parserResult == DataFlowResult.Terminated)
                {
                    return(parserResult);
                }

                var urls = Follow?.Invoke(context);
                if (urls != null && urls.Length > 0)
                {
                    var followRequests = new List <Request>();
                    foreach (var url in urls)
                    {
                        var followRequest = CreateFromRequest(request, url);
                        if (CanParse(followRequest))
                        {
                            followRequests.Add(followRequest);
                        }
                    }

                    context.AddTargetRequests(followRequests.ToArray());
                }

                return(DataFlowResult.Success);
            }
            catch (Exception e)
            {
                Logger?.LogError($"数据解析发生异常: {e}");
                return(DataFlowResult.Failed);
            }
        }
Пример #6
0
        /// <summary>
        /// 获取主题的地址
        /// </summary>
        /// <param name="context"></param>
        public static void GetSubjectUrl(DataFlowContext context)
        {
            var response    = context.GetResponse();
            var pages       = context.GetSelectable().XPath("//*[@id=\"listdiv\"]/ul/li/div[@class='galleryli_title']/a/@href").GetValues();
            var requestList = new List <Request>();

            foreach (var page in pages)
            {
                var request = new Request
                {
                    Url     = page,
                    OwnerId = response.Request.OwnerId,
                };
                request.AddProperty("tag", response.Request.GetProperty("tag"));
                request.AddProperty("referer", response.Request.Url);
                requestList.Add(request);
            }

            if (requestList.Count > 0)
            {
                context.AddTargetRequests(requestList.ToArray());
            }
        }
Пример #7
0
        /// <summary>
        /// 解析画册的分页
        /// </summary>
        /// <param name="context"></param>
        public static void GetSubjectPageUrl(DataFlowContext context)
        {
            var response = context.GetResponse();
            Dictionary <string, string> pageSet = new Dictionary <string, string>();
            var pages       = context.GetSelectable().XPath("//*[@id=\"listdiv\"]/div[@class='pagesYY']/div/a[not(@class)]/@href").GetValues();
            var requestList = new List <Request>();

            foreach (var page in pages)
            {
                if (!pageSet.ContainsKey(page))
                {
                    try
                    {
                        var request = new Request
                        {
                            Url     = page,
                            OwnerId = response.Request.OwnerId,
                        };
                        //request.Properties.Add("tag", response.Request.Properties["tag"]);
                        request.AddProperty("tag", "萝莉");
                        requestList.Add(request);

                        pageSet.Add(page, page);
                    }
                    catch (Exception e)
                    {
                        Console.WriteLine(e);
                    }
                }
            }

            if (requestList.Count > 0)
            {
                context.AddTargetRequests(requestList.ToArray());
            }
        }