コード例 #1
0
            protected override Task <DataFlowResult> Parse(DataFlowContext context)
            {
                var selectable = context.GetSelectable();
                // 解析数据
                var author = selectable.XPath("//span[@class='p-name vcard-fullname d-block overflow-hidden']")
                             .GetValue();
                var name = selectable.XPath("//span[@class='p-nickname vcard-username d-block']")
                           .GetValue();

                context.AddItem("author", author);
                context.AddItem("username", name);

                // 添加目标链接
                var urls = selectable.Links().Regex("(https://github\\.com/[\\w\\-]+/[\\w\\-]+)").GetValues();

                AddFollowRequests(context, urls);

                // 如果解析为空,跳过后续步骤(存储 etc)
                if (string.IsNullOrWhiteSpace(name))
                {
                    context.ClearItems();
                    return(Task.FromResult(DataFlowResult.Terminated));
                }

                return(Task.FromResult(DataFlowResult.Success));
            }
コード例 #2
0
ファイル: GithubSpider.cs プロジェクト: greatkeke/spider
            protected override Task <DataFlowResult> Parse(DataFlowContext context)
            {
                var selectable = context.GetSelectable();
                // 解析数据
                var name = selectable.XPath("//*[@id=\"subject_list\"]/ul/li[1]/div[2]/h2/a")
                           .GetValue();
                var author = selectable.XPath("//*[@id=\"subject_list\"]/ul/li[1]/div[2]/div[1]")
                             .GetValue();

                context.AddItem("author", author);
                context.AddItem("username", name);

                // 添加目标链接
                var urls = selectable.Links().Regex("(https://book.douban\\.com/tag/[\\w\\-]+)").GetValues();

                AddTargetRequests(context, urls);

                // 如果解析为空,跳过后续步骤(存储 etc)
                if (string.IsNullOrWhiteSpace(name))
                {
                    context.ClearItems();
                    return(Task.FromResult(DataFlowResult.Terminated));
                }

                return(Task.FromResult(DataFlowResult.Success));
            }
コード例 #3
0
ファイル: GithubSpider.cs プロジェクト: yeuai/YeuAI.NET
            protected override Task <DataFlowResult> Parse(DataFlowContext context)
            {
                var selectable = context.GetSelectable();
                // Parsing data
                var author = selectable.XPath("//span[@class='p-name vcard-fullname d-block overflow-hidden']")
                             .GetValue();
                var name = selectable.XPath("//span[@class='p-nickname vcard-username d-block']")
                           .GetValue();

                context.AddItem("author", author);
                context.AddItem("username", name);

                // Add target link
                var urls = selectable.Links().Regex("(https://github\\.com/[\\w\\-]+/[\\w\\-]+)").GetValues();

                AddTargetRequests(context, urls);

                // If the parsing is empty, skip the next step
                if (string.IsNullOrWhiteSpace(name))
                {
                    context.ClearItems();
                    return(Task.FromResult(DataFlowResult.Terminated));
                }

                return(Task.FromResult(DataFlowResult.Success));
            }
コード例 #4
0
 protected override Task <DataFlowResult> Parse(DataFlowContext context)
 {
     if (_mapping != null)
     {
         if (_mapping.Deepth.GetValueOrDefault() >= 1)
         {
             if (context.Response.Request.Depth != _mapping.Deepth.Value)
             {
                 context.ClearItems();
                 return(Task.FromResult(DataFlowResult.Success));
             }
         }
         if (!string.IsNullOrWhiteSpace(_mapping.ItemCssSelector))
         {
             var items     = new List <dynamic>();
             var itemNodes = context.GetSelectable().XPath(_mapping.ItemCssSelector).Nodes();
             foreach (var note in itemNodes)
             {
                 var item = new Dictionary <string, string>();
                 foreach (var field in _mapping.Mapping)
                 {
                     item.Add(field.Field, note.XPath(field.CssSelector).GetValue());
                 }
                 if (item.Count > 0)
                 {
                     item.Add("PageSourceURL", context.Response.Request.Url);
                     items.Add(item);
                 }
             }
             if (items.Count > 0)
             {
                 context.AddItem("Content", JsonConvert.SerializeObject(items));
             }
         }
         else
         {
             if (_mapping.Mapping != null && _mapping.Mapping.Length > 0)
             {
                 var item = new Dictionary <string, string>();
                 foreach (var field in _mapping.Mapping)
                 {
                     var value = context.GetSelectable().XPath(field.CssSelector).GetValue();
                     if (value != null)
                     {
                         value = value.Replace("\t", "").Trim();
                     }
                     item.Add(field.Field, value);
                 }
                 if (item.Count > 0)
                 {
                     item.Add("PageSourceURL", context.Response.Request.Url);
                     context.AddItem("Content", JsonConvert.SerializeObject(item, Formatting.Indented));
                 }
             }
             else
             {
                 context.AddItem("PageSourceURL", context.Response.Request.Url);
                 context.AddItem("Content", context.Response.RawText);
             }
         }
     }
     //var item = context.GetSelectable().XPath("//h1[@class='title_news_detail mb10']").GetValue();
     //var item = context.GetSelectable().XPath("//h1[@class='title_news_detail mb10']").GetValue();
     //if (!string.IsNullOrWhiteSpace(item))
     //{
     //	//	context.AddItem("Vnexpress", item);
     //	context.AddItem("Content:", context.Response.RawText);
     //}
     //else
     //	context.ClearItems();
     return(Task.FromResult(DataFlowResult.Success));
 }