public void TestCrawlArticle(string url) { var data = CnBlog.CrawlArticle(url); while (!data.IsCompleted) { } Assert.IsNotNull(data.Result); }
public void TestCrawlList2() { var data = CnBlog.CrawlList2(2); while (!data.IsCompleted) { } Assert.IsTrue(data.Result.Count > 0); }
protected async override void OnNavigatedTo(NavigationEventArgs e) { base.OnNavigatedTo(e); object[] parameters = e.Parameter as object[]; if (parameters != null) { if (parameters.Length == 1 && (parameters[0] as CnBlog) != null) { _blog = parameters[0] as CnBlog; BlogTitle.Text = _blog.Title; AuthorName.Content = _blog.AuthorName; PublishTime.Text = _blog.PublishTime; Views.Text = _blog.Views; Diggs.Text = "[" + _blog.Diggs + "]"; Comments.Text = _blog.Comments; BitmapImage bi = new BitmapImage { UriSource = new Uri(_blog.AuthorAvator) }; Avatar.Source = bi; AuthorName.Tag = _blog.BlogApp; string blogBody = await BlogService.GetBlogContentAsync(_blog.Id); if (blogBody != null) { HideScrollbar(ref blogBody); BlogContent.NavigateToString(blogBody); } // 获取评论数据 _commentHtml = CommentTool.BaseChatHtml; HideScrollbar(ref _commentHtml); BlogComment.NavigateToString(_commentHtml); List <CnBlogComment> listComments = await BlogService.GetBlogCommentsAsync(_blog.Id, 1, 199); if (listComments != null) { string comments = ""; foreach (CnBlogComment comment in listComments) { comments += CommentTool.Receive(comment.AuthorAvatar, comment.AuthorName == _blog.AuthorName ? "[博主]" + _blog.AuthorName : comment.AuthorName, comment.Content, comment.PublishTime, comment.Id); } _commentHtml = _commentHtml.Replace("<a id='ok'></a>", "") + comments + "<a id='ok'></a>"; Debug.Write(_commentHtml); HideScrollbar(ref _commentHtml); BlogComment.NavigateToString(_commentHtml); } Loading.IsActive = false; } } }
public SpiderModule() : base("/spider") { Get("ithome/hot_list", async _ => { var data = await ITHome.CrawlHotList().ConfigureAwait(false); var response = (Response)JsonConvert.SerializeObject(data); response.ContentType = "application/json"; return(response); }); Get("ithome/new_list", async _ => { var data = await ITHome.CrawlNewList().ConfigureAwait(false); var response = (Response)JsonConvert.SerializeObject(data); response.ContentType = "application/json"; return(response); }); Get("ithome/crawl", _ => { ITHome.Crawl(); return("已经开始任务"); }); Get("ithome/article/{id}", async param => { var id = param["id"]; var context = SQLiteContextFactory.GetContext(); ListArticle listArticle = context.QueryByKey <ListArticle>(id); var article = await ITHome.CrawlArticle(listArticle.Link); return(JsonConvert.SerializeObject(article)); }); Get("cnblog/list/{page_count}", async param => { var pageCount = param["page_count"]; var data = await CnBlog.CrawlList(pageCount); return(JsonConvert.SerializeObject(data)); }); }
static readonly string _urlSearchBlogs = "http://zzk.cnblogs.com/s?w={0}&t=b&p={1}"; //blog_keywords page_index public static async Task <List <CnBlog> > SearchBlogs(string keywords, int pageIndex) { try { string url = string.Format(_urlSearchBlogs, keywords, pageIndex); string html = await BaseService.SendGetRequest(url); if (html != null) { html = html.Split(new[] { "<div class=\"forflow\">" }, StringSplitOptions.None)[1] .Split(new[] { "<div class=\"forflow\" id=\"sidebar\">" }, StringSplitOptions.None)[0] .Split(new[] { "<div id=\"paging_block\"" }, StringSplitOptions.None)[0]; html = "<?xml version=\"1.0\" encoding=\"utf - 8\" ?> " + "<result>" + html + "</result>"; List <CnBlog> listBlogs = new List <CnBlog>(); CnBlog blog; XmlDocument doc = new XmlDocument(); doc.LoadXml(html); XmlNode searchItems = doc.ChildNodes[1]; if (searchItems != null) { foreach (XmlNode node in searchItems.ChildNodes) { blog = new CnBlog(); blog.Title = node.ChildNodes[0].InnerText; blog.Summary = node.ChildNodes[2].InnerText; blog.AuthorName = node.ChildNodes[4].ChildNodes[0].InnerText; blog.AuthorHome = node.ChildNodes[4].ChildNodes[0].ChildNodes[0].Attributes["href"].Value; blog.BlogApp = blog.AuthorHome.Split('/')[3]; blog.PublishTime = node.ChildNodes[4].ChildNodes[1].InnerText; if (node.ChildNodes[4].ChildNodes[2] != null) { if (node.ChildNodes[4].ChildNodes[2].InnerText.Contains("推荐")) { blog.Diggs = node.ChildNodes[4].ChildNodes[2].InnerText.Split('(')[1].TrimEnd(')'); } if (node.ChildNodes[4].ChildNodes[2].InnerText.Contains("评论")) { blog.Comments = "[" + node.ChildNodes[4].ChildNodes[2].InnerText.Split('(')[1].TrimEnd(')') + "]"; } if (node.ChildNodes[4].ChildNodes[2].InnerText.Contains("浏览")) { blog.Views = "[" + node.ChildNodes[4].ChildNodes[2].InnerText.Split('(')[1].TrimEnd(')') + "]"; } } if (node.ChildNodes[4].ChildNodes[3] != null) { if (node.ChildNodes[4].ChildNodes[3].InnerText.Contains("推荐")) { blog.Diggs = node.ChildNodes[4].ChildNodes[3].InnerText.Split('(')[1].TrimEnd(')'); } if (node.ChildNodes[4].ChildNodes[3].InnerText.Contains("评论")) { blog.Comments = "[" + node.ChildNodes[4].ChildNodes[3].InnerText.Split('(')[1].TrimEnd(')') + "]"; } if (node.ChildNodes[4].ChildNodes[3].InnerText.Contains("浏览")) { blog.Views = "[" + node.ChildNodes[4].ChildNodes[3].InnerText.Split('(')[1].TrimEnd(')') + "]"; } } if (node.ChildNodes[4].ChildNodes[4] != null) { if (node.ChildNodes[4].ChildNodes[4].InnerText.Contains("推荐")) { blog.Diggs = node.ChildNodes[4].ChildNodes[4].InnerText.Split('(')[1].TrimEnd(')'); } if (node.ChildNodes[4].ChildNodes[4].InnerText.Contains("评论")) { blog.Comments = "[" + node.ChildNodes[4].ChildNodes[4].InnerText.Split('(')[1].TrimEnd(')') + "]"; } if (node.ChildNodes[4].ChildNodes[4].InnerText.Contains("浏览")) { blog.Views = "[" + node.ChildNodes[4].ChildNodes[4].InnerText.Split('(')[1].TrimEnd(')') + "]"; } } blog.BlogRawUrl = node.ChildNodes[5].InnerText; blog.AuthorAvator = "http://pic.cnblogs.com/avatar/simple_avatar.gif"; string[] strs = blog.BlogRawUrl.Split('/'); blog.Id = strs[strs.Length - 1].Split('.')[0]; if (blog.Diggs == null) { blog.Diggs = "0"; } if (blog.Comments == null) { blog.Comments = "[0]"; } listBlogs.Add(blog); } } return(listBlogs); } return(null); } catch { return(null); } }
/// <summary> /// 十天推荐榜 /// </summary> /// <param name="itemCount"></param> /// <returns></returns> public async static Task <List <CnBlog> > Get10TopDiggsAysnc(int itemCount) { try { string url = string.Format(_url10Diggs, itemCount); string xml = await BaseService.SendGetRequest(url); if (xml != null) { List <CnBlog> listBlogs = new List <CnBlog>(); CnBlog cnblog; XmlDocument doc = new XmlDocument(); doc.LoadXml(xml); XmlNode feed = doc.ChildNodes[1]; foreach (XmlNode node in feed.ChildNodes) { if (node.Name.Equals("entry")) { cnblog = new CnBlog(); foreach (XmlNode node2 in node.ChildNodes) { if (node2.Name.Equals("id")) { cnblog.Id = node2.InnerText; } if (node2.Name.Equals("title")) { cnblog.Title = node2.InnerText; } if (node2.Name.Equals("summary")) { cnblog.Summary = node2.InnerText + "..."; } if (node2.Name.Equals("published")) { DateTime t = DateTime.Parse(node2.InnerText); cnblog.PublishTime = "发表于 " + t; } if (node2.Name.Equals("updated")) { cnblog.UpdateTime = node2.InnerText; } if (node2.Name.Equals("author")) { cnblog.AuthorName = node2.ChildNodes[0].InnerText; cnblog.AuthorHome = node2.ChildNodes[1].InnerText; if (node2.ChildNodes.Count == 3) { cnblog.AuthorAvator = node2.ChildNodes[2].InnerText.Equals("") ? "http://pic.cnblogs.com/avatar/simple_avatar.gif" : node2.ChildNodes[2].InnerText; } else { cnblog.AuthorAvator = "http://pic.cnblogs.com/avatar/simple_avatar.gif"; } } if (node2.Name.Equals("link")) { cnblog.BlogRawUrl = node2.Attributes["href"].Value; } if (node2.Name.Equals("diggs")) { cnblog.Diggs = node2.InnerText; } if (node2.Name.Equals("views")) { cnblog.Views = "[" + node2.InnerText + "]"; } if (node2.Name.Equals("comments")) { cnblog.Comments = "[" + node2.InnerText + "]"; } } cnblog.BlogApp = cnblog.AuthorHome.Split('/')[3]; listBlogs.Add(cnblog); } } return(listBlogs); } return(null); } catch { return(null); } }
public void TestCrawlList() { var data = CnBlog.CrawlList(2); Assert.IsTrue(data.Count > 0); }