/// <summary> /// 获取公众号微信号wechatid的信息 (实际上就是搜索公众号id,获取第一个结果) /// wechatid: 公众号id /// 因为wechatid唯一确定,所以第一个就是要搜索的公众号 /// /// </summary> /// <param name="wechatid"></param> /// <returns></returns> public OfficialAccount GetAccountInfoById(string wechatid) { //""" OfficialAccount info = this.SearchOfficialAccount(wechatid, 1)[0]; //可能为空 return(info); // return info[0] if info else False }
/// <summary> /// 最近文章页 公众号信息 /// </summary> /// <param name="text"></param> /// <param name="url"></param> /// <returns></returns> public OfficialAccount _ResolveOfficialAccount(string htmlText, string url) { OfficialAccount officialAccount = new OfficialAccount(); officialAccount.AccountPageurl = url; HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(htmlText); var profileInfoArea = doc.DocumentNode.SelectSingleNode("//div[@class='profile_info_area']"); officialAccount.ProfilePicture = profileInfoArea.SelectSingleNode("div[1]/span/img").GetAttributeValue("src", ""); officialAccount.Name = profileInfoArea.SelectSingleNode("div[1]/div/strong/text()").InnerText.Trim(); //name = Tools.replace_space(name); string wechatId = profileInfoArea.SelectSingleNode("div[1]/div/p/text()").InnerText.Trim(); if (wechatId.Length > 0) { wechatId = wechatId.Replace("微信号: ", ""); } else { wechatId = ""; } officialAccount.WeChatId = wechatId; officialAccount.Introduction = profileInfoArea.SelectSingleNode("ul/li[1]/div/text()").InnerText; string authInfo = profileInfoArea.SelectSingleNode("ul/li[2]/div/text()").InnerText; if (!string.IsNullOrEmpty(authInfo)) { officialAccount.IsAuth = true; } else { officialAccount.IsAuth = false; } string qrcode = WebUtility.HtmlDecode(doc.DocumentNode.SelectSingleNode("//*[@id='js_pc_qr_code_img']").GetAttributeValue("src", "")); if (qrcode.Length > 0) { qrcode = "http://mp.weixin.qq.com" + qrcode; } else { qrcode = ""; } officialAccount.QrCode = qrcode; return(officialAccount); }
/// <summary> /// 搜索公众号 /// </summary> /// <param name="keyword">搜索关键字</param> /// <param name="page">第几页</param> /// <returns></returns> public List <OfficialAccount> SearchOfficialAccount(string keyword, int page = 1) { List <OfficialAccount> accountList = new List <OfficialAccount>(); string text = this._SearchAccount_Html(keyword, page); HtmlDocument pageDoc = new HtmlDocument(); pageDoc.LoadHtml(text); HtmlNodeCollection targetArea = pageDoc.DocumentNode.SelectNodes("//ul[@class='news-list2']/li"); if (targetArea != null) { foreach (HtmlNode node in targetArea) { try { OfficialAccount accountInfo = new OfficialAccount(); //链接中包含了& html编码符,要用htmdecode,不是urldecode accountInfo.AccountPageurl = WebUtility.HtmlDecode(node.SelectSingleNode("div/div[@class='img-box']/a").GetAttributeValue("href", "")); //accountInfo.ProfilePicture = node.SelectSingleNode("div/div[1]/a/img").InnerHtml; accountInfo.ProfilePicture = WebUtility.HtmlDecode(node.SelectSingleNode("div/div[@class='img-box']/a/img").GetAttributeValue("src", "")); accountInfo.Name = node.SelectSingleNode("div/div[2]/p[1]").InnerText.Trim().Replace("<!--red_beg-->", "").Replace("<!--red_end-->", ""); accountInfo.WeChatId = node.SelectSingleNode("div/div[2]/p[2]/label").InnerText.Trim(); accountInfo.QrCode = WebUtility.HtmlDecode(node.SelectSingleNode("div/div[3]/span/img").GetAttributeValue("src", "")); accountInfo.Introduction = node.SelectSingleNode("dl[1]/dd").InnerText.Trim().Replace("<!--red_beg-->", "").Replace("<!--red_end-->", ""); //早期的账号认证和后期的认证显示不一样?,对比 bitsea 和 NUAA_1952 两个账号 //现在改为包含该script的即认证了 if (node.InnerText.Contains("document.write(authname('2'))")) { accountInfo.IsAuth = true; } else { accountInfo.IsAuth = false; } accountList.Add(accountInfo); } catch (Exception e) { logger.Warn(e); } } } return(accountList); }
/// <summary> /// GetArticleByCategoryIndex /// </summary> /// <param name="categoryIndex">从0开始,首页分类,热门0, 推荐:1,段子手:2,养生堂:3,私房话:4 范围0-20 ?</param> /// <param name="page">页,从0开始</param> /// <remarks>get_recent_article_url_by_index_single</remarks> /// <returns></returns> public List <Article> GetArticleByCategoryIndex(int categoryIndex, int page) { string pageStr = "";// "pc_" + page; if (page == 0) { pageStr = "pc_" + categoryIndex; //分类N第0页格式为xxxx/pc/pc_N/pc_N.html } else { pageStr = page.ToString(); } //http://weixin.sogou.com/pcindex/pc/pc_4/pc_4.html //分类4第0页 //http://weixin.sogou.com/pcindex/pc/pc_2/1.html //分类2第1页 //http://weixin.sogou.com/pcindex/pc/pc_3/2.html //分类3第2页 string url = "http://weixin.sogou.com/pcindex/pc/pc_" + categoryIndex + '/' + pageStr + ".html"; WebHeaderCollection headers = new WebHeaderCollection(); headers.Add("Host", "weixin.sogou.com"); headers.Add("Referer", "http://weixin.sogou.com/"); headers.Add("Accept", "*/*"); HttpHelper NetHelper = new HttpHelper(); string text = NetHelper.Get(headers, url, "UTF-8"); HtmlDocument pageDoc = new HtmlDocument(); pageDoc.LoadHtml(text); string targetXpath = ""; if (page == 0) { targetXpath = "//ul[@class='news-list']/li"; } else { targetXpath = "li"; } var targetArea = pageDoc.DocumentNode.SelectNodes(targetXpath); List <Article> ListArticle = new List <Article>(); if (targetArea != null) { foreach (var li in targetArea) { try { Article article = new Article() { Imgs = new List <string>() }; OfficialAccount account = new OfficialAccount(); article.Title = li.SelectSingleNode("div[2]/h3/a").InnerText; article.Url = li.SelectSingleNode("div[1]/a").GetAttributeValue("href", ""); article.Brief = WebUtility.HtmlDecode(li.SelectSingleNode("div[2]/p[@class='txt-info']").InnerText); string coverImg = li.SelectSingleNode("div[1]/a/img").GetAttributeValue("src", ""); if (!string.IsNullOrEmpty(coverImg)) { article.Imgs.Add(coverImg); } article.Time = li.SelectSingleNode("div[2]/div/span").GetAttributeValue("t", ""); article.ArticleListUrl = li.SelectSingleNode("div[2]/div/a").GetAttributeValue("href", ""); account.AccountPageurl = li.SelectSingleNode("div[2]/div/a").GetAttributeValue("href", ""); account.Name = li.SelectSingleNode("div[2]/div/a").InnerText; string isV = li.SelectSingleNode("div[2]/div/a").GetAttributeValue("data-isv", ""); if (isV == "1") { account.IsAuth = true; } else { account.IsAuth = false; } account.ProfilePicture = li.SelectSingleNode("div[2]/div/a").GetAttributeValue("data-headimage", ""); article.officialAccount = account; ListArticle.Add(article); } catch (Exception e) { logger.Error(e); } } } return(ListArticle); }
/// <summary> /// 搜索微信文章 /// </summary> /// <param name="name"></param> /// <param name="page"></param> /// <returns></returns> public List <Article> SearchArticle(string keyword, int page = 1) { List <Article> articleList = new List <Article>(); string text = this._SearchArticle_Html(keyword, page); HtmlDocument pageDoc = new HtmlDocument(); pageDoc.LoadHtml(text); //todo HtmlNodeCollection targetArea = pageDoc.DocumentNode.SelectNodes("//ul[@class='news-list']/li"); foreach (HtmlNode node in targetArea) { try { string url = WebUtility.HtmlDecode(node.SelectSingleNode("div[2]/h3/a").GetAttributeValue("href", "")); string title; List <string> imgs = new List <string>(); string brief; string time; OfficialAccount account = new OfficialAccount(); string spans; if (!string.IsNullOrEmpty(url)) { title = node.SelectSingleNode("div[2]/h3/a").InnerText; string img = WebUtility.HtmlDecode(node.SelectSingleNode("div[1]/a/img").GetAttributeValue("src", "")); imgs.Add(img); brief = node.SelectSingleNode("div[2]/p").InnerHtml; time = node.SelectSingleNode("div[2]/div/span/script/text()").InnerHtml; if (node.SelectSingleNode("div[@class='txt-box']/div[@class='s-p']/a") != null) { account.IsAuth = Convert.ToBoolean(Convert.ToInt16(node.SelectSingleNode("div[@class='txt-box']/div[@class='s-p']/a").GetAttributeValue("data-isv", ""))); } account.AccountPageurl = WebUtility.HtmlDecode(node.SelectSingleNode("div[@class='txt-box']/h3/a").GetAttributeValue("href", "")); if (node.SelectSingleNode("div/div[2]/a") != null) { account.ProfilePicture = WebUtility.HtmlDecode(node.SelectSingleNode("div/div[2]/a").GetAttributeValue("data-headimage", "")); } if (node.SelectSingleNode("div/div[2]/a") != null) { account.Name = node.SelectSingleNode("div/div[2]/a").InnerText; } } else { url = WebUtility.HtmlDecode(node.SelectSingleNode("div/h3/a").GetAttributeValue("href", "")); title = node.SelectSingleNode("div/h3/a").InnerText; HtmlNodeCollection spansNodeCollection = node.SelectNodes("div/div[1]/a"); foreach (HtmlNode span in spansNodeCollection) { string img = WebUtility.HtmlDecode(span.SelectSingleNode("span/img/@src").InnerText); if (!string.IsNullOrEmpty(img)) { imgs.Add(img); } } brief = node.SelectSingleNode("div/p").InnerText; time = node.SelectSingleNode("div/div[2]/span/script/text()").InnerText; if (node.SelectSingleNode("div/div[2]/a") != null) { account.IsAuth = Convert.ToBoolean(node.SelectSingleNode("div/div[2]/a").GetAttributeValue("data-isv", "")); } account.AccountPageurl = WebUtility.HtmlDecode(node.SelectSingleNode("div/div[2]/a").GetAttributeValue("href", "")); account.ProfilePicture = WebUtility.HtmlDecode(node.SelectSingleNode("div/div[2]/a").GetAttributeValue("data-headimage", "")); if (node.SelectSingleNode("div/div[2]/a") != null) { account.Name = node.SelectSingleNode("div/div[2]/a").InnerText; } } if (!string.IsNullOrEmpty(title)) { title = title.Trim().Replace("<!--red_beg-->", "").Replace("<!--red_end-->", "").Replace("<em>", "").Replace("</em>", "");; } else { title = ""; } if (!string.IsNullOrEmpty(brief)) { brief = brief.Trim().Replace("<!--red_beg-->", "").Replace("<!--red_end-->", "").Replace("<em>", "").Replace("</em>", ""); } else { brief = ""; } Regex TimeRegex = new Regex(@"timeConvert\('(?<1>(\d+))'\)"); var m = TimeRegex.Match(time); time = TimeRegex.Match(time).Groups[1].Value; Article article = new Article(); article.Title = title; article.Brief = brief; article.Url = url; article.Imgs = imgs; article.Time = time; article.officialAccount = account; articleList.Add(article); } catch (Exception e) { logger.Error(e); } } return(articleList); }