Пример #1
0
        /// <summary>
        /// 获取公众号微信号wechatid的信息 (实际上就是搜索公众号id,获取第一个结果)
        /// wechatid: 公众号id
        /// 因为wechatid唯一确定,所以第一个就是要搜索的公众号
        ///
        /// </summary>
        /// <param name="wechatid"></param>
        /// <returns></returns>
        public OfficialAccount GetAccountInfoById(string wechatid)
        {
            //"""
            OfficialAccount info = this.SearchOfficialAccount(wechatid, 1)[0];  //可能为空

            return(info);
            // return info[0] if info else False
        }
Пример #2
0
        /// <summary>
        /// 最近文章页  公众号信息
        /// </summary>
        /// <param name="text"></param>
        /// <param name="url"></param>
        /// <returns></returns>
        public OfficialAccount _ResolveOfficialAccount(string htmlText, string url)
        {
            OfficialAccount officialAccount = new OfficialAccount();

            officialAccount.AccountPageurl = url;
            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            doc.LoadHtml(htmlText);
            var profileInfoArea = doc.DocumentNode.SelectSingleNode("//div[@class='profile_info_area']");

            officialAccount.ProfilePicture = profileInfoArea.SelectSingleNode("div[1]/span/img").GetAttributeValue("src", "");
            officialAccount.Name           = profileInfoArea.SelectSingleNode("div[1]/div/strong/text()").InnerText.Trim();
            //name = Tools.replace_space(name);
            string wechatId = profileInfoArea.SelectSingleNode("div[1]/div/p/text()").InnerText.Trim();

            if (wechatId.Length > 0)
            {
                wechatId = wechatId.Replace("微信号: ", "");
            }
            else
            {
                wechatId = "";
            }

            officialAccount.WeChatId     = wechatId;
            officialAccount.Introduction = profileInfoArea.SelectSingleNode("ul/li[1]/div/text()").InnerText;
            string authInfo = profileInfoArea.SelectSingleNode("ul/li[2]/div/text()").InnerText;

            if (!string.IsNullOrEmpty(authInfo))
            {
                officialAccount.IsAuth = true;
            }
            else
            {
                officialAccount.IsAuth = false;
            }

            string qrcode = WebUtility.HtmlDecode(doc.DocumentNode.SelectSingleNode("//*[@id='js_pc_qr_code_img']").GetAttributeValue("src", ""));

            if (qrcode.Length > 0)
            {
                qrcode = "http://mp.weixin.qq.com" + qrcode;
            }
            else
            {
                qrcode = "";
            }

            officialAccount.QrCode = qrcode;
            return(officialAccount);
        }
Пример #3
0
        /// <summary>
        /// 搜索公众号
        /// </summary>
        /// <param name="keyword">搜索关键字</param>
        /// <param name="page">第几页</param>
        /// <returns></returns>
        public List <OfficialAccount> SearchOfficialAccount(string keyword, int page = 1)
        {
            List <OfficialAccount> accountList = new List <OfficialAccount>();

            string       text    = this._SearchAccount_Html(keyword, page);
            HtmlDocument pageDoc = new HtmlDocument();

            pageDoc.LoadHtml(text);
            HtmlNodeCollection targetArea = pageDoc.DocumentNode.SelectNodes("//ul[@class='news-list2']/li");

            if (targetArea != null)
            {
                foreach (HtmlNode node in targetArea)
                {
                    try
                    {
                        OfficialAccount accountInfo = new OfficialAccount();

                        //链接中包含了&amp; html编码符,要用htmdecode,不是urldecode
                        accountInfo.AccountPageurl = WebUtility.HtmlDecode(node.SelectSingleNode("div/div[@class='img-box']/a").GetAttributeValue("href", ""));
                        //accountInfo.ProfilePicture = node.SelectSingleNode("div/div[1]/a/img").InnerHtml;
                        accountInfo.ProfilePicture = WebUtility.HtmlDecode(node.SelectSingleNode("div/div[@class='img-box']/a/img").GetAttributeValue("src", ""));
                        accountInfo.Name           = node.SelectSingleNode("div/div[2]/p[1]").InnerText.Trim().Replace("<!--red_beg-->", "").Replace("<!--red_end-->", "");
                        accountInfo.WeChatId       = node.SelectSingleNode("div/div[2]/p[2]/label").InnerText.Trim();
                        accountInfo.QrCode         = WebUtility.HtmlDecode(node.SelectSingleNode("div/div[3]/span/img").GetAttributeValue("src", ""));
                        accountInfo.Introduction   = node.SelectSingleNode("dl[1]/dd").InnerText.Trim().Replace("<!--red_beg-->", "").Replace("<!--red_end-->", "");
                        //早期的账号认证和后期的认证显示不一样?,对比 bitsea 和 NUAA_1952 两个账号
                        //现在改为包含该script的即认证了
                        if (node.InnerText.Contains("document.write(authname('2'))"))
                        {
                            accountInfo.IsAuth = true;
                        }
                        else
                        {
                            accountInfo.IsAuth = false;
                        }
                        accountList.Add(accountInfo);
                    }
                    catch (Exception e)
                    {
                        logger.Warn(e);
                    }
                }
            }



            return(accountList);
        }
Пример #4
0
        /// <summary>
        /// GetArticleByCategoryIndex
        /// </summary>
        /// <param name="categoryIndex">从0开始,首页分类,热门0, 推荐:1,段子手:2,养生堂:3,私房话:4  范围0-20 ?</param>
        /// <param name="page">页,从0开始</param>
        /// <remarks>get_recent_article_url_by_index_single</remarks>
        /// <returns></returns>
        public List <Article> GetArticleByCategoryIndex(int categoryIndex, int page)
        {
            string pageStr = "";// "pc_" + page;

            if (page == 0)
            {
                pageStr = "pc_" + categoryIndex; //分类N第0页格式为xxxx/pc/pc_N/pc_N.html
            }
            else
            {
                pageStr = page.ToString();
            }

            //http://weixin.sogou.com/pcindex/pc/pc_4/pc_4.html //分类4第0页
            //http://weixin.sogou.com/pcindex/pc/pc_2/1.html //分类2第1页
            //http://weixin.sogou.com/pcindex/pc/pc_3/2.html //分类3第2页

            string url = "http://weixin.sogou.com/pcindex/pc/pc_" + categoryIndex + '/' + pageStr + ".html";
            WebHeaderCollection headers = new WebHeaderCollection();

            headers.Add("Host", "weixin.sogou.com");
            headers.Add("Referer", "http://weixin.sogou.com/");
            headers.Add("Accept", "*/*");
            HttpHelper NetHelper = new HttpHelper();
            string     text      = NetHelper.Get(headers, url, "UTF-8");

            HtmlDocument pageDoc = new HtmlDocument();

            pageDoc.LoadHtml(text);
            string targetXpath = "";

            if (page == 0)
            {
                targetXpath = "//ul[@class='news-list']/li";
            }
            else
            {
                targetXpath = "li";
            }
            var targetArea = pageDoc.DocumentNode.SelectNodes(targetXpath);

            List <Article> ListArticle = new List <Article>();

            if (targetArea != null)
            {
                foreach (var li in targetArea)
                {
                    try
                    {
                        Article article = new Article()
                        {
                            Imgs = new List <string>()
                        };
                        OfficialAccount account = new OfficialAccount();
                        article.Title = li.SelectSingleNode("div[2]/h3/a").InnerText;
                        article.Url   = li.SelectSingleNode("div[1]/a").GetAttributeValue("href", "");
                        article.Brief = WebUtility.HtmlDecode(li.SelectSingleNode("div[2]/p[@class='txt-info']").InnerText);
                        string coverImg = li.SelectSingleNode("div[1]/a/img").GetAttributeValue("src", "");
                        if (!string.IsNullOrEmpty(coverImg))
                        {
                            article.Imgs.Add(coverImg);
                        }

                        article.Time           = li.SelectSingleNode("div[2]/div/span").GetAttributeValue("t", "");
                        article.ArticleListUrl = li.SelectSingleNode("div[2]/div/a").GetAttributeValue("href", "");

                        account.AccountPageurl = li.SelectSingleNode("div[2]/div/a").GetAttributeValue("href", "");
                        account.Name           = li.SelectSingleNode("div[2]/div/a").InnerText;
                        string isV = li.SelectSingleNode("div[2]/div/a").GetAttributeValue("data-isv", "");
                        if (isV == "1")
                        {
                            account.IsAuth = true;
                        }
                        else
                        {
                            account.IsAuth = false;
                        }
                        account.ProfilePicture = li.SelectSingleNode("div[2]/div/a").GetAttributeValue("data-headimage", "");


                        article.officialAccount = account;
                        ListArticle.Add(article);
                    }
                    catch (Exception e)
                    {
                        logger.Error(e);
                    }
                }
            }



            return(ListArticle);
        }
Пример #5
0
        /// <summary>
        /// 搜索微信文章
        /// </summary>
        /// <param name="name"></param>
        /// <param name="page"></param>
        /// <returns></returns>
        public List <Article> SearchArticle(string keyword, int page = 1)
        {
            List <Article> articleList = new List <Article>();
            string         text        = this._SearchArticle_Html(keyword, page);
            HtmlDocument   pageDoc     = new HtmlDocument();

            pageDoc.LoadHtml(text);
            //todo
            HtmlNodeCollection targetArea = pageDoc.DocumentNode.SelectNodes("//ul[@class='news-list']/li");

            foreach (HtmlNode node in targetArea)
            {
                try
                {
                    string          url = WebUtility.HtmlDecode(node.SelectSingleNode("div[2]/h3/a").GetAttributeValue("href", ""));
                    string          title;
                    List <string>   imgs = new List <string>();
                    string          brief;
                    string          time;
                    OfficialAccount account = new OfficialAccount();
                    string          spans;



                    if (!string.IsNullOrEmpty(url))
                    {
                        title = node.SelectSingleNode("div[2]/h3/a").InnerText;
                        string img = WebUtility.HtmlDecode(node.SelectSingleNode("div[1]/a/img").GetAttributeValue("src", ""));
                        imgs.Add(img);
                        brief = node.SelectSingleNode("div[2]/p").InnerHtml;
                        time  = node.SelectSingleNode("div[2]/div/span/script/text()").InnerHtml;
                        if (node.SelectSingleNode("div[@class='txt-box']/div[@class='s-p']/a") != null)
                        {
                            account.IsAuth = Convert.ToBoolean(Convert.ToInt16(node.SelectSingleNode("div[@class='txt-box']/div[@class='s-p']/a").GetAttributeValue("data-isv", "")));
                        }

                        account.AccountPageurl = WebUtility.HtmlDecode(node.SelectSingleNode("div[@class='txt-box']/h3/a").GetAttributeValue("href", ""));
                        if (node.SelectSingleNode("div/div[2]/a") != null)
                        {
                            account.ProfilePicture = WebUtility.HtmlDecode(node.SelectSingleNode("div/div[2]/a").GetAttributeValue("data-headimage", ""));
                        }
                        if (node.SelectSingleNode("div/div[2]/a") != null)
                        {
                            account.Name = node.SelectSingleNode("div/div[2]/a").InnerText;
                        }
                    }
                    else
                    {
                        url   = WebUtility.HtmlDecode(node.SelectSingleNode("div/h3/a").GetAttributeValue("href", ""));
                        title = node.SelectSingleNode("div/h3/a").InnerText;
                        HtmlNodeCollection spansNodeCollection = node.SelectNodes("div/div[1]/a");
                        foreach (HtmlNode span in spansNodeCollection)
                        {
                            string img = WebUtility.HtmlDecode(span.SelectSingleNode("span/img/@src").InnerText);
                            if (!string.IsNullOrEmpty(img))
                            {
                                imgs.Add(img);
                            }
                        }
                        brief = node.SelectSingleNode("div/p").InnerText;
                        time  = node.SelectSingleNode("div/div[2]/span/script/text()").InnerText;
                        if (node.SelectSingleNode("div/div[2]/a") != null)
                        {
                            account.IsAuth = Convert.ToBoolean(node.SelectSingleNode("div/div[2]/a").GetAttributeValue("data-isv", ""));
                        }
                        account.AccountPageurl = WebUtility.HtmlDecode(node.SelectSingleNode("div/div[2]/a").GetAttributeValue("href", ""));
                        account.ProfilePicture = WebUtility.HtmlDecode(node.SelectSingleNode("div/div[2]/a").GetAttributeValue("data-headimage", ""));
                        if (node.SelectSingleNode("div/div[2]/a") != null)
                        {
                            account.Name = node.SelectSingleNode("div/div[2]/a").InnerText;
                        }
                    }



                    if (!string.IsNullOrEmpty(title))
                    {
                        title = title.Trim().Replace("<!--red_beg-->", "").Replace("<!--red_end-->", "").Replace("<em>", "").Replace("</em>", "");;
                    }
                    else
                    {
                        title = "";
                    }

                    if (!string.IsNullOrEmpty(brief))
                    {
                        brief = brief.Trim().Replace("<!--red_beg-->", "").Replace("<!--red_end-->", "").Replace("<em>", "").Replace("</em>", "");
                    }
                    else
                    {
                        brief = "";
                    }

                    Regex TimeRegex = new Regex(@"timeConvert\('(?<1>(\d+))'\)");
                    var   m         = TimeRegex.Match(time);
                    time = TimeRegex.Match(time).Groups[1].Value;

                    Article article = new Article();
                    article.Title           = title;
                    article.Brief           = brief;
                    article.Url             = url;
                    article.Imgs            = imgs;
                    article.Time            = time;
                    article.officialAccount = account;
                    articleList.Add(article);
                }
                catch (Exception e)
                {
                    logger.Error(e);
                }
            }



            return(articleList);
        }