예제 #1
0
        /// <summary>
        /// 分析用户主页信息
        /// </summary>
        /// <param name="home"></param>
        public static CommonEntityLib.Entities.user.Entity AnalysisUserHome(string home)
        {
            try
            {
                CommonEntityLib.Entities.user.Entity weiboUser = new CommonEntityLib.Entities.user.Entity
                {
                    ID = Regex.Match(home, "\"id\":\"(\\d*?)\"").Groups[1].Value
                };
                //分析uid
                if (string.IsNullOrEmpty(weiboUser.ID))
                {
                    return(null);
                }

                //获取分页爬博文所要信息
                weiboUser.IDStr = Regex.Match(home, "'stageId':'(\\d*?)'").Groups[1].Value;

                //获取用户发表的微博数量
                var mblogNumStr = Regex.Match(home, "\"mblogNum\":\"(\\d*?)\"").Groups[1].Value;
                weiboUser.StatusesCount = int.Parse(mblogNumStr);

                //获取关注数量
                var attNumStr = Regex.Match(home, "\"attNum\":\"(\\d*?)\"").Groups[1].Value;
                weiboUser.FriendsCount = int.Parse(attNumStr);

                //获取粉丝数量
                var fansNumStr = Regex.Match(home, "\"fansNum\":\"(\\d*?)\"").Groups[1].Value;
                weiboUser.FollowersCount = int.Parse(fansNumStr);

                //获取用户性别
                var genderStr = Regex.Match(home, "\"ta\":\"(.*?)\"").Groups[1].Value;
                if (genderStr == @"\u4ed6")
                {
                    weiboUser.Gender = "m";
                }

                //用户昵称
                var nickname = Regex.Match(home, "\"name\":\"(.*?)\"").Groups[1].Value;
                weiboUser.ScreenName = nickname.NormalU2C();

                //用户所在省份
                var location = Regex.Match(home, "\"nativePlace\":\"(.*?)\"").Groups[1].Value;
                weiboUser.Location = location.NormalU2C();

                //用户简介
                var description = Regex.Match(home, "\"description\":\"(.*?)\"").Groups[1].Value;
                weiboUser.Description = description.NormalU2C();

                //头像地址
                var profileImageUrl = Regex.Match(home, @"""profile_image_url"":""(?<url>.*?)""").Groups["url"].Value;
                weiboUser.ProfileImageUrl = profileImageUrl.Replace(@"\", "");

                return(weiboUser);
            }
            catch (Exception exception)
            {
                AnalyseCnPageLogger.Error(string.Format("分析用户主页信息失败\r\n{0}", home), exception);
                return(null);
            }
        }
예제 #2
0
        /// <summary>
        /// 根据uid获取用户信息
        /// </summary>
        /// <param name="webLogin">登陆对象</param>
        /// <param name="uid"></param>
        /// <returns></returns>
        public CommonEntityLib.Entities.user.Entity GetUserEntity(IWeiboLogin webLogin, string uid)
        {
            IWeiboLogin cnWeiboLogin = PlatformType.CN.GetWeiboLogin();

            cnWeiboLogin.Web.Cookie = webLogin.Web.Cookie;

            if (!CNHttpWork.UserExist(cnWeiboLogin.Web, uid))
            {
                return(null);
            }

            CommonEntityLib.Entities.user.Entity res = CNHttpWork.GetUserEntity(cnWeiboLogin.Web, uid);
            if (res == null)
            {
                return(null);
            }

            string url  = "http://weibo.com/" + uid + "/info";
            string html = webLogin.Web.GetHTML(url);

            if (string.IsNullOrEmpty(html))
            {
                return(null);
            }

            var temHtmlList = new Dictionary <string, string>();

            if (html.Contains("<script>FM"))
            {
                var regex = new Regex(@"FM.view\((?<json>.*?)\)</script>");
                if (regex.IsMatch(html))
                {
                    foreach (Match match in regex.Matches(html))
                    {
                        string jsonStr = match.Groups["json"].Value;
                        if (!jsonStr.Contains("等级信息") && !jsonStr.Contains("基本信息") && !jsonStr.Contains("他的主页") && !jsonStr.Contains("Pl_Official_Headerv6__1"))
                        {
                            continue;
                        }

                        var    json  = DynamicJson.Parse(jsonStr);
                        string domid = json.domid;
                        try
                        {
                            string ht = json.html;
                            for (var i = (char)1; i < (char)32; i++)
                            {
                                ht = ht.Replace(i.ToString(CultureInfo.InvariantCulture), string.Empty);
                            }
                            ht = ht.Replace("\x7F", string.Empty);

                            temHtmlList.Add(domid, ht);
                        }
                        catch (Exception)
                        {
                        }
                    }
                }
            }

            foreach (var thl in temHtmlList)
            {
                if (thl.Key.Contains("Pl_Official_Headerv6__1"))
                {
                    res.Verified = thl.Value.Contains("verified.weibo.com");
                    if (res.Verified)
                    {
                        HtmlDocument document = new HtmlDocument();
                        document.LoadHtml(thl.Value);
                        var root = document.DocumentNode;

                        res.VerifiedReason = root.SelectSingleNode("div/div/div[@class='shadow  S_shadow']/div[@class='pf_photo']/a/em")
                                             .Attributes["title"].Value;
                    }
                    continue;
                }

                if (thl.Value.Contains("等级信息"))
                {
                    // 等级
                    // <span>Lv.2</span>
                    string lv = Regex.Match(thl.Value, "<span>Lv\\.(?<lv>\\d*)</span>").Groups["lv"].Value;
                    int    uRank;
                    if (!string.IsNullOrEmpty(lv) && int.TryParse(lv, out uRank))
                    {
                        res.Urank = uRank;
                    }
                    continue;
                }

                if (thl.Value.Contains("他的主页"))
                {
                    HtmlDocument document = new HtmlDocument();
                    document.LoadHtml(thl.Value);
                    var root = document.DocumentNode;
                    var aTag = root.SelectSingleNode("div/div/table/tr/td/a");
                    if (aTag != null)
                    {
                        res.ProfileUrl = string.Format("http://weibo.com{0}", aTag.Attributes["href"].Value);
                    }
                }

                if (thl.Value.Contains("基本信息"))
                {
                    HtmlDocument document = new HtmlDocument();
                    document.LoadHtml(thl.Value);
                    var root = document.DocumentNode;
                    HtmlNodeCollection categoryNodeList = root.SelectNodes("//ul[@class='clearfix']/li");

                    if (categoryNodeList == null)
                    {
                        continue;
                    }

                    foreach (HtmlNode htmlNode in categoryNodeList)
                    {
                        HtmlNodeCollection spans = htmlNode.SelectNodes("span");
                        if (spans.Count != 2)
                        {
                            continue;
                        }

                        string txt = spans[0].InnerText;
                        string val = spans[1].InnerText;

                        if (txt.Contains("博客"))
                        {
                            res.Url = val;
                            continue;
                        }
                        if (txt.Contains("个性域名"))
                        {
                            res.Domain = val;
                            continue;
                        }
                        if (txt.Contains("注册时间"))
                        {
                            res.CreatedAt = val;
                            continue;
                        }
                        if (txt.Contains("所在地"))
                        {
                            res.Location = val;
                            if (!string.IsNullOrEmpty(res.Location) && res.Location.Split(' ').Count() >= 2)
                            {
                                string provice = res.Location.Split(' ')[0].Trim();
                                string city    = res.Location.Split(' ')[1].Trim();
                                res.Province = ProvinceToCode(provice);
                                res.City     = CityToCode(city);
                            }
                            continue;
                        }
                        if (txt.Contains("标签"))
                        {
                            HtmlNodeCollection tags = spans[1].SelectNodes("a");
                            if (tags != null)
                            {
                                res.Remark = string.Join(",", tags.Select(c => c.InnerText));
                            }
                        }
                    }
                }
            }
            return(res);
        }
예제 #3
0
        /// <summary>
        /// 分析用户主页信息
        /// </summary>
        /// <param name="home"></param>
        public static CommonEntityLib.Entities.user.Entity AnalysisUserHome(string home)
        {
            try
            {
                CommonEntityLib.Entities.user.Entity weiboUser = new CommonEntityLib.Entities.user.Entity
                {
                    ID = Regex.Match(home, "\"id\":\"(\\d*?)\"").Groups[1].Value
                };
                //分析uid
                if (string.IsNullOrEmpty(weiboUser.ID))
                    return null;

                //获取分页爬博文所要信息
                weiboUser.IDStr = Regex.Match(home, "'stageId':'(\\d*?)'").Groups[1].Value;

                //获取用户发表的微博数量
                var mblogNumStr = Regex.Match(home, "\"mblogNum\":\"(\\d*?)\"").Groups[1].Value;
                weiboUser.StatusesCount = int.Parse(mblogNumStr);

                //获取关注数量
                var attNumStr = Regex.Match(home, "\"attNum\":\"(\\d*?)\"").Groups[1].Value;
                weiboUser.FriendsCount = int.Parse(attNumStr);

                //获取粉丝数量
                var fansNumStr = Regex.Match(home, "\"fansNum\":\"(\\d*?)\"").Groups[1].Value;
                weiboUser.FollowersCount = int.Parse(fansNumStr);

                //获取用户性别
                var genderStr = Regex.Match(home, "\"ta\":\"(.*?)\"").Groups[1].Value;
                if (genderStr == @"\u4ed6")
                    weiboUser.Gender = "m";

                //用户昵称
                var nickname = Regex.Match(home, "\"name\":\"(.*?)\"").Groups[1].Value;
                weiboUser.ScreenName = nickname.NormalU2C();

                //用户所在省份
                var location = Regex.Match(home, "\"nativePlace\":\"(.*?)\"").Groups[1].Value;
                weiboUser.Location = location.NormalU2C();

                //用户简介
                var description = Regex.Match(home, "\"description\":\"(.*?)\"").Groups[1].Value;
                weiboUser.Description = description.NormalU2C();

                //头像地址
                var profileImageUrl = Regex.Match(home, @"""profile_image_url"":""(?<url>.*?)""").Groups["url"].Value;
                weiboUser.ProfileImageUrl = profileImageUrl.Replace(@"\", "");

                return weiboUser;
            }
            catch (Exception exception)
            {
                AnalyseCnPageLogger.Error(string.Format("分析用户主页信息失败\r\n{0}", home), exception);
                return null;
            }
        }
예제 #4
0
 private static CommonEntityLib.Entities.user.Entity AnalyseUserPage(string html)
 {
     var location = LocationRegex.Match(html).Groups["location"].Value;
     string province;
     string city;
     if (!string.IsNullOrEmpty(location))
     {
         province = location.Split(' ')[0];
         province = ProvinceToCode(province);
         city = "0";
         if (location.Contains(" "))
         {
             city = location.Split(' ')[1];
             city = CityToCode(city);
         }
     }
     else
     {
         province = "11";
         city = "0";
     }
     CommonEntityLib.Entities.user.Entity userInformationEntity = new CommonEntityLib.Entities.user.Entity
     {
         ScreenName = NicknameRegex.Match(html).Groups["nickname"].Value,
         Gender = GenderRegex.Match(html).Groups["gender"].Value == "男" ? "m" : "f",
         Description = DescriptionRegex.Match(html).Groups["description"].Value,
         Province = province,
         City = city
     };
     return userInformationEntity;
 }