/// <summary> /// 分析用户主页信息 /// </summary> /// <param name="home"></param> public static CommonEntityLib.Entities.user.Entity AnalysisUserHome(string home) { try { CommonEntityLib.Entities.user.Entity weiboUser = new CommonEntityLib.Entities.user.Entity { ID = Regex.Match(home, "\"id\":\"(\\d*?)\"").Groups[1].Value }; //分析uid if (string.IsNullOrEmpty(weiboUser.ID)) { return(null); } //获取分页爬博文所要信息 weiboUser.IDStr = Regex.Match(home, "'stageId':'(\\d*?)'").Groups[1].Value; //获取用户发表的微博数量 var mblogNumStr = Regex.Match(home, "\"mblogNum\":\"(\\d*?)\"").Groups[1].Value; weiboUser.StatusesCount = int.Parse(mblogNumStr); //获取关注数量 var attNumStr = Regex.Match(home, "\"attNum\":\"(\\d*?)\"").Groups[1].Value; weiboUser.FriendsCount = int.Parse(attNumStr); //获取粉丝数量 var fansNumStr = Regex.Match(home, "\"fansNum\":\"(\\d*?)\"").Groups[1].Value; weiboUser.FollowersCount = int.Parse(fansNumStr); //获取用户性别 var genderStr = Regex.Match(home, "\"ta\":\"(.*?)\"").Groups[1].Value; if (genderStr == @"\u4ed6") { weiboUser.Gender = "m"; } //用户昵称 var nickname = Regex.Match(home, "\"name\":\"(.*?)\"").Groups[1].Value; weiboUser.ScreenName = nickname.NormalU2C(); //用户所在省份 var location = Regex.Match(home, "\"nativePlace\":\"(.*?)\"").Groups[1].Value; weiboUser.Location = location.NormalU2C(); //用户简介 var description = Regex.Match(home, "\"description\":\"(.*?)\"").Groups[1].Value; weiboUser.Description = description.NormalU2C(); //头像地址 var profileImageUrl = Regex.Match(home, @"""profile_image_url"":""(?<url>.*?)""").Groups["url"].Value; weiboUser.ProfileImageUrl = profileImageUrl.Replace(@"\", ""); return(weiboUser); } catch (Exception exception) { AnalyseCnPageLogger.Error(string.Format("分析用户主页信息失败\r\n{0}", home), exception); return(null); } }
/// <summary> /// 根据uid获取用户信息 /// </summary> /// <param name="webLogin">登陆对象</param> /// <param name="uid"></param> /// <returns></returns> public CommonEntityLib.Entities.user.Entity GetUserEntity(IWeiboLogin webLogin, string uid) { IWeiboLogin cnWeiboLogin = PlatformType.CN.GetWeiboLogin(); cnWeiboLogin.Web.Cookie = webLogin.Web.Cookie; if (!CNHttpWork.UserExist(cnWeiboLogin.Web, uid)) { return(null); } CommonEntityLib.Entities.user.Entity res = CNHttpWork.GetUserEntity(cnWeiboLogin.Web, uid); if (res == null) { return(null); } string url = "http://weibo.com/" + uid + "/info"; string html = webLogin.Web.GetHTML(url); if (string.IsNullOrEmpty(html)) { return(null); } var temHtmlList = new Dictionary <string, string>(); if (html.Contains("<script>FM")) { var regex = new Regex(@"FM.view\((?<json>.*?)\)</script>"); if (regex.IsMatch(html)) { foreach (Match match in regex.Matches(html)) { string jsonStr = match.Groups["json"].Value; if (!jsonStr.Contains("等级信息") && !jsonStr.Contains("基本信息") && !jsonStr.Contains("他的主页") && !jsonStr.Contains("Pl_Official_Headerv6__1")) { continue; } var json = DynamicJson.Parse(jsonStr); string domid = json.domid; try { string ht = json.html; for (var i = (char)1; i < (char)32; i++) { ht = ht.Replace(i.ToString(CultureInfo.InvariantCulture), string.Empty); } ht = ht.Replace("\x7F", string.Empty); temHtmlList.Add(domid, ht); } catch (Exception) { } } } } foreach (var thl in temHtmlList) { if (thl.Key.Contains("Pl_Official_Headerv6__1")) { res.Verified = thl.Value.Contains("verified.weibo.com"); if (res.Verified) { HtmlDocument document = new HtmlDocument(); document.LoadHtml(thl.Value); var root = document.DocumentNode; res.VerifiedReason = root.SelectSingleNode("div/div/div[@class='shadow S_shadow']/div[@class='pf_photo']/a/em") .Attributes["title"].Value; } continue; } if (thl.Value.Contains("等级信息")) { // 等级 // <span>Lv.2</span> string lv = Regex.Match(thl.Value, "<span>Lv\\.(?<lv>\\d*)</span>").Groups["lv"].Value; int uRank; if (!string.IsNullOrEmpty(lv) && int.TryParse(lv, out uRank)) { res.Urank = uRank; } continue; } if (thl.Value.Contains("他的主页")) { HtmlDocument document = new HtmlDocument(); document.LoadHtml(thl.Value); var root = document.DocumentNode; var aTag = root.SelectSingleNode("div/div/table/tr/td/a"); if (aTag != null) { res.ProfileUrl = string.Format("http://weibo.com{0}", aTag.Attributes["href"].Value); } } if (thl.Value.Contains("基本信息")) { HtmlDocument document = new HtmlDocument(); document.LoadHtml(thl.Value); var root = document.DocumentNode; HtmlNodeCollection categoryNodeList = root.SelectNodes("//ul[@class='clearfix']/li"); if (categoryNodeList == null) { continue; } foreach (HtmlNode htmlNode in categoryNodeList) { HtmlNodeCollection spans = htmlNode.SelectNodes("span"); if (spans.Count != 2) { continue; } string txt = spans[0].InnerText; string val = spans[1].InnerText; if (txt.Contains("博客")) { res.Url = val; continue; } if (txt.Contains("个性域名")) { res.Domain = val; continue; } if (txt.Contains("注册时间")) { res.CreatedAt = val; continue; } if (txt.Contains("所在地")) { res.Location = val; if (!string.IsNullOrEmpty(res.Location) && res.Location.Split(' ').Count() >= 2) { string provice = res.Location.Split(' ')[0].Trim(); string city = res.Location.Split(' ')[1].Trim(); res.Province = ProvinceToCode(provice); res.City = CityToCode(city); } continue; } if (txt.Contains("标签")) { HtmlNodeCollection tags = spans[1].SelectNodes("a"); if (tags != null) { res.Remark = string.Join(",", tags.Select(c => c.InnerText)); } } } } } return(res); }
/// <summary> /// 分析用户主页信息 /// </summary> /// <param name="home"></param> public static CommonEntityLib.Entities.user.Entity AnalysisUserHome(string home) { try { CommonEntityLib.Entities.user.Entity weiboUser = new CommonEntityLib.Entities.user.Entity { ID = Regex.Match(home, "\"id\":\"(\\d*?)\"").Groups[1].Value }; //分析uid if (string.IsNullOrEmpty(weiboUser.ID)) return null; //获取分页爬博文所要信息 weiboUser.IDStr = Regex.Match(home, "'stageId':'(\\d*?)'").Groups[1].Value; //获取用户发表的微博数量 var mblogNumStr = Regex.Match(home, "\"mblogNum\":\"(\\d*?)\"").Groups[1].Value; weiboUser.StatusesCount = int.Parse(mblogNumStr); //获取关注数量 var attNumStr = Regex.Match(home, "\"attNum\":\"(\\d*?)\"").Groups[1].Value; weiboUser.FriendsCount = int.Parse(attNumStr); //获取粉丝数量 var fansNumStr = Regex.Match(home, "\"fansNum\":\"(\\d*?)\"").Groups[1].Value; weiboUser.FollowersCount = int.Parse(fansNumStr); //获取用户性别 var genderStr = Regex.Match(home, "\"ta\":\"(.*?)\"").Groups[1].Value; if (genderStr == @"\u4ed6") weiboUser.Gender = "m"; //用户昵称 var nickname = Regex.Match(home, "\"name\":\"(.*?)\"").Groups[1].Value; weiboUser.ScreenName = nickname.NormalU2C(); //用户所在省份 var location = Regex.Match(home, "\"nativePlace\":\"(.*?)\"").Groups[1].Value; weiboUser.Location = location.NormalU2C(); //用户简介 var description = Regex.Match(home, "\"description\":\"(.*?)\"").Groups[1].Value; weiboUser.Description = description.NormalU2C(); //头像地址 var profileImageUrl = Regex.Match(home, @"""profile_image_url"":""(?<url>.*?)""").Groups["url"].Value; weiboUser.ProfileImageUrl = profileImageUrl.Replace(@"\", ""); return weiboUser; } catch (Exception exception) { AnalyseCnPageLogger.Error(string.Format("分析用户主页信息失败\r\n{0}", home), exception); return null; } }
private static CommonEntityLib.Entities.user.Entity AnalyseUserPage(string html) { var location = LocationRegex.Match(html).Groups["location"].Value; string province; string city; if (!string.IsNullOrEmpty(location)) { province = location.Split(' ')[0]; province = ProvinceToCode(province); city = "0"; if (location.Contains(" ")) { city = location.Split(' ')[1]; city = CityToCode(city); } } else { province = "11"; city = "0"; } CommonEntityLib.Entities.user.Entity userInformationEntity = new CommonEntityLib.Entities.user.Entity { ScreenName = NicknameRegex.Match(html).Groups["nickname"].Value, Gender = GenderRegex.Match(html).Groups["gender"].Value == "男" ? "m" : "f", Description = DescriptionRegex.Match(html).Groups["description"].Value, Province = province, City = city }; return userInformationEntity; }