/// <summary> /// 爬取网页中a标签的链接地址 /// </summary> /// <param name="url"></param> public void ReptileURL(string url) { if (!IsBaseUrl(url)) { return; } JumonyHelper jumonyHelper = new JumonyHelper(url); if (jumonyHelper.doc == null) { return; } var urlList = jumonyHelper.doc.Find("a[href]"); if (urlList == null) { return; } foreach (var item in urlList) { string itemUrl = item.Attribute("href").Value(); if (!ReptileUrl.Contains(itemUrl)) { lock (ReptileObj) { ReptileUrl.Add(itemUrl); } if (IsNeedUrl(itemUrl)) { itemUrl = itemUrl.TrimEnd('/'); if (!NeedUrlList.Contains(itemUrl)) { lock (NeedObj) { NeedUrlList.Add(itemUrl); insertURLXML(MemberIndex, itemUrl); MemberIndex++; } } } } } }
public void LoadUser(string userUrl) { MemberEntity member = new MemberEntity(); member.url = userUrl; try { JumonyHelper jumonyHelper = new JumonyHelper(userUrl); var nameValue = jumonyHelper.doc.FindFirst(".profile-title"); string name = nameValue.InnerText(); member.name = name; var profiles = jumonyHelper.doc.Find(".profile-points > li"); foreach (var item in profiles) { string value = item.InnerText(); if (value.Contains("\r\n")) { value = value.Replace("\r\n", "|"); string[] pros = value.Split('|'); if (pros[1] == "声望") { member.reputation = pros[0]; } else if (pros[1] == "勋章") { member.medal = pros[0]; } else if (pros[1] == "积分") { member.point = pros[0]; } } } var profile = jumonyHelper.doc.FindFirst(".profile-bio"); member.profile = profile.InnerText(); var follows = jumonyHelper.doc.Find(".profile-follow"); foreach (var item in follows) { string value = item.InnerText(); if (!String.IsNullOrEmpty(value)) { if (value.Contains("关注")) { string following = value.Split('(')[1].Split(')')[0]; member.following = following; } else if (value.Contains("粉丝")) { string follower = value.Split('(')[1].Split(')')[0]; member.follower = follower; } } } var infos = jumonyHelper.doc.Find(".member-info > span"); foreach (var item in infos) { string value = item.InnerText(); if (!String.IsNullOrEmpty(value)) { if (value.Contains("注册")) { string date = value.Split(':')[1]; member.Date = date; } else if (value.Contains("城市")) { string city = value.Split(':')[1]; member.city = city; } } } var image = jumonyHelper.doc.FindFirst(".profile-img > a > img"); string imageUrl = image.Attribute("src").Value(); member.image = imageUrl; if (jumonyHelper.doc.Exists("i[title]")) { var sexHtml = jumonyHelper.doc.FindFirst("i[title]"); string sex = sexHtml.Attribute("title").Value(); member.sex = sex; } else { member.sex = ""; } member.Id = UserIndex; lock (ReptileObj) { memberHelper.AddMember(member); insertXML(member); UserIndex++; } } catch (Exception ex) { WriteTxt.WriteNewTxt("ERRORLOG", "++++错误数据+++" + ex.Message); } }