Пример #1
0
        /// <summary>
        /// 爬取网页中a标签的链接地址
        /// </summary>
        /// <param name="url"></param>
        public void ReptileURL(string url)
        {
            if (!IsBaseUrl(url))
            {
                return;
            }
            JumonyHelper jumonyHelper = new JumonyHelper(url);

            if (jumonyHelper.doc == null)
            {
                return;
            }
            var urlList = jumonyHelper.doc.Find("a[href]");

            if (urlList == null)
            {
                return;
            }
            foreach (var item in urlList)
            {
                string itemUrl = item.Attribute("href").Value();
                if (!ReptileUrl.Contains(itemUrl))
                {
                    lock (ReptileObj)
                    {
                        ReptileUrl.Add(itemUrl);
                    }
                    if (IsNeedUrl(itemUrl))
                    {
                        itemUrl = itemUrl.TrimEnd('/');
                        if (!NeedUrlList.Contains(itemUrl))
                        {
                            lock (NeedObj)
                            {
                                NeedUrlList.Add(itemUrl);
                                insertURLXML(MemberIndex, itemUrl);
                                MemberIndex++;
                            }
                        }
                    }
                }
            }
        }
Пример #2
0
        public void LoadUser(string userUrl)
        {
            MemberEntity member = new MemberEntity();

            member.url = userUrl;
            try
            {
                JumonyHelper jumonyHelper = new JumonyHelper(userUrl);
                var          nameValue    = jumonyHelper.doc.FindFirst(".profile-title");
                string       name         = nameValue.InnerText();
                member.name = name;
                var profiles = jumonyHelper.doc.Find(".profile-points > li");
                foreach (var item in profiles)
                {
                    string value = item.InnerText();
                    if (value.Contains("\r\n"))
                    {
                        value = value.Replace("\r\n", "|");
                        string[] pros = value.Split('|');
                        if (pros[1] == "声望")
                        {
                            member.reputation = pros[0];
                        }
                        else if (pros[1] == "勋章")
                        {
                            member.medal = pros[0];
                        }
                        else if (pros[1] == "积分")
                        {
                            member.point = pros[0];
                        }
                    }
                }
                var profile = jumonyHelper.doc.FindFirst(".profile-bio");
                member.profile = profile.InnerText();
                var follows = jumonyHelper.doc.Find(".profile-follow");
                foreach (var item in follows)
                {
                    string value = item.InnerText();
                    if (!String.IsNullOrEmpty(value))
                    {
                        if (value.Contains("关注"))
                        {
                            string following = value.Split('(')[1].Split(')')[0];
                            member.following = following;
                        }
                        else if (value.Contains("粉丝"))
                        {
                            string follower = value.Split('(')[1].Split(')')[0];
                            member.follower = follower;
                        }
                    }
                }
                var infos = jumonyHelper.doc.Find(".member-info > span");
                foreach (var item in infos)
                {
                    string value = item.InnerText();
                    if (!String.IsNullOrEmpty(value))
                    {
                        if (value.Contains("注册"))
                        {
                            string date = value.Split(':')[1];
                            member.Date = date;
                        }
                        else if (value.Contains("城市"))
                        {
                            string city = value.Split(':')[1];
                            member.city = city;
                        }
                    }
                }

                var    image    = jumonyHelper.doc.FindFirst(".profile-img > a > img");
                string imageUrl = image.Attribute("src").Value();
                member.image = imageUrl;

                if (jumonyHelper.doc.Exists("i[title]"))
                {
                    var    sexHtml = jumonyHelper.doc.FindFirst("i[title]");
                    string sex     = sexHtml.Attribute("title").Value();
                    member.sex = sex;
                }
                else
                {
                    member.sex = "";
                }
                member.Id = UserIndex;
                lock (ReptileObj)
                {
                    memberHelper.AddMember(member);
                    insertXML(member);
                    UserIndex++;
                }
            }
            catch (Exception ex)
            {
                WriteTxt.WriteNewTxt("ERRORLOG", "++++错误数据+++" + ex.Message);
            }
        }