Пример #1
0
        /// <summary>
        /// 设置用户普查的规则,默认为每7天普查前100条微博
        /// </summary>
        /// <param name="author">用户</param>
        /// <param name="source">用户来源</param>
        public static void SetAuthorPostSampleRule(Author author, Enums.AuthorSource source)
        {
            switch (source)
            {
            case Enums.AuthorSource.ListedTop:
            case Enums.AuthorSource.Partner:
            case Enums.AuthorSource.PublicLeader:
                author.RefreshStatus = Enums.CrawlStatus.Normal;
                break;

            default:
                author.RefreshStatus = Enums.CrawlStatus.Stop;
                break;
            }
            switch (source)
            {
            case Enums.AuthorSource.ListedTop: author.PostSampleMethode = Enums.SampleMethod.All; break;

            case Enums.AuthorSource.PublicLeader: author.PostSampleMethode = Enums.SampleMethod.All; break;

            default:
                author.PostSampleMethode = Enums.SampleMethod.First100;
                break;
            }
            author.IntervalDays = 7;
        }
Пример #2
0
        /// <summary>
        /// 手工添加一个用户到库内(使用用户ID)
        /// </summary>
        /// <param name="AuthorID">用户ID</param>
        /// <param name="source">用户来源</param>
        public static void AddNewAuthorWithID(string AuthorID, Enums.AuthorSource source)
        {
            var user   = WeiboAPI.GetAuthorInfo(AuthorID);
            var author = AuthorDBManager.ConvertToAuthor(user, source);

            AuthorDBManager.InsertOrUpdateAuthorInfo(author);
        }
Пример #3
0
        /// <summary>
        /// 手工添加一个用户到库内(使用用户微博主页地址)
        /// </summary>
        /// <param name="url">用户微博主页</param>
        /// <param name="source">用户来源</param>
        public static void AddNewAuthorWithUrl(string url, Enums.AuthorSource source)
        {
            string html  = HTMLJobProcessor.GetHTMLViaRequest(url);
            Match  match = userIDReg.Match(html);

            if (match.Groups[1].Success)
            {
                AddNewAuthorWithID(match.Groups[1].Value, source);
            }
        }
Пример #4
0
        /// <summary>
        /// 判断该用户来源是否属于红人
        /// </summary>
        /// <param name="source"></param>
        /// <returns></returns>
        public static bool IsRedSkin(Enums.AuthorSource source)
        {
            switch (source)
            {
            case Enums.AuthorSource.ListedTop:
            case Enums.AuthorSource.Partner:
            case Enums.AuthorSource.PublicLeader:
                return(true);

            default:
                return(false);
            }
        }
Пример #5
0
        /// <summary>
        /// 将新浪返回的动态类转换为Author类
        /// </summary>
        /// <param name="user">动态类用户</param>
        /// <param name="source">用户来源</param>
        /// <returns>转换后的用户</returns>
        public static Author ConvertToAuthor(dynamic user, Enums.AuthorSource source)
        {
            Author author = new Author();

            #region 注册信息
            try
            {
                author.AuthorID          = user.id;
                author.AuthorName        = user.screen_name; //是昵称还是?
                author.RealName          = user.name;
                author.Certification     = Utilities.GetCertificationType(user.verified_type, user.verified);
                author.CertificationInfo = user.verified_reason;
                author.Gender            = Utilities.GetGender(user.gender);
                author.RegisterTime      = Utilities.ParseToDateTime(user.created_at);
                author.RegionID          = RegionDBManager.GetRegionID(user.location);
                author.Description       = user.description;
                author.AuthorImg         = user.profile_image_url;
                author.Homepage          = user.profile_url;
                if (!string.IsNullOrEmpty(author.Homepage) && !author.Homepage.Contains("http:"))
                {
                    if (author.Homepage.Contains('/'))
                    {
                        author.Homepage = "http://weibo.com/" + author.Homepage;
                    }
                    else
                    {
                        author.Homepage = "http://blog.sina.com.cn/" + author.Homepage;
                    }
                }
            }
            catch (Exception) { }
            #endregion

            #region 行为数据
            try
            {
                author.FansCount         = int.Parse(user.followers_count);
                author.FollowCount       = int.Parse(user.friends_count);
                author.CloseFriendsCount = int.Parse(user.bi_followers_count);
                author.PostCount         = int.Parse(user.statuses_count);
                author.AvgForward        = author.AvgReply = author.AvgFansCountOfFans;
            }
            catch (Exception) { }
            #endregion

            SetAuthorCrawlInfo(author, source);

            return(author);
        }
Пример #6
0
        /// <summary>
        /// 设置单条微博的转发和跟踪规则,默认为每15分钟刷一次转发和评论
        /// </summary>
        /// <param name="item">微博</param>
        /// <param name="source">微博来源</param>
        public static void SetItemTrackingRule(Item item, Enums.AuthorSource source)
        {
            item.Tracking = null;

            item.Tracking_Forward = new ItemTracking();
            item.Tracking_Forward.FollowNextTime = Utilities.Epoch;
            if (ItemCountData.ShouldFollow(item.CurrentCount, -1, DefaultSettings.MinReply, DefaultSettings.MinForward))
            {
                item.Tracking_Forward.FollowStatus = Enums.CrawlStatus.Normal;
            }
            else
            {
                item.Tracking_Forward.FollowStatus = Enums.CrawlStatus.Stop;
            }
            item.Tracking_Forward.FollowPriority = 0;
        }
Пример #7
0
        /// <summary>
        /// 设置用户粉丝和关注的跟踪规则,默认为每7天抓取用户前5000个粉丝和关注
        /// </summary>
        /// <param name="author">用户</param>
        /// <param name="source">用户来源</param>
        public static void SetAuthorFansAndFollowersSampleRule(Author author, Enums.AuthorSource source)
        {
            switch (source)
            {
            case Enums.AuthorSource.ListedTop:
            case Enums.AuthorSource.Partner:
            case Enums.AuthorSource.PublicLeader:
                author.Fans_RefreshStatus = Enums.CrawlStatus.Normal;
                break;

            default:
                author.Fans_RefreshStatus = Enums.CrawlStatus.Stop;
                break;
            }
            author.FollowerSampleMethode = Enums.SampleMethod.First5000;
            author.FansSampleMethode     = Enums.SampleMethod.First5000;
            author.Fans_IntervalDays     = 7;
        }
Пример #8
0
        /// <summary>
        /// 设置用户抓取信息
        /// </summary>
        /// <param name="author">用户</param>
        /// <param name="source">用户来源</param>
        private static void SetAuthorCrawlInfo(Author author, Enums.AuthorSource source)
        {
            #region 抓取信息
            author.AuthorSource = source;
            author.CreateTime   = DateTime.Now;

            #region 基本信息和微博刷新
            WeiboUtilities.SetAuthorPostSampleRule(author, source);
            author.UpdateTime      = Utilities.Epoch;
            author.UpdateCount     = 0;
            author.NextRefreshTime = Utilities.Epoch;
            #endregion

            #region 刷新粉丝和关注
            author.Fans_UpdateTime      = Utilities.Epoch;
            author.Fans_UpdateCount     = 0;
            author.Fans_NextRefreshTime = Utilities.Epoch;
            WeiboUtilities.SetAuthorFansAndFollowersSampleRule(author, source);
            #endregion

            #region 刷新地理列表
            author.Location_UpdateTime      = Utilities.Epoch;
            author.Location_UpdateCount     = 0;
            author.Location_NextRefreshTime = Utilities.Epoch;
            WeiboUtilities.SetAuthorLocationSampleRule(author, source);
            #endregion

            switch (author.AuthorSource)
            {
            case Enums.AuthorSource.ListedTop:
            case Enums.AuthorSource.Partner:
            case Enums.AuthorSource.PublicLeader:
                author.InternalSubscribeID = DefaultSettings.ToBeFollowed;
                break;

            default:
                author.InternalSubscribeID = null;
                break;
            }

            #endregion
        }
Пример #9
0
        /// <summary>
        /// 将新浪返回的动态微博类型转换为本地微博类型(Item)
        /// </summary>
        /// <param name="status">动态类型微博</param>
        /// <param name="source">微博来源</param>
        /// <returns></returns>
        public static Item ConvertToItem(Enums.AuthorSource source, string CrawlID, dynamic status, dynamic user = null, Author author = null)
        {
            Item item = new Item();

            item.CrawlID = CrawlID;
            item.Crawler = ConfigurationManager.AppSettings["ServerLocation"] + CrawlID;

            //设置媒体信息
            WeiboUtilities.SetItemMediaInfo(item);

            try
            {
                #region 抓取任务数据

                item.FetchTime          = DateTime.Now;
                item.UpdateTime         = null;
                item.ContentDetailLevel = Enums.ContentDetailLevel.Weibo;

                #endregion 抓取任务数据

                #region 基础数据
                if (user == null)
                {
                    if (author != null)
                    {
                        item.Url = Utilities.GetItemUrl(author.AuthorID, status.mid);
                    }
                    else
                    {
                        item.Url = Utilities.GetItemUrl(status.user.id, status.mid);
                    }
                }
                else
                {
                    item.Url = Utilities.GetItemUrl(user.id, status.mid);
                }
                if (item.Url == null)
                {
                    item.ItemID = null;
                }
                else
                {
                    item.ItemID = Palas.Common.Utility.MD5Helper.getMd5Hash(item.Url);
                }
                item.ClientItemID = status.id;
                item.CleanTitle   = status.text;
                item.PubDate      = Utilities.ParseToDateTime(status.created_at);
                item.Location     = null;
                string checkinUrl = null;
                try
                {
                    checkinUrl = Utilities.GetCheckInUrl(status.text);
                }
                catch (Exception) { }

                LocationDBManager.SetPoIDAndCoordinate(item, status, checkinUrl);

                if (user == null)
                {
                    if (author != null)
                    {
                        item.AuthorName         = author.AuthorName;
                        item.AuthorID           = author.AuthorID;
                        item.AuthorCertificated = author.Certification;
                        item.Source             = status.source;
                        item.AuthorImg          = author.AuthorImg;
                    }
                    else
                    {
                        item.AuthorName         = status.user.name;
                        item.AuthorID           = status.user.id;
                        item.AuthorCertificated = Utilities.GetCertificationType(status.user.verified_type, status.user.verified);
                        item.Source             = status.source;
                        item.AuthorImg          = status.user.profile_image_url;
                    }
                }
                else
                {
                    item.AuthorName         = user.name;
                    item.AuthorID           = user.id;
                    item.AuthorCertificated = Utilities.GetCertificationType(user.verified_type, user.verified);
                    item.Source             = status.source;
                    item.AuthorImg          = user.profile_image_url;
                }
                try
                {
                    item.AttachImg = status.original_pic;
                }
                catch (Exception) { }
                #endregion

                #region Item跟踪
                item.CurrentCount = new ItemCountData(DateTime.Now);
                try
                {
                    item.CurrentCount.ForwardCount = int.Parse(status.reposts_count);
                    item.CurrentCount.ReplyCount   = int.Parse(status.comments_count);
                }
                catch (Exception) { }
                item.CountHistory    = new ItemCountData[1];
                item.CountHistory[0] = item.CurrentCount;
                WeiboUtilities.SetItemTrackingRule(item, source);
                #endregion Item跟踪
                try
                {
                    if (status.retweeted_status != null)
                    {
                        Item tmp = ConvertToItem(status.retweeted_status, source, CrawlID);
                        if (tmp.ItemID != null)
                        {
                            InsertOrUpdateItem(tmp);
                        }
                        item.ParentItemID = tmp.ItemID;
                    }
                }
                catch (Exception) { }
            }
            catch (Exception) {  }
            return(item);
        }
Пример #10
0
        /// <summary>
        /// 将新浪返回的微博转换为本地微博类型(Item)
        /// </summary>
        /// <param name="status">新浪返回的微博</param>
        /// <param name="source">微博来源</param>
        /// <returns></returns>
        public static Item ConvertToItem(NetDimension.Weibo.Entities.status.Entity status, Enums.AuthorSource source, string CrawlID)
        {
            Item item = new Item();

            item.CrawlID = CrawlID;
            item.Crawler = ConfigurationManager.AppSettings["ServerLocation"] + CrawlID;

            //设置媒体信息
            WeiboUtilities.SetItemMediaInfo(item);

            #region 抓取任务数据

            item.FetchTime          = DateTime.Now;
            item.UpdateTime         = null;
            item.ContentDetailLevel = Enums.ContentDetailLevel.Weibo;

            #endregion 抓取任务数据
            try
            {
                #region 基础数据
                item.Url                = Utilities.GetItemUrl(status.User.ID, status.MID);
                item.ItemID             = Palas.Common.Utility.MD5Helper.getMd5Hash(item.Url);
                item.ClientItemID       = status.ID;
                item.CleanTitle         = status.Text;
                item.PubDate            = Utilities.ParseToDateTime(status.CreatedAt);
                item.Location           = null;
                item.PoID               = null;
                item.AuthorName         = status.User.Name;
                item.AuthorID           = status.User.ID;
                item.AuthorImg          = status.User.ProfileImageUrl;
                item.AuthorCertificated = Utilities.GetCertificationType(status.User.VerifiedType, status.User.Verified);
                item.Source             = status.Source;
                item.AttachImg          = status.OriginalPictureUrl;
                #endregion

                #region Item跟踪

                item.CurrentCount = new ItemCountData(DateTime.Now);
                item.CurrentCount.ForwardCount = status.RepostsCount;
                item.CurrentCount.ReplyCount   = status.CommentsCount;

                item.CountHistory    = new ItemCountData[1];
                item.CountHistory[0] = item.CurrentCount;

                WeiboUtilities.SetItemTrackingRule(item, source);
                #endregion Item跟踪

                if (status.RetweetedStatus != null)
                {
                    Item tmp = ConvertToItem(status.RetweetedStatus, source, CrawlID);
                    if (tmp.ItemID != null)
                    {
                        InsertOrUpdateItem(tmp);
                    }
                    item.ParentItemID = tmp.ItemID;
                }
            }
            catch (Exception) { }
            return(item);
        }
Пример #11
0
        /// <summary>
        /// 将新浪返回的用户类型转换为Author类型
        /// </summary>
        /// <param name="user">新浪返回的用户</param>
        /// <param name="source">用户来源</param>
        /// <returns>转换后的用户</returns>
        public static Author ConvertToAuthor(NetDimension.Weibo.Entities.user.Entity user, Enums.AuthorSource source)
        {
            Author author = new Author();

            #region 注册信息
            author.AuthorID          = user.ID;
            author.AuthorName        = user.ScreenName;
            author.RealName          = user.Name;
            author.Certification     = Utilities.GetCertificationType(user.VerifiedType, user.Verified);
            author.CertificationInfo = user.VerifiedReason;
            author.Gender            = Utilities.GetGender(user.Gender);
            author.RegionID          = RegionDBManager.GetRegionID(user.Location);
            author.Description       = user.Description;
            author.AuthorImg         = user.ProfileImageUrl;
            author.Homepage          = user.ProfileUrl;
            if (!string.IsNullOrEmpty(author.Homepage) && !author.Homepage.Contains("http:"))
            {
                if (author.Homepage.Contains('/'))
                {
                    author.Homepage = "http://weibo.com/" + author.Homepage;
                }
                else
                {
                    author.Homepage = "http://blog.sina.com.cn/" + author.Homepage;
                }
            }
            author.RegisterTime = Utilities.ParseToDateTime(user.CreatedAt);
            #endregion

            #region 行为数据
            author.FansCount         = user.FollowersCount;
            author.FollowCount       = user.FriendsCount;
            author.CloseFriendsCount = user.BIFollowersCount;
            author.PostCount         = user.StatusesCount;
            author.AvgForward        = author.AvgReply = author.AvgFansCountOfFans = 0;
            #endregion

            SetAuthorCrawlInfo(author, source);

            return(author);
        }