/// <summary> /// 由于每天每个账号至多订阅数百个用户,所以只需要开一个线程 /// </summary> /// <param name="Pipeline"></param> /// <returns></returns> public string DoOneJob(IPipeline Pipeline) { int SuccCount = 0, ErrCount = 0; DateTime nextWorkTime = Utilities.Epoch; while (!StopFlag) { if (DateTime.Now > nextWorkTime) { string authorID = GetNextJob(); if (authorID != null) { try { SendMsg("待关注用户: " + authorID); var status = AccountDBManager.FollowUser(authorID); if (status == AccountDBManager.FollowStatus.Succ) { SendMsg("关注用户成功"); CntData.Tick(); SuccCount++; } else { if (status == AccountDBManager.FollowStatus.Exception) { SendMsg("出现异常,休息1小时后再尝试"); nextWorkTime = DateTime.Now.AddHours(1); } else { SendMsg("账号关注频率受限,1分钟后再试"); Thread.Sleep(1000 * 60); } } } catch (Exception ex) { nextWorkTime = DateTime.Now.AddHours(1); Logger.Error(ex.ToString()); ErrCount++; } } else { SendMsg("所有任务已完成"); } } Thread.Sleep(IntervalMS); } StopFlag = false; return(SuccCount == 0 && ErrCount == 0 ? "Nothing to do" : string.Format("OneJob Done. Succ {0} Err {1}", SuccCount, ErrCount)); }
public string DoOneJob(IPipeline Pipeline) { int SuccCount = 0, ErrCount = 0; DateTime nextWorkTime = Utilities.Epoch; while (!StopFlag) { if (DateTime.Now > nextWorkTime) { Location loc = GetNextJob(); if (loc != null) { try { DateTime lastTime = loc.LastRefreshTime; loc.LastRefreshTime = DateTime.Now; loc.NextRefreshTime = DateTime.Now.AddMinutes(loc.IntervalMins); //获取CBD周围的人 List <dynamic> users = new List <dynamic>(); List <dynamic> statuses = new List <dynamic>(); try { SendMsg(string.Format("正在刷新{0}周围的位置动态", loc.Title)); WeiboAPI.GetUsersNearCBD(loc.Lon, loc.Lat, loc.Radius, Utilities.DateTime2UnixTime(lastTime), Utilities.DateTime2UnixTime(DateTime.Now), loc.LocationSampleMethode, users, statuses); } catch (IOException) { ErrCount++; nextWorkTime = WeiboAPI.rateLimitStatus.ResetTime; } catch (WeiboException ex) { SendMsg("获取CBD周围动态时发生错误,见日志"); ErrCount++; nextWorkTime = WeiboAPI.rateLimitStatus.ResetTime; Logger.Error(ex.ToString()); } SendMsg(string.Format("{0}的位置动态获取到{1}条,开始插入数据库", loc.Title, users.Count())); for (int i = 0; i < users.Count; ++i) { if (i >= statuses.Count) { break; } if (statuses[i] == null || users[i] == null) { continue; } Item item = ItemDBManager.ConvertToItem(Enums.AuthorSource.LocationScan, CrawlID, statuses[i], users[i]); if (item.PoID == null) { item.PoID = item.PoIDSource = loc.CategoryID; } ItemDBManager.InsertOrUpdateItem(item); Author author = AuthorDBManager.ConvertToAuthor(users[i], Enums.AuthorSource.LocationScan); AuthorDBManager.InsertOrUpdateAuthorInfo(author); CntData.Tick(); } SuccCount++; SendMsg(string.Format("{0}的任务完成", loc.Title)); continue; } catch (Exception ex) { ErrCount++; nextWorkTime = WeiboAPI.rateLimitStatus.ResetTime; SendMsg(ex.ToString()); Logger.Error(ex.ToString()); } finally { loc.RefreshCount++; loc.RefreshStatus = Enums.CrawlStatus.Normal; LocationDBManager.PushbackLoationJob(loc); } } } SendMsg(string.Format("休息{0}秒", IntervalMS / 1000)); Thread.Sleep(IntervalMS); } StopFlag = false; return(SuccCount == 0 && ErrCount == 0 ? "Nothing to do" : string.Format("OneJob Done. Succ {0} Err {1}", SuccCount, ErrCount)); }
public string DoOneJob(IPipeline Pipeline) { int SuccCount = 0, ErrCount = 0; DateTime nextWorkTime = Utilities.Epoch; while (!StopFlag) { if (DateTime.Now > nextWorkTime) { Author author = GetNextJob(); if (author != null) { try { //如果不是红人,那么只刷新一次就结束 if (WeiboUtilities.IsRedSkin(author.AuthorSource)) { author.Fans_RefreshStatus = Enums.CrawlStatus.Normal; } else { author.Fans_RefreshStatus = Enums.CrawlStatus.Stop; } #region 用户粉丝刷新 List <NetDimension.Weibo.Entities.user.Entity> users = new List <NetDimension.Weibo.Entities.user.Entity>(); try { SendMsg(string.Format("正在刷新{0}的粉丝", author.AuthorName)); WeiboAPI.GetFollowers(author.AuthorID, author.FollowerSampleMethode, users); } catch (IOException) { ErrCount++; nextWorkTime = WeiboAPI.rateLimitStatus.ResetTime; } catch (Exception ex) { SendMsg("获取粉丝列表时发生错误,见日志"); ErrCount++; nextWorkTime = WeiboAPI.rateLimitStatus.ResetTime; author.Fans_RefreshStatus = Enums.CrawlStatus.Normal; Logger.Error(ex.ToString()); } SendMsg(string.Format("{0}的粉丝抓取到{1}个,开始插入数据库", author.AuthorName, users.Count)); double avg = 0; //用户粉丝的粉丝平均数 for (int i = 0; i < users.Count; ++i) { var user = AuthorDBManager.ConvertToAuthor(users[i], Enums.AuthorSource.FansDiscover); AuthorDBManager.InsertOrUpdateAuthorInfo(user); CntData.Tick(); AuthorRelationDBManager.InsertOrUpdateRelation(user.AuthorID, author.AuthorID); avg += (double)users[i].FollowersCount / (double)users.Count; } #endregion #region 用户关注列表 try { IEnumerable <string> friends = null; SendMsg(string.Format("{0}的粉丝插入完成,开始获取他的关注列表", author.AuthorName)); friends = WeiboAPI.GetFriendsIDs(author.AuthorID, author.FansSampleMethode); if (friends != null) { foreach (var user in friends) { AuthorRelationDBManager.InsertOrUpdateRelation(user, author.AuthorID); } } } catch (IOException) { ErrCount++; nextWorkTime = WeiboAPI.rateLimitStatus.ResetTime; } catch (Exception ex) { SendMsg("获取关注列表时发生错误,见日志"); ErrCount++; nextWorkTime = WeiboAPI.rateLimitStatus.ResetTime; author.Fans_RefreshStatus = Enums.CrawlStatus.Normal; Logger.Error(ex.ToString()); } SendMsg(string.Format("{0}的关系刷新任务完成", author.AuthorName)); #endregion author.AvgFansCountOfFans = (int)avg; SuccCount++; continue; } catch (Exception ex) { ErrCount++; nextWorkTime = WeiboAPI.rateLimitStatus.ResetTime; author.Fans_RefreshStatus = Enums.CrawlStatus.Normal; SendMsg(ex.ToString()); Logger.Error(ex.ToString()); } finally { author.Fans_UpdateCount++; author.Fans_NextRefreshTime = DateTime.Now.AddDays(author.Fans_IntervalDays); AuthorDBManager.PushbackRelationshipJob(author); } } } Thread.Sleep(IntervalMS); } StopFlag = false; return(SuccCount == 0 && ErrCount == 0 ? "Nothing to do" : string.Format("OneJob Done. Succ {0} Err {1}", SuccCount, ErrCount)); }
public string DoOneJob(IPipeline Pipeline) { int SuccCount = 0, ErrCount = 0; DateTime nextWorkTime = Utilities.Epoch; while (!StopFlag) { if (DateTime.Now > nextWorkTime) { LoginAccountEntity account = GetNextJob(); if (account != null) { try { //刷新订阅的微博 List <NetDimension.Weibo.Entities.status.Entity> result = new List <NetDimension.Weibo.Entities.status.Entity>(); try { SendMsg(string.Format("正在刷新{0}关注的最新微博", account.UserName)); WeiboAPI.FetchStatus(account, result); } catch (IOException) { ErrCount++; nextWorkTime = WeiboAPI.rateLimitStatus.ResetTime; } catch (Exception ex) { SendMsg("获取新微博时发生错误,见日志"); ErrCount++; nextWorkTime = WeiboAPI.rateLimitStatus.ResetTime; SendMsg(ex.ToString()); Logger.Error(ex.ToString()); } SendMsg(string.Format("{0}关注的微博抓取到{1}条,开始插入", account.UserName, result.Count)); for (int i = 0; i < result.Count; ++i) { var item = ItemDBManager.ConvertToItem(result[i], Enums.AuthorSource.PublicLeader, CrawlID); ItemDBManager.InsertOrUpdateItem(item); CntData.Tick(); } SendMsg(string.Format("{0}的关注微博刷新任务完成", account.UserName)); SuccCount++; continue; } catch (Exception ex) { ErrCount++; nextWorkTime = WeiboAPI.rateLimitStatus.ResetTime; SendMsg(ex.ToString()); Logger.Error(ex.ToString()); } finally { AccountDBManager.PushbackSubscribeJob(account); } } } SendMsg(string.Format("休息{0}秒", IntervalMS / 1000)); Thread.Sleep(IntervalMS); } StopFlag = false; return(SuccCount == 0 && ErrCount == 0 ? "Nothing to do" : string.Format("OneJob Done. Succ {0} Err {1}", SuccCount, ErrCount)); }
public string DoOneJob(IPipeline Pipeline) { int SuccCount = 0, ErrCount = 0; DateTime nextWorkTime = Utilities.Epoch; while (!StopFlag) { if (DateTime.Now > nextWorkTime) { Author author = GetNextJob(); if (author != null) { try { author.RefreshStatus = Enums.CrawlStatus.Stop; try { SendMsg(string.Format("正在更新用户{0}的个人信息", author.AuthorName)); //更新用户个人信息 NetDimension.Weibo.Entities.user.Entity user = WeiboAPI.GetAuthorInfo(author.AuthorID); var author_new = AuthorDBManager.ConvertToAuthor(user, author.AuthorSource); AuthorDBManager.InsertOrUpdateAuthorInfo(author); } catch (IOException) { ErrCount++; nextWorkTime = WeiboAPI.rateLimitStatus.ResetTime; } catch (Exception ex) { SendMsg("获取个人信息时发生错误,见日志"); ErrCount++; nextWorkTime = WeiboAPI.rateLimitStatus.ResetTime; author.RefreshStatus = Enums.CrawlStatus.Normal; Logger.Error(ex.ToString()); } SendMsg(string.Format("用户{0}的个人信息更新完毕,开始刷新其最新微博", author.AuthorName)); List <NetDimension.Weibo.Entities.status.Entity> result = new List <NetDimension.Weibo.Entities.status.Entity>(); try { //获取最新若干微博 WeiboAPI.GetAuthorLatestStatus(author, result, author.PostSampleMethode); } catch (IOException) { ErrCount++; nextWorkTime = WeiboAPI.rateLimitStatus.ResetTime; } catch (Exception ex) { SendMsg("获取最新微博时发生错误,见日志"); ErrCount++; nextWorkTime = WeiboAPI.rateLimitStatus.ResetTime; author.RefreshStatus = Enums.CrawlStatus.Normal; Logger.Error(ex.ToString()); } SendMsg(string.Format("找到{0}的{1}条最新微博,开始插入数据库并统计相关信息", author.AuthorName, result.Count)); //同时更新平均转发数和评论数 double avgForward = 0, avgReply = 0; for (int i = 0; i < result.Count; ++i) { avgForward += (double)result[i].RepostsCount / (double)result.Count; avgReply += (double)result[i].CommentsCount / (double)result.Count; var item = ItemDBManager.ConvertToItem(result[i], author.AuthorSource, CrawlID); ItemDBManager.InsertOrUpdateItem(item); CntData.Tick(); } author.AvgForward = (int)avgForward; author.AvgReply = (int)avgReply; SuccCount++; continue; } catch (Exception ex) { ErrCount++; nextWorkTime = WeiboAPI.rateLimitStatus.ResetTime; author.RefreshStatus = Enums.CrawlStatus.Normal; SendMsg(ex.ToString()); } finally { author.UpdateCount++; author.UpdateTime = DateTime.Now; author.NextRefreshTime = author.UpdateTime.AddDays(author.IntervalDays); AuthorDBManager.PushbackCensusJob(author); SendMsg(string.Format("用户{0}的普查任务完成", author.AuthorName)); } } } SendMsg(string.Format("休息{0}秒", IntervalMS / 1000)); Thread.Sleep(IntervalMS); } StopFlag = false; return(SuccCount == 0 && ErrCount == 0 ? "Nothing to do" : string.Format("OneJob Done. Succ {0} Err {1}", SuccCount, ErrCount)); }
public string DoOneJob(IPipeline Pipeline) { int SuccCount = 0, ErrCount = 0; DateTime nextWorkTime = Utilities.Epoch; while (!StopFlag) { if (DateTime.Now > nextWorkTime) { Author author = GetNextJob(); if (author != null) { try { //获取最新若干带有地点信息的微博 List <dynamic> result = new List <dynamic>(); try { SendMsg(string.Format("正在刷新{0}的位置动态", author.AuthorName)); WeiboAPI.GetUserStatusLocationHistory(author, result, author.LocationSampleMethode); } catch (IOException) { ErrCount++; nextWorkTime = WeiboAPI.rateLimitStatus.ResetTime; } catch (Exception ex) { SendMsg("获取位置动态时发生错误,见日志"); ErrCount++; nextWorkTime = WeiboAPI.rateLimitStatus.ResetTime; SendMsg(ex.ToString()); Logger.Error(ex.ToString()); } SendMsg(string.Format("{0}的位置动态获取到{1}条,开始插入数据库", author.AuthorName, result.Count())); for (int i = 0; i < result.Count; ++i) { Item item = ItemDBManager.ConvertToItem(author.AuthorSource, CrawlID, result[i], null, author); if (item.PoID == null) { continue; } ItemDBManager.InsertOrUpdateItem(item); CntData.Tick(); } SuccCount++; SendMsg(string.Format("{0}的任务完成", author.AuthorName)); continue; } catch (Exception ex) { ErrCount++; nextWorkTime = WeiboAPI.rateLimitStatus.ResetTime; SendMsg(ex.ToString()); Logger.Error(ex.ToString()); } finally { author.Location_RefreshStatus = Enums.CrawlStatus.Normal; author.Location_UpdateTime = DateTime.Now; author.Location_NextRefreshTime = DateTime.Now.AddDays(author.Location_IntervalDays); author.Location_UpdateCount++; AuthorDBManager.PushbackLocHistJob(author); } } } SendMsg(string.Format("休息{0}秒", IntervalMS / 1000)); Thread.Sleep(IntervalMS); } StopFlag = false; return(SuccCount == 0 && ErrCount == 0 ? "Nothing to do" : string.Format("OneJob Done. Succ {0} Err {1}", SuccCount, ErrCount)); }