public static void InitializeRssCrawler(ILogger logger, RandomTimeSpan channelFetchingDelay, TimeSpan channelErrorDelay, TimeSpan errorDelay, TimeSpan loopDelay) { System.Threading.Tasks.Task.Run(() => { //var channelHubCtx = GlobalHost.ConnectionManager.GetHubContext<ChannelHub>(); ObjectCache cache = MemoryCache.Default; while (true) { if (_currentDate != DateTime.Now.Day) { _feedCache = new HashSet <string>(); _currentDate = DateTime.Now.Day; } var feedUrl = string.Empty; try { List <RssChannelsRow> channels = channels = SimpleFeedlyDatabaseAccess.GetActiveChannels().OrderBy(x => x.Id).ToList(); var count = 0; foreach (var channel in channels) { feedUrl = channel.Link; count++; logger.Info($"- [{count}/{channels.Count}] Working on channel: {channel.Id} | {feedUrl}"); //channelHubCtx.Clients.All.updateChannelProgress(new { Message = $"<strong>Fetching</strong> <a href='{channel.Link}' target='_blank'>{channel.Link}</a>", IsSleeping = false }); if (string.IsNullOrWhiteSpace(feedUrl)) { logger.Warn($"=> Channel has empty link: {channel.Id}"); continue; } var channelSleepingCacheKey = "channel_is_sleeping|" + channel.Id; var isSleeping = cache[channelSleepingCacheKey] as bool?; if (isSleeping == null || isSleeping == false) { try { RssCrawlerEngine usedEngine = RssCrawlerEngine.CodeHollowFeedReader; var feed = RssCrawler.GetFeedsFromChannel(feedUrl, channel.RssCrawlerEngine, false, out usedEngine, out Exception fetchFeedError); // update default engine for channel SimpleFeedlyDatabaseAccess.UpdateChannelDefaultEngine((long)channel.Id, feed == null ? (RssCrawlerEngine?)null : usedEngine); if (feed != null) { logger.Info($" + Number of items: {feed.Items.Count}"); var hasNew = false; foreach (var fItem in feed.Items) { if (!StringUtils.IsUrl(fItem.Link)) { continue; } var feedItemKey = GenerateFeedItemKey(fItem); var feedCacheKey = GenerateFeedCacheKey((long)channel.Id, feedItemKey); if (string.IsNullOrWhiteSpace(feedItemKey) || string.IsNullOrWhiteSpace(fItem.Link)) { logger.Info($" + Skipped item: {JsonConvert.SerializeObject(fItem)}"); continue; } if (!_feedCache.Contains(feedCacheKey)) { if (!SimpleFeedlyDatabaseAccess.CheckExistFeedItem((long)channel.Id, feedItemKey)) { var feedItem = new RssFeedItemsRow { ChannelId = channel.Id, FeedItemKey = feedItemKey, Title = string.IsNullOrWhiteSpace(fItem.Title) ? fItem.Link : fItem.Title, Link = fItem.Link, Description = fItem.Description, PublishingDate = fItem.PublishingDate, Author = fItem.Author, Content = fItem.Content }; SimpleFeedlyDatabaseAccess.InsertFeedItem(feedItem); hasNew = true; } _feedCache.Add(feedCacheKey); } } SimpleFeedlyDatabaseAccess.UpdateChannelErrorStatus((long)channel.Id, false, null); if (!hasNew) { var randomExpiryTime = cache.Add(channelSleepingCacheKey, true, DateTime.Now.Add(channelFetchingDelay.GenerateRamdomValue())); } } else { SimpleFeedlyDatabaseAccess.UpdateChannelErrorStatus((long)channel.Id, true, fetchFeedError == null ? null : JsonConvert.SerializeObject(fetchFeedError)); if (fetchFeedError != null) { ErrorHandle(fetchFeedError, feedUrl); } } } catch (Exception err) { SimpleFeedlyDatabaseAccess.UpdateChannelErrorStatus((long)channel.Id, true, JsonConvert.SerializeObject(err)); cache.Add(channelSleepingCacheKey, true, DateTime.Now.Add(channelErrorDelay)); logger.Error(err, $"An error occurred on channel: {channel.Id} | {feedUrl}"); ErrorHandle(err, feedUrl); } } else { logger.Info($" + sleeping..."); } } } catch (Exception ex) { logger.Error(ex, "An error occurred"); ErrorHandle(ex, feedUrl); System.Threading.Thread.Sleep(errorDelay); } //channelHubCtx.Clients.All.updateChannelProgress(new { Message = "<span class='link-muted'>Crawler's sleeping...</span>", IsSleeping = true }); _currentDate = DateTime.Now.Day; // we should delay a little bit, some seconds maybe System.Threading.Thread.Sleep(loopDelay); } }); }
private static async Task <int> FakeAsync(int minSecond, int maxSecond) { await Task.Delay(RandomTimeSpan.Between(minSecond, maxSecond)); return(0); }