Exemple #1
0
        public static void InitializeRssCrawler(ILogger logger, RandomTimeSpan channelFetchingDelay, TimeSpan channelErrorDelay, TimeSpan errorDelay, TimeSpan loopDelay)
        {
            System.Threading.Tasks.Task.Run(() =>
            {
                //var channelHubCtx = GlobalHost.ConnectionManager.GetHubContext<ChannelHub>();
                ObjectCache cache = MemoryCache.Default;

                while (true)
                {
                    if (_currentDate != DateTime.Now.Day)
                    {
                        _feedCache   = new HashSet <string>();
                        _currentDate = DateTime.Now.Day;
                    }

                    var feedUrl = string.Empty;
                    try
                    {
                        List <RssChannelsRow> channels = channels = SimpleFeedlyDatabaseAccess.GetActiveChannels().OrderBy(x => x.Id).ToList();

                        var count = 0;

                        foreach (var channel in channels)
                        {
                            feedUrl = channel.Link;

                            count++;

                            logger.Info($"- [{count}/{channels.Count}] Working on channel: {channel.Id} | {feedUrl}");
                            //channelHubCtx.Clients.All.updateChannelProgress(new { Message = $"<strong>Fetching</strong> <a href='{channel.Link}' target='_blank'>{channel.Link}</a>", IsSleeping = false });

                            if (string.IsNullOrWhiteSpace(feedUrl))
                            {
                                logger.Warn($"=> Channel has empty link: {channel.Id}");
                                continue;
                            }

                            var channelSleepingCacheKey = "channel_is_sleeping|" + channel.Id;

                            var isSleeping = cache[channelSleepingCacheKey] as bool?;
                            if (isSleeping == null || isSleeping == false)
                            {
                                try
                                {
                                    RssCrawlerEngine usedEngine = RssCrawlerEngine.CodeHollowFeedReader;
                                    var feed = RssCrawler.GetFeedsFromChannel(feedUrl, channel.RssCrawlerEngine, false, out usedEngine, out Exception fetchFeedError);

                                    // update default engine for channel
                                    SimpleFeedlyDatabaseAccess.UpdateChannelDefaultEngine((long)channel.Id, feed == null ? (RssCrawlerEngine?)null : usedEngine);

                                    if (feed != null)
                                    {
                                        logger.Info($"  + Number of items: {feed.Items.Count}");

                                        var hasNew = false;
                                        foreach (var fItem in feed.Items)
                                        {
                                            if (!StringUtils.IsUrl(fItem.Link))
                                            {
                                                continue;
                                            }

                                            var feedItemKey  = GenerateFeedItemKey(fItem);
                                            var feedCacheKey = GenerateFeedCacheKey((long)channel.Id, feedItemKey);

                                            if (string.IsNullOrWhiteSpace(feedItemKey) || string.IsNullOrWhiteSpace(fItem.Link))
                                            {
                                                logger.Info($"  + Skipped item: {JsonConvert.SerializeObject(fItem)}");
                                                continue;
                                            }

                                            if (!_feedCache.Contains(feedCacheKey))
                                            {
                                                if (!SimpleFeedlyDatabaseAccess.CheckExistFeedItem((long)channel.Id, feedItemKey))
                                                {
                                                    var feedItem = new RssFeedItemsRow
                                                    {
                                                        ChannelId      = channel.Id,
                                                        FeedItemKey    = feedItemKey,
                                                        Title          = string.IsNullOrWhiteSpace(fItem.Title) ? fItem.Link : fItem.Title,
                                                        Link           = fItem.Link,
                                                        Description    = fItem.Description,
                                                        PublishingDate = fItem.PublishingDate,
                                                        Author         = fItem.Author,
                                                        Content        = fItem.Content
                                                    };

                                                    SimpleFeedlyDatabaseAccess.InsertFeedItem(feedItem);

                                                    hasNew = true;
                                                }

                                                _feedCache.Add(feedCacheKey);
                                            }
                                        }

                                        SimpleFeedlyDatabaseAccess.UpdateChannelErrorStatus((long)channel.Id, false, null);

                                        if (!hasNew)
                                        {
                                            var randomExpiryTime =
                                                cache.Add(channelSleepingCacheKey, true, DateTime.Now.Add(channelFetchingDelay.GenerateRamdomValue()));
                                        }
                                    }
                                    else
                                    {
                                        SimpleFeedlyDatabaseAccess.UpdateChannelErrorStatus((long)channel.Id, true, fetchFeedError == null ? null : JsonConvert.SerializeObject(fetchFeedError));

                                        if (fetchFeedError != null)
                                        {
                                            ErrorHandle(fetchFeedError, feedUrl);
                                        }
                                    }
                                }
                                catch (Exception err)
                                {
                                    SimpleFeedlyDatabaseAccess.UpdateChannelErrorStatus((long)channel.Id, true, JsonConvert.SerializeObject(err));

                                    cache.Add(channelSleepingCacheKey, true, DateTime.Now.Add(channelErrorDelay));
                                    logger.Error(err, $"An error occurred on channel: {channel.Id} | {feedUrl}");

                                    ErrorHandle(err, feedUrl);
                                }
                            }
                            else
                            {
                                logger.Info($"  + sleeping...");
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        logger.Error(ex, "An error occurred");

                        ErrorHandle(ex, feedUrl);

                        System.Threading.Thread.Sleep(errorDelay);
                    }

                    //channelHubCtx.Clients.All.updateChannelProgress(new { Message = "<span class='link-muted'>Crawler's sleeping...</span>", IsSleeping = true });

                    _currentDate = DateTime.Now.Day;

                    // we should delay a little bit, some seconds maybe
                    System.Threading.Thread.Sleep(loopDelay);
                }
            });
        }
Exemple #2
0
        private static async Task <int> FakeAsync(int minSecond, int maxSecond)
        {
            await Task.Delay(RandomTimeSpan.Between(minSecond, maxSecond));

            return(0);
        }