Exemple #1
0
    public void Purge(RssFeed feed)
    {
        // Purge stale files from working folder
        string workingFolder = Path.Combine(_utils.GetAssemblyDirectory(), feed.CollectionName);

        if (!Directory.Exists(workingFolder))
        {
            Log.Warning("Folder '{workingFolder}' does not exist", workingFolder);
            return;
        }

        _utils.PurgeStaleFiles(workingFolder, feed.FileRetentionDays);

        // Purge stale documents from the database collection
        var list = _crawlerRepository.GetStaleDocuments <RssFeedItem>(_crawlerCollectionName, feed.CollectionName, 7);

        Log.Information("Stripped {count} documents older than 7 days from {collectionName}", list.Count, feed.CollectionName);

        foreach (var item in list)
        {
            Log.Debug("Stripping UrlHash '{urlHash}' from {collectionName}", item.FeedAttributes.UrlHash, feed.CollectionName);
            item.OpenGraphAttributes = default;
            item.HtmlAttributes      = default;
            _crawlerRepository.SaveDocument <RssFeedItem>(_crawlerCollectionName, item, feed.DatabaseRetentionDays);
        }
    }