Beispiel #1
0
        private async Task <OpenLibraryDownload> GetDownload(DateTime date, OpenLibraryArchiveType archiveType, CancellationToken ct)
        {
            // https://archive.org/download/ol_dump_2021-03-19/ol_dump_editions_2021-03-19.txt.gz
            var formattedDate = date.ToIsoDateString();
            var url           = $"https://archive.org/download/ol_dump_{formattedDate}/ol_dump_{archiveType.GetKey()}_{formattedDate}.txt.gz";

            _logger.LogInformation($"Checking {url}");
            var timer = Stopwatch.StartNew();
            OpenLibraryDownload dl = null;

            using (var response = await _client.GetAsync(url, HttpCompletionOption.ResponseHeadersRead, ct))
            {
                timer.Stop();
                if (response.IsSuccessStatusCode)
                {
                    dl = new OpenLibraryDownload
                    {
                        Datestamp   = date,
                        Source      = url,
                        ArchiveType = archiveType,
                    };
                }
            }
            timer.Stop();
            _logger.LogInformation($"Checked {url} in {timer.ElapsedMilliseconds}ms. Exists = {dl is not null}");
            return(dl);
        }
        public async Task <OpenLibraryVersion> FindLatestArchiveEntry(OpenLibraryArchiveType archiveType, CancellationToken ct)
        {
            var attributeValuesMap = new Dictionary <string, AttributeValue>
            {
                { ":kind", new AttributeValue {
                      S = archiveType.GetKey()
                  } },
                { ":date", new AttributeValue {
                      S = _clock.UtcNow().ToIsoDateString()
                  } },
            };

            var queryReq = new QueryRequest
            {
                TableName = _openLibVersionsTable,
                KeyConditionExpression    = "Kind = :kind AND PublishDate <= :date",
                ScanIndexForward          = false, // False = sort by descending
                ExpressionAttributeValues = attributeValuesMap,
                Limit = 1,
            };

            var queryResults = await _dynamoClient.QueryAsync(queryReq, ct);

            var theResult = queryResults.Items?.SingleOrDefault();

            if (theResult is null)
            {
                return(null);
            }

            var doc   = Document.FromAttributeMap(theResult);
            var typed = _pocoClient.FromDocument <OpenLibraryVersion>(doc);

            return(typed);
        }
 public static string GetKey(this OpenLibraryArchiveType archiveTypeArchiveType)
 {
     return(archiveTypeArchiveType switch
     {
         OpenLibraryArchiveType.Authors => "authors",
         OpenLibraryArchiveType.Editions => "editions",
         _ => throw new ArgumentException($"{archiveTypeArchiveType} is not a supported archive type"),
     });
        public async Task <IReadOnlyCollection <OpenLibraryVersion> > FindArchiveEntries(
            DateTime searchStart,
            DateTime searchEnd,
            OpenLibraryArchiveType archiveType,
            CancellationToken ct)
        {
            if (searchEnd <= searchStart)
            {
                throw new ArgumentException($"Search start ({searchStart:O}) must come before search end ({searchEnd:O})");
            }

            var range   = new object[] { searchStart, searchEnd };
            var query   = _pocoClient.QueryAsync <OpenLibraryVersion>(archiveType.GetKey(), QueryOperator.Between, range);
            var results = await query.GetNextSetAsync(ct);

            return(results);
        }
        public Task <Stream> GetArchive(DateTime date, OpenLibraryArchiveType archiveType, CancellationToken ct)
        {
            var key = OpenLibraryDownloadExtensions.GetObjectName(date, archiveType);

            return(_s3.GetObjectStreamAsync(_openLibVersionsBucket, key, additionalProperties: null, ct));
        }
        public async Task <OpenLibraryVersion> GetArchiveEntry(DateTime date, OpenLibraryArchiveType archiveType, CancellationToken ct)
        {
            var q = await _pocoClient.LoadAsync <OpenLibraryVersion>(archiveType.GetKey(), date, ct);

            return(q);
        }
 public static string GetObjectName(DateTime dt, OpenLibraryArchiveType archiveType)
 => $"{dt.ToIsoDateString()}-{archiveType.GetKey()}-orig.txt.gz";
Beispiel #8
0
        public async Task <OpenLibraryDownload> GetLatestVersionForType(OpenLibraryArchiveType archiveType, CancellationToken ct)
        {
            var plural = await GetLatestVersionForTypes(new[] { archiveType }, ct);

            return(plural.SingleOrDefault());
        }
Beispiel #9
0
        public async Task <IReadOnlyCollection <OpenLibraryDownload> > GetDownloadsForType(OpenLibraryArchiveType archiveType, CancellationToken ct)
        {
            var feedItems = await GetFeedTimestamps(ct);

            var downloadsByDateTasks = feedItems
                                       .Select(d => GetDownload(d, archiveType, ct))
                                       .ToList();
            await Task.WhenAll(downloadsByDateTasks);

            var availableForType = downloadsByDateTasks
                                   .Where(t => t.IsCompletedSuccessfully)
                                   .Select(t => t.Result)
                                   .Where(dl => dl is not null)
                                   .ToList();

            return(availableForType);
        }