public async Task <OpenLibraryVersion> SaveArchive(OpenLibraryDownload version, CancellationToken ct)
        {
            var matchingVersion = await FindArchiveEntries(version.Datestamp.AddDays(-1), version.Datestamp.AddDays(1), version.ArchiveType, ct);

            if (matchingVersion?.Any(v => v.PublishDate == version.Datestamp) == true)
            {
                throw new ArgumentException($"{version.ArchiveType.GetKey()} is already in the archives");
            }

            var transferReport = await _storageStreamer.StreamHttpToS3(version.Source, _openLibVersionsBucket, version.ObjectName, ct);

            var versionEntry = new OpenLibraryVersion
            {
                SourceUrl   = version.Source,
                Kind        = version.ArchiveType.GetKey(),
                ObjectName  = version.ObjectName,
                Bytes       = transferReport.Bytes,
                Uri         = transferReport.DestinationUrl,
                PublishDate = version.Datestamp,
            };

            await SaveArchiveEntry(versionEntry, ct);

            return(versionEntry);
        }
Exemplo n.º 2
0
        private async Task <OpenLibraryDownload> GetDownload(DateTime date, OpenLibraryArchiveType archiveType, CancellationToken ct)
        {
            // https://archive.org/download/ol_dump_2021-03-19/ol_dump_editions_2021-03-19.txt.gz
            var formattedDate = date.ToIsoDateString();
            var url           = $"https://archive.org/download/ol_dump_{formattedDate}/ol_dump_{archiveType.GetKey()}_{formattedDate}.txt.gz";

            _logger.LogInformation($"Checking {url}");
            var timer = Stopwatch.StartNew();
            OpenLibraryDownload dl = null;

            using (var response = await _client.GetAsync(url, HttpCompletionOption.ResponseHeadersRead, ct))
            {
                timer.Stop();
                if (response.IsSuccessStatusCode)
                {
                    dl = new OpenLibraryDownload
                    {
                        Datestamp   = date,
                        Source      = url,
                        ArchiveType = archiveType,
                    };
                }
            }
            timer.Stop();
            _logger.LogInformation($"Checked {url} in {timer.ElapsedMilliseconds}ms. Exists = {dl is not null}");
            return(dl);
        }
 public static string GetObjectName(this OpenLibraryDownload dl)
 => GetObjectName(dl.Datestamp, dl.ArchiveType);