static async Task <StringPath> JoinFiles(ISimpleFileStore store, IReadOnlyCollection <StoreFileMd> toOptimise, StringPath destPath, int parallel, ILogger log) { var optimisedFile = FilePath(destPath, toOptimise.Last().Ts); using (var joinedStream = new MemoryStream()) { using (var zipWriter = new GZipStream(joinedStream, CompressionLevel.Optimal, true)) { var inStreams = await toOptimise.BlockFunc(async s => { var inStream = await store.Load(s.Path, log).WithDuration(); log.Debug("Optimise {Path} - loaded file {SourceFile} to be optimised in {Duration}", destPath, s.Path, inStream.Duration.HumanizeShort()); return(inStream.Result); }, parallel); foreach (var s in inStreams) { using var zr = new GZipStream(s, CompressionMode.Decompress, false); await zr.CopyToAsync(zipWriter); } } joinedStream.Seek(0, SeekOrigin.Begin); await store.Save(optimisedFile, joinedStream); } // when in-place, this is dirty if we fail now. There is no transaction capability in cloud storage, so downstream process must handle duplicates // successfully staged files, delete from land. Incremental using TS will work without delete, but it's more efficient to delete process landed files. await toOptimise.BlockAction(f => store.Delete(f.Path), parallel) .WithWrappedException(e => "Failed to delete optimised files. Duplicate records need to be handled downstream"); log.Debug("Optimise {Path} - deleted {Files} that were optimised into {OptimisedFile}", destPath, toOptimise.Count, optimisedFile); return(optimisedFile); }
async Task ReplaceJsonLFile(StoreFileMd f, StringPath newPath, IEnumerable <JToken> upgradedJs) { await using var stream = upgradedJs.ToJsonlGzStream(); await Store.Save(newPath, stream); var deleted = await Store.Delete(f.Path); if (!deleted) { throw new InvalidOperationException($"Didn't delete old file {f.Path}"); } Log.Information("Upgraded {OldFile} to {File}", f.Path, newPath); }