Beispiel #1
0
        public async Task LoadFromGoogleDrive(string folderId, string folderName, ILogger log)
        {
            var creds   = GoogleCredential.FromJson(Cfg.Creds.ToString()).CreateScoped(DriveService.Scope.DriveReadonly);
            var service = new DriveService(new() {
                HttpClientInitializer = creds,
                ApplicationName       = "recfluence"
            });
            var list = service.Files.List();

            list.Q        = $"'{folderId}' in parents";
            list.PageSize = 1000;
            var files = await list.ExecuteAsync();

            Dir.EnsureDirectoryExists();

            async Task <FPath> Download(File f, FPath localFile)
            {
                if (localFile.Exists)
                {
                    return(localFile);
                }
                using var sw = localFile.Open(CreateNew);
                var progress = await service.Files.Get(f.Id).DownloadAsync(sw);

                while (progress.Status.In(DownloadStatus.NotStarted, DownloadStatus.Downloading))
                {
                    await 1.Seconds().Delay();
                }
                if (progress.Status == DownloadStatus.Completed)
                {
                    return(localFile);
                }
                if (progress.Exception != null)
                {
                    log.Error(progress.Exception, "error when downloading file {File}: {Message}", f.Name, progress.Exception.Message);
                    return(null);
                }
                log.Error("download did not complete {File}");
                return(null);
            }

            await files.Files.WithIndex().BlockAction(async f => {
                var localFile = Dir.Combine(f.item.Name.Trim().Split(".").First().Replace("Copy of ", "") + ".jsonl.gz");
                var blobPath  = $"parler/{folderName}/{localFile.FileName}";
                if (await Db.Exists(blobPath))
                {
                    log.Information("Skipping existing blob {File}", blobPath);
                    return;
                }
                var downloadedFile = await Download(f.item, localFile);
                if (downloadedFile != null)
                {
                    await Db.Save(blobPath, downloadedFile, Log);
                }
                log.Information("Moved {File} {Num}/{Total}", localFile.FileName, f.index + 1, files.Files.Count);
                localFile.Delete();
            }, 2);

            Log.Information("parler - completed loading {Name}", folderName);
        }
Beispiel #2
0
    /// <summary>
    ///   Saves the result for the given query to Storage and a local tmp file
    /// </summary>
    async Task<FPath> SaveResult(IDbConnection db, FPath tempDir, ResQuery q) {
      var sw = Stopwatch.StartNew();
      var reader = await ResQuery(db, q);
      var fileName = $"{q.Name}.csv.gz";
      var tempFile = tempDir.Combine(fileName);
      using (var fileWriter = tempFile.Open(FileMode.Create, FileAccess.Write))
        await reader.WriteCsvGz(fileWriter, fileName, Log);

      // save to both latest and the current date 
      await SaveToLatestAndDateDirs(fileName, tempFile);

      Log.Information("Complete saving result {Name} in {Duration}", q.Name, sw.Elapsed);
      return tempFile;
    }