public static async Task <EtlSettings> ReadEtlSampleData(this EtlSettings etlSettings, int lines = 20) { etlSettings.Sample = new DataSample(); switch (etlSettings.SourceType) { case EtlSourceEnum.SFTP: { var sftp = etlSettings.SFTPSource; var nameRegex = new Regex(sftp.PathRegex); using (var sftpClient = new SftpClient(sftp.Host, sftp.Username, sftp.Password)) { sftpClient.Connect(); var files = sftpClient.ListDirectory(sftp.BasePath); files = files.Where(f => nameRegex.IsMatch(f.FullName)).ToList(); var first = files.FirstOrDefault(); if (first != null) { switch (etlSettings.FileType) { case EtlFileType.CSV: { using (var sftpStream = sftpClient.OpenRead(first.FullName)) { etlSettings.ReadFromCSVFile(sftpStream, lines); } } break; } } sftpClient.Disconnect(); } } break; case EtlSourceEnum.S3BucketCheck: { var s3 = etlSettings.S3CheckSource; var awsS3API = new AWSS3API(new AWSS3Options() { Key = s3.Key, Secret = s3.Secret, Bucket = s3.BucketName, Region = s3.Region, }); var objects = await awsS3API.ListAllObjectsInBucket(s3.BucketName, s3.Prefix); var nameRegex = new Regex(s3.PathRegex); objects = objects.Where(f => nameRegex.IsMatch(f.Key)).ToList(); var first = objects.FirstOrDefault(); if (first != null) { switch (etlSettings.FileType) { case EtlFileType.CSV: { using (var s3Stream = await awsS3API.OpenReadAsync(first.Key, first.BucketName)) { etlSettings.ReadFromCSVFile(s3Stream, lines); } } break; } } } break; case EtlSourceEnum.S3BucketEvent: { var s3 = etlSettings.S3EventSource; var awsS3API = new AWSS3API(new AWSS3Options() { Key = s3.Key, Secret = s3.Secret, Bucket = s3.BucketName, Region = s3.Region, }); if (await awsS3API.FileExists(s3.ExamplePath, s3.BucketName)) { switch (etlSettings.FileType) { case EtlFileType.CSV: { using (var s3Stream = await awsS3API.OpenReadAsync(s3.ExamplePath, s3.BucketName)) { etlSettings.ReadFromCSVFile(s3Stream, lines); } } break; } } } break; case EtlSourceEnum.GoogleAnalytics: { await etlSettings.GetBigQueryResultSampleByDate(lines); } break; case EtlSourceEnum.AmazonAthena: { await etlSettings.GetAthenaQueryResultSampleByDate(lines); } break; case EtlSourceEnum.AmazonAthenaPipes: { // need to compile the query await etlSettings.ParseAthenaQueryPipes(); } break; } // make the sample data smaller foreach (var row in etlSettings.Sample.Rows.ToList()) { row.Items = row.Items.Select(item => item.Length < 100 ? item : item.Substring(0, 50) + "..." + item.Substring(item.Length - 50)).ToList(); } return(etlSettings); }