コード例 #1
0
        public static async Task <EtlSettings> ReadEtlSampleData(this EtlSettings etlSettings, int lines = 20)
        {
            etlSettings.Sample = new DataSample();

            switch (etlSettings.SourceType)
            {
            case EtlSourceEnum.SFTP:
            {
                var sftp      = etlSettings.SFTPSource;
                var nameRegex = new Regex(sftp.PathRegex);
                using (var sftpClient = new SftpClient(sftp.Host, sftp.Username, sftp.Password))
                {
                    sftpClient.Connect();
                    var files = sftpClient.ListDirectory(sftp.BasePath);
                    files = files.Where(f => nameRegex.IsMatch(f.FullName)).ToList();
                    var first = files.FirstOrDefault();
                    if (first != null)
                    {
                        switch (etlSettings.FileType)
                        {
                        case EtlFileType.CSV:
                        {
                            using (var sftpStream = sftpClient.OpenRead(first.FullName))
                            {
                                etlSettings.ReadFromCSVFile(sftpStream, lines);
                            }
                        }
                        break;
                        }
                    }
                    sftpClient.Disconnect();
                }
            }
            break;

            case EtlSourceEnum.S3BucketCheck:
            {
                var s3       = etlSettings.S3CheckSource;
                var awsS3API = new AWSS3API(new AWSS3Options()
                    {
                        Key    = s3.Key,
                        Secret = s3.Secret,
                        Bucket = s3.BucketName,
                        Region = s3.Region,
                    });
                var objects = await awsS3API.ListAllObjectsInBucket(s3.BucketName, s3.Prefix);

                var nameRegex = new Regex(s3.PathRegex);
                objects = objects.Where(f => nameRegex.IsMatch(f.Key)).ToList();
                var first = objects.FirstOrDefault();
                if (first != null)
                {
                    switch (etlSettings.FileType)
                    {
                    case EtlFileType.CSV:
                    {
                        using (var s3Stream = await awsS3API.OpenReadAsync(first.Key, first.BucketName))
                        {
                            etlSettings.ReadFromCSVFile(s3Stream, lines);
                        }
                    }
                    break;
                    }
                }
            }
            break;

            case EtlSourceEnum.S3BucketEvent:
            {
                var s3       = etlSettings.S3EventSource;
                var awsS3API = new AWSS3API(new AWSS3Options()
                    {
                        Key    = s3.Key,
                        Secret = s3.Secret,
                        Bucket = s3.BucketName,
                        Region = s3.Region,
                    });
                if (await awsS3API.FileExists(s3.ExamplePath, s3.BucketName))
                {
                    switch (etlSettings.FileType)
                    {
                    case EtlFileType.CSV:
                    {
                        using (var s3Stream = await awsS3API.OpenReadAsync(s3.ExamplePath, s3.BucketName))
                        {
                            etlSettings.ReadFromCSVFile(s3Stream, lines);
                        }
                    }
                    break;
                    }
                }
            }
            break;

            case EtlSourceEnum.GoogleAnalytics:
            {
                await etlSettings.GetBigQueryResultSampleByDate(lines);
            }
            break;

            case EtlSourceEnum.AmazonAthena:
            {
                await etlSettings.GetAthenaQueryResultSampleByDate(lines);
            }
            break;

            case EtlSourceEnum.AmazonAthenaPipes:
            {
                // need to compile the query
                await etlSettings.ParseAthenaQueryPipes();
            }
            break;
            }

            // make the sample data smaller
            foreach (var row in etlSettings.Sample.Rows.ToList())
            {
                row.Items = row.Items.Select(item => item.Length < 100 ? item : item.Substring(0, 50) + "..." + item.Substring(item.Length - 50)).ToList();
            }

            return(etlSettings);
        }