private static string MapAthenaField(this FieldMapping fieldMapping) { switch (fieldMapping.MappedType) { case AthenaTypeEnum.athena_string: return("string"); case AthenaTypeEnum.athena_integer: return("int"); case AthenaTypeEnum.athena_boolean: return("BOOLEAN"); case AthenaTypeEnum.athena_bigint: return("bigint"); case AthenaTypeEnum.athena_smallint: return("smallint"); case AthenaTypeEnum.athena_tinyint: return("tinyint"); case AthenaTypeEnum.athena_double: return("double"); case AthenaTypeEnum.athena_float: return("float"); case AthenaTypeEnum.athena_timestamp: return("timestamp"); case AthenaTypeEnum.athena_date: return("date"); } throw new Exception($"Unexpected Athena Type '{fieldMapping.MappedType}' in the field mapping '{fieldMapping.SourceFieldName}'"); }
public static void ReadFromCSVFile(this EtlSettings etlSettings, Stream stream, int lines = 20) { var newMapptings = new List <FieldMapping>(); etlSettings.Sample = new DataSample() { Rows = new List <DataRow>() }; var config = new CsvConfiguration(CultureInfo.InvariantCulture) { Delimiter = etlSettings.CsvSourceOptoins.Delimiter }; var csvStream = stream; if (etlSettings.CsvSourceOptoins.GZip) { csvStream = new GZipStream(stream, CompressionMode.Decompress); } using (var streamReader = new StreamReader(csvStream)) { using (var csvReader = new CsvReader(streamReader, config)) { int numberOfColoumns = 0; if (etlSettings.HasHeader) { if (csvReader.Read()) { var value = ""; int i = 0; while (csvReader.TryGetField(i, out value)) { newMapptings.Add(new FieldMapping() { SourceFieldName = value, MappedName = value.ToMappedName() }); i++; } numberOfColoumns = i; } } int rowCount = 0; while (csvReader.Read() && rowCount < lines) { var value = ""; int i = 0; var row = new List <string>(); while (csvReader.TryGetField(i, out value)) { row.Add(value); i++; } numberOfColoumns = numberOfColoumns <= i ? numberOfColoumns : i; etlSettings.Sample.Rows.Add(new DataRow() { Items = row }); rowCount++; } if (!etlSettings.HasHeader) { for (int i = 0; i < numberOfColoumns; i++) { var field = new FieldMapping() { SourceFieldName = $"Col{i}", MappedName = $"Col{i}" }; newMapptings.Add(field); } } for (int i = 0; i < newMapptings.Count; i++) { newMapptings[i].MappedType = etlSettings.Sample.Rows .Select(row => row.Items.Count > i ? row.Items[i].ToString() : "") .DetectTypeString() .DetectedTypeToAthenaType(); } // update the mappings if (etlSettings.Mappings != null && etlSettings.Mappings.Count > 0) { var oldMappings = etlSettings.Mappings; etlSettings.Mappings = new List <FieldMapping>(); for (int i = 0; i < newMapptings.Count; i++) { if (oldMappings.Count > i && oldMappings[i].SourceFieldName == newMapptings[i].SourceFieldName) { etlSettings.Mappings.Add(oldMappings[i]); } else { etlSettings.Mappings.Add(newMapptings[i]); } } } else { etlSettings.Mappings = newMapptings; } } } }