示例#1
0
        private static string MapAthenaField(this FieldMapping fieldMapping)
        {
            switch (fieldMapping.MappedType)
            {
            case AthenaTypeEnum.athena_string:
                return("string");

            case AthenaTypeEnum.athena_integer:
                return("int");

            case AthenaTypeEnum.athena_boolean:
                return("BOOLEAN");

            case AthenaTypeEnum.athena_bigint:
                return("bigint");

            case AthenaTypeEnum.athena_smallint:
                return("smallint");

            case AthenaTypeEnum.athena_tinyint:
                return("tinyint");

            case AthenaTypeEnum.athena_double:
                return("double");

            case AthenaTypeEnum.athena_float:
                return("float");

            case AthenaTypeEnum.athena_timestamp:
                return("timestamp");

            case AthenaTypeEnum.athena_date:
                return("date");
            }
            throw new Exception($"Unexpected Athena Type '{fieldMapping.MappedType}' in the field mapping '{fieldMapping.SourceFieldName}'");
        }
示例#2
0
        public static void ReadFromCSVFile(this EtlSettings etlSettings, Stream stream, int lines = 20)
        {
            var newMapptings = new List <FieldMapping>();

            etlSettings.Sample = new DataSample()
            {
                Rows = new List <DataRow>()
            };

            var config = new CsvConfiguration(CultureInfo.InvariantCulture)
            {
                Delimiter = etlSettings.CsvSourceOptoins.Delimiter
            };

            var csvStream = stream;

            if (etlSettings.CsvSourceOptoins.GZip)
            {
                csvStream = new GZipStream(stream, CompressionMode.Decompress);
            }

            using (var streamReader = new StreamReader(csvStream))
            {
                using (var csvReader = new CsvReader(streamReader, config))
                {
                    int numberOfColoumns = 0;
                    if (etlSettings.HasHeader)
                    {
                        if (csvReader.Read())
                        {
                            var value = "";
                            int i     = 0;
                            while (csvReader.TryGetField(i, out value))
                            {
                                newMapptings.Add(new FieldMapping()
                                {
                                    SourceFieldName = value,
                                    MappedName      = value.ToMappedName()
                                });
                                i++;
                            }
                            numberOfColoumns = i;
                        }
                    }
                    int rowCount = 0;
                    while (csvReader.Read() && rowCount < lines)
                    {
                        var value = "";
                        int i     = 0;
                        var row   = new List <string>();
                        while (csvReader.TryGetField(i, out value))
                        {
                            row.Add(value);
                            i++;
                        }
                        numberOfColoumns = numberOfColoumns <= i ? numberOfColoumns : i;
                        etlSettings.Sample.Rows.Add(new DataRow()
                        {
                            Items = row
                        });
                        rowCount++;
                    }
                    if (!etlSettings.HasHeader)
                    {
                        for (int i = 0; i < numberOfColoumns; i++)
                        {
                            var field = new FieldMapping()
                            {
                                SourceFieldName = $"Col{i}",
                                MappedName      = $"Col{i}"
                            };
                            newMapptings.Add(field);
                        }
                    }
                    for (int i = 0; i < newMapptings.Count; i++)
                    {
                        newMapptings[i].MappedType = etlSettings.Sample.Rows
                                                     .Select(row => row.Items.Count > i ? row.Items[i].ToString() : "")
                                                     .DetectTypeString()
                                                     .DetectedTypeToAthenaType();
                    }

                    // update the mappings

                    if (etlSettings.Mappings != null && etlSettings.Mappings.Count > 0)
                    {
                        var oldMappings = etlSettings.Mappings;
                        etlSettings.Mappings = new List <FieldMapping>();
                        for (int i = 0; i < newMapptings.Count; i++)
                        {
                            if (oldMappings.Count > i && oldMappings[i].SourceFieldName == newMapptings[i].SourceFieldName)
                            {
                                etlSettings.Mappings.Add(oldMappings[i]);
                            }
                            else
                            {
                                etlSettings.Mappings.Add(newMapptings[i]);
                            }
                        }
                    }
                    else
                    {
                        etlSettings.Mappings = newMapptings;
                    }
                }
            }
        }