Ejemplo n.º 1
0
        public void FileMetadata_sets_num_rows_on_file_and_row_group_multiple_row_groups()
        {
            var ms = new MemoryStream();
            var id = new DataField <int>("id");

            //write
            using (var writer = new ParquetWriter(new Schema(id), ms))
            {
                using (ParquetRowGroupWriter rg = writer.CreateRowGroup())
                {
                    rg.WriteColumn(new DataColumn(id, new[] { 1, 2, 3, 4 }));
                }

                using (ParquetRowGroupWriter rg = writer.CreateRowGroup())
                {
                    rg.WriteColumn(new DataColumn(id, new[] { 5, 6 }));
                }
            }

            //read back
            using (var reader = new ParquetReader(ms))
            {
                Assert.Equal(6, reader.ThriftMetadata.Num_rows);

                using (ParquetRowGroupReader rg = reader.OpenRowGroupReader(0))
                {
                    Assert.Equal(4, rg.RowCount);
                }

                using (ParquetRowGroupReader rg = reader.OpenRowGroupReader(1))
                {
                    Assert.Equal(2, rg.RowCount);
                }
            }
        }
Ejemplo n.º 2
0
        public static Stream GetParquetFileWithThreeRowGroups()
        {
            var stream = new MemoryStream();
            var schema = SchemaReflector.Reflect <TwoColumn>();

            using (var parquetWriter = new ParquetWriter(schema, stream))
            {
                using (var rowGroup = parquetWriter.CreateRowGroup())
                {
                    rowGroup.WriteColumn(new Parquet.Data.DataColumn((DataField)schema.Fields[0], new[] {
                        1,
                        2,
                        3,
                        4
                    }));
                    rowGroup.WriteColumn(new Parquet.Data.DataColumn((DataField)schema.Fields[1], new[] {
                        "one",
                        "two",
                        "three",
                        "four"
                    }));
                }

                using (var rowGroup = parquetWriter.CreateRowGroup())
                {
                    rowGroup.WriteColumn(new Parquet.Data.DataColumn((DataField)schema.Fields[0], new[] {
                        5,
                        6,
                        7,
                        8
                    }));
                    rowGroup.WriteColumn(new Parquet.Data.DataColumn((DataField)schema.Fields[1], new[] {
                        "five",
                        "six",
                        "seven",
                        "eight"
                    }));
                }

                using (var rowGroup = parquetWriter.CreateRowGroup())
                {
                    rowGroup.WriteColumn(new Parquet.Data.DataColumn((DataField)schema.Fields[0], new[] {
                        9,
                        10,
                        11,
                        12
                    }));
                    rowGroup.WriteColumn(new Parquet.Data.DataColumn((DataField)schema.Fields[1], new[] {
                        "nine",
                        "ten",
                        "eleven",
                        "twelve"
                    }));
                }
            }

            stream.Position = 0;
            return(stream);
        }
Ejemplo n.º 3
0
        public void Write_in_small_row_groups()
        {
            //write a single file having 3 row groups
            var id = new DataField <int>("id");
            var ms = new MemoryStream();

            using (var writer = new ParquetWriter(new Schema(id), ms))
            {
                using (ParquetRowGroupWriter rg = writer.CreateRowGroup())
                {
                    rg.WriteColumn(new DataColumn(id, new int[] { 1 }));
                }

                using (ParquetRowGroupWriter rg = writer.CreateRowGroup())
                {
                    rg.WriteColumn(new DataColumn(id, new int[] { 2 }));
                }

                using (ParquetRowGroupWriter rg = writer.CreateRowGroup())
                {
                    rg.WriteColumn(new DataColumn(id, new int[] { 3 }));
                }
            }

            //read the file back and validate
            ms.Position = 0;
            using (var reader = new ParquetReader(ms))
            {
                Assert.Equal(3, reader.RowGroupCount);

                using (ParquetRowGroupReader rg = reader.OpenRowGroupReader(0))
                {
                    Assert.Equal(1, rg.RowCount);
                    DataColumn dc = rg.ReadColumn(id);
                    Assert.Equal(new int[] { 1 }, dc.Data);
                }

                using (ParquetRowGroupReader rg = reader.OpenRowGroupReader(1))
                {
                    Assert.Equal(1, rg.RowCount);
                    DataColumn dc = rg.ReadColumn(id);
                    Assert.Equal(new int[] { 2 }, dc.Data);
                }

                using (ParquetRowGroupReader rg = reader.OpenRowGroupReader(2))
                {
                    Assert.Equal(1, rg.RowCount);
                    DataColumn dc = rg.ReadColumn(id);
                    Assert.Equal(new int[] { 3 }, dc.Data);
                }
            }
        }
Ejemplo n.º 4
0
        public long ParquetDotNet()
        {
            {
                var valueField = new DecimalDataField("Value", precision: 29, scale: 3, hasNulls: false);
                var schema     = new Parquet.Data.Schema(valueField);

                using var stream        = File.Create("decimal_timeseries.parquet.net");
                using var parquetWriter = new ParquetWriter(schema, stream);
                using var groupWriter   = parquetWriter.CreateRowGroup();

                groupWriter.WriteColumn(new DataColumn(valueField, _values));
            }

            if (Check.Enabled)
            {
                // Read content from ParquetSharp and Parquet.NET
                var baseline = ReadFile("decimal_timeseries.parquet");
                var results  = ReadFile("decimal_timeseries.parquet.net");

                // Prove that the content is the same
                Check.ArraysAreEqual(_values, baseline);
                Check.ArraysAreEqual(baseline, results);
            }

            return(new FileInfo("decimal_timeseries.parquet.net").Length);
        }
Ejemplo n.º 5
0
        static void ConvertCsvToParquet(string inputFile, string outputFile)
        {
            var data = new Dictionary <string, ArrayList>();

            using (var reader = new StreamReader(inputFile, true))
            {
                var header = reader.ReadLine();

                var columns = header.Split(",");
                for (int i = 0; i < columns.Length; i++)
                {
                    columns[i] = columns[i].Trim();
                }

                while (!reader.EndOfStream)
                {
                    var line = reader.ReadLine();
                    if (String.IsNullOrEmpty(line))
                    {
                        continue;
                    }

                    var parts = line.Split(",");
                    for (int i = 0; i < parts.Length && i < columns.Length; i++)
                    {
                        var column = columns[i];

                        if (parquet_types.ContainsKey(column))
                        {
                            if (!data.ContainsKey(column))
                            {
                                data.Add(column, new ArrayList());
                            }

                            data[column].Add(ParseValue(parquet_types[column], parts[i]));
                        }
                    }
                }
            }

            var datacolumns = parquet_types.Select(
                x => new DataColumn(CreateParquetField(x.Key, x.Value), data[x.Key].ToArray(ConvertParquetType(x.Value)))
                ).ToArray();
            var schema = new Schema(datacolumns.Select(x => (Field)x.Field).ToArray());

            using (Stream fileStream = System.IO.File.OpenWrite(outputFile))
            {
                using (var parquetWriter = new ParquetWriter(schema, fileStream))
                {
                    // create a new row group in the file
                    using (ParquetRowGroupWriter groupWriter = parquetWriter.CreateRowGroup())
                    {
                        foreach (var column in datacolumns)
                        {
                            groupWriter.WriteColumn(column);
                        }
                    }
                }
            }
        }
Ejemplo n.º 6
0
        public void WriteIntro()
        {
            //create data columns with schema metadata and the data you need
            var idColumn = new DataColumn(
                new DataField <int>("id"),
                new int[] { 1, 2 });

            var cityColumn = new DataColumn(
                new DataField <string>("city"),
                new string[] { "London", "Derby" });

            // create file schema
            var schema = new Schema(idColumn.Field, cityColumn.Field);

            using (Stream fileStream = System.IO.File.OpenWrite("c:\\test.parquet"))
            {
                using (var parquetWriter = new ParquetWriter(schema, fileStream))
                {
                    // create a new row group in the file
                    using (ParquetRowGroupWriter groupWriter = parquetWriter.CreateRowGroup(2))
                    {
                        groupWriter.WriteColumn(idColumn);
                        groupWriter.WriteColumn(cityColumn);
                    }
                }
            }
        }
Ejemplo n.º 7
0
        public void Write_read_nullable_column(Array input)
        {
            var id = new DataField <int?>("id");
            var ms = new MemoryStream();

            using (var writer = new ParquetWriter(new Schema(id), ms))
            {
                using (ParquetRowGroupWriter rg = writer.CreateRowGroup())
                {
                    rg.WriteColumn(new DataColumn(id, input));
                }
            }

            ms.Position = 0;
            using (var reader = new ParquetReader(ms))
            {
                Assert.Equal(1, reader.RowGroupCount);

                using (ParquetRowGroupReader rg = reader.OpenRowGroupReader(0))
                {
                    Assert.Equal(input.Length, rg.RowCount);
                    Assert.Equal(input, rg.ReadColumn(id).Data);
                }
            }
        }
Ejemplo n.º 8
0
        public void CustomMetadata_can_write_and_read()
        {
            var ms = new MemoryStream();
            var id = new DataField <int>("id");

            //write
            using (var writer = new ParquetWriter(new Schema(id), ms))
            {
                writer.CustomMetadata = new Dictionary <string, string>
                {
                    ["key1"] = "value1",
                    ["key2"] = "value2"
                };

                using (ParquetRowGroupWriter rg = writer.CreateRowGroup())
                {
                    rg.WriteColumn(new DataColumn(id, new[] { 1, 2, 3, 4 }));
                }
            }

            //read back
            using (var reader = new ParquetReader(ms))
            {
                Assert.Equal("value1", reader.CustomMetadata["key1"]);
                Assert.Equal("value2", reader.CustomMetadata["key2"]);
            }
        }
Ejemplo n.º 9
0
        public static void WriteAthenaRowsAsParquet(this Stream stream, ResultSetMetadata tableSchema, List <FieldMapping> mappings, IEnumerable <Row> rows)
        {
            List <DataColumn> columns = new List <DataColumn>();

            int index = 0;

            foreach (var column in tableSchema.ColumnInfo)
            {
                columns.Add(column.ToParquetColumn(mappings, index, rows));
                index++;
            }

            Schema schema = new Schema(new ReadOnlyCollection <Field>(columns.Select(column => column.Field).ToArray()));

            using (ParquetWriter writer = new ParquetWriter(schema, stream))
            {
                writer.CompressionMethod = CompressionMethod.Snappy;
                using (ParquetRowGroupWriter rowGroupWriter = writer.CreateRowGroup())
                {
                    foreach (var column in columns)
                    {
                        rowGroupWriter.WriteColumn(column);
                    }
                }
            }
        }
Ejemplo n.º 10
0
        /// <inheritdoc/>
        public void Write(IList <IColumn> columns)
        {
            List <DataColumn> parquetColumns = CreateParquetColumns(columns);
            List <DataField>  parquetFields  = parquetColumns.Select(p => p.Field).ToList();
            Schema            schema         = new Schema(parquetFields);

            using (var parquetWriter = new ParquetWriter(schema, FileStream))
            {
                // TODO - Write is called many times; one for each rowgroup in the file. We do not need to compile
                // and write metadata many times. Refactor to write metadata only once.
                CryptoMetadata metadata = CompileMetadata(columns, FileEncryptionSettings);
                if (!metadata.IsEmpty())
                {
                    parquetWriter.CustomMetadata = new Dictionary <string, string>
                    {
                        [nameof(CryptoMetadata)] = JsonConvert.SerializeObject(
                            value: metadata,
                            settings: new JsonSerializerSettings()
                        {
                            NullValueHandling = NullValueHandling.Ignore,
                            Converters        = { new StringEnumConverter() },
                            Formatting        = Formatting.Indented
                        })
                    };
                }

                // create a new row group in the file
                using (ParquetRowGroupWriter groupWriter = parquetWriter.CreateRowGroup())
                {
                    parquetColumns.ForEach(groupWriter.WriteColumn);
                }
            }
        }
Ejemplo n.º 11
0
        public void Write_multiple_row_groups_to_forward_only_stream()
        {
            var ms          = new MemoryStream();
            var forwardOnly = new WriteableNonSeekableStream(ms);

            var schema = new Schema(
                new DataField <int>("id"),
                new DataField <string>("nonsense"));

            using (var writer = new ParquetWriter(schema, forwardOnly))
            {
                using (ParquetRowGroupWriter rgw = writer.CreateRowGroup(1))
                {
                    rgw.WriteColumn(new DataColumn((DataField)schema[0], new[] { 1 }));
                    rgw.WriteColumn(new DataColumn((DataField)schema[1], new[] { "1" }));
                }

                using (ParquetRowGroupWriter rgw = writer.CreateRowGroup(1))
                {
                    rgw.WriteColumn(new DataColumn((DataField)schema[0], new[] { 2 }));
                    rgw.WriteColumn(new DataColumn((DataField)schema[1], new[] { "2" }));
                }
            }

            ms.Position = 0;
            using (var reader = new ParquetReader(ms))
            {
                Assert.Equal(2, reader.RowGroupCount);

                using (ParquetRowGroupReader rgr = reader.OpenRowGroupReader(0))
                {
                    Assert.Equal(1, rgr.RowCount);

                    DataColumn column = rgr.ReadColumn((DataField)schema[0]);
                    Assert.Equal(1, column.Data.GetValue(0));
                }

                using (ParquetRowGroupReader rgr = reader.OpenRowGroupReader(1))
                {
                    Assert.Equal(1, rgr.RowCount);

                    DataColumn column = rgr.ReadColumn((DataField)schema[0]);
                    Assert.Equal(2, column.Data.GetValue(0));
                }
            }
        }
Ejemplo n.º 12
0
        //TODO: Implement this class from scratch by leveraging grouping queries in the DB engine
        //TODO: Unit & Integration Test
        //https://stackoverflow.com/questions/50933429/how-to-view-apache-parquet-file-in-windows
        //https://github.com/elastacloud/parquet-dotnet
        public void WriteData(IEnumerable <PriceForecast> data, string basePath)
        {
            var forecastsByCountry = data.GroupBy(f => f.CountryCode);

            foreach (var countryGroup in forecastsByCountry)
            {
                var country = countryGroup.Key;

                var forecastsByCategory = countryGroup.ToList().GroupBy(f => f.Category);
                foreach (var categoryGroup in forecastsByCategory)
                {
                    var category = categoryGroup.Key;

                    var forecastsByYear = categoryGroup.ToList().GroupBy(f => f.ForecastedDate.Year);
                    foreach (var yearGroup in forecastsByYear)
                    {
                        var year = yearGroup.Key;

                        var forecastsByMonth = yearGroup.ToList().GroupBy(f => f.ForecastedDate.Month);
                        foreach (var monthGroup in forecastsByMonth)
                        {
                            var month     = monthGroup.Key;
                            var forecasts = monthGroup.ToList();
                            var dirPath   = $"{basePath}/Country={country}/Category={category}/Year={year}/Month={month}/";
                            var filePath  = dirPath + "forecast.parquet";

                            //TODO: automating schema generation using reflection and attributes
                            var columns = new DataColumn[]
                            {
                                new DataColumn(ParquetSchemaHelper.ForecastDateField, forecasts.Select(f => f.ForecastDateTime.ToString()).ToArray()),
                                new DataColumn(ParquetSchemaHelper.ForecastModelField, forecasts.Select(f => f.ForecastModel).ToArray()),
                                new DataColumn(ParquetSchemaHelper.MarketField, forecasts.Select(f => f.Market).ToArray()),
                                new DataColumn(ParquetSchemaHelper.ProductField, forecasts.Select(f => f.Product).ToArray()),
                                new DataColumn(ParquetSchemaHelper.CountryField, forecasts.Select(f => f.CountryCode).ToArray()),
                                new DataColumn(ParquetSchemaHelper.ForecastedDateField, forecasts.Select(f => f.ForecastedDate.ToString()).ToArray()),
                                new DataColumn(ParquetSchemaHelper.CategoryField, forecasts.Select(f => f.Category).ToArray()),
                                new DataColumn(ParquetSchemaHelper.PriceField, forecasts.Select(f => f.Price).ToArray())
                            };


                            var           schema = new Schema(columns.Select(c => c.Field).ToArray());
                            DirectoryInfo di     = Directory.CreateDirectory(dirPath);//safe
                            using Stream fileStream = System.IO.File.OpenWrite(filePath);
                            using var parquetWriter = new ParquetWriter(schema, fileStream);
                            using (ParquetRowGroupWriter groupWriter = parquetWriter.CreateRowGroup())
                            {
                                foreach (var col in columns)
                                {
                                    groupWriter.WriteColumn(col);
                                }
                            }
                        }
                    }
                }
            }
            return;
        }
Ejemplo n.º 13
0
        private string CreateTestArchive(Guid sessionId)
        {
            var tmpName = Path.GetTempFileName();

            var timeData   = new[] { 1L, 2L, 3L };
            var timeColumn = new DataColumn(
                new DataField <long>("time"),
                timeData);

            var numbersData   = new[] { 42d, 1337d, 6.022e23 };
            var numbersColumn = new DataColumn(
                new DataField <double>("cool_numbers"),
                numbersData);

            var schema = new Schema(timeColumn.Field, numbersColumn.Field);

            var json = new JObject {
                ["meta"] = new JObject(), ["user"] = new JObject()
            };


            using (var ms = new MemoryStream())
            {
                using (var parquetWriter = new ParquetWriter(schema, ms))
                    using (var groupWriter = parquetWriter.CreateRowGroup())
                    {
                        groupWriter.WriteColumn(timeColumn);
                        groupWriter.WriteColumn(numbersColumn);
                    }

                ms.Position = 0;

                using (var parquetReader = new ParquetReader(ms))
                {
                    var tableInformation = new ArchiveTableInformation()
                    {
                        Columns = new List <DataField>(parquetReader.Schema.GetDataFields()),
                        Time    = timeColumn.Field
                    };
                    var table = new ArchiveTable(json, parquetReader, tableInformation, "testData");

                    var archive = Archive.Create(tmpName);

                    var session = ArchiveSession.Create(archive, "testName");
                    var folder  = ArchiveFolder.Create(archive, sessionId, "testFolder");

                    folder.AddChild(table);
                    session.AddChild(folder);
                    archive.AddSession(session);

                    archive.WriteFile().Wait();
                    archive.Close();
                }
            }

            return(tmpName);
        }
Ejemplo n.º 14
0
 /// <summary>
 ///		Escribe un grupo de filas en el archivo
 /// </summary>
 private void FlushRowGroup(ParquetWriter writer, Table table)
 {
     if (table.Count > 0)
     {
         using (ParquetRowGroupWriter rowGroupWriter = writer.CreateRowGroup())
         {
             rowGroupWriter.Write(table);
         }
     }
 }
Ejemplo n.º 15
0
        private static MemoryStream CreateStream(Schema schema, Dictionary <int, List <object> > data)
        {
            var memoryStream = new MemoryStream();
            var fields       = schema.GetDataFields();

            using (var parquet = new ParquetWriter(schema, memoryStream)
            {
                CompressionMethod = CompressionMethod.Snappy
            })
                using (var rgw = parquet.CreateRowGroup())
                {
                    for (var i = 0; i < fields.Length; i++)
                    {
                        var columData = data[i];
                        var dataField = fields[i];

                        switch (fields[i].DataType)
                        {
                        case DataType.Int32:
                            rgw.WriteColumn(new DataColumn(dataField, columData.Cast <int?>().ToArray()));
                            break;

                        case DataType.Int64:
                            rgw.WriteColumn(new DataColumn(dataField, columData.Cast <long?>().ToArray()));
                            break;

                        case DataType.Float:
                            rgw.WriteColumn(new DataColumn(dataField, columData.Cast <float?>().ToArray()));
                            break;

                        case DataType.Decimal:
                            rgw.WriteColumn(new DataColumn(dataField, columData.Cast <decimal?>().ToArray()));
                            break;

                        case DataType.Double:
                            rgw.WriteColumn(new DataColumn(dataField, columData.Cast <double?>().ToArray()));
                            break;

                        case DataType.String:
                            rgw.WriteColumn(new DataColumn(dataField, columData.Cast <string>().ToArray()));
                            break;

                        case DataType.DateTimeOffset:
                            rgw.WriteColumn(new DataColumn(dataField, columData.Cast <DateTimeOffset?>().ToArray()));
                            break;
                        }
                    }
                }

            return(memoryStream);
        }
        public static void TestDecimalSeries([Values(0, 1)] int warmup)
        {
            var timer = Stopwatch.StartNew();
            var rand  = new Random(123);

            Console.WriteLine("Generating data...");

            var values = Enumerable.Range(0, 10_000_000).Select(i =>
            {
                var n    = rand.Next();
                var sign = rand.NextDouble() < 0.5 ? -1M : +1M;
                return(sign * ((decimal)n * n * n) / 1000M);
            }).ToArray();

            Console.WriteLine("Generated {0:N0} rows in {1:N2} sec", values.Length, timer.Elapsed.TotalSeconds);
            Console.WriteLine();
            Console.WriteLine("Saving to Parquet");

            timer.Restart();

            using (var fileWriter = new ParquetFileWriter("decimal_timeseries.parquet", new Column[] { new Column <decimal>("Value", LogicalType.Decimal(precision: 29, scale: 3)) }))
            {
                using (var rowGroupWriter = fileWriter.AppendRowGroup())
                {
                    using var valueWriter = rowGroupWriter.NextColumn().LogicalWriter <decimal>();
                    valueWriter.WriteBatch(values);
                }

                fileWriter.Close();
            }

            Console.WriteLine("Saved to Parquet ({0:N0} bytes) in {1:N2} sec", new FileInfo("decimal_timeseries.parquet").Length, timer.Elapsed.TotalSeconds);
            Console.WriteLine();
            Console.WriteLine("Saving to Parquet.NET");

            timer.Restart();

            {
                var valueField = new DecimalDataField("Value", precision: 29, scale: 3);
                var schema     = new Parquet.Data.Schema(valueField);

                using var stream        = File.Create("decimal_timeseries.parquet.net");
                using var parquetWriter = new ParquetWriter(schema, stream);
                using var groupWriter   = parquetWriter.CreateRowGroup();

                groupWriter.WriteColumn(new DataColumn(valueField, values));
            }

            Console.WriteLine("Saved to Parquet.NET ({0:N0} bytes) in {1:N2} sec", new FileInfo("decimal_timeseries.parquet.net").Length, timer.Elapsed.TotalSeconds);
        }
Ejemplo n.º 17
0
        private void OnWrite <TModel>(MapperConfig <TModel> mapConfig, Stream fileStream, bool isAppend, params TModel[] models)
        {
            var dataColumns      = mapConfig.ToDataColumns(models);
            var columnDataFields = dataColumns.Select(s => s.Field).ToArray();

            var schema = new Schema(columnDataFields);

            using var parquetWriter = new ParquetWriter(schema, fileStream, append: isAppend);
            using var groupWriter   = parquetWriter.CreateRowGroup();

            for (int i = 0; i < dataColumns.Length; i++)
            {
                groupWriter.WriteColumn(dataColumns[i]);
            }
        }
Ejemplo n.º 18
0
        public void Cannot_write_columns_in_wrong_order()
        {
            var schema = new Schema(new DataField <int>("id"), new DataField <int>("id2"));

            using (var writer = new ParquetWriter(schema, new MemoryStream()))
            {
                using (ParquetRowGroupWriter gw = writer.CreateRowGroup())
                {
                    Assert.Throws <ArgumentException>(() =>
                    {
                        gw.WriteColumn(new DataColumn((DataField)schema[1], new int[] { 1 }));
                    });
                }
            }
        }
        public async Task <bool> WriteData(JObject root, ISessionWriter writer)
        {
            var pathArr = Meta["attachments"].ToObject <string[]>() ?? throw new ArgumentException("Table is missing 'attachments'");

            //TODO: Implement?
            if (false && IsSaved)
            {
                var stream = await _archive.OpenFile(_zipEntry);

                writer.StoreFileId(stream, pathArr[0]);
            }
            else
            {
                // This stream will be disposed by the sessionWriter
                var ms = new MemoryStream();

                // Make a copy of the Remembering reader that later can be discarded
                // This to avoid to read in all tables in memory at the same time.
                var fullReader = new RememberingParquetReader(_reader);
                fullReader.LoadAll();
                using (var tableWriter = new ParquetWriter(fullReader.Schema, ms))
                {
                    using (var rowGroup = tableWriter.CreateRowGroup())  // Using construction assure correct storage of final rowGroup details in parquet file
                    {
                        foreach (var field in fullReader.Schema.GetDataFields())
                        {
                            var column = new DataColumn(field, fullReader.GetColumn(field));
                            rowGroup.WriteColumn(column);
                        }
                    }
                }

                ms.Position = 0;
                writer.StoreFileId(ms, pathArr[0]);
            }

            // TODO AUTOACTIVE-58 - Generalize copy of previous metadata for save

            // Copy previous
            root["meta"] = Meta;
            root["user"] = User;

            // Overwrite potentially changed
            // TODO root["meta"]["is_world_clock"] = ;
            // TODO root["meta"]["synced_to"] =  ;

            return(true);
        }
        public static void WriteParquetColumns(this Stream stream, List <DataColumn> columns)
        {
            Schema schema = new Schema(new ReadOnlyCollection <Field>(columns.Select(column => column.Field).ToArray()));

            using (ParquetWriter writer = new ParquetWriter(schema, stream))
            {
                writer.CompressionMethod = CompressionMethod.Snappy;
                using (ParquetRowGroupWriter rowGroupWriter = writer.CreateRowGroup()) // items.Count()
                {
                    foreach (var column in columns)
                    {
                        rowGroupWriter.WriteColumn(column);
                    }
                }
            }
        }
Ejemplo n.º 21
0
        public static void WriteParquet <T>(this Stream stream, IEnumerable <T> items) where T : class
        {
            Type classType = typeof(T);

            var properties = classType.GetProperties();

            List <DataColumn> columns = new List <DataColumn>();

            foreach (var prop in properties)
            {
                if (prop.PropertyType == DateTimeType)
                {
                    columns.Add(new DataColumn(
                                    new DateTimeDataField(prop.Name, DateTimeFormat.DateAndTime),
                                    items.Select(item => new DateTimeOffset(((DateTime)prop.GetValue(item)))).ToArray()
                                    ));
                }
                else
                {
                    var       genericArguments   = new Type[] { prop.PropertyType };
                    var       genericType        = DataFieldGenericType.MakeGenericType(genericArguments);
                    var       genericConstructor = genericType.GetConstructor(DataFieldConstructorGenericArguments);
                    DataField field         = genericConstructor.Invoke(new object[] { prop.Name }) as DataField;
                    var       dataSource    = items.Select(item => prop.GetValue(item));
                    var       castMethod    = CastMethodGeneric.MakeGenericMethod(genericArguments);
                    var       toArrayMethod = ToArrayMethodGeneric.MakeGenericMethod(genericArguments);
                    var       data          = toArrayMethod.Invoke(null, new object[] { castMethod.Invoke(null, new object[] { dataSource }) }) as Array;
                    var       column        = new DataColumn(field, data);
                    columns.Add(column);
                }
            }

            Schema schema = new Schema(new ReadOnlyCollection <Field>(columns.Select(column => column.Field).ToArray()));

            using (ParquetWriter writer = new ParquetWriter(schema, stream))
            {
                writer.CompressionMethod = CompressionMethod.Snappy;
                using (ParquetRowGroupWriter rowGroupWriter = writer.CreateRowGroup()) // items.Count()
                {
                    foreach (var column in columns)
                    {
                        rowGroupWriter.WriteColumn(column);
                    }
                }
            }
        }
Ejemplo n.º 22
0
        public static void BuildParquetFile(DataColumn license, DataColumn sensor, DataColumn time, string outPath)
        {
            var schema = new Schema(license.Field, sensor.Field, time.Field);

            using (Stream fileStream = File.Create(outPath))
            {
                using (var parquetWriter = new ParquetWriter(schema, fileStream))
                {
                    parquetWriter.CompressionMethod = CompressionMethod.Gzip;
                    using (ParquetRowGroupWriter groupWriter = parquetWriter.CreateRowGroup())
                    {
                        groupWriter.WriteColumn(license);
                        groupWriter.WriteColumn(sensor);
                        groupWriter.WriteColumn(time);
                    }
                }
            }
        }
Ejemplo n.º 23
0
        private static void WriteDataInFile(string path, List <DataColumn> schemaColumns)
        {
            var schema = new Schema(schemaColumns.ConvertAll(col => col.Field));

            using (Stream fileStream = File.Create(path))
            {
                using (var parquetWriter = new ParquetWriter(schema, fileStream))
                {
                    using (ParquetRowGroupWriter groupWriter = parquetWriter.CreateRowGroup())
                    {
                        foreach (var column in schemaColumns)
                        {
                            groupWriter.WriteColumn(column);
                        }
                    }
                }
            }
        }
Ejemplo n.º 24
0
        public void Append_to_file_reads_all_data()
        {
            //write a file with a single row group
            var id = new DataField <int>("id");
            var ms = new MemoryStream();

            using (var writer = new ParquetWriter(new Schema(id), ms))
            {
                using (ParquetRowGroupWriter rg = writer.CreateRowGroup())
                {
                    rg.WriteColumn(new DataColumn(id, new int[] { 1, 2 }));
                }
            }

            //append to this file. Note that you cannot append to existing row group, therefore create a new one
            ms.Position = 0;
            using (var writer = new ParquetWriter(new Schema(id), ms, append: true))
            {
                using (ParquetRowGroupWriter rg = writer.CreateRowGroup())
                {
                    rg.WriteColumn(new DataColumn(id, new int[] { 3, 4 }));
                }
            }

            //check that this file now contains two row groups and all the data is valid
            ms.Position = 0;
            using (var reader = new ParquetReader(ms))
            {
                Assert.Equal(2, reader.RowGroupCount);

                using (ParquetRowGroupReader rg = reader.OpenRowGroupReader(0))
                {
                    Assert.Equal(2, rg.RowCount);
                    Assert.Equal(new int[] { 1, 2 }, rg.ReadColumn(id).Data);
                }

                using (ParquetRowGroupReader rg = reader.OpenRowGroupReader(1))
                {
                    Assert.Equal(2, rg.RowCount);
                    Assert.Equal(new int[] { 3, 4 }, rg.ReadColumn(id).Data);
                }
            }
        }
Ejemplo n.º 25
0
        protected object WriteReadSingle(DataField field, object value, CompressionMethod compressionMethod = CompressionMethod.None, int compressionLevel = -1)
        {
            //for sanity, use disconnected streams
            byte[] data;

            using (var ms = new MemoryStream())
            {
                // write single value

                using (var writer = new ParquetWriter(new Schema(field), ms))
                {
                    writer.CompressionMethod = compressionMethod;
                    writer.CompressionLevel  = compressionLevel;

                    using (ParquetRowGroupWriter rg = writer.CreateRowGroup())
                    {
                        Array dataArray = Array.CreateInstance(field.ClrNullableIfHasNullsType, 1);
                        dataArray.SetValue(value, 0);
                        var column = new DataColumn(field, dataArray);

                        rg.WriteColumn(column);
                    }
                }

                data = ms.ToArray();
            }

            using (var ms = new MemoryStream(data))
            {
                // read back single value

                ms.Position = 0;
                using (var reader = new ParquetReader(ms))
                {
                    using (ParquetRowGroupReader rowGroupReader = reader.OpenRowGroupReader(0))
                    {
                        DataColumn column = rowGroupReader.ReadColumn(field);

                        return(column.Data.GetValue(0));
                    }
                }
            }
        }
Ejemplo n.º 26
0
        public static void Main(string[] args)
        {
            var dt     = GenerateTestData();
            var fields = GenerateSchema(dt);

            // Open the output file for writing
            using (var stream = File.Open(OutputFilePath, FileMode.Create, FileAccess.Write))
            {
                using (var writer = new ParquetWriter(new Schema(fields), stream))
                {
                    var startRow = 0;

                    // Keep on creating row groups until we run out of data
                    while (startRow < dt.Rows.Count)
                    {
                        using (var rgw = writer.CreateRowGroup(RowGroupSize))
                        {
                            // Data is written to the row group column by column
                            for (var i = 0; i < dt.Columns.Count; i++)
                            {
                                var columnIndex = i;

                                // Determine the target data type for the column
                                var targetType = dt.Columns[columnIndex].DataType;
                                if (targetType == typeof(DateTime))
                                {
                                    targetType = typeof(DateTimeOffset);
                                }

                                // Generate the value type, this is to ensure it can handle null values
                                var valueType = targetType.IsClass
                                    ? targetType
                                    : typeof(Nullable <>).MakeGenericType(targetType);

                                // Create a list to hold values of the required type for the column
                                var list = (IList)typeof(List <>)
                                           .MakeGenericType(valueType)
                                           .GetConstructor(Type.EmptyTypes)
                                           .Invoke(null);

                                // Get the data to be written to the parquet stream
                                foreach (var row in dt.AsEnumerable().Skip(startRow).Take(RowGroupSize))
                                {
                                    // Check if value is null, if so then add a null value
                                    if (row[columnIndex] == null || row[columnIndex] == DBNull.Value)
                                    {
                                        list.Add(null);
                                    }
                                    else
                                    {
                                        // Add the value to the list, but if it's a DateTime then create it as a DateTimeOffset first
                                        list.Add(dt.Columns[columnIndex].DataType == typeof(DateTime)
                                            ? new DateTimeOffset((DateTime)row[columnIndex])
                                            : row[columnIndex]);
                                    }
                                }

                                // Copy the list values to an array of the same type as the WriteColumn method expects
                                // and Array
                                var valuesArray = Array.CreateInstance(valueType, list.Count);
                                list.CopyTo(valuesArray, 0);

                                // Write the column
                                rgw.WriteColumn(new Parquet.Data.DataColumn(fields[i], valuesArray));
                            }
                        }

                        startRow += RowGroupSize;
                    }
                }
            }
        }
Ejemplo n.º 27
0
        public void SaveSingleDataArchive()
        {
            Guid sessionId = Guid.NewGuid(); // Todo fake sessionId

            var tmpName = Path.GetTempFileName();

            var timeData   = new[] { 1L, 2L, 3L };
            var timeColumn = new DataColumn(
                new DataField <long>("time"),
                timeData);

            var numbersData   = new[] { 42d, 1337d, 6.022e23 };
            var numbersColumn = new DataColumn(
                new DataField <double>("cool_numbers"),
                numbersData);

            var schema = new Schema(timeColumn.Field, numbersColumn.Field);

            var json = new JObject {
                ["meta"] = new JObject(), ["user"] = new JObject()
            };


            using (var ms = new MemoryStream())
            {
                using (var parquetWriter = new ParquetWriter(schema, ms))
                    using (var groupWriter = parquetWriter.CreateRowGroup())
                    {
                        groupWriter.WriteColumn(timeColumn);
                        groupWriter.WriteColumn(numbersColumn);
                    }
                ms.Position = 0;

                using (var parquetReader = new ParquetReader(ms)) {
                    var tableInformation = new ArchiveTableInformation()
                    {
                        Columns = new List <DataField>(parquetReader.Schema.GetDataFields()),
                        Time    = timeColumn.Field
                    };
                    var table = new ArchiveTable(json, parquetReader, tableInformation, "testData");

                    var archive = Archive.Create(tmpName);

                    var session = ArchiveSession.Create(archive, "testName");
                    var folder  = ArchiveFolder.Create(archive, sessionId, "testFolder");

                    folder.AddChild(table);
                    session.AddChild(folder);
                    archive.AddSession(session);

                    try
                    {
                        archive.WriteFile().Wait();
                    }
                    catch (Exception)
                    {
                        Assert.True(false);
                    }

                    archive.Close();

                    using (var fr = new FileReader(tmpName))
                    {
                        var openTask = Archive.Open(fr);
                        openTask.Wait();
                        var newArchive = openTask.Result;
                        AssertArchivesEqual(archive, newArchive);

                        Assert.Equal("testName", session.Name);
                        Assert.Single(newArchive.Sessions.First().Children);
                        var readFolder = newArchive.Sessions.First().Children.First();
                        Assert.Equal("testFolder", readFolder.Name);
                        Assert.Single(readFolder.Children);

                        var child = readFolder.Children.First();
                        Assert.Single(child.DataPoints);

                        Assert.IsAssignableFrom <ArchiveTable>(child);
                        var tableChild = (ArchiveTable)child;

                        var dataPoint = tableChild.DataPoints.First();
                        var context   = new TimeSynchronizedContext();
                        context.AvailableTimeRangeChanged +=
                            (sender, from, to) => context.SetSelectedTimeRange(from, to);

                        var viewer = context.GetDataViewerFor(dataPoint);
                        viewer.Wait();
                        var dataViewer = viewer.Result;

                        Assert.IsAssignableFrom <ITimeSeriesViewer>(dataViewer);
                        var timeViewer = (ITimeSeriesViewer)dataViewer;

                        var data = timeViewer.GetCurrentData <double>();
                        Assert.Equal("cool_numbers", dataViewer.DataPoint.Name);

                        Assert.Equal(timeData, data.X.ToArray());
                        Assert.Equal(numbersData, data.Y.ToArray());

                        newArchive.Close();
                    }
                }
            }

            File.Delete(tmpName);
        }
        public static void TestWriteFloatTimeSeries([Values(0, 1)] int warmup)
        {
            var timer = Stopwatch.StartNew();

            Console.WriteLine("Generating data...");

            var(dates, objectIds, values, numRows) = CreateFloatDataFrame();

            Console.WriteLine("Generated {0:N0} rows in {1:N2} sec", numRows, timer.Elapsed.TotalSeconds);
            Console.WriteLine();
            Console.WriteLine("Saving to CSV");

            timer.Restart();

            using (var csv = new StreamWriter("float_timeseries.csv"))
            {
                for (int i = 0; i != dates.Length; ++i)
                {
                    for (int j = 0; j != objectIds.Length; ++j)
                    {
                        csv.WriteLine("{0:yyyy-MM-dd HH:mm:ss},{1},{2}", dates[i], objectIds[j], values[i][j]);
                    }
                }
            }

            Console.WriteLine("Saved to CSV ({0:N0} bytes) in {1:N2} sec", new FileInfo("float_timeseries.csv").Length, timer.Elapsed.TotalSeconds);
            Console.WriteLine();
            Console.WriteLine("Saving to CSV.GZ");

            timer.Restart();

            using (var stream = new FileStream("float_timeseries.csv.gz", FileMode.Create))
            {
                using var zip = new GZipStream(stream, CompressionLevel.Optimal);
                using var csv = new StreamWriter(zip);

                for (int i = 0; i != dates.Length; ++i)
                {
                    for (int j = 0; j != objectIds.Length; ++j)
                    {
                        csv.WriteLine("{0:yyyy-MM-dd HH:mm:ss},{1},{2}", dates[i], objectIds[j], values[i][j]);
                    }
                }
            }

            Console.WriteLine("Saved to CSV ({0:N0} bytes) in {1:N2} sec", new FileInfo("float_timeseries.csv.gz").Length, timer.Elapsed.TotalSeconds);
            Console.WriteLine();
            Console.WriteLine("Saving to Parquet");

            timer.Restart();

            using (var fileWriter = new ParquetFileWriter("float_timeseries.parquet", CreateFloatColumns()))
            {
                using var rowGroupWriter = fileWriter.AppendRowGroup();

                using (var dateTimeWriter = rowGroupWriter.NextColumn().LogicalWriter <DateTime>())
                {
                    for (int i = 0; i != dates.Length; ++i)
                    {
                        dateTimeWriter.WriteBatch(Enumerable.Repeat(dates[i], objectIds.Length).ToArray());
                    }
                }

                using (var objectIdWriter = rowGroupWriter.NextColumn().LogicalWriter <int>())
                {
                    for (int i = 0; i != dates.Length; ++i)
                    {
                        objectIdWriter.WriteBatch(objectIds);
                    }
                }

                using (var valueWriter = rowGroupWriter.NextColumn().LogicalWriter <float>())
                {
                    for (int i = 0; i != dates.Length; ++i)
                    {
                        valueWriter.WriteBatch(values[i]);
                    }
                }

                fileWriter.Close();
            }

            Console.WriteLine("Saved to Parquet ({0:N0} bytes) in {1:N2} sec", new FileInfo("float_timeseries.parquet").Length, timer.Elapsed.TotalSeconds);
            Console.WriteLine();
            Console.WriteLine("Saving to Parquet.Chunked (by date)");

            timer.Restart();

            using (var fileWriter = new ParquetFileWriter("float_timeseries.parquet.chunked", CreateFloatColumns()))
            {
                for (int i = 0; i != dates.Length; ++i)
                {
                    using var rowGroupWriter = fileWriter.AppendRowGroup();

                    using (var dateTimeWriter = rowGroupWriter.NextColumn().LogicalWriter <DateTime>())
                    {
                        dateTimeWriter.WriteBatch(Enumerable.Repeat(dates[i], objectIds.Length).ToArray());
                    }

                    using (var objectIdWriter = rowGroupWriter.NextColumn().LogicalWriter <int>())
                    {
                        objectIdWriter.WriteBatch(objectIds);
                    }

                    using (var valueWriter = rowGroupWriter.NextColumn().LogicalWriter <float>())
                    {
                        valueWriter.WriteBatch(values[i]);
                    }
                }

                fileWriter.Close();
            }

            Console.WriteLine("Saved to Parquet.Chunked ({0:N0} bytes) in {1:N2} sec", new FileInfo("float_timeseries.parquet.chunked").Length, timer.Elapsed.TotalSeconds);
            Console.WriteLine();
            Console.WriteLine("Saving to Parquet.RowOriented");

            timer.Restart();

            using (var rowWriter = ParquetFile.CreateRowWriter <(DateTime, int, float)>("float_timeseries.parquet.roworiented", new[] { "DateTime", "ObjectId", "Value" }))
            {
                for (int i = 0; i != dates.Length; ++i)
                {
                    for (int j = 0; j != objectIds.Length; ++j)
                    {
                        rowWriter.WriteRow((dates[i], objectIds[j], values[i][j]));
                    }
                }
            }

            Console.WriteLine("Saved to Parquet.RowOriented ({0:N0} bytes) in {1:N2} sec", new FileInfo("float_timeseries.parquet.roworiented").Length, timer.Elapsed.TotalSeconds);
            Console.WriteLine();
            Console.WriteLine("Saving to Parquet.Stream");

            timer.Restart();

            using (var stream = new FileStream("float_timeseries.parquet.stream", FileMode.Create))
            {
                using var writer         = new IO.ManagedOutputStream(stream);
                using var fileWriter     = new ParquetFileWriter(writer, CreateFloatColumns());
                using var rowGroupWriter = fileWriter.AppendRowGroup();

                using (var dateTimeWriter = rowGroupWriter.NextColumn().LogicalWriter <DateTime>())
                {
                    for (int i = 0; i != dates.Length; ++i)
                    {
                        dateTimeWriter.WriteBatch(Enumerable.Repeat(dates[i], objectIds.Length).ToArray());
                    }
                }

                using (var objectIdWriter = rowGroupWriter.NextColumn().LogicalWriter <int>())
                {
                    for (int i = 0; i != dates.Length; ++i)
                    {
                        objectIdWriter.WriteBatch(objectIds);
                    }
                }

                using (var valueWriter = rowGroupWriter.NextColumn().LogicalWriter <float>())
                {
                    for (int i = 0; i != dates.Length; ++i)
                    {
                        valueWriter.WriteBatch(values[i]);
                    }
                }


                fileWriter.Close();
            }

            Console.WriteLine("Saved to Parquet.Stream ({0:N0} bytes) in {1:N2} sec", new FileInfo("float_timeseries.parquet.stream").Length, timer.Elapsed.TotalSeconds);
            Console.WriteLine();
            Console.WriteLine("Saving to Parquet.Chunked.Stream (by date)");

            timer.Restart();

            using (var stream = new FileStream("float_timeseries.parquet.chunked.stream", FileMode.Create))
            {
                using var writer     = new IO.ManagedOutputStream(stream);
                using var fileWriter = new ParquetFileWriter(writer, CreateFloatColumns());

                for (int i = 0; i != dates.Length; ++i)
                {
                    using var rowGroupWriter = fileWriter.AppendRowGroup();

                    using (var dateTimeWriter = rowGroupWriter.NextColumn().LogicalWriter <DateTime>())
                    {
                        dateTimeWriter.WriteBatch(Enumerable.Repeat(dates[i], objectIds.Length).ToArray());
                    }

                    using (var objectIdWriter = rowGroupWriter.NextColumn().LogicalWriter <int>())
                    {
                        objectIdWriter.WriteBatch(objectIds);
                    }

                    using (var valueWriter = rowGroupWriter.NextColumn().LogicalWriter <float>())
                    {
                        valueWriter.WriteBatch(values[i]);
                    }
                }

                fileWriter.Close();
            }

            Console.WriteLine("Saved to Parquet.Chunked.Stream ({0:N0} bytes) in {1:N2} sec", new FileInfo("float_timeseries.parquet.chunked.stream").Length, timer.Elapsed.TotalSeconds);
            Console.WriteLine();
            Console.WriteLine("Saving to Parquet.RowOriented.Stream");

            timer.Restart();

            using (var stream = new FileStream("float_timeseries.parquet.roworiented.stream", FileMode.Create))
            {
                using var writer    = new IO.ManagedOutputStream(stream);
                using var rowWriter = ParquetFile.CreateRowWriter <(DateTime, int, float)>(writer, new[] { "DateTime", "ObjectId", "Value" });

                for (int i = 0; i != dates.Length; ++i)
                {
                    for (int j = 0; j != objectIds.Length; ++j)
                    {
                        rowWriter.WriteRow((dates[i], objectIds[j], values[i][j]));
                    }
                }

                rowWriter.Close();
            }

            Console.WriteLine("Saved to Parquet.RowOriented.Stream ({0:N0} bytes) in {1:N2} sec", new FileInfo("float_timeseries.parquet.roworiented.stream").Length, timer.Elapsed.TotalSeconds);
            Console.WriteLine();
            Console.WriteLine("Saving to Parquet.NET");

            timer.Restart();

            {
                var dateTimeField = new DateTimeDataField("DateTime", DateTimeFormat.DateAndTime);
                var objectIdField = new DataField <int>("ObjectId");
                var valueField    = new DataField <float>("Value");
                var schema        = new Parquet.Data.Schema(dateTimeField, objectIdField, valueField);

                using (var stream = File.Create("float_timeseries.parquet.net"))
                    using (var parquetWriter = new ParquetWriter(schema, stream))
                        using (var groupWriter = parquetWriter.CreateRowGroup())
                        {
                            var dateTimeColumn = new DataColumn(dateTimeField,
                                                                dates.SelectMany(d => Enumerable.Repeat(new DateTimeOffset(d), objectIds.Length)).ToArray());

                            var objectIdColumn = new DataColumn(objectIdField,
                                                                dates.SelectMany(d => objectIds).ToArray());

                            var valueColumn = new DataColumn(valueField,
                                                             dates.SelectMany((d, i) => values[i]).ToArray());

                            groupWriter.WriteColumn(dateTimeColumn);
                            groupWriter.WriteColumn(objectIdColumn);
                            groupWriter.WriteColumn(valueColumn);
                        }
            }

            Console.WriteLine("Saved to Parquet.NET ({0:N0} bytes) in {1:N2} sec", new FileInfo("float_timeseries.parquet.net").Length, timer.Elapsed.TotalSeconds);
        }
Ejemplo n.º 29
0
        /// <summary>
        /// Writes dataLen rows and typed columns to the file.
        /// </summary>
        /// <param name="csvColumns">Processed CSV data</param>
        /// <param name="dataLen">Row count</param>
        /// <param name="writer">ParquetWriter</param>
        /// <param name="fields">Field structure</param>
        /// <param name="config">Config structure</param>
        public static void WriteGroup(List <Object> csvColumns, long dataLen, ParquetWriter writer, List <DataField> fields, Config config)
        {
            using (ParquetRowGroupWriter rg = writer.CreateRowGroup())
            {
                for (int i = 0; i < fields.Count; i++)
                {
                    if (fields[i].HasNulls)
                    {
                        switch (fields[i].DataType)
                        {
                        case DataType.Boolean:
                            rg.WriteColumn(new DataColumn(fields[i], ((bool?[])csvColumns[i])));
                            break;

                        case DataType.DateTimeOffset:
                            rg.WriteColumn(new DataColumn(fields[i], ((DateTimeOffset?[])csvColumns[i])));
                            break;

                        case DataType.Decimal:
                            rg.WriteColumn(new DataColumn(fields[i], ((decimal?[])csvColumns[i])));
                            break;

                        case DataType.Double:
                            rg.WriteColumn(new DataColumn(fields[i], ((double?[])csvColumns[i])));
                            break;

                        case DataType.Float:
                            rg.WriteColumn(new DataColumn(fields[i], ((float?[])csvColumns[i])));
                            break;

                        case DataType.Int16:
                            rg.WriteColumn(new DataColumn(fields[i], ((Int16?[])csvColumns[i])));
                            break;

                        case DataType.Int32:
                            rg.WriteColumn(new DataColumn(fields[i], ((Int32?[])csvColumns[i])));
                            break;

                        case DataType.Int64:
                            rg.WriteColumn(new DataColumn(fields[i], ((Int64?[])csvColumns[i])));
                            break;

                        case DataType.String:
                            rg.WriteColumn(new DataColumn(fields[i], ((string[])csvColumns[i])));
                            break;

                        default:
                            throw new ArgumentOutOfRangeException(fields[i].DataType.ToString());
                        }
                    }
                    else
                    {
                        switch (fields[i].DataType)
                        {
                        case DataType.Boolean:
                            rg.WriteColumn(new DataColumn(fields[i], ((bool[])csvColumns[i])));
                            break;

                        case DataType.DateTimeOffset:
                            rg.WriteColumn(new DataColumn(fields[i], ((DateTimeOffset[])csvColumns[i])));
                            break;

                        case DataType.Decimal:
                            rg.WriteColumn(new DataColumn(fields[i], ((decimal[])csvColumns[i])));
                            break;

                        case DataType.Double:
                            rg.WriteColumn(new DataColumn(fields[i], ((double[])csvColumns[i])));
                            break;

                        case DataType.Float:
                            rg.WriteColumn(new DataColumn(fields[i], ((float[])csvColumns[i])));
                            break;

                        case DataType.Int16:
                            rg.WriteColumn(new DataColumn(fields[i], ((Int16[])csvColumns[i])));
                            break;

                        case DataType.Int32:
                            rg.WriteColumn(new DataColumn(fields[i], ((Int32[])csvColumns[i])));
                            break;

                        case DataType.Int64:
                            rg.WriteColumn(new DataColumn(fields[i], ((Int64[])csvColumns[i])));
                            break;

                        case DataType.String:
                            rg.WriteColumn(new DataColumn(fields[i], ((string[])csvColumns[i])));
                            break;

                        default:
                            throw new ArgumentOutOfRangeException(fields[i].DataType.ToString());
                        }
                    }
                }
            }
        }
        public static void TestFloatTimeSeries()
        {
            var timer = Stopwatch.StartNew();
            var rand  = new Random(123);

            Console.WriteLine("Generating data...");

            var dates = Enumerable.Range(0, 360)//*24*12)
                        .Select(i => new DateTime(2001, 01, 01) + TimeSpan.FromHours(i))
                        .Where(d => d.DayOfWeek != DayOfWeek.Saturday && d.DayOfWeek != DayOfWeek.Sunday)
                        .ToArray();

            var objectIds = Enumerable.Range(0, 10000)
                            .Select(i => rand.Next())
                            .Distinct()
                            .OrderBy(i => i)
                            .ToArray();

            var values = dates.Select(d => objectIds.Select(o => (float)rand.NextDouble()).ToArray()).ToArray();

            Console.WriteLine("Generated {0:N0} rows in {1:N2} sec", values.Select(v => v.Length).Aggregate(0, (sum, l) => sum + l), timer.Elapsed.TotalSeconds);
            Console.WriteLine();
            Console.WriteLine("Saving to CSV");

            timer.Restart();

            using (var csv = new StreamWriter("float_timeseries.csv"))
            {
                for (int i = 0; i != dates.Length; ++i)
                {
                    for (int j = 0; j != objectIds.Length; ++j)
                    {
                        csv.WriteLine("{0:yyyy-MM-dd HH:mm:ss},{1},{2}", dates[i], objectIds[j], values[i][j]);
                    }
                }
            }

            Console.WriteLine("Saved to CSV ({0:N0} bytes) in {1:N2} sec", new FileInfo("float_timeseries.csv").Length, timer.Elapsed.TotalSeconds);
            Console.WriteLine();
            Console.WriteLine("Saving to CSV.GZ");

            timer.Restart();

            using (var stream = new FileStream("float_timeseries.csv.gz", FileMode.Create))
                using (var zip = new GZipStream(stream, CompressionLevel.Optimal))
                    using (var csv = new StreamWriter(zip))
                    {
                        for (int i = 0; i != dates.Length; ++i)
                        {
                            for (int j = 0; j != objectIds.Length; ++j)
                            {
                                csv.WriteLine("{0:yyyy-MM-dd HH:mm:ss},{1},{2}", dates[i], objectIds[j], values[i][j]);
                            }
                        }
                    }

            Console.WriteLine("Saved to CSV ({0:N0} bytes) in {1:N2} sec", new FileInfo("float_timeseries.csv.gz").Length, timer.Elapsed.TotalSeconds);
            Console.WriteLine();
            Console.WriteLine("Saving to Parquet");

            timer.Restart();

            using (var fileWriter = new ParquetFileWriter("float_timeseries.parquet", CreateFloatColumns()))
                using (var rowGroupWriter = fileWriter.AppendRowGroup())
                {
                    using (var dateTimeWriter = rowGroupWriter.NextColumn().LogicalWriter <DateTime>())
                    {
                        for (int i = 0; i != dates.Length; ++i)
                        {
                            dateTimeWriter.WriteBatch(Enumerable.Repeat(dates[i], objectIds.Length).ToArray());
                        }
                    }

                    using (var objectIdWriter = rowGroupWriter.NextColumn().LogicalWriter <int>())
                    {
                        for (int i = 0; i != dates.Length; ++i)
                        {
                            objectIdWriter.WriteBatch(objectIds);
                        }
                    }

                    using (var valueWriter = rowGroupWriter.NextColumn().LogicalWriter <float>())
                    {
                        for (int i = 0; i != dates.Length; ++i)
                        {
                            valueWriter.WriteBatch(values[i]);
                        }
                    }
                }

            Console.WriteLine("Saved to Parquet ({0:N0} bytes) in {1:N2} sec", new FileInfo("float_timeseries.parquet").Length, timer.Elapsed.TotalSeconds);
            Console.WriteLine();
            Console.WriteLine("Saving to Parquet.Chunked (by date)");

            timer.Restart();

            using (var fileWriter = new ParquetFileWriter("float_timeseries.parquet.chunked", CreateFloatColumns()))
            {
                for (int i = 0; i != dates.Length; ++i)
                {
                    using (var rowGroupWriter = fileWriter.AppendRowGroup())
                    {
                        using (var dateTimeWriter = rowGroupWriter.NextColumn().LogicalWriter <DateTime>())
                        {
                            dateTimeWriter.WriteBatch(Enumerable.Repeat(dates[i], objectIds.Length).ToArray());
                        }

                        using (var objectIdWriter = rowGroupWriter.NextColumn().LogicalWriter <int>())
                        {
                            objectIdWriter.WriteBatch(objectIds);
                        }

                        using (var valueWriter = rowGroupWriter.NextColumn().LogicalWriter <float>())
                        {
                            valueWriter.WriteBatch(values[i]);
                        }
                    }
                }
            }

            Console.WriteLine("Saved to Parquet.Chunked ({0:N0} bytes) in {1:N2} sec", new FileInfo("float_timeseries.parquet.chunked").Length, timer.Elapsed.TotalSeconds);
            Console.WriteLine();
            Console.WriteLine("Saving to Parquet.RowOriented");

            timer.Restart();

            using (var rowWriter = ParquetFile.CreateRowWriter <(DateTime, int, float)>("float_timeseries.parquet.roworiented", new[] { "DateTime", "ObjectId", "Value" }))
            {
                for (int i = 0; i != dates.Length; ++i)
                {
                    for (int j = 0; j != objectIds.Length; ++j)
                    {
                        rowWriter.WriteRow((dates[i], objectIds[j], values[i][j]));
                    }
                }
            }

            Console.WriteLine("Saved to Parquet.RowOriented ({0:N0} bytes) in {1:N2} sec", new FileInfo("float_timeseries.parquet.roworiented").Length, timer.Elapsed.TotalSeconds);
            Console.WriteLine();
            Console.WriteLine("Saving to Parquet.NET");

            timer.Restart();

            {
                var dateTimeField = new DateTimeDataField("DateTime", DateTimeFormat.DateAndTime);
                var objectIdField = new DataField <int>("ObjectId");
                var valueField    = new DataField <float>("Value");
                var schema        = new Parquet.Data.Schema(dateTimeField, objectIdField, valueField);

                using (var stream = File.Create("float_timeseries.parquet.net"))
                    using (var parquetWriter = new ParquetWriter(schema, stream))
                        using (var groupWriter = parquetWriter.CreateRowGroup())
                        {
                            var dateTimeColumn = new DataColumn(dateTimeField,
                                                                dates.SelectMany(d => Enumerable.Repeat(new DateTimeOffset(d), objectIds.Length)).ToArray());

                            var objectIdColumn = new DataColumn(objectIdField,
                                                                dates.SelectMany(d => objectIds).ToArray());

                            var valueColumn = new DataColumn(valueField,
                                                             dates.SelectMany((d, i) => values[i]).ToArray());

                            groupWriter.WriteColumn(dateTimeColumn);
                            groupWriter.WriteColumn(objectIdColumn);
                            groupWriter.WriteColumn(valueColumn);
                        }
            }

            Console.WriteLine("Saved to Parquet.NET ({0:N0} bytes) in {1:N2} sec", new FileInfo("float_timeseries.parquet.net").Length, timer.Elapsed.TotalSeconds);
        }