コード例 #1
0
        public override string GenerateFile(out UploadInfo uploadInfo)
        {
            var nowAsString = DateTime.UtcNow.ToString(DateTimeFormat, CultureInfo.InvariantCulture);

            uploadInfo = new UploadInfo
            {
                FileName   = $"{nowAsString}-{_fileNameSuffix}.{Extension}",
                FolderName = _remoteFolderName,
            };

            var localPath = Path.Combine(_tmpFilePath, _localFolderName ?? string.Empty, uploadInfo.FileName);

            if (_localFolderName != null)
            {
                Directory.CreateDirectory(Path.Combine(_tmpFilePath, _localFolderName));
            }

            using (Stream fileStream = File.Open(localPath, FileMode.OpenOrCreate, FileAccess.ReadWrite))
                using (var parquetWriter = new ParquetWriter(new Schema(Fields.Values), fileStream))
                {
                    WriteGroup(parquetWriter);
                }

            _count = _group.Count;
            _group.Clear();

            return(localPath);
        }
コード例 #2
0
        public void Write_and_read_nullable_integers()
        {
            var ds = new DataSet(new SchemaElement <int>("id"))
            {
                1,
                2,
                3,
                (object)null,
                4,
                (object)null,
                5
            };
            var ms = new MemoryStream();

            ParquetWriter.Write(ds, ms);

            ms.Position = 0;
            DataSet ds1 = ParquetReader.Read(ms);

            Assert.Equal(ds1[0].GetInt(0), 1);
            Assert.Equal(ds1[1].GetInt(0), 2);
            Assert.Equal(ds1[2].GetInt(0), 3);
            Assert.True(ds1[3].IsNullAt(0));
            Assert.Equal(ds1[4].GetInt(0), 4);
            Assert.True(ds1[5].IsNullAt(0));
            Assert.Equal(ds1[6].GetInt(0), 5);
        }
コード例 #3
0
        public void CustomMetadata_can_write_and_read()
        {
            var ms = new MemoryStream();
            var id = new DataField <int>("id");

            //write
            using (var writer = new ParquetWriter(new Schema(id), ms))
            {
                writer.CustomMetadata = new Dictionary <string, string>
                {
                    ["key1"] = "value1",
                    ["key2"] = "value2"
                };

                using (ParquetRowGroupWriter rg = writer.CreateRowGroup())
                {
                    rg.WriteColumn(new DataColumn(id, new[] { 1, 2, 3, 4 }));
                }
            }

            //read back
            using (var reader = new ParquetReader(ms))
            {
                Assert.Equal("value1", reader.CustomMetadata["key1"]);
                Assert.Equal("value2", reader.CustomMetadata["key2"]);
            }
        }
コード例 #4
0
 public static void CreateParquetFile(Stream inStream, Stream outStream)
 {
     using (var writer = new ParquetWriter(outStream))
     {
         DataSet ds          = null;
         int     recordCount = 0;
         foreach (var data in ReadFile(inStream))
         {
             if (recordCount == 0)
             {
                 List <Parquet.Data.Field> fields = new List <Parquet.Data.Field>();
                 foreach (var prop in data.Properties)
                 {
                     fields.Add(new DataField(prop.Key, prop.Value.GetType()));
                 }
                 foreach (var prop in data.SystemProperties)
                 {
                     fields.Add(new DataField(prop.Key, prop.Value.GetType()));
                 }
                 fields.Add(new DataField <byte[]>("Body"));
                 ds = new DataSet(fields.ToArray());
             }
             List <Object> values = new List <object>();
             values.AddRange(data.Properties.Values);
             values.AddRange(data.SystemProperties.Values);
             values.Add(data.Body.ToArray());
             ds.Add(values.ToArray());
             recordCount++;
         }
         writer.Write(ds);
     }
 }
コード例 #5
0
        public void Write_datetimeoffset()
        {
            var ds = new DataSet(
                new SchemaElement <DateTimeOffset>("timestamp_col")
                )
            {
                new DateTimeOffset(new DateTime(2017, 1, 1, 12, 13, 22)),
                new DateTimeOffset(new DateTime(2017, 1, 1, 12, 13, 23))
            };

            //8 values for each column


            var uncompressed = new MemoryStream();

            using (var writer = new ParquetWriter(uncompressed))
            {
                writer.Write(ds, CompressionMethod.None);
            }

#if DEBUG
            const string path = "c:\\tmp\\first.parquet";
            F.WriteAllBytes(path, uncompressed.ToArray());
#endif
        }
コード例 #6
0
        public static void WriteAthenaRowsAsParquet(this Stream stream, ResultSetMetadata tableSchema, List <FieldMapping> mappings, IEnumerable <Row> rows)
        {
            List <DataColumn> columns = new List <DataColumn>();

            int index = 0;

            foreach (var column in tableSchema.ColumnInfo)
            {
                columns.Add(column.ToParquetColumn(mappings, index, rows));
                index++;
            }

            Schema schema = new Schema(new ReadOnlyCollection <Field>(columns.Select(column => column.Field).ToArray()));

            using (ParquetWriter writer = new ParquetWriter(schema, stream))
            {
                writer.CompressionMethod = CompressionMethod.Snappy;
                using (ParquetRowGroupWriter rowGroupWriter = writer.CreateRowGroup())
                {
                    foreach (var column in columns)
                    {
                        rowGroupWriter.WriteColumn(column);
                    }
                }
            }
        }
コード例 #7
0
        /// <inheritdoc/>
        public void Write(IList <IColumn> columns)
        {
            List <DataColumn> parquetColumns = CreateParquetColumns(columns);
            List <DataField>  parquetFields  = parquetColumns.Select(p => p.Field).ToList();
            Schema            schema         = new Schema(parquetFields);

            using (var parquetWriter = new ParquetWriter(schema, FileStream))
            {
                // TODO - Write is called many times; one for each rowgroup in the file. We do not need to compile
                // and write metadata many times. Refactor to write metadata only once.
                CryptoMetadata metadata = CompileMetadata(columns, FileEncryptionSettings);
                if (!metadata.IsEmpty())
                {
                    parquetWriter.CustomMetadata = new Dictionary <string, string>
                    {
                        [nameof(CryptoMetadata)] = JsonConvert.SerializeObject(
                            value: metadata,
                            settings: new JsonSerializerSettings()
                        {
                            NullValueHandling = NullValueHandling.Ignore,
                            Converters        = { new StringEnumConverter() },
                            Formatting        = Formatting.Indented
                        })
                    };
                }

                // create a new row group in the file
                using (ParquetRowGroupWriter groupWriter = parquetWriter.CreateRowGroup())
                {
                    parquetColumns.ForEach(groupWriter.WriteColumn);
                }
            }
        }
コード例 #8
0
        public void List_simple_element_write_read()
        {
            var table = new Table(
                new Schema(
                    new DataField <int>("id"),
                    new ListField("cities",
                                  new DataField <string>("name"))));

            var ms = new MemoryStream();

            table.Add(1, new[] { "London", "Derby" });
            table.Add(2, new[] { "Paris", "New York" });

            //write as table
            using (var writer = new ParquetWriter(table.Schema, ms))
            {
                writer.Write(table);
            }

            //read back into table
            ms.Position = 0;
            Table table2;

            using (var reader = new ParquetReader(ms))
            {
                table2 = reader.ReadAsTable();
            }

            //validate data
            Assert.Equal(table.ToString(), table2.ToString(), ignoreLineEndingDifferences: true);
        }
コード例 #9
0
        public void WriteIntro()
        {
            //create data columns with schema metadata and the data you need
            var idColumn = new DataColumn(
                new DataField <int>("id"),
                new int[] { 1, 2 });

            var cityColumn = new DataColumn(
                new DataField <string>("city"),
                new string[] { "London", "Derby" });

            // create file schema
            var schema = new Schema(idColumn.Field, cityColumn.Field);

            using (Stream fileStream = System.IO.File.OpenWrite("c:\\test.parquet"))
            {
                using (var parquetWriter = new ParquetWriter(schema, fileStream))
                {
                    // create a new row group in the file
                    using (ParquetRowGroupWriter groupWriter = parquetWriter.CreateRowGroup(2))
                    {
                        groupWriter.WriteColumn(idColumn);
                        groupWriter.WriteColumn(cityColumn);
                    }
                }
            }
        }
コード例 #10
0
      public void I_can_write_snappy_and_read_back()
      {
         var ms = new MemoryStream();
         var ds1 = new DataSet(
            new DataField<int>("id"),
            new DataField<int>("no"));

         ds1.Add(1, 3);
         ds1.Add(2, 4);

         DataSet ds2;

         //write
         using (var writer = new ParquetWriter(ms))
         {
            writer.Write(ds1, CompressionMethod.Snappy);
         }

         //read back
         using (var reader = new ParquetReader(ms))
         {
            ms.Position = 0;
            ds2 = reader.Read();
         }

         Assert.Equal(1, ds2[0].GetInt(0));
         Assert.Equal(2, ds2[1].GetInt(0));
         Assert.Equal(3, ds2[0].GetInt(1));
         Assert.Equal(4, ds2[1].GetInt(1));
      }
コード例 #11
0
        public void Append_to_file_works_for_all_data_types()
        {
            var ms = new MemoryStream();

            var schema = new Schema();

            schema.Elements.Add(new SchemaElement <int>("Id"));
            schema.Elements.Add(new SchemaElement <DateTime>("Timestamp"));
            schema.Elements.Add(new SchemaElement <DateTimeOffset>("Timestamp2"));
            schema.Elements.Add(new SchemaElement <string>("Message"));
            schema.Elements.Add(new SchemaElement <byte[]>("Data"));
            schema.Elements.Add(new SchemaElement <bool>("IsDeleted"));
            schema.Elements.Add(new SchemaElement <float>("Amount"));
            schema.Elements.Add(new SchemaElement <decimal>("TotalAmount"));
            schema.Elements.Add(new SchemaElement <long>("Counter"));
            schema.Elements.Add(new SchemaElement <double>("Amount2"));
            schema.Elements.Add(new SchemaElement <byte>("Flag"));
            schema.Elements.Add(new SchemaElement <sbyte>("Flag2"));
            schema.Elements.Add(new SchemaElement <short>("Flag3"));
            schema.Elements.Add(new SchemaElement <ushort>("Flag4"));

            var ds1 = new DataSet(schema);

            ds1.Add(1, DateTime.Now, DateTimeOffset.Now, "Record1", System.Text.Encoding.ASCII.GetBytes("SomeData"), false, 123.4f, 200M, 100000L, 1331313D, (byte)1, (sbyte)-1, (short)-500, (ushort)500);
            ds1.Add(1, DateTime.Now, DateTimeOffset.Now, "Record2", System.Text.Encoding.ASCII.GetBytes("SomeData2"), false, 124.4f, 300M, 200000L, 2331313D, (byte)2, (sbyte)-2, (short)-400, (ushort)400);

            ParquetWriter.Write(ds1, ms, CompressionMethod.Snappy, null, null, false);

            var ds2 = new DataSet(schema);

            ds2.Add(1, DateTime.Now, DateTimeOffset.Now, "Record3", System.Text.Encoding.ASCII.GetBytes("SomeData3"), false, 125.4f, 400M, 300000L, 3331313D, (byte)3, (sbyte)-3, (short)-600, (ushort)600);
            ds2.Add(1, DateTime.Now, DateTimeOffset.Now, "Record4", System.Text.Encoding.ASCII.GetBytes("SomeData4"), false, 126.4f, 500M, 400000L, 4331313D, (byte)4, (sbyte)-4, (short)-700, (ushort)700);

            ParquetWriter.Write(ds2, ms, CompressionMethod.Snappy, null, null, true);
        }
コード例 #12
0
        public void All_compression_methods_supported(CompressionMethod compressionMethod)
        {
            //v2
            var     ms  = new MemoryStream();
            DataSet ds1 = new DataSet(new DataField <int>("id"));
            DataSet ds2;

            ds1.Add(5);

            //write
            using (var writer = new ParquetWriter(ms))
            {
                writer.Write(ds1, CompressionMethod.Gzip);
            }

            //read back
            using (var reader = new ParquetReader(ms))
            {
                ms.Position = 0;
                ds2         = reader.Read();
            }

            Assert.Equal(5, ds2[0].GetInt(0));

            //v3
            const int value  = 5;
            object    actual = WriteReadSingle(new DataField <int>("id"), value, compressionMethod);

            Assert.Equal(5, (int)actual);
        }
コード例 #13
0
        public void Array_write_read()
        {
            var table = new Table(
                new Schema(
                    new DataField <int>("id"),
                    new DataField <string[]>("categories") //array field
                    )
                );
            var ms = new MemoryStream();

            table.Add(1, new[] { "1", "2", "3" });
            table.Add(3, new[] { "3", "3", "3" });

            //write to stream
            using (var writer = new ParquetWriter(table.Schema, ms))
            {
                writer.Write(table);
            }

            //System.IO.File.WriteAllBytes("c:\\tmp\\1.parquet", ms.ToArray());

            //read back into table
            ms.Position = 0;
            Table table2;

            using (var reader = new ParquetReader(ms))
            {
                table2 = reader.ReadAsTable();
            }

            //validate data
            Assert.Equal(table.ToString(), table2.ToString(), ignoreLineEndingDifferences: true);
        }
コード例 #14
0
        public void Write_read_nullable_column(Array input)
        {
            var id = new DataField <int?>("id");
            var ms = new MemoryStream();

            using (var writer = new ParquetWriter(new Schema(id), ms))
            {
                using (ParquetRowGroupWriter rg = writer.CreateRowGroup())
                {
                    rg.WriteColumn(new DataColumn(id, input));
                }
            }

            ms.Position = 0;
            using (var reader = new ParquetReader(ms))
            {
                Assert.Equal(1, reader.RowGroupCount);

                using (ParquetRowGroupReader rg = reader.OpenRowGroupReader(0))
                {
                    Assert.Equal(input.Length, rg.RowCount);
                    Assert.Equal(input, rg.ReadColumn(id).Data);
                }
            }
        }
コード例 #15
0
        public void Write_int64datetimeoffset()
        {
            var element = new SchemaElement <DateTimeOffset>("timestamp_col");

            /*{
             * ThriftConvertedType = ConvertedType.TIMESTAMP_MILLIS,
             * ThriftOriginalType = Type.INT64
             * };*/

            var ds = new DataSet(
                element
                )
            {
                new DateTimeOffset(new DateTime(2017, 1, 1, 12, 13, 22)),
                new DateTimeOffset(new DateTime(2017, 1, 1, 12, 13, 24))
            };

            //8 values for each column


            var uncompressed = new MemoryStream();

            using (var writer = new ParquetWriter(uncompressed))
            {
                writer.Write(ds, CompressionMethod.None);
            }
        }
コード例 #16
0
        public override void Output(IRow input, IUnstructuredWriter output)
        {
            ISchema schema = input.Schema;

            if (_ds == null)
            {
                List <SchemaElement> lse = new List <SchemaElement>();
                for (int i = 0; i < schema.Count(); i++)
                {
                    var col = schema[i];
                    lse.Add(new SchemaElement(col.Name, Type.GetType(getColType(col))));
                }
                _ds = new DataSet(new Schema(lse));

                _tempStream   = new MemoryStream();
                _resultStream = output.BaseStream;

                _writer = new ParquetWriter(output.BaseStream, null, writeroptions);

                //create DS based on schema
                //input.Schema
            }

            List <object> ls = new List <object>();

            for (int i = 0; i < schema.Count; i++)
            {
                ls.Add(input.Get <dynamic>(input.Schema[i].Name));
            }
            Row r = new Row(ls);

            _ds.Add(r);
        }
コード例 #17
0
        private void CompareWithMr(Table t)
        {
            string testFileName = Path.GetFullPath("temp.parquet");

            if (F.Exists(testFileName))
            {
                F.Delete(testFileName);
            }

            //produce file
            using (Stream s = F.OpenWrite(testFileName))
            {
                using (var writer = new ParquetWriter(t.Schema, s))
                {
                    writer.Write(t);
                }
            }

            //read back
            Table t2 = ParquetReader.ReadTableFromFile(testFileName);

            //check we don't have a bug internally before launching MR
            Assert.Equal(t.ToString("j"), t2.ToString("j"), ignoreLineEndingDifferences: true);

            string mrJson = ExecAndGetOutput(_javaExecName, $"-jar {_toolsJarPath} cat -j {testFileName}");

            Assert.Equal(t.ToString("j"), mrJson);
        }
コード例 #18
0
        public void Write_different_compressions()
        {
            var ds = new DataSet(
                new SchemaElement <int>("id"),
                new SchemaElement <bool>("bool_col"),
                new SchemaElement <string>("string_col")
                )
            {
                //8 values for each column

                { 4, true, "0" },
                { 5, false, "1" },
                { 6, true, "0" },
                { 7, false, "1" },
                { 2, true, "0" },
                { 3, false, "1" },
                { 0, true, "0" },
                { 1, false, "0" }
            };
            var uncompressed = new MemoryStream();

            ParquetWriter.Write(ds, uncompressed, CompressionMethod.None);

            var compressed = new MemoryStream();

            ParquetWriter.Write(ds, compressed, CompressionMethod.Gzip);

            var compressedSnappy = new MemoryStream();

            ParquetWriter.Write(ds, compressedSnappy, CompressionMethod.Snappy);
        }
コード例 #19
0
        public void Write_and_read_nullable_integers()
        {
            var ds = new DataSet(new DataField <int?>("id"))
            {
                1,
                2,
                3,
                (object)null,
                4,
                (object)null,
                5
            };
            var ms = new MemoryStream();

            ParquetWriter.Write(ds, ms);

            ms.Position = 0;
            DataSet ds1 = ParquetReader.Read(ms);

            Assert.Equal(1, ds1[0].GetInt(0));
            Assert.Equal(2, ds1[1].GetInt(0));
            Assert.Equal(3, ds1[2].GetInt(0));
            Assert.True(ds1[3].IsNullAt(0));
            Assert.Equal(4, ds1[4].GetInt(0));
            Assert.True(ds1[5].IsNullAt(0));
            Assert.Equal(5, ds1[6].GetInt(0));
        }
コード例 #20
0
        static void ConvertCsvToParquet(string inputFile, string outputFile)
        {
            var data = new Dictionary <string, ArrayList>();

            using (var reader = new StreamReader(inputFile, true))
            {
                var header = reader.ReadLine();

                var columns = header.Split(",");
                for (int i = 0; i < columns.Length; i++)
                {
                    columns[i] = columns[i].Trim();
                }

                while (!reader.EndOfStream)
                {
                    var line = reader.ReadLine();
                    if (String.IsNullOrEmpty(line))
                    {
                        continue;
                    }

                    var parts = line.Split(",");
                    for (int i = 0; i < parts.Length && i < columns.Length; i++)
                    {
                        var column = columns[i];

                        if (parquet_types.ContainsKey(column))
                        {
                            if (!data.ContainsKey(column))
                            {
                                data.Add(column, new ArrayList());
                            }

                            data[column].Add(ParseValue(parquet_types[column], parts[i]));
                        }
                    }
                }
            }

            var datacolumns = parquet_types.Select(
                x => new DataColumn(CreateParquetField(x.Key, x.Value), data[x.Key].ToArray(ConvertParquetType(x.Value)))
                ).ToArray();
            var schema = new Schema(datacolumns.Select(x => (Field)x.Field).ToArray());

            using (Stream fileStream = System.IO.File.OpenWrite(outputFile))
            {
                using (var parquetWriter = new ParquetWriter(schema, fileStream))
                {
                    // create a new row group in the file
                    using (ParquetRowGroupWriter groupWriter = parquetWriter.CreateRowGroup())
                    {
                        foreach (var column in datacolumns)
                        {
                            groupWriter.WriteColumn(column);
                        }
                    }
                }
            }
        }
コード例 #21
0
        public void Type_writes_and_reads_end_to_end(SchemaElement schema, object value, string name = null)
        {
            var ds = new DataSet(schema)
            {
                new Row(value)
            };
            var ms = new MemoryStream();

            ParquetWriter.Write(ds, ms);

            ms.Position = 0;
            DataSet ds1 = ParquetReader.Read(ms);

            object expectedValue = ds[0][0];
            object actualValue   = ds1[0][0];

            if (schema.ElementType == typeof(DateTime))
            {
                actualValue = ((DateTimeOffset)actualValue).DateTime;
            }

            Assert.True(expectedValue.Equals(actualValue),
                        $"{name}| expected: {expectedValue}, actual: {actualValue}, schema element: {schema}");

            //if (schema.ElementType == typeof(decimal)) ParquetWriter.WriteFile(ds1, "c:\\tmp\\decimals.parquet");
        }
コード例 #22
0
        public void Flat_write_read()
        {
            var table = new Table(new Schema(new DataField <int>("id"), new DataField <string>("city")));
            var ms    = new MemoryStream();

            //generate fake data
            for (int i = 0; i < 1000; i++)
            {
                table.Add(new Row(i, "record#" + i));
            }

            //write to stream
            using (var writer = new ParquetWriter(table.Schema, ms))
            {
                writer.Write(table);
            }

            //read back into table
            ms.Position = 0;
            Table table2;

            using (var reader = new ParquetReader(ms))
            {
                table2 = reader.ReadAsTable();
            }

            //validate data
            Assert.True(table.Equals(table2, true));
        }
コード例 #23
0
        public void All_compression_methods_supported(CompressionMethod compressionMethod)
        {
            //v2
            var     ms  = new MemoryStream();
            DataSet ds1 = new DataSet(new DataField <int>("id"));
            DataSet ds2;

            ds1.Add(5);

            //write
            using (var writer = new ParquetWriter(ms))
            {
                writer.Write(ds1, compressionMethod);
            }

            //read back
            using (var reader = new ParquetReader(ms))
            {
                ms.Position = 0;
                ds2         = reader.Read();
            }

            Assert.Equal(5, ds2[0].GetInt(0));

            //v3
            //looks like writing is not working in certain scenarios!
            //broken length: 177
            //correct length: 187
            const int value  = 5;
            object    actual = WriteReadSingle(new DataField <int>("id"), value, compressionMethod);

            Assert.Equal(5, (int)actual);
        }
コード例 #24
0
        public void Special_read_file_with_multiple_row_groups()
        {
            var ms = new MemoryStream();

            //create multirowgroup file

            //first row group
            var t = new Table(new DataField <int>("id"));

            t.Add(1);
            t.Add(2);
            using (var writer = new ParquetWriter(t.Schema, ms))
            {
                writer.Write(t);
            }

            //second row group
            t.Clear();
            t.Add(3);
            t.Add(4);
            using (var writer = new ParquetWriter(t.Schema, ms, null, true))
            {
                writer.Write(t);
            }

            //read back as table
            t = ParquetReader.ReadTableFromStream(ms);
            Assert.Equal(4, t.Count);
        }
コード例 #25
0
ファイル: DecimalWrite.cs プロジェクト: philjdf/ParquetSharp
        public long ParquetDotNet()
        {
            {
                var valueField = new DecimalDataField("Value", precision: 29, scale: 3, hasNulls: false);
                var schema     = new Parquet.Data.Schema(valueField);

                using var stream        = File.Create("decimal_timeseries.parquet.net");
                using var parquetWriter = new ParquetWriter(schema, stream);
                using var groupWriter   = parquetWriter.CreateRowGroup();

                groupWriter.WriteColumn(new DataColumn(valueField, _values));
            }

            if (Check.Enabled)
            {
                // Read content from ParquetSharp and Parquet.NET
                var baseline = ReadFile("decimal_timeseries.parquet");
                var results  = ReadFile("decimal_timeseries.parquet.net");

                // Prove that the content is the same
                Check.ArraysAreEqual(_values, baseline);
                Check.ArraysAreEqual(baseline, results);
            }

            return(new FileInfo("decimal_timeseries.parquet.net").Length);
        }
コード例 #26
0
        public void FileMetadata_sets_num_rows_on_file_and_row_group_multiple_row_groups()
        {
            var ms = new MemoryStream();
            var id = new DataField <int>("id");

            //write
            using (var writer = new ParquetWriter(new Schema(id), ms))
            {
                using (ParquetRowGroupWriter rg = writer.CreateRowGroup())
                {
                    rg.WriteColumn(new DataColumn(id, new[] { 1, 2, 3, 4 }));
                }

                using (ParquetRowGroupWriter rg = writer.CreateRowGroup())
                {
                    rg.WriteColumn(new DataColumn(id, new[] { 5, 6 }));
                }
            }

            //read back
            using (var reader = new ParquetReader(ms))
            {
                Assert.Equal(6, reader.ThriftMetadata.Num_rows);

                using (ParquetRowGroupReader rg = reader.OpenRowGroupReader(0))
                {
                    Assert.Equal(4, rg.RowCount);
                }

                using (ParquetRowGroupReader rg = reader.OpenRowGroupReader(1))
                {
                    Assert.Equal(2, rg.RowCount);
                }
            }
        }
コード例 #27
0
ファイル: Program.cs プロジェクト: kRaBby/parquet-dotnet
        static void Main(string[] args)
        {
            L.Config
            .WriteTo.PoshConsole();

            using (var time = new TimeMeasure())
            {
                var ds = new DataSet(
                    new SchemaElement <int>("id"),
                    new SchemaElement <string>("name"),
                    new SchemaElement <double>("lat"),
                    new SchemaElement <double>("lon"));

                log.Trace(ds.Schema.ToString());

                for (int i = 0; i < 10; i++)
                {
                    ds.Add(
                        i,
                        NameGenerator.GeneratePersonFullName(),
                        Generator.RandomDouble,
                        Generator.RandomDouble);
                }

                ParquetWriter.WriteFile(ds, "c:\\tmp\\perf.parquet");


                log.Trace("written in {0}", time.Elapsed);
            }
        }
コード例 #28
0
        public static Stream GetParquetFileWithThreeRowGroups()
        {
            var stream = new MemoryStream();
            var schema = SchemaReflector.Reflect <TwoColumn>();

            using (var parquetWriter = new ParquetWriter(schema, stream))
            {
                using (var rowGroup = parquetWriter.CreateRowGroup())
                {
                    rowGroup.WriteColumn(new Parquet.Data.DataColumn((DataField)schema.Fields[0], new[] {
                        1,
                        2,
                        3,
                        4
                    }));
                    rowGroup.WriteColumn(new Parquet.Data.DataColumn((DataField)schema.Fields[1], new[] {
                        "one",
                        "two",
                        "three",
                        "four"
                    }));
                }

                using (var rowGroup = parquetWriter.CreateRowGroup())
                {
                    rowGroup.WriteColumn(new Parquet.Data.DataColumn((DataField)schema.Fields[0], new[] {
                        5,
                        6,
                        7,
                        8
                    }));
                    rowGroup.WriteColumn(new Parquet.Data.DataColumn((DataField)schema.Fields[1], new[] {
                        "five",
                        "six",
                        "seven",
                        "eight"
                    }));
                }

                using (var rowGroup = parquetWriter.CreateRowGroup())
                {
                    rowGroup.WriteColumn(new Parquet.Data.DataColumn((DataField)schema.Fields[0], new[] {
                        9,
                        10,
                        11,
                        12
                    }));
                    rowGroup.WriteColumn(new Parquet.Data.DataColumn((DataField)schema.Fields[1], new[] {
                        "nine",
                        "ten",
                        "eleven",
                        "twelve"
                    }));
                }
            }

            stream.Position = 0;
            return(stream);
        }
コード例 #29
0
        public static DataSet WriteRead(DataSet original, WriterOptions writerOptions = null)
        {
            var ms = new MemoryStream();

            ParquetWriter.Write(original, ms, CompressionMethod.None, null, writerOptions);

            ms.Position = 0;
            return(ParquetReader.Read(ms));
        }
コード例 #30
0
        private string CreateTestArchive(Guid sessionId)
        {
            var tmpName = Path.GetTempFileName();

            var timeData   = new[] { 1L, 2L, 3L };
            var timeColumn = new DataColumn(
                new DataField <long>("time"),
                timeData);

            var numbersData   = new[] { 42d, 1337d, 6.022e23 };
            var numbersColumn = new DataColumn(
                new DataField <double>("cool_numbers"),
                numbersData);

            var schema = new Schema(timeColumn.Field, numbersColumn.Field);

            var json = new JObject {
                ["meta"] = new JObject(), ["user"] = new JObject()
            };


            using (var ms = new MemoryStream())
            {
                using (var parquetWriter = new ParquetWriter(schema, ms))
                    using (var groupWriter = parquetWriter.CreateRowGroup())
                    {
                        groupWriter.WriteColumn(timeColumn);
                        groupWriter.WriteColumn(numbersColumn);
                    }

                ms.Position = 0;

                using (var parquetReader = new ParquetReader(ms))
                {
                    var tableInformation = new ArchiveTableInformation()
                    {
                        Columns = new List <DataField>(parquetReader.Schema.GetDataFields()),
                        Time    = timeColumn.Field
                    };
                    var table = new ArchiveTable(json, parquetReader, tableInformation, "testData");

                    var archive = Archive.Create(tmpName);

                    var session = ArchiveSession.Create(archive, "testName");
                    var folder  = ArchiveFolder.Create(archive, sessionId, "testFolder");

                    folder.AddChild(table);
                    session.AddChild(folder);
                    archive.AddSession(session);

                    archive.WriteFile().Wait();
                    archive.Close();
                }
            }

            return(tmpName);
        }