Пример #1
0
        public void Array_write_read()
        {
            var table = new Table(
                new Schema(
                    new DataField <int>("id"),
                    new DataField <string[]>("categories") //array field
                    )
                );
            var ms = new MemoryStream();

            table.Add(1, new[] { "1", "2", "3" });
            table.Add(3, new[] { "3", "3", "3" });

            //write to stream
            using (var writer = new ParquetWriter(table.Schema, ms))
            {
                writer.Write(table);
            }

            //System.IO.File.WriteAllBytes("c:\\tmp\\1.parquet", ms.ToArray());

            //read back into table
            ms.Position = 0;
            Table table2;

            using (var reader = new ParquetReader(ms))
            {
                table2 = reader.ReadAsTable();
            }

            //validate data
            Assert.Equal(table.ToString(), table2.ToString(), ignoreLineEndingDifferences: true);
        }
Пример #2
0
        public void Flat_write_read()
        {
            var table = new Table(new Schema(new DataField <int>("id"), new DataField <string>("city")));
            var ms    = new MemoryStream();

            //generate fake data
            for (int i = 0; i < 1000; i++)
            {
                table.Add(new Row(i, "record#" + i));
            }

            //write to stream
            using (var writer = new ParquetWriter(table.Schema, ms))
            {
                writer.Write(table);
            }

            //read back into table
            ms.Position = 0;
            Table table2;

            using (var reader = new ParquetReader(ms))
            {
                table2 = reader.ReadAsTable();
            }

            //validate data
            Assert.True(table.Equals(table2, true));
        }
Пример #3
0
        public void List_simple_element_write_read()
        {
            var table = new Table(
                new Schema(
                    new DataField <int>("id"),
                    new ListField("cities",
                                  new DataField <string>("name"))));

            var ms = new MemoryStream();

            table.Add(1, new[] { "London", "Derby" });
            table.Add(2, new[] { "Paris", "New York" });

            //write as table
            using (var writer = new ParquetWriter(table.Schema, ms))
            {
                writer.Write(table);
            }

            //read back into table
            ms.Position = 0;
            Table table2;

            using (var reader = new ParquetReader(ms))
            {
                table2 = reader.ReadAsTable();
            }

            //validate data
            Assert.Equal(table.ToString(), table2.ToString(), ignoreLineEndingDifferences: true);
        }
Пример #4
0
        public static async Task <Table> LoadAsync(StorageFile file, int offset = 0, int count = 100)
        {
            using (IRandomAccessStreamWithContentType uwpStream = await file.OpenReadAsync())
            {
                using (Stream stream = uwpStream.AsStreamForRead())
                {
                    var formatOptions = new ParquetOptions
                    {
                        TreatByteArrayAsString = true
                    };

                    try
                    {
                        using (var reader = new ParquetReader(stream, formatOptions))
                        {
                            return(reader.ReadAsTable());
                        }
                    }
                    catch (Exception ex)
                    {
                        var dialog = new MessageDialog(ex.Message, "Cannot open file");
                        await dialog.ShowAsync();

                        return(null);
                    }
                }
            }
        }
Пример #5
0
        private Table ReadParquetFile(string filepath)
        {
            var fileStream = this.dataLakeFileSystemClient.GetFileClient($"{this.deltaTablePath}/{filepath}").OpenRead();

            using (var parquetReader = new ParquetReader(fileStream))
            {
                return(parquetReader.ReadAsTable());
            }
        }
Пример #6
0
 protected Table ReadTestFileAsTable(string name)
 {
     using (Stream s = OpenTestFile(name))
     {
         using (var reader = new ParquetReader(s))
         {
             return(reader.ReadAsTable());
         }
     }
 }
Пример #7
0
        public void List_read_simple_element_from_Apache_Spark()
        {
            Table t;

            using (Stream stream = OpenTestFile("list_simple.parquet"))
            {
                using (var reader = new ParquetReader(stream))
                {
                    t = reader.ReadAsTable();
                }
            }

            Assert.Equal("{'cities': ['London', 'Derby', 'Paris', 'New York'], 'id': 1}", t[0].ToString(), ignoreLineEndingDifferences: true);
        }
Пример #8
0
        public void Map_read_from_Apache_Spark()
        {
            Table t;

            using (Stream stream = OpenTestFile("map_simple.parquet"))
            {
                using (var reader = new ParquetReader(stream))
                {
                    t = reader.ReadAsTable();
                }
            }

            Assert.Equal("{'id': 1, 'numbers': [{'key': 1, 'value': 'one'}, {'key': 2, 'value': 'two'}, {'key': 3, 'value': 'three'}]}", t[0].ToString(), ignoreLineEndingDifferences: true);
        }
Пример #9
0
        public void List_read_structures_from_Apache_Spark()
        {
            Table t;

            using (Stream stream = OpenTestFile("list_structs.parquet"))
            {
                using (var reader = new ParquetReader(stream))
                {
                    t = reader.ReadAsTable();
                }
            }

            Assert.Single(t);
            Assert.Equal("{'cities': [{'country': 'UK', 'name': 'London'}, {'country': 'US', 'name': 'New York'}], 'id': 1}", t[0].ToString(), ignoreLineEndingDifferences: true);
        }
Пример #10
0
        public void BigFatOne_variations_from_Apache_Spark()
        {
            Table t;

            using (Stream stream = OpenTestFile("all_var1.parquet"))
            {
                using (var reader = new ParquetReader(stream))
                {
                    t = reader.ReadAsTable();
                }
            }

            Assert.Equal(2, t.Count);
            Assert.Equal("{'addresses': [{'line1': 'Dante Road', 'name': 'Head Office', 'openingHours': [9, 10, 11, 12, 13, 14, 15, 16, 17, 18], 'postcode': 'SE11'}, {'line1': 'Somewhere Else', 'name': 'Small Office', 'openingHours': [6, 7, 19, 20, 21, 22, 23], 'postcode': 'TN19'}], 'cities': ['London', 'Derby'], 'comment': 'this file contains all the permunations for nested structures and arrays to test Parquet parser', 'id': 1, 'location': {'latitude': 51.2, 'longitude': 66.3}, 'price': {'lunch': {'max': 2, 'min': 1}}}", t[0].ToString());
            Assert.Equal("{'addresses': [{'line1': 'Dante Road', 'name': 'Head Office', 'openingHours': [9, 10, 11, 12, 13, 14, 15, 16, 17, 18], 'postcode': 'SE11'}, {'line1': 'Somewhere Else', 'name': 'Small Office', 'openingHours': [6, 7, 19, 20, 21, 22, 23], 'postcode': 'TN19'}], 'cities': ['London', 'Derby'], 'comment': 'this file contains all the permunations for nested structures and arrays to test Parquet parser', 'id': 1, 'location': {'latitude': 51.2, 'longitude': 66.3}, 'price': {'lunch': {'max': 2, 'min': 1}}}", t[1].ToString());
        }
Пример #11
0
        protected Table WriteRead(Table table, bool saveLocal = false)
        {
            var ms = new MemoryStream();

            using (var writer = new ParquetWriter(table.Schema, ms))
            {
                writer.Write(table);
            }

            if (saveLocal)
            {
                F.WriteAllBytes("c:\\tmp\\test.parquet", ms.ToArray());
            }

            ms.Position = 0;

            using (var reader = new ParquetReader(ms))
            {
                return(reader.ReadAsTable());
            }
        }