public void Array_write_read() { var table = new Table( new Schema( new DataField <int>("id"), new DataField <string[]>("categories") //array field ) ); var ms = new MemoryStream(); table.Add(1, new[] { "1", "2", "3" }); table.Add(3, new[] { "3", "3", "3" }); //write to stream using (var writer = new ParquetWriter(table.Schema, ms)) { writer.Write(table); } //System.IO.File.WriteAllBytes("c:\\tmp\\1.parquet", ms.ToArray()); //read back into table ms.Position = 0; Table table2; using (var reader = new ParquetReader(ms)) { table2 = reader.ReadAsTable(); } //validate data Assert.Equal(table.ToString(), table2.ToString(), ignoreLineEndingDifferences: true); }
public void Flat_write_read() { var table = new Table(new Schema(new DataField <int>("id"), new DataField <string>("city"))); var ms = new MemoryStream(); //generate fake data for (int i = 0; i < 1000; i++) { table.Add(new Row(i, "record#" + i)); } //write to stream using (var writer = new ParquetWriter(table.Schema, ms)) { writer.Write(table); } //read back into table ms.Position = 0; Table table2; using (var reader = new ParquetReader(ms)) { table2 = reader.ReadAsTable(); } //validate data Assert.True(table.Equals(table2, true)); }
public void List_simple_element_write_read() { var table = new Table( new Schema( new DataField <int>("id"), new ListField("cities", new DataField <string>("name")))); var ms = new MemoryStream(); table.Add(1, new[] { "London", "Derby" }); table.Add(2, new[] { "Paris", "New York" }); //write as table using (var writer = new ParquetWriter(table.Schema, ms)) { writer.Write(table); } //read back into table ms.Position = 0; Table table2; using (var reader = new ParquetReader(ms)) { table2 = reader.ReadAsTable(); } //validate data Assert.Equal(table.ToString(), table2.ToString(), ignoreLineEndingDifferences: true); }
public static async Task <Table> LoadAsync(StorageFile file, int offset = 0, int count = 100) { using (IRandomAccessStreamWithContentType uwpStream = await file.OpenReadAsync()) { using (Stream stream = uwpStream.AsStreamForRead()) { var formatOptions = new ParquetOptions { TreatByteArrayAsString = true }; try { using (var reader = new ParquetReader(stream, formatOptions)) { return(reader.ReadAsTable()); } } catch (Exception ex) { var dialog = new MessageDialog(ex.Message, "Cannot open file"); await dialog.ShowAsync(); return(null); } } } }
private Table ReadParquetFile(string filepath) { var fileStream = this.dataLakeFileSystemClient.GetFileClient($"{this.deltaTablePath}/{filepath}").OpenRead(); using (var parquetReader = new ParquetReader(fileStream)) { return(parquetReader.ReadAsTable()); } }
protected Table ReadTestFileAsTable(string name) { using (Stream s = OpenTestFile(name)) { using (var reader = new ParquetReader(s)) { return(reader.ReadAsTable()); } } }
public void List_read_simple_element_from_Apache_Spark() { Table t; using (Stream stream = OpenTestFile("list_simple.parquet")) { using (var reader = new ParquetReader(stream)) { t = reader.ReadAsTable(); } } Assert.Equal("{'cities': ['London', 'Derby', 'Paris', 'New York'], 'id': 1}", t[0].ToString(), ignoreLineEndingDifferences: true); }
public void Map_read_from_Apache_Spark() { Table t; using (Stream stream = OpenTestFile("map_simple.parquet")) { using (var reader = new ParquetReader(stream)) { t = reader.ReadAsTable(); } } Assert.Equal("{'id': 1, 'numbers': [{'key': 1, 'value': 'one'}, {'key': 2, 'value': 'two'}, {'key': 3, 'value': 'three'}]}", t[0].ToString(), ignoreLineEndingDifferences: true); }
public void List_read_structures_from_Apache_Spark() { Table t; using (Stream stream = OpenTestFile("list_structs.parquet")) { using (var reader = new ParquetReader(stream)) { t = reader.ReadAsTable(); } } Assert.Single(t); Assert.Equal("{'cities': [{'country': 'UK', 'name': 'London'}, {'country': 'US', 'name': 'New York'}], 'id': 1}", t[0].ToString(), ignoreLineEndingDifferences: true); }
public void BigFatOne_variations_from_Apache_Spark() { Table t; using (Stream stream = OpenTestFile("all_var1.parquet")) { using (var reader = new ParquetReader(stream)) { t = reader.ReadAsTable(); } } Assert.Equal(2, t.Count); Assert.Equal("{'addresses': [{'line1': 'Dante Road', 'name': 'Head Office', 'openingHours': [9, 10, 11, 12, 13, 14, 15, 16, 17, 18], 'postcode': 'SE11'}, {'line1': 'Somewhere Else', 'name': 'Small Office', 'openingHours': [6, 7, 19, 20, 21, 22, 23], 'postcode': 'TN19'}], 'cities': ['London', 'Derby'], 'comment': 'this file contains all the permunations for nested structures and arrays to test Parquet parser', 'id': 1, 'location': {'latitude': 51.2, 'longitude': 66.3}, 'price': {'lunch': {'max': 2, 'min': 1}}}", t[0].ToString()); Assert.Equal("{'addresses': [{'line1': 'Dante Road', 'name': 'Head Office', 'openingHours': [9, 10, 11, 12, 13, 14, 15, 16, 17, 18], 'postcode': 'SE11'}, {'line1': 'Somewhere Else', 'name': 'Small Office', 'openingHours': [6, 7, 19, 20, 21, 22, 23], 'postcode': 'TN19'}], 'cities': ['London', 'Derby'], 'comment': 'this file contains all the permunations for nested structures and arrays to test Parquet parser', 'id': 1, 'location': {'latitude': 51.2, 'longitude': 66.3}, 'price': {'lunch': {'max': 2, 'min': 1}}}", t[1].ToString()); }
protected Table WriteRead(Table table, bool saveLocal = false) { var ms = new MemoryStream(); using (var writer = new ParquetWriter(table.Schema, ms)) { writer.Write(table); } if (saveLocal) { F.WriteAllBytes("c:\\tmp\\test.parquet", ms.ToArray()); } ms.Position = 0; using (var reader = new ParquetReader(ms)) { return(reader.ReadAsTable()); } }