/// <summary> /// Test read, to be defined /// </summary> public DataSet Read() { _readerOptions.Validate(); _meta = ReadMetadata(); var metaParser = new FileMetadataParser(_meta); Schema schema = metaParser.ParseSchema(_formatOptions); var pathToValues = new Dictionary <string, IList>(); long pos = 0; long rowsRead = 0; foreach (Thrift.RowGroup rg in _meta.Row_groups) { //check whether to skip RG completely if ((_readerOptions.Count != -1 && rowsRead >= _readerOptions.Count) || (_readerOptions.Offset > pos + rg.Num_rows - 1)) { pos += rg.Num_rows; continue; } long offset = Math.Max(0, _readerOptions.Offset - pos); long count = _readerOptions.Count == -1 ? rg.Num_rows : Math.Min(_readerOptions.Count - rowsRead, rg.Num_rows); for (int icol = 0; icol < rg.Columns.Count; icol++) { Thrift.ColumnChunk cc = rg.Columns[icol]; SchemaElement se = schema[cc]; var p = new ColumnReader(cc, se, _input, ThriftStream, _formatOptions); try { IList chunkValues = p.Read(offset, count); if (!pathToValues.TryGetValue(se.Path, out IList allValues)) { pathToValues[se.Path] = chunkValues; } else { allValues.AddRange(chunkValues); } if (icol == 0) { //todo: this may not work rowsRead += chunkValues.Count; } } catch (Exception ex) { throw new ParquetException($"fatal error reading column '{se}'", ex); } } pos += rg.Num_rows; } var merger = new RecursiveMerge(schema); DataSet ds = merger.Merge(pathToValues); ds.TotalRowCount = _meta.Num_rows; ds.Metadata.CreatedBy = _meta.Created_by; return(ds); }
public override Expression Reduce() { var columnReader = new ColumnReader(_dataContext, _dataContext.MappingSchema, _type, _idx); return Convert(Call(Constant(columnReader), _columnReaderGetValueInfo, _dataReaderParam), _type); }
public override Expression Reduce() { var columnReader = new ColumnReader(_dataContext, _dataContext.MappingSchema, _type, _idx); return(Convert(Call(Constant(columnReader), _columnReaderGetValueInfo, _dataReaderParam), _type)); }