Esempio n. 1
0
        /// <summary>
        /// Test read, to be defined
        /// </summary>
        public DataSet Read()
        {
            _readerOptions.Validate();

            _meta = ReadMetadata();

            var    metaParser = new FileMetadataParser(_meta);
            Schema schema     = metaParser.ParseSchema(_formatOptions);

            var  pathToValues = new Dictionary <string, IList>();
            long pos          = 0;
            long rowsRead     = 0;

            foreach (Thrift.RowGroup rg in _meta.Row_groups)
            {
                //check whether to skip RG completely
                if ((_readerOptions.Count != -1 && rowsRead >= _readerOptions.Count) ||
                    (_readerOptions.Offset > pos + rg.Num_rows - 1))
                {
                    pos += rg.Num_rows;
                    continue;
                }

                long offset = Math.Max(0, _readerOptions.Offset - pos);
                long count  = _readerOptions.Count == -1 ? rg.Num_rows : Math.Min(_readerOptions.Count - rowsRead, rg.Num_rows);

                for (int icol = 0; icol < rg.Columns.Count; icol++)
                {
                    Thrift.ColumnChunk cc = rg.Columns[icol];
                    SchemaElement      se = schema[cc];

                    var p = new ColumnReader(cc, se, _input, ThriftStream, _formatOptions);

                    try
                    {
                        IList chunkValues = p.Read(offset, count);

                        if (!pathToValues.TryGetValue(se.Path, out IList allValues))
                        {
                            pathToValues[se.Path] = chunkValues;
                        }
                        else
                        {
                            allValues.AddRange(chunkValues);
                        }

                        if (icol == 0)
                        {
                            //todo: this may not work
                            rowsRead += chunkValues.Count;
                        }
                    }
                    catch (Exception ex)
                    {
                        throw new ParquetException($"fatal error reading column '{se}'", ex);
                    }
                }

                pos += rg.Num_rows;
            }

            var     merger = new RecursiveMerge(schema);
            DataSet ds     = merger.Merge(pathToValues);

            ds.TotalRowCount      = _meta.Num_rows;
            ds.Metadata.CreatedBy = _meta.Created_by;

            return(ds);
        }
		public override Expression Reduce()
		{
			var columnReader = new ColumnReader(_dataContext, _dataContext.MappingSchema, _type, _idx);
			return Convert(Call(Constant(columnReader), _columnReaderGetValueInfo, _dataReaderParam), _type);
		}
        public override Expression Reduce()
        {
            var columnReader = new ColumnReader(_dataContext, _dataContext.MappingSchema, _type, _idx);

            return(Convert(Call(Constant(columnReader), _columnReaderGetValueInfo, _dataReaderParam), _type));
        }