private void ValidateSchemasCompatible(ThriftFooter footer, Schema schema) { Schema existingSchema = footer.CreateModelSchema(_formatOptions); if (!schema.Equals(existingSchema)) { string reason = schema.GetNotEqualsMessage(existingSchema, "appending", "existing"); throw new ParquetException($"passed schema does not match existing file schema, reason: {reason}"); } }
void ValidateSchemasCompatible(ThriftFooter footer, DataSet ds) { Schema existingSchema = footer.CreateModelSchema(_formatOptions); if (!ds.Schema.Equals(existingSchema)) { string reason = ds.Schema.GetNotEqualsMessage(existingSchema, "appending", "existing"); throw new ParquetException($"{nameof(DataSet)} schema does not match existing file schema, reason: {reason}"); } }
/// <summary> /// Test read, to be defined /// </summary> public DataSet Read() { _readerOptions.Validate(); _meta = ReadMetadata(); var footer = new ThriftFooter(_meta); var pathToValues = new Dictionary <string, IList>(); long pos = 0; long rowsRead = 0; foreach (Thrift.RowGroup rg in _meta.Row_groups) { //check whether to skip RG completely if ((_readerOptions.Count != -1 && rowsRead >= _readerOptions.Count) || (_readerOptions.Offset > pos + rg.Num_rows - 1)) { pos += rg.Num_rows; continue; } long offset = Math.Max(0, _readerOptions.Offset - pos); long count = _readerOptions.Count == -1 ? rg.Num_rows : Math.Min(_readerOptions.Count - rowsRead, rg.Num_rows); for (int icol = 0; icol < rg.Columns.Count; icol++) { Thrift.ColumnChunk cc = rg.Columns[icol]; string path = cc.GetPath(); if (_fieldPredicates != null && !_fieldPredicates.Any(p => p.IsMatch(cc, path))) { continue; } var columnarReader = new ColumnarReader(_input, cc, footer, _formatOptions); try { IList chunkValues = columnarReader.Read(offset, count); if (!pathToValues.TryGetValue(path, out IList allValues)) { pathToValues[path] = chunkValues; } else { foreach (object v in chunkValues) { allValues.Add(v); } } if (icol == 0) { //todo: this may not work rowsRead += chunkValues.Count; } } catch (Exception ex) { throw new ParquetException($"fatal error reading column '{path}'", ex); } } pos += rg.Num_rows; } Schema schema = footer.CreateModelSchema(_formatOptions); schema = schema.Filter(_fieldPredicates); var ds = new DataSet(schema, pathToValues, _meta.Num_rows, _meta.Created_by); Dictionary <string, string> customMetadata = footer.CustomMetadata; if (customMetadata != null) { ds.Metadata.Custom.AddRange(customMetadata); } ds.Thrift = _meta; return(ds); }