/// <summary> /// Writes next data column to parquet stream. Note that columns must be written in the order they are declared in the /// file schema. /// </summary> /// <param name="column"></param> public void WriteColumn(DataColumn column) { if (column == null) { throw new ArgumentNullException(nameof(column)); } if (RowCount == null) { if (column.Data.Length > 0 || column.Field.MaxRepetitionLevel == 0) { RowCount = column.CalculateRowCount(); } } Thrift.SchemaElement tse = _thschema[_colIdx]; if (!column.Field.Equals(tse)) { throw new ArgumentException($"cannot write this column, expected '{tse.Name}', passed: '{column.Field.Name}'", nameof(column)); } IDataTypeHandler dataTypeHandler = DataTypeFactory.Match(tse, _formatOptions); _colIdx += 1; List <string> path = _footer.GetPath(tse); var writer = new DataColumnWriter(_stream, _thriftStream, _footer, tse, _compressionMethod, _compressionLevel, (int)(RowCount ?? 0)); Thrift.ColumnChunk chunk = writer.Write(path, column, dataTypeHandler); _thriftRowGroup.Columns.Add(chunk); }
public void Distinct_stat_for_basic_data_types(string name) { TestDesc test = NameToTest[name]; var id = new DataField("id", test.Type); DataColumn rc = WriteReadSingleColumn(id, new DataColumn(id, test.Data)); Assert.Equal(test.Data.Length, rc.CalculateRowCount()); Assert.Equal(test.DistinctCount, rc.Statistics.DistinctCount); Assert.Equal(test.NullCount, rc.Statistics.NullCount); Assert.Equal(test.Min, rc.Statistics.MinValue); Assert.Equal(test.Max, rc.Statistics.MaxValue); }