Esempio n. 1
0
        /// <summary>
        /// Writes next data column to parquet stream. Note that columns must be written in the order they are declared in the
        /// file schema.
        /// </summary>
        /// <param name="column"></param>
        public void WriteColumn(DataColumn column)
        {
            if (column == null)
            {
                throw new ArgumentNullException(nameof(column));
            }

            if (RowCount == null)
            {
                if (column.Data.Length > 0 || column.Field.MaxRepetitionLevel == 0)
                {
                    RowCount = column.CalculateRowCount();
                }
            }

            Thrift.SchemaElement tse = _thschema[_colIdx];
            if (!column.Field.Equals(tse))
            {
                throw new ArgumentException($"cannot write this column, expected '{tse.Name}', passed: '{column.Field.Name}'", nameof(column));
            }
            IDataTypeHandler dataTypeHandler = DataTypeFactory.Match(tse, _formatOptions);

            _colIdx += 1;

            List <string> path = _footer.GetPath(tse);

            var writer = new DataColumnWriter(_stream, _thriftStream, _footer, tse,
                                              _compressionMethod, _compressionLevel,
                                              (int)(RowCount ?? 0));

            Thrift.ColumnChunk chunk = writer.Write(path, column, dataTypeHandler);
            _thriftRowGroup.Columns.Add(chunk);
        }
Esempio n. 2
0
        public void Distinct_stat_for_basic_data_types(string name)
        {
            TestDesc test = NameToTest[name];

            var id = new DataField("id", test.Type);

            DataColumn rc = WriteReadSingleColumn(id, new DataColumn(id, test.Data));

            Assert.Equal(test.Data.Length, rc.CalculateRowCount());
            Assert.Equal(test.DistinctCount, rc.Statistics.DistinctCount);
            Assert.Equal(test.NullCount, rc.Statistics.NullCount);
            Assert.Equal(test.Min, rc.Statistics.MinValue);
            Assert.Equal(test.Max, rc.Statistics.MaxValue);
        }