public override void Write(Thrift.SchemaElement tse, BinaryWriter writer, IList values, Thrift.Statistics statistics) { foreach (Interval interval in values) { writer.Write(interval.Months); writer.Write(interval.Days); writer.Write(interval.Millis); } }
public override void Write(Thrift.SchemaElement tse, BinaryWriter writer, IList values, Thrift.Statistics statistics) { switch (tse.Type) { case Thrift.Type.INT32: WriteAsInt32(writer, values); break; case Thrift.Type.INT64: WriteAsInt64(writer, values); break; case Thrift.Type.INT96: WriteAsInt96(writer, values); break; default: throw new InvalidDataException($"data type '{tse.Type}' does not represent any date types"); } }
public void Write(Thrift.SchemaElement tse, BinaryWriter writer, IList values, Thrift.Statistics statistics) { throw new NotSupportedException(); }
private void PrintStatistics(Thrift.FileMetaData fileMeta, Thrift.ColumnChunk column, Thrift.Statistics stats) { WriteLine(" Statistics", T.HeadingTextColor); if (stats == null || !(stats.__isset.null_count || stats.__isset.distinct_count || stats.__isset.min || stats.__isset.max)) { WriteLine(" none defined", T.ErrorTextColor); return; } const string undefined = "undefined"; var t = new Table("name", "value"); t.AddRow("Null Count", stats.__isset.null_count ? stats.Null_count.ToString() : undefined); t.AddRow("Distinct Count", stats.__isset.distinct_count ? stats.Distinct_count.ToString() : undefined); t.AddRow("Min", stats.__isset.min_value ? fileMeta.DecodeSingleStatsValue(column, stats.Min_value) : undefined); t.AddRow("Max", stats.__isset.max_value ? fileMeta.DecodeSingleStatsValue(column, stats.Max_value) : undefined); t.AddRow("Min (legacy)", stats.__isset.min ? fileMeta.DecodeSingleStatsValue(column, stats.Min) : undefined); t.AddRow("Max (legacy)", stats.__isset.max ? fileMeta.DecodeSingleStatsValue(column, stats.Max) : undefined); t.Render(false, 6, T.HeadingTextColor, T.NormalTextColor); }
public override void Write(Thrift.SchemaElement tse, BinaryWriter writer, IList values, Thrift.Statistics statistics) { switch (tse.Type) { case Thrift.Type.INT32: WriteAsInt32(tse, writer, values); break; case Thrift.Type.INT64: WriteAsInt64(tse, writer, values); break; case Thrift.Type.FIXED_LEN_BYTE_ARRAY: WriteAsFixedLengthByteArray(tse, writer, values); break; default: throw new InvalidDataException($"data type '{tse.Type}' does not represent a decimal"); } }
public virtual void Write(Thrift.SchemaElement tse, BinaryWriter writer, IList values, Thrift.Statistics statistics) { // casing to an array of TSystemType means we avoid Array.GetValue calls, which are slow var typedArray = (TSystemType[])values; foreach (TSystemType one in typedArray) { WriteOne(writer, one); } }
private List <PageTag> WriteColumn(DataColumn column, Thrift.SchemaElement tse, IDataTypeHandler dataTypeHandler, int maxRepetitionLevel, int maxDefinitionLevel, Thrift.Statistics statistics) { var pages = new List <PageTag>(); /* * Page header must preceeed actual data (compressed or not) however it contains both * the uncompressed and compressed data size which we don't know! This somehow limits * the write efficiency. */ using (var ms = new MemoryStream()) { Thrift.PageHeader dataPageHeader = _footer.CreateDataPage(column.Data.Length); //chain streams together so we have real streaming instead of wasting undefraggable LOH memory using (GapStream pageStream = DataStreamFactory.CreateWriter(ms, _compressionMethod, true)) { using (var writer = new BinaryWriter(pageStream, Encoding.UTF8, true)) { if (column.RepetitionLevels != null) { WriteLevels(writer, column.RepetitionLevels, column.RepetitionLevels.Length, maxRepetitionLevel); } Array data = column.Data; if (maxDefinitionLevel > 0) { data = column.PackDefinitions(maxDefinitionLevel, out int[] definitionLevels, out int definitionLevelsLength, out int nullCount); //last chance to capture null count as null data is compressed now statistics.Null_count = nullCount; try { WriteLevels(writer, definitionLevels, definitionLevelsLength, maxDefinitionLevel); } finally { if (definitionLevels != null) { ArrayPool <int> .Shared.Return(definitionLevels); } } } else { //no defitions means no nulls statistics.Null_count = 0; } dataTypeHandler.Write(tse, writer, data, statistics); writer.Flush(); } pageStream.Flush(); //extremely important to flush the stream as some compression algorithms don't finish writing pageStream.MarkWriteFinished(); dataPageHeader.Uncompressed_page_size = (int)pageStream.Position; } dataPageHeader.Compressed_page_size = (int)ms.Position; //write the header in int headerSize = _thriftStream.Write(dataPageHeader); ms.Position = 0; ms.CopyTo(_stream); dataPageHeader.Data_page_header.Statistics = new Thrift.Statistics { Distinct_count = statistics.Distinct_count }; var dataTag = new PageTag { HeaderMeta = dataPageHeader, HeaderSize = headerSize }; pages.Add(dataTag); } return(pages); }
public override void Write(Thrift.SchemaElement tse, BinaryWriter writer, IList values, Thrift.Statistics statistics) { int n = 0; byte b = 0; byte[] buffer = new byte[values.Count / 8 + 1]; int ib = 0; foreach (bool flag in values) { if (flag) { b |= (byte)(1 << n); } n++; if (n == 8) { buffer[ib++] = b; n = 0; b = 0; } } if (n != 0) { buffer[ib] = b; } writer.Write(buffer); }