示例#1
0
 public override void Write(Thrift.SchemaElement tse, BinaryWriter writer, IList values, Thrift.Statistics statistics)
 {
     foreach (Interval interval in values)
     {
         writer.Write(interval.Months);
         writer.Write(interval.Days);
         writer.Write(interval.Millis);
     }
 }
示例#2
0
        public override void Write(Thrift.SchemaElement tse, BinaryWriter writer, IList values, Thrift.Statistics statistics)
        {
            switch (tse.Type)
            {
            case Thrift.Type.INT32:
                WriteAsInt32(writer, values);
                break;

            case Thrift.Type.INT64:
                WriteAsInt64(writer, values);
                break;

            case Thrift.Type.INT96:
                WriteAsInt96(writer, values);
                break;

            default:
                throw new InvalidDataException($"data type '{tse.Type}' does not represent any date types");
            }
        }
 public void Write(Thrift.SchemaElement tse, BinaryWriter writer, IList values, Thrift.Statistics statistics)
 {
     throw new NotSupportedException();
 }
示例#4
0
        private void PrintStatistics(Thrift.FileMetaData fileMeta, Thrift.ColumnChunk column, Thrift.Statistics stats)
        {
            WriteLine("    Statistics", T.HeadingTextColor);

            if (stats == null || !(stats.__isset.null_count || stats.__isset.distinct_count || stats.__isset.min || stats.__isset.max))
            {
                WriteLine("      none defined", T.ErrorTextColor);
                return;
            }

            const string undefined = "undefined";

            var t = new Table("name", "value");

            t.AddRow("Null Count", stats.__isset.null_count ? stats.Null_count.ToString() : undefined);
            t.AddRow("Distinct Count", stats.__isset.distinct_count ? stats.Distinct_count.ToString() : undefined);
            t.AddRow("Min", stats.__isset.min_value ? fileMeta.DecodeSingleStatsValue(column, stats.Min_value) : undefined);
            t.AddRow("Max", stats.__isset.max_value ? fileMeta.DecodeSingleStatsValue(column, stats.Max_value) : undefined);
            t.AddRow("Min (legacy)", stats.__isset.min ? fileMeta.DecodeSingleStatsValue(column, stats.Min) : undefined);
            t.AddRow("Max (legacy)", stats.__isset.max ? fileMeta.DecodeSingleStatsValue(column, stats.Max) : undefined);
            t.Render(false, 6, T.HeadingTextColor, T.NormalTextColor);
        }
示例#5
0
        public override void Write(Thrift.SchemaElement tse, BinaryWriter writer, IList values, Thrift.Statistics statistics)
        {
            switch (tse.Type)
            {
            case Thrift.Type.INT32:
                WriteAsInt32(tse, writer, values);
                break;

            case Thrift.Type.INT64:
                WriteAsInt64(tse, writer, values);
                break;

            case Thrift.Type.FIXED_LEN_BYTE_ARRAY:
                WriteAsFixedLengthByteArray(tse, writer, values);
                break;

            default:
                throw new InvalidDataException($"data type '{tse.Type}' does not represent a decimal");
            }
        }
示例#6
0
        public virtual void Write(Thrift.SchemaElement tse, BinaryWriter writer, IList values, Thrift.Statistics statistics)
        {
            // casing to an array of TSystemType means we avoid Array.GetValue calls, which are slow
            var typedArray = (TSystemType[])values;

            foreach (TSystemType one in typedArray)
            {
                WriteOne(writer, one);
            }
        }
示例#7
0
        private List <PageTag> WriteColumn(DataColumn column,
                                           Thrift.SchemaElement tse,
                                           IDataTypeHandler dataTypeHandler,
                                           int maxRepetitionLevel,
                                           int maxDefinitionLevel,
                                           Thrift.Statistics statistics)
        {
            var pages = new List <PageTag>();

            /*
             * Page header must preceeed actual data (compressed or not) however it contains both
             * the uncompressed and compressed data size which we don't know! This somehow limits
             * the write efficiency.
             */


            using (var ms = new MemoryStream())
            {
                Thrift.PageHeader dataPageHeader = _footer.CreateDataPage(column.Data.Length);

                //chain streams together so we have real streaming instead of wasting undefraggable LOH memory
                using (GapStream pageStream = DataStreamFactory.CreateWriter(ms, _compressionMethod, true))
                {
                    using (var writer = new BinaryWriter(pageStream, Encoding.UTF8, true))
                    {
                        if (column.RepetitionLevels != null)
                        {
                            WriteLevels(writer, column.RepetitionLevels, column.RepetitionLevels.Length, maxRepetitionLevel);
                        }

                        Array data = column.Data;

                        if (maxDefinitionLevel > 0)
                        {
                            data = column.PackDefinitions(maxDefinitionLevel, out int[] definitionLevels, out int definitionLevelsLength, out int nullCount);

                            //last chance to capture null count as null data is compressed now
                            statistics.Null_count = nullCount;

                            try
                            {
                                WriteLevels(writer, definitionLevels, definitionLevelsLength, maxDefinitionLevel);
                            }
                            finally
                            {
                                if (definitionLevels != null)
                                {
                                    ArrayPool <int> .Shared.Return(definitionLevels);
                                }
                            }
                        }
                        else
                        {
                            //no defitions means no nulls
                            statistics.Null_count = 0;
                        }

                        dataTypeHandler.Write(tse, writer, data, statistics);

                        writer.Flush();
                    }

                    pageStream.Flush(); //extremely important to flush the stream as some compression algorithms don't finish writing
                    pageStream.MarkWriteFinished();
                    dataPageHeader.Uncompressed_page_size = (int)pageStream.Position;
                }
                dataPageHeader.Compressed_page_size = (int)ms.Position;

                //write the header in
                int headerSize = _thriftStream.Write(dataPageHeader);
                ms.Position = 0;
                ms.CopyTo(_stream);

                dataPageHeader.Data_page_header.Statistics = new Thrift.Statistics
                {
                    Distinct_count = statistics.Distinct_count
                };
                var dataTag = new PageTag
                {
                    HeaderMeta = dataPageHeader,
                    HeaderSize = headerSize
                };

                pages.Add(dataTag);
            }

            return(pages);
        }
示例#8
0
        public override void Write(Thrift.SchemaElement tse, BinaryWriter writer, IList values, Thrift.Statistics statistics)
        {
            int  n = 0;
            byte b = 0;

            byte[] buffer = new byte[values.Count / 8 + 1];
            int    ib     = 0;

            foreach (bool flag in values)
            {
                if (flag)
                {
                    b |= (byte)(1 << n);
                }

                n++;
                if (n == 8)
                {
                    buffer[ib++] = b;
                    n            = 0;
                    b            = 0;
                }
            }

            if (n != 0)
            {
                buffer[ib] = b;
            }

            writer.Write(buffer);
        }