Пример #1
0
        private List <PageTag> WriteColumn(DataColumn column,
                                           Thrift.SchemaElement tse,
                                           IDataTypeHandler dataTypeHandler,
                                           int maxRepetitionLevel,
                                           int maxDefinitionLevel)
        {
            var pages = new List <PageTag>();

            /*
             * Page header must preceeed actual data (compressed or not) however it contains both
             * the uncompressed and compressed data size which we don't know! This somehow limits
             * the write efficiency.
             */


            using (var ms = new MemoryStream())
            {
                Thrift.PageHeader dataPageHeader = _footer.CreateDataPage(column.TotalCount);

                //chain streams together so we have real streaming instead of wasting undefraggable LOH memory
                using (GapStream pageStream = DataStreamFactory.CreateWriter(ms, _compressionMethod, true))
                {
                    using (var writer = new BinaryWriter(pageStream, Encoding.UTF8, true))
                    {
                        if (maxRepetitionLevel > 0)
                        {
                            WriteLevels(writer, column.RepetitionLevels, maxRepetitionLevel);
                        }

                        if (maxDefinitionLevel > 0)
                        {
                            WriteLevels(writer, column.DefinitionLevels, maxDefinitionLevel);
                        }

                        dataTypeHandler.Write(tse, writer, column.DefinedData);

                        writer.Flush();
                    }

                    pageStream.Flush(); //extremely important to flush the stream as some compression algorithms don't finish writing
                    dataPageHeader.Uncompressed_page_size = (int)pageStream.Position;
                }
                dataPageHeader.Compressed_page_size = (int)ms.Position;

                //write the header in
                int headerSize = _thriftStream.Write(dataPageHeader);
                ms.Position = 0;
                ms.CopyTo(_stream);

                var dataTag = new PageTag
                {
                    HeaderMeta = dataPageHeader,
                    HeaderSize = headerSize
                };

                pages.Add(dataTag);
            }

            return(pages);
        }
        private List <PageTag> WriteColumn(DataColumn column,
                                           Thrift.SchemaElement tse,
                                           IDataTypeHandler dataTypeHandler,
                                           int maxRepetitionLevel,
                                           int maxDefinitionLevel)
        {
            var pages = new List <PageTag>();

            /*
             * Page header must preceeed actual data (compressed or not) however it contains both
             * the uncompressed and compressed data size which we don't know! This somehow limits
             * the write efficiency.
             */


            using (var ms = new MemoryStream())
            {
                Thrift.PageHeader dataPageHeader = _footer.CreateDataPage(column.TotalCount);

                //chain streams together so we have real streaming instead of wasting undefraggable LOH memory
                using (PositionTrackingStream pps = DataStreamFactory.CreateWriter(ms, _compressionMethod))
                {
                    using (var writer = new BinaryWriter(pps))
                    {
                        if (column.HasRepetitions)
                        {
                            throw new NotImplementedException();
                        }

                        if (column.HasDefinitions)
                        {
                            WriteLevels(writer, column.DefinitionLevels, maxDefinitionLevel);
                        }

                        dataTypeHandler.Write(tse, writer, column.DefinedData);
                    }

                    dataPageHeader.Uncompressed_page_size = (int)pps.Position;
                }
                dataPageHeader.Compressed_page_size = (int)ms.Position;

                //write the hader in
                int headerSize = _thriftStream.Write(dataPageHeader);
                ms.Position = 0;
                ms.CopyTo(_stream);

                var dataTag = new PageTag
                {
                    HeaderMeta = dataPageHeader,
                    HeaderSize = headerSize
                };

                pages.Add(dataTag);
            }

            return(pages);
        }
Пример #3
0
        private List <PageTag> WriteColumn(DataColumn column,
                                           Thrift.SchemaElement tse,
                                           IDataTypeHandler dataTypeHandler,
                                           int maxRepetitionLevel,
                                           int maxDefinitionLevel)
        {
            var pages = new List <PageTag>();

            /*
             * Page header must preceeed actual data (compressed or not) however it contains both
             * the uncompressed and compressed data size which we don't know! This somehow limits
             * the write efficiency.
             */


            using (var ms = new MemoryStream())
            {
                Thrift.PageHeader dataPageHeader = _footer.CreateDataPage(column.Data.Length);

                //chain streams together so we have real streaming instead of wasting undefraggable LOH memory
                using (GapStream pageStream = DataStreamFactory.CreateWriter(ms, _compressionMethod, _compressionLevel, true))
                {
                    using (var writer = new BinaryWriter(pageStream, Encoding.UTF8, true))
                    {
                        if (column.RepetitionLevels != null)
                        {
                            WriteLevels(writer, column.RepetitionLevels, column.RepetitionLevels.Length, maxRepetitionLevel);
                        }

                        Array data = column.Data;

                        if (maxDefinitionLevel > 0)
                        {
                            data = column.PackDefinitions(maxDefinitionLevel, out int[] definitionLevels, out int definitionLevelsLength, out int nullCount);

                            //last chance to capture null count as null data is compressed now
                            column.Statistics.NullCount = nullCount;

                            try
                            {
                                WriteLevels(writer, definitionLevels, definitionLevelsLength, maxDefinitionLevel);
                            }
                            finally
                            {
                                if (definitionLevels != null)
                                {
                                    ArrayPool <int> .Shared.Return(definitionLevels);
                                }
                            }
                        }
                        else
                        {
                            //no defitions means no nulls
                            column.Statistics.NullCount = 0;
                        }

                        dataTypeHandler.Write(tse, writer, data, column.Statistics);

                        writer.Flush();
                    }

                    pageStream.Flush(); //extremely important to flush the stream as some compression algorithms don't finish writing
                    pageStream.MarkWriteFinished();
                    dataPageHeader.Uncompressed_page_size = (int)pageStream.Position;
                }
                dataPageHeader.Compressed_page_size = (int)ms.Position;

                //write the header in
                dataPageHeader.Data_page_header.Statistics = column.Statistics.ToThriftStatistics(dataTypeHandler, _schemaElement);
                int headerSize = _thriftStream.Write(dataPageHeader);
                ms.Position = 0;
                ms.CopyTo(_stream);


                var dataTag = new PageTag
                {
                    HeaderMeta = dataPageHeader,
                    HeaderSize = headerSize
                };

                pages.Add(dataTag);
            }

            return(pages);
        }