Example #1
0
        private PageData ReadDataPage(IDataTypeHandler dataTypeHandler, Thrift.PageHeader ph, Thrift.SchemaElement tse, long maxValues)
        {
            byte[] data = ReadRawBytes(ph, _inputStream);
            int    max  = ph.Data_page_header.Num_values;

            _footer.GetLevels(_thriftColumnChunk, out int maxRepetitionLevel, out int maxDefinitionLevel);
            var pd = new PageData();

            using (var dataStream = new MemoryStream(data))
            {
                using (var reader = new BinaryReader(dataStream))
                {
                    if (maxRepetitionLevel > 0)
                    {
                        pd.repetitions = ReadLevels(reader, maxRepetitionLevel);
                    }

                    if (maxDefinitionLevel > 0)
                    {
                        pd.definitions = ReadLevels(reader, maxDefinitionLevel);
                    }

                    ReadColumn(dataTypeHandler, tse, reader, ph.Data_page_header.Encoding, maxValues,
                               out pd.values,
                               out pd.indexes);
                }
            }

            return(pd);
        }
Example #2
0
        private Field GetField(PropertyInfo property)
        {
            Type pt = property.PropertyType;

            if (pt.IsNullable())
            {
                pt = pt.GetNonNullable();
            }
            if (pt.IsArray)
            {
                pt = pt.GetElementType();
            }

            IDataTypeHandler handler = DataTypeFactory.Match(pt);

            if (handler == null)
            {
                return(null);
            }

            ParquetColumnAttribute columnAttr = property.GetCustomAttribute <ParquetColumnAttribute>();

            string   name = columnAttr?.Name ?? property.Name;
            DataType type = handler.DataType;

            var r = new DataField(name,
                                  property.PropertyType //use CLR type here as DF constructor will figure out nullability and other parameters
                                  );

            r.ClrPropName = property.Name;
            return(r);
        }
Example #3
0
        private DataColumn(DataField field)
        {
            Field = field ?? throw new ArgumentNullException(nameof(field));

            _dataTypeHandler = DataTypeFactory.Match(field.DataType);
            HasRepetitions   = field.IsArray;
        }
Example #4
0
        /// <summary>
        /// Writes next data column to parquet stream. Note that columns must be written in the order they are declared in the
        /// file schema.
        /// </summary>
        /// <param name="column"></param>
        public void WriteColumn(DataColumn column)
        {
            if (column == null)
            {
                throw new ArgumentNullException(nameof(column));
            }

            if (RowCount == null)
            {
                if (column.Data.Length > 0 || column.Field.MaxRepetitionLevel == 0)
                {
                    RowCount = column.CalculateRowCount();
                }
            }

            Thrift.SchemaElement tse = _thschema[_colIdx];
            if (!column.Field.Equals(tse))
            {
                throw new ArgumentException($"cannot write this column, expected '{tse.Name}', passed: '{column.Field.Name}'", nameof(column));
            }
            IDataTypeHandler dataTypeHandler = DataTypeFactory.Match(tse, _formatOptions);

            _colIdx += 1;

            List <string> path = _footer.GetPath(tse);

            var writer = new DataColumnWriter(_stream, _thriftStream, _footer, tse,
                                              _compressionMethod, _compressionLevel,
                                              (int)(RowCount ?? 0));

            Thrift.ColumnChunk chunk = writer.Write(path, column, dataTypeHandler);
            _thriftRowGroup.Columns.Add(chunk);
        }
Example #5
0
        private void CreateModelSchema(string path, IList <Field> container, int childCount, ref int si, ParquetOptions formatOptions)
        {
            for (int i = 0; i < childCount && si < _fileMeta.Schema.Count; i++)
            {
                Thrift.SchemaElement tse = _fileMeta.Schema[si];
                IDataTypeHandler     dth = DataTypeFactory.Match(tse, formatOptions);

                if (dth == null)
                {
                    throw new InvalidOperationException($"cannot find data type handler to create model schema for {tse.Describe()}");
                }

                Field se = dth.CreateSchemaElement(_fileMeta.Schema, ref si, out int ownedChildCount);

                se.Path = string.Join(Schema.PathSeparator, new[] { path, se.Path ?? se.Name }.Where(p => p != null));

                if (ownedChildCount > 0)
                {
                    var childContainer = new List <Field>();
                    CreateModelSchema(se.Path, childContainer, ownedChildCount, ref si, formatOptions);
                    foreach (Field cse in childContainer)
                    {
                        se.Assign(cse);
                    }
                }


                container.Add(se);
            }
        }
Example #6
0
        private List <PageTag> WriteColumn(DataColumn column,
                                           Thrift.SchemaElement tse,
                                           IDataTypeHandler dataTypeHandler,
                                           int maxRepetitionLevel,
                                           int maxDefinitionLevel)
        {
            var pages = new List <PageTag>();

            /*
             * Page header must preceeed actual data (compressed or not) however it contains both
             * the uncompressed and compressed data size which we don't know! This somehow limits
             * the write efficiency.
             */


            using (var ms = new MemoryStream())
            {
                Thrift.PageHeader dataPageHeader = _footer.CreateDataPage(column.TotalCount);

                //chain streams together so we have real streaming instead of wasting undefraggable LOH memory
                using (GapStream pageStream = DataStreamFactory.CreateWriter(ms, _compressionMethod, true))
                {
                    using (var writer = new BinaryWriter(pageStream, Encoding.UTF8, true))
                    {
                        if (maxRepetitionLevel > 0)
                        {
                            WriteLevels(writer, column.RepetitionLevels, maxRepetitionLevel);
                        }

                        if (maxDefinitionLevel > 0)
                        {
                            WriteLevels(writer, column.DefinitionLevels, maxDefinitionLevel);
                        }

                        dataTypeHandler.Write(tse, writer, column.DefinedData);

                        writer.Flush();
                    }

                    pageStream.Flush(); //extremely important to flush the stream as some compression algorithms don't finish writing
                    dataPageHeader.Uncompressed_page_size = (int)pageStream.Position;
                }
                dataPageHeader.Compressed_page_size = (int)ms.Position;

                //write the header in
                int headerSize = _thriftStream.Write(dataPageHeader);
                ms.Position = 0;
                ms.CopyTo(_stream);

                var dataTag = new PageTag
                {
                    HeaderMeta = dataPageHeader,
                    HeaderSize = headerSize
                };

                pages.Add(dataTag);
            }

            return(pages);
        }
Example #7
0
        static IList GetValues(Dictionary <string, IList> columns, DataField field, bool createIfMissing, bool isNested = false)
        {
            if (field.Path == null)
            {
                throw new ArgumentNullException(nameof(field.Path));
            }

            if (!columns.TryGetValue(field.Path, out IList values) || values == null)
            {
                if (createIfMissing)
                {
                    IDataTypeHandler handler = DataTypeFactory.Match(field);

                    values = isNested
                       ? new List <IEnumerable>()
                       : handler.CreateEmptyList(field.HasNulls, field.IsArray, 0);

                    columns[field.Path] = values;
                }
                else
                {
                    throw new ArgumentException($"column does not exist by path '{field.Path}'", nameof(field));
                }
            }

            return(values);
        }
Example #8
0
        public override void CreateThrift(Field field, Thrift.SchemaElement parent, IList <Thrift.SchemaElement> container)
        {
            ListField listField = (ListField)field;

            parent.Num_children += 1;

            //add list container
            var root = new Thrift.SchemaElement(field.Name)
            {
                Converted_type  = Thrift.ConvertedType.LIST,
                Repetition_type = Thrift.FieldRepetitionType.OPTIONAL,
                Num_children    = 1 //field container below
            };

            container.Add(root);

            //add field container
            var list = new Thrift.SchemaElement("list")
            {
                Repetition_type = Thrift.FieldRepetitionType.REPEATED
            };

            container.Add(list);

            //add the list item as well
            IDataTypeHandler fieldHandler = DataTypeFactory.Match(listField.Item);

            fieldHandler.CreateThrift(listField.Item, list, container);
        }
Example #9
0
        private void ReadColumn(IDataTypeHandler dataTypeHandler, Thrift.SchemaElement tse, BinaryReader reader, Thrift.Encoding encoding, long maxValues,
                                out IList values,
                                out List <int> indexes)
        {
            //dictionary encoding uses RLE to encode data

            switch (encoding)
            {
            case Thrift.Encoding.PLAIN:
                values  = dataTypeHandler.Read(reader);
                indexes = null;
                break;

            case Thrift.Encoding.RLE:
                values  = null;
                indexes = RunLengthBitPackingHybridValuesReader.Read(reader, tse.Type_length);
                break;

            case Thrift.Encoding.PLAIN_DICTIONARY:
                values  = null;
                indexes = PlainDictionaryValuesReader.Read(reader, maxValues);
                break;

            default:
                throw new ParquetException($"encoding {encoding} is not supported.");
            }
        }
Example #10
0
        private void ParseSchemaExperimenal(SchemaElement parent, int childCount, ref int si, ParquetOptions formatOptions)
        {
            for (int i = 0; i < childCount && si < _fileMeta.Schema.Count; i++)
            {
                Thrift.SchemaElement tse = _fileMeta.Schema[si];
                IDataTypeHandler     dth = DataTypeFactory.Match(tse, formatOptions);

                if (dth == null)
                {
                    if (tse.Num_children > 0)
                    {
                        //it's an element
                        ParseSchemaExperimenal(parent, _fileMeta.Schema[si++].Num_children, ref si, formatOptions);
                        continue;
                    }
                    else
                    {
                        ThrowNoHandler(tse);
                    }
                }

                SchemaElement newRoot = dth.Create(parent, _fileMeta.Schema, ref si);

                if (newRoot != null)
                {
                    ParseSchemaExperimenal(newRoot, _fileMeta.Schema[si - 1].Num_children, ref si, formatOptions);
                }
            }
        }
        private List <PageTag> WriteColumn(DataColumn column,
                                           Thrift.SchemaElement tse,
                                           IDataTypeHandler dataTypeHandler,
                                           int maxRepetitionLevel,
                                           int maxDefinitionLevel)
        {
            var pages = new List <PageTag>();

            /*
             * Page header must preceeed actual data (compressed or not) however it contains both
             * the uncompressed and compressed data size which we don't know! This somehow limits
             * the write efficiency.
             */


            using (var ms = new MemoryStream())
            {
                Thrift.PageHeader dataPageHeader = _footer.CreateDataPage(column.TotalCount);

                //chain streams together so we have real streaming instead of wasting undefraggable LOH memory
                using (PositionTrackingStream pps = DataStreamFactory.CreateWriter(ms, _compressionMethod))
                {
                    using (var writer = new BinaryWriter(pps))
                    {
                        if (column.HasRepetitions)
                        {
                            throw new NotImplementedException();
                        }

                        if (column.HasDefinitions)
                        {
                            WriteLevels(writer, column.DefinitionLevels, maxDefinitionLevel);
                        }

                        dataTypeHandler.Write(tse, writer, column.DefinedData);
                    }

                    dataPageHeader.Uncompressed_page_size = (int)pps.Position;
                }
                dataPageHeader.Compressed_page_size = (int)ms.Position;

                //write the hader in
                int headerSize = _thriftStream.Write(dataPageHeader);
                ms.Position = 0;
                ms.CopyTo(_stream);

                var dataTag = new PageTag
                {
                    HeaderMeta = dataPageHeader,
                    HeaderSize = headerSize
                };

                pages.Add(dataTag);
            }

            return(pages);
        }
Example #12
0
        public DataColumn(DataField field)
        {
            _field = field ?? throw new ArgumentNullException(nameof(field));

            IDataTypeHandler handler = DataTypeFactory.Match(field.DataType);

            _definedData      = handler.CreateEmptyList(false, false, 0); // always a plain list, always non-nullable when possible
            _definitionLevels = field.HasNulls ? new List <int>() : null; // do not create an instance when not required
        }
Example #13
0
        public void Read(long offset, long count)
        {
            Thrift.SchemaElement tse = _footer.GetSchemaElement(_thriftColumnChunk);

            IDataTypeHandler dataTypeHandler = DataTypeFactory.Match(tse, _parquetOptions);

            long fileOffset = GetFileOffset();
            long maxValues  = _thriftColumnChunk.Meta_data.Num_values;

            _inputStream.Seek(fileOffset, SeekOrigin.Begin);

            IList      dictionary  = null;
            List <int> indexes     = null;
            List <int> repetitions = null;
            List <int> definitions = null;
            IList      values      = null;

            //there can be only one dictionary page in column
            Thrift.PageHeader ph = _thriftStream.Read <Thrift.PageHeader>();
            if (TryReadDictionaryPage(ph, dataTypeHandler, out dictionary))
            {
                ph = _thriftStream.Read <Thrift.PageHeader>();
            }

            int pagesRead = 0;

            while (true)
            {
                int      valuesSoFar = Math.Max(indexes == null ? 0 : indexes.Count, values == null ? 0 : values.Count);
                PageData pd          = ReadDataPage(dataTypeHandler, ph, tse, maxValues - valuesSoFar);

                repetitions = AssignOrAdd(repetitions, pd.repetitions);
                definitions = AssignOrAdd(definitions, pd.definitions);
                indexes     = AssignOrAdd(indexes, pd.indexes);
                values      = AssignOrAdd(values, pd.values);

                pagesRead++;

                int totalCount = Math.Max(
                    (values == null ? 0 : values.Count) +
                    (indexes == null ? 0 : indexes.Count),
                    (definitions == null ? 0 : definitions.Count));
                if (totalCount >= maxValues)
                {
                    break;                      //limit reached
                }
                ph = _thriftStream.Read <Thrift.PageHeader>();
                if (ph.Type != Thrift.PageType.DATA_PAGE)
                {
                    break;
                }
            }

            //IList mergedValues = new ValueMerger(_schema, values)
            //   .Apply(dictionary, definitions, repetitions, indexes, (int)maxValues);
        }
Example #14
0
        private bool TryReadDictionaryPage(Thrift.PageHeader ph, IDataTypeHandler dataTypeHandler, out IList dictionary)
        {
            if (ph.Type != Thrift.PageType.DICTIONARY_PAGE)
            {
                dictionary = null;
                return(false);
            }

            throw new NotImplementedException();
        }
Example #15
0
        private void CreateThriftSchema(IEnumerable <Field> ses, Thrift.SchemaElement parent, IList <Thrift.SchemaElement> container)
        {
            foreach (Field se in ses)
            {
                IDataTypeHandler handler = DataTypeFactory.Match(se);

                //todo: check that handler is found indeed

                handler.CreateThrift(se, parent, container);
            }
        }
Example #16
0
        public DataColumn(DataField field)
        {
            _field = field ?? throw new ArgumentNullException(nameof(field));

            IDataTypeHandler handler = DataTypeFactory.Match(field.DataType);

            _definedData      = handler.CreateEmptyList(false, false, 0); // always a plain list, always non-nullable when possible
            _definitionLevels = new List <int>();

            HasRepetitions    = field.IsArray;
            _repetitionLevels = HasRepetitions ? new List <int>() : null;
        }
Example #17
0
        public DataField(string name, DataType dataType, bool hasNulls = true, bool isArray = false) : base(name, SchemaType.Data)
        {
            DataType = dataType;
            HasNulls = hasNulls;
            IsArray  = isArray;

            IDataTypeHandler handler = DataTypeFactory.Match(dataType);

            if (handler != null)
            {
                ClrType = handler.ClrType;
            }
        }
Example #18
0
        public ColumnarReader(Stream inputStream, Thrift.ColumnChunk thriftColumnChunk, ThriftFooter footer, ParquetOptions parquetOptions)
        {
            _inputStream       = inputStream ?? throw new ArgumentNullException(nameof(inputStream));
            _thriftColumnChunk = thriftColumnChunk ?? throw new ArgumentNullException(nameof(thriftColumnChunk));
            _footer            = footer ?? throw new ArgumentNullException(nameof(footer));
            _parquetOptions    = parquetOptions ?? throw new ArgumentNullException(nameof(parquetOptions));

            _thriftStream = new ThriftStream(inputStream);
            _footer.GetLevels(_thriftColumnChunk, out int mrl, out int mdl);
            _maxRepetitionLevel  = mrl;
            _maxDefinitionLevel  = mdl;
            _thriftSchemaElement = _footer.GetSchemaElement(_thriftColumnChunk);
            _dataTypeHandler     = DataTypeFactory.Match(_thriftSchemaElement, _parquetOptions);
        }
Example #19
0
 public ValueMerger(
     int maxDefinitionLevel,
     int maxRepetitionLevel,
     IList values,
     IDataTypeHandler dataTypeHandler,
     bool isNullable)
 {
     _maxDefinitionLevel  = maxDefinitionLevel;
     _maxRepetitionLevel  = maxRepetitionLevel;
     _dataTypeHandler     = dataTypeHandler;
     _isNullable          = isNullable;
     _createEmptyListFunc = () => _dataTypeHandler.CreateEmptyList(_isNullable, false, 0);
     _values = values ?? _dataTypeHandler.CreateEmptyList(_isNullable, false, 0);
 }
Example #20
0
        /// <summary>
        /// Returns database bundle instance
        /// </summary>
        /// <param name="packageName">Name of the package</param>
        /// <returns>DatabaseBundle Instance of database bundle</returns>
        private DatabaseBundle GetDatabaseBundle(String packageName)
        {
            IDatabaseImpl    database        = (IDatabaseImpl)ClassUtils.CreateClassInstance(packageName + "." + DATABASE_CLASS_NAME);
            IQueryBuilder    queryBuilder    = (IQueryBuilder)ClassUtils.CreateClassInstance(packageName + "." + DATABASE_QUERY_BUILDER);
            IDataTypeHandler dataTypeHandler = (IDataTypeHandler)ClassUtils.CreateClassInstance(packageName + "." + DATABASE_DATA_TYPE_HANDLER);

            DatabaseBundle databaseBundle = new DatabaseBundle();

            databaseBundle.SetDatabase(database);
            databaseBundle.SetQueryBuilder(queryBuilder);
            databaseBundle.SetDataTypeHandler(dataTypeHandler);

            return(databaseBundle);
        }
Example #21
0
        private Field GetField(PropertyInfo property)
        {
            Type pt = property.PropertyType;

            if (pt.IsNullable())
            {
                pt = pt.GetNonNullable();
            }
            if (pt.IsArray)
            {
                pt = pt.GetElementType();
            }

            IDataTypeHandler handler = DataTypeFactory.Match(pt);

            if (handler == null)
            {
                return(null);
            }

            ParquetColumnAttribute columnAttr = property.GetCustomAttribute <ParquetColumnAttribute>();

            string   name = columnAttr?.Name ?? property.Name;
            DataType type = handler.DataType;

            var r = new DataField(name,
                                  property.PropertyType //use CLR type here as DF constructor will figure out nullability and other parameters
                                  );

            if (columnAttr != null)
            {
                if (handler.ClrType == typeof(TimeSpan))
                {
                    r = new TimeSpanDataField(r.Name, columnAttr.TimeSpanFormat, r.HasNulls, r.IsArray);
                }
                if (handler.ClrType == typeof(DateTime) || handler.ClrType == typeof(DateTimeOffset))
                {
                    r = new DateTimeDataField(r.Name, columnAttr.DateTimeFormat, r.HasNulls, r.IsArray);
                }
                if (handler.ClrType == typeof(decimal))
                {
                    r = new DecimalDataField(r.Name, columnAttr.DecimalPrecision, columnAttr.DecimalScale, columnAttr.DecimalForceByteArrayEncoding, r.HasNulls, r.IsArray);
                }
            }

            r.ClrPropName = property.Name;

            return(r);
        }
Example #22
0
        internal Thrift.Statistics ToThriftStatistics(IDataTypeHandler handler, Thrift.SchemaElement tse)
        {
            byte[] min = handler.PlainEncode(tse, MinValue);
            byte[] max = handler.PlainEncode(tse, MaxValue);

            return(new Thrift.Statistics
            {
                Null_count = NullCount,
                Distinct_count = DistinctCount,
                Min = min,
                Min_value = min,
                Max = max,
                Max_value = max
            });
        }
Example #23
0
 public PrimitiveReader(
     Thrift.SchemaElement schemaElement,
     ParquetOptions parquetOptions,
     IDataTypeHandler dataTypeHandler,
     BinaryReader binaryReader,
     int typeWidth,
     Func <BinaryReader, TElement> readOneFunc)
 {
     _schemaElement   = schemaElement;
     _parquetOptions  = parquetOptions;
     _dataTypeHandler = dataTypeHandler;
     _binaryReader    = binaryReader;
     _typeWidth       = typeWidth;
     _readOneFunc     = readOneFunc;
 }
Example #24
0
        /// <summary>
        /// Creates a new instance of <see cref="DataField"/> by specifying all the required attributes.
        /// </summary>
        /// <param name="name">Field name.</param>
        /// <param name="dataType">Native Parquet type</param>
        /// <param name="hasNulls">When true, the field accepts null values. Note that nullable values take slightly more disk space and computing comparing to non-nullable, but are more common.</param>
        /// <param name="isArray">When true, each value of this field can have multiple values, similar to array in C#.</param>
        public DataField(string name, DataType dataType, bool hasNulls = true, bool isArray = false) : base(name, SchemaType.Data)
        {
            DataType = dataType;
            HasNulls = hasNulls;
            IsArray  = isArray;

            MaxRepetitionLevel = isArray ? 1 : 0;

            IDataTypeHandler handler = DataTypeFactory.Match(dataType);

            if (handler != null)
            {
                ClrType = handler.ClrType;
                ClrNullableIfHasNullsType = hasNulls ? ClrType.GetNullable() : ClrType;
            }
        }
        public void Write(DataColumn column)
        {
            if (column == null)
            {
                throw new ArgumentNullException(nameof(column));
            }

            Thrift.SchemaElement tse             = _thschema[_colIdx++];
            IDataTypeHandler     dataTypeHandler = DataTypeFactory.Match(tse, _formatOptions);
            //todo: check if the column is in the right order


            List <string> path = _footer.GetPath(tse);

            Thrift.ColumnChunk chunk = WriteColumnChunk(tse, path, column, dataTypeHandler);
            _thriftRowGroup.Columns.Add(chunk);
        }
        public void CreateThrift(Field field, Thrift.SchemaElement parent, IList <Thrift.SchemaElement> container)
        {
            StructField structField = (StructField)field;

            Thrift.SchemaElement tseStruct = new Thrift.SchemaElement(field.Name)
            {
                Repetition_type = Thrift.FieldRepetitionType.OPTIONAL,
            };
            container.Add(tseStruct);
            parent.Num_children += 1;

            foreach (Field cf in structField.Fields)
            {
                IDataTypeHandler handler = DataTypeFactory.Match(cf);
                handler.CreateThrift(cf, tseStruct, container);
            }
        }
Example #27
0
        private static CInfo Discover()
        {
            Type t        = typeof(T);
            Type baseType = t;
            bool isArray  = false;
            bool hasNulls = false;

            //throw a useful hint
            if (t.TryExtractDictionaryType(out Type dKey, out Type dValue))
            {
                throw new ArgumentException($"cannot declare a dictionary this way, please use {nameof(MapField)}.");
            }

            if (t.TryExtractEnumerableType(out Type enumItemType))
            {
                baseType = enumItemType;
                isArray  = true;
            }

            if (baseType.IsNullable())
            {
                baseType = baseType.GetNonNullable();
                hasNulls = true;
            }

            if (typeof(Row) == baseType)
            {
                throw new ArgumentException($"{typeof(Row)} is not supported. If you tried to declare a struct please use {typeof(StructField)} instead.");
            }

            IDataTypeHandler handler = DataTypeFactory.Match(baseType);

            if (handler == null)
            {
                DataTypeFactory.ThrowClrTypeNotSupported(baseType);
            }

            return(new CInfo
            {
                dataType = handler.DataType,
                baseType = baseType,
                isArray = isArray,
                hasNulls = hasNulls
            });
        }
Example #28
0
        public Thrift.ColumnChunk Write(List <string> path, DataColumn column, IDataTypeHandler dataTypeHandler)
        {
            Thrift.ColumnChunk chunk = _footer.CreateColumnChunk(_compressionMethod, _stream, _schemaElement.Type, path, 0);
            Thrift.PageHeader  ph    = _footer.CreateDataPage(column.TotalCount);
            _footer.GetLevels(chunk, out int maxRepetitionLevel, out int maxDefinitionLevel);

            List <PageTag> pages = WriteColumn(column, _schemaElement, dataTypeHandler, maxRepetitionLevel, maxDefinitionLevel);

            //this count must be set to number of all values in the column, including nulls.
            //for hierarchy/repeated columns this is a count of flattened list, including nulls.
            chunk.Meta_data.Num_values = ph.Data_page_header.Num_values;

            //the following counters must include both data size and header size
            chunk.Meta_data.Total_compressed_size   = pages.Sum(p => p.HeaderMeta.Compressed_page_size + p.HeaderSize);
            chunk.Meta_data.Total_uncompressed_size = pages.Sum(p => p.HeaderMeta.Uncompressed_page_size + p.HeaderSize);

            return(chunk);
        }
Example #29
0
        public override void CreateThrift(Field field, Thrift.SchemaElement parent, IList <Thrift.SchemaElement> container)
        {
            parent.Num_children += 1;

            //add the root container where map begins
            var root = new Thrift.SchemaElement(field.Name)
            {
                Converted_type  = Thrift.ConvertedType.MAP,
                Num_children    = 1,
                Repetition_type = Thrift.FieldRepetitionType.OPTIONAL
            };

            container.Add(root);

            //key-value is a container for column of keys and column of values
            var keyValue = new Thrift.SchemaElement(MapField.ContainerName)
            {
                Num_children    = 0, //is assigned by children
                Repetition_type = Thrift.FieldRepetitionType.REPEATED
            };

            container.Add(keyValue);

            //now add the key and value separately
            MapField         mapField     = field as MapField;
            IDataTypeHandler keyHandler   = DataTypeFactory.Match(mapField.Key);
            IDataTypeHandler valueHandler = DataTypeFactory.Match(mapField.Value);

            keyHandler.CreateThrift(mapField.Key, keyValue, container);
            Thrift.SchemaElement tseKey = container[container.Count - 1];
            valueHandler.CreateThrift(mapField.Value, keyValue, container);
            Thrift.SchemaElement tseValue = container[container.Count - 1];

            //fixups for weirdness in RLs
            if (tseKey.Repetition_type == Thrift.FieldRepetitionType.REPEATED)
            {
                tseKey.Repetition_type = Thrift.FieldRepetitionType.OPTIONAL;
            }
            if (tseValue.Repetition_type == Thrift.FieldRepetitionType.REPEATED)
            {
                tseValue.Repetition_type = Thrift.FieldRepetitionType.OPTIONAL;
            }
        }
Example #30
0
        internal void AddElement(IList keys, IList values, IDictionary dictionary)
        {
            IDataTypeHandler keyHandler   = DataTypeFactory.Match(Key.DataType);
            IDataTypeHandler valueHandler = DataTypeFactory.Match(Value.DataType);

            IList keysList   = keyHandler.CreateEmptyList(Key.HasNulls, false, dictionary.Count);
            IList valuesList = valueHandler.CreateEmptyList(Value.HasNulls, false, dictionary.Count);

            foreach (object v in dictionary.Keys)
            {
                keysList.Add(v);
            }
            foreach (object v in dictionary.Values)
            {
                valuesList.Add(v);
            }

            keys.Add(keysList);
            values.Add(valuesList);
        }