Пример #1
0
        static bool TryReadTimePrecision(ref feather.fbs.Column column, out DateTimePrecisionType precision, out string errorMessage)
        {
            var metadata = column.Metadata <TimeMetadata>();

            if (metadata == null)
            {
                precision    = default(DateTimePrecisionType);
                errorMessage = $"Couldn't read Precision for column {column.Name}, no TimeMetadata";
                return(false);
            }

            var metadataValue = metadata.Value;
            var unit          = metadataValue.Unit;

            switch (unit)
            {
            case TimeUnit.MICROSECOND: precision = DateTimePrecisionType.Microsecond; break;

            case TimeUnit.MILLISECOND: precision = DateTimePrecisionType.Millisecond; break;

            case TimeUnit.NANOSECOND: precision = DateTimePrecisionType.Nanosecond; break;

            case TimeUnit.SECOND: precision = DateTimePrecisionType.Second; break;

            default:
                precision    = default(DateTimePrecisionType);
                errorMessage = $"Couldn't understand TimeUnit {unit}";
                return(false);
            }

            errorMessage = null;
            return(true);
        }
Пример #2
0
        static bool TryReadTimestampPrecision(ref feather.fbs.Column column, out DateTimePrecisionType precision, out string errorMessage)
        {
            var metadata = column.Metadata <TimestampMetadata>();

            if (metadata == null)
            {
                precision    = default(DateTimePrecisionType);
                errorMessage = $"Couldn't read Precision for column {column.Name}, no TimestampMetadada";
                return(false);
            }

            var metadataValue = metadata.Value;
            var timezoneName  = metadataValue.Timezone;
            var unit          = metadataValue.Unit;

            // note: supporting other timezones would be nice,
            //   but the timezone included appears to just be
            //   a passthrough from as.POSIXct (in R anyway)
            //   which lets a bunch of ambiguous junk through
            //   (timezone abbreviations are NOT unique, or
            //   properly spec'd even).
            // the only thing we can rely on being the same
            //   everywhere is UTC, so constrain to that.
            var isntUtc =
                timezoneName != null &&
                !timezoneName.Equals("UTC", StringComparison.InvariantCultureIgnoreCase) &&
                !timezoneName.Equals("GMT", StringComparison.InvariantCultureIgnoreCase);

            if (isntUtc)
            {
                errorMessage = $"Cannot read Timestamp in timezone {timezoneName}, only UTC/GMT is supported";
                precision    = default(DateTimePrecisionType);
                return(false);
            }

            switch (unit)
            {
            case TimeUnit.MICROSECOND: precision = DateTimePrecisionType.Microsecond; break;

            case TimeUnit.MILLISECOND: precision = DateTimePrecisionType.Millisecond; break;

            case TimeUnit.NANOSECOND: precision = DateTimePrecisionType.Nanosecond; break;

            case TimeUnit.SECOND: precision = DateTimePrecisionType.Second; break;

            default:
                precision    = default(DateTimePrecisionType);
                errorMessage = $"Couldn't understand TimeUnit {unit}";
                return(false);
            }

            errorMessage = null;
            return(true);
        }
Пример #3
0
        public static feather.fbs.TimeUnit MapToDiskType(this DateTimePrecisionType type)
        {
            switch (type)
            {
            case DateTimePrecisionType.Microsecond: return(feather.fbs.TimeUnit.MICROSECOND);

            case DateTimePrecisionType.Millisecond: return(feather.fbs.TimeUnit.MILLISECOND);

            case DateTimePrecisionType.Nanosecond: return(feather.fbs.TimeUnit.NANOSECOND);

            case DateTimePrecisionType.Second: return(feather.fbs.TimeUnit.SECOND);

            default: throw new InvalidOperationException($"Unexpected {nameof(DateTimePrecisionType)}: {type}");
            }
        }
Пример #4
0
        static bool TryMakeColumnSpec(string name, feather.fbs.Type effectiveType, ref PrimitiveArray arrayDetails, string[] categoryLevels, DateTimePrecisionType precision, out ColumnSpec columnSpec, out string errorMessage)
        {
            var arrayOffset   = arrayDetails.Offset;
            var arrayLength   = arrayDetails.Length;
            var arrayNulls    = arrayDetails.NullCount;
            var arrayEncoding = arrayDetails.Encoding;

            // TODO (Dictionary Encoding)
            if (arrayEncoding != feather.fbs.Encoding.PLAIN)
            {
                throw new NotImplementedException();
            }
            // END TODO

            ColumnType type;
            bool       isNullable;

            if (!TryGetType(effectiveType, ref arrayDetails, precision, out type, out isNullable, out errorMessage))
            {
                columnSpec = default(ColumnSpec);
                return(false);
            }

            long numNullBytes = 0;

            if (isNullable)
            {
                numNullBytes = arrayLength / 8;
                if (arrayLength % 8 != 0)
                {
                    numNullBytes++;
                }
            }

            // a naive reading of the spec suggests that the null bitmask should be
            //   aligned based on the _type_ but it appears to always be long
            //   aligned.
            // this may be a bug in the spec
            int nullPadding = 0;

            if ((numNullBytes % FeatherMagic.NULL_BITMASK_ALIGNMENT) != 0)
            {
                nullPadding = FeatherMagic.NULL_BITMASK_ALIGNMENT - (int)(numNullBytes % FeatherMagic.NULL_BITMASK_ALIGNMENT);
            }
            var nullOffset = isNullable ? (arrayOffset) : -1;
            var dataOffset = !isNullable ? (arrayOffset) : (nullOffset + numNullBytes + nullPadding);

            columnSpec =
                new ColumnSpec
            {
                Name = name,
                NullBitmaskOffset = nullOffset,
                DataOffset        = dataOffset,
                Length            = arrayLength,
                Type           = type,
                CategoryLevels = categoryLevels,

                // only spin up this map if we've got categories to potentially map to
                CategoryEnumMap = categoryLevels != null ? new Dictionary <System.Type, CategoryEnumMapType>() : null
            };
            errorMessage = null;
            return(true);
        }
Пример #5
0
        static bool TryGetType(feather.fbs.Type effectiveType, ref PrimitiveArray array, DateTimePrecisionType precision, out ColumnType type, out bool isNullable, out string errorMessage)
        {
            isNullable = array.NullCount != 0;

            if (!isNullable)
            {
                switch (effectiveType)
                {
                case feather.fbs.Type.BINARY:
                    type         = ColumnType.Binary;
                    errorMessage = null;
                    return(true);

                case feather.fbs.Type.BOOL:
                    type         = ColumnType.Bool;
                    errorMessage = null;
                    return(true);

                case feather.fbs.Type.CATEGORY:
                    type         = ColumnType.Category;
                    errorMessage = null;
                    return(true);

                case feather.fbs.Type.DATE:
                    type         = ColumnType.Date;
                    errorMessage = null;
                    return(true);

                case feather.fbs.Type.DOUBLE:
                    type         = ColumnType.Double;
                    errorMessage = null;
                    return(true);

                case feather.fbs.Type.FLOAT:
                    type         = ColumnType.Float;
                    errorMessage = null;
                    return(true);

                case feather.fbs.Type.INT16:
                    type         = ColumnType.Int16;
                    errorMessage = null;
                    return(true);

                case feather.fbs.Type.INT32:
                    type         = ColumnType.Int32;
                    errorMessage = null;
                    return(true);

                case feather.fbs.Type.INT64:
                    type         = ColumnType.Int64;
                    errorMessage = null;
                    return(true);

                case feather.fbs.Type.INT8:
                    type         = ColumnType.Int8;
                    errorMessage = null;
                    return(true);


                case feather.fbs.Type.TIMESTAMP:
                    switch (precision)
                    {
                    case DateTimePrecisionType.Microsecond:
                        type         = ColumnType.Timestamp_Microsecond;
                        errorMessage = null;
                        return(true);

                    case DateTimePrecisionType.Millisecond:
                        type         = ColumnType.Timestamp_Millisecond;
                        errorMessage = null;
                        return(true);

                    case DateTimePrecisionType.Nanosecond:
                        type         = ColumnType.Timestamp_Nanosecond;
                        errorMessage = null;
                        return(true);

                    case DateTimePrecisionType.Second:
                        type         = ColumnType.Timestamp_Second;
                        errorMessage = null;
                        return(true);

                    default:
                        errorMessage = $"Unknown precision {precision}";
                        type         = ColumnType.NONE;
                        return(false);
                    }

                case feather.fbs.Type.TIME:
                    switch (precision)
                    {
                    case DateTimePrecisionType.Microsecond:
                        type         = ColumnType.Time_Microsecond;
                        errorMessage = null;
                        return(true);

                    case DateTimePrecisionType.Millisecond:
                        type         = ColumnType.Time_Millisecond;
                        errorMessage = null;
                        return(true);

                    case DateTimePrecisionType.Nanosecond:
                        type         = ColumnType.Time_Nanosecond;
                        errorMessage = null;
                        return(true);

                    case DateTimePrecisionType.Second:
                        type         = ColumnType.Time_Second;
                        errorMessage = null;
                        return(true);

                    default:
                        errorMessage = $"Unknown precision {precision}";
                        type         = ColumnType.NONE;
                        return(false);
                    }

                case feather.fbs.Type.UINT16:
                    type         = ColumnType.Uint16;
                    errorMessage = null;
                    return(true);

                case feather.fbs.Type.UINT32:
                    type         = ColumnType.Uint32;
                    errorMessage = null;
                    return(true);

                case feather.fbs.Type.UINT64:
                    type         = ColumnType.Uint64;
                    errorMessage = null;
                    return(true);

                case feather.fbs.Type.UINT8:
                    type         = ColumnType.Uint8;
                    errorMessage = null;
                    return(true);

                case feather.fbs.Type.UTF8:
                    type         = ColumnType.String;
                    errorMessage = null;
                    return(true);

                default:
                    errorMessage = $"Unknown column type {array.Type}";
                    type         = ColumnType.NONE;
                    return(false);
                }
            }
            else
            {
                switch (effectiveType)
                {
                case feather.fbs.Type.BINARY:
                    type         = ColumnType.NullableBinary;
                    errorMessage = null;
                    return(true);

                case feather.fbs.Type.BOOL:
                    type         = ColumnType.NullableBool;
                    errorMessage = null;
                    return(true);

                case feather.fbs.Type.CATEGORY:
                    type         = ColumnType.NullableCategory;
                    errorMessage = null;
                    return(true);

                case feather.fbs.Type.DATE:
                    type         = ColumnType.NullableDate;
                    errorMessage = null;
                    return(true);

                case feather.fbs.Type.DOUBLE:
                    type         = ColumnType.NullableDouble;
                    errorMessage = null;
                    return(true);

                case feather.fbs.Type.FLOAT:
                    type         = ColumnType.NullableFloat;
                    errorMessage = null;
                    return(true);

                case feather.fbs.Type.INT16:
                    type         = ColumnType.NullableInt16;
                    errorMessage = null;
                    return(true);

                case feather.fbs.Type.INT32:
                    type         = ColumnType.NullableInt32;
                    errorMessage = null;
                    return(true);

                case feather.fbs.Type.INT64:
                    type         = ColumnType.NullableInt64;
                    errorMessage = null;
                    return(true);

                case feather.fbs.Type.INT8:
                    type         = ColumnType.NullableInt8;
                    errorMessage = null;
                    return(true);

                case feather.fbs.Type.TIMESTAMP:
                    switch (precision)
                    {
                    case DateTimePrecisionType.Microsecond:
                        type         = ColumnType.NullableTimestamp_Microsecond;
                        errorMessage = null;
                        return(true);

                    case DateTimePrecisionType.Millisecond:
                        type         = ColumnType.NullableTimestamp_Millisecond;
                        errorMessage = null;
                        return(true);

                    case DateTimePrecisionType.Nanosecond:
                        type         = ColumnType.NullableTimestamp_Nanosecond;
                        errorMessage = null;
                        return(true);

                    case DateTimePrecisionType.Second:
                        type         = ColumnType.NullableTimestamp_Second;
                        errorMessage = null;
                        return(true);

                    default:
                        errorMessage = $"Unknown precision {precision}";
                        type         = ColumnType.NONE;
                        return(false);
                    }

                case feather.fbs.Type.TIME:
                    switch (precision)
                    {
                    case DateTimePrecisionType.Microsecond:
                        type         = ColumnType.NullableTime_Microsecond;
                        errorMessage = null;
                        return(true);

                    case DateTimePrecisionType.Millisecond:
                        type         = ColumnType.NullableTime_Millisecond;
                        errorMessage = null;
                        return(true);

                    case DateTimePrecisionType.Nanosecond:
                        type         = ColumnType.NullableTime_Nanosecond;
                        errorMessage = null;
                        return(true);

                    case DateTimePrecisionType.Second:
                        type         = ColumnType.NullableTime_Second;
                        errorMessage = null;
                        return(true);

                    default:
                        errorMessage = $"Unknown precision {precision}";
                        type         = ColumnType.NONE;
                        return(false);
                    }

                case feather.fbs.Type.UINT16:
                    type         = ColumnType.NullableUint16;
                    errorMessage = null;
                    return(true);

                case feather.fbs.Type.UINT32:
                    type         = ColumnType.NullableUint32;
                    errorMessage = null;
                    return(true);

                case feather.fbs.Type.UINT64:
                    type         = ColumnType.NullableUint64;
                    errorMessage = null;
                    return(true);

                case feather.fbs.Type.UINT8:
                    type         = ColumnType.NullableUint8;
                    errorMessage = null;
                    return(true);

                case feather.fbs.Type.UTF8:
                    type         = ColumnType.NullableString;
                    errorMessage = null;
                    return(true);

                default:
                    errorMessage = $"Unknown column type {array.Type}";
                    type         = ColumnType.NONE;
                    return(false);
                }
            }
        }
Пример #6
0
        static bool TryReadMetaData(MemoryMappedFile file, long size, out Metadata metadata, out string error)
        {
            if (size < FeatherMagic.MAGIC_HEADER_SIZE * 2)
            {
                metadata = default(Metadata);
                error    = $"File too small ({size:N0} bytes) to be a valid feather file";
                return(false);
            }

            using (var accessor = file.CreateViewAccessor())
            {
                var leadingHeader = accessor.ReadInt32(0);

                if (leadingHeader != FeatherMagic.MAGIC_HEADER)
                {
                    metadata = default(Metadata);
                    error    = $"Magic header malformed";
                    return(false);
                }

                var trailingHeader = accessor.ReadInt32(size - FeatherMagic.MAGIC_HEADER_SIZE);

                if (trailingHeader != FeatherMagic.MAGIC_HEADER)
                {
                    metadata = default(Metadata);
                    error    = $"Magic footer malformed";
                    return(false);
                }

                var metadataSize = accessor.ReadUInt32(size - FeatherMagic.MAGIC_HEADER_SIZE - sizeof(uint));

                var metadataStart = size - FeatherMagic.MAGIC_HEADER_SIZE - sizeof(uint) - metadataSize;
                if (metadataStart < FeatherMagic.MAGIC_HEADER_SIZE || metadataSize > int.MaxValue)
                {
                    metadata = default(Metadata);
                    error    = $"Metadata size ({metadataSize:N0}) is invalid";
                    return(false);
                }

                var metadataBytes = new byte[metadataSize];
                accessor.ReadArray(metadataStart, metadataBytes, 0, (int)metadataSize);

                // note: It'd be nice to not actually use flatbuffers for this,
                //   kind of a heavy (re)build dependency for reading, like, 4
                //   things
                var metadataBuffer = new ByteBuffer(metadataBytes);
                var metadataCTable = CTable.GetRootAsCTable(metadataBuffer);

                if (metadataCTable.Version != FeatherMagic.FEATHER_VERSION)
                {
                    error    = $"Unexpected version {metadataCTable.Version}, only {FeatherMagic.FEATHER_VERSION} is supported";
                    metadata = default(Metadata);
                    return(false);
                }

                if (metadataCTable.ColumnsLength <= 0)
                {
                    error    = $"Invalid number of columns: {metadataCTable.ColumnsLength:N0}";
                    metadata = default(Metadata);
                    return(false);
                }

                var columnSpecs = new ColumnSpec[metadataCTable.ColumnsLength];
                for (var i = 0; i < columnSpecs.Length; i++)
                {
                    var metadataColumn = metadataCTable.Columns(i).Value;
                    var name           = metadataColumn.Name;
                    var metadataType   = metadataColumn.MetadataType;

                    string[] categoryLevels         = null;
                    DateTimePrecisionType precision = default(DateTimePrecisionType);

                    var arrayDetails  = metadataColumn.Values.Value;
                    var effectiveType = arrayDetails.Type;

                    switch (metadataType)
                    {
                    case TypeMetadata.CategoryMetadata:
                        if (!TryReadCategoryLevels(accessor, ref metadataColumn, out categoryLevels, out error))
                        {
                            metadata = default(Metadata);
                            return(false);
                        }
                        break;

                    case TypeMetadata.TimestampMetadata:
                        if (arrayDetails.Type != feather.fbs.Type.INT64)
                        {
                            metadata = default(Metadata);
                            error    = $"Column {name} has Timestamp metadata, but isn't backed by an Int64 array";
                            return(false);
                        }

                        if (!TryReadTimestampPrecision(ref metadataColumn, out precision, out error))
                        {
                            metadata = default(Metadata);
                            return(false);
                        }

                        // note: this type is spec'd (https://github.com/wesm/feather/blob/master/cpp/src/feather/metadata.fbs#L25),
                        //  but it looks like R always writes it as an int64?
                        // Possibly a bug.
                        effectiveType = feather.fbs.Type.TIMESTAMP;

                        break;

                    case TypeMetadata.TimeMetadata:
                        if (arrayDetails.Type != feather.fbs.Type.INT64)
                        {
                            metadata = default(Metadata);
                            error    = $"Column {name} has Time metadata, but isn't backed by an Int64 array";
                            return(false);
                        }

                        if (!TryReadTimePrecision(ref metadataColumn, out precision, out error))
                        {
                            metadata = default(Metadata);
                            return(false);
                        }

                        // note: this type is spec'd (https://github.com/wesm/feather/blob/master/cpp/src/feather/metadata.fbs#L27),
                        //  but it looks like R always writes it as an int64?
                        // Possibly a bug.
                        effectiveType = feather.fbs.Type.TIME;

                        break;

                    case TypeMetadata.DateMetadata:
                        if (arrayDetails.Type != feather.fbs.Type.INT32)
                        {
                            metadata = default(Metadata);
                            error    = $"Column {name} has Time metadata, but isn't backed by an Int32 array";
                            return(false);
                        }

                        // note: this type is spec'd (https://github.com/wesm/feather/blob/master/cpp/src/feather/metadata.fbs#L26),
                        //  but it looks like R always writes it as an int32?
                        // Possibly a bug.
                        effectiveType = feather.fbs.Type.DATE;

                        break;

                    case TypeMetadata.NONE: break;
                    }

                    ColumnSpec column;
                    if (!TryMakeColumnSpec(name, effectiveType, ref arrayDetails, categoryLevels, precision, out column, out error))
                    {
                        metadata = default(Metadata);
                        return(false);
                    }

                    columnSpecs[i] = column;
                }

                metadata =
                    new Metadata
                {
                    Columns = columnSpecs,
                    NumRows = metadataCTable.NumRows
                };
                error = null;
                return(true);
            }
        }