Esempio n. 1
0
        static bool TryReadMetaData(MemoryMappedFile file, long size, out Metadata metadata, out string error)
        {
            if (size < FeatherMagic.MAGIC_HEADER_SIZE * 2)
            {
                metadata = default(Metadata);
                error    = $"File too small ({size:N0} bytes) to be a valid feather file";
                return(false);
            }

            using (var accessor = file.CreateViewAccessor())
            {
                var leadingHeader = accessor.ReadInt32(0);

                if (leadingHeader != FeatherMagic.MAGIC_HEADER)
                {
                    metadata = default(Metadata);
                    error    = $"Magic header malformed";
                    return(false);
                }

                var trailingHeader = accessor.ReadInt32(size - FeatherMagic.MAGIC_HEADER_SIZE);

                if (trailingHeader != FeatherMagic.MAGIC_HEADER)
                {
                    metadata = default(Metadata);
                    error    = $"Magic footer malformed";
                    return(false);
                }

                var metadataSize = accessor.ReadUInt32(size - FeatherMagic.MAGIC_HEADER_SIZE - sizeof(uint));

                var metadataStart = size - FeatherMagic.MAGIC_HEADER_SIZE - sizeof(uint) - metadataSize;
                if (metadataStart < FeatherMagic.MAGIC_HEADER_SIZE || metadataSize > int.MaxValue)
                {
                    metadata = default(Metadata);
                    error    = $"Metadata size ({metadataSize:N0}) is invalid";
                    return(false);
                }

                var metadataBytes = new byte[metadataSize];
                accessor.ReadArray(metadataStart, metadataBytes, 0, (int)metadataSize);

                // note: It'd be nice to not actually use flatbuffers for this,
                //   kind of a heavy (re)build dependency for reading, like, 4
                //   things
                var metadataBuffer = new ByteBuffer(metadataBytes);
                var metadataCTable = CTable.GetRootAsCTable(metadataBuffer);

                if (metadataCTable.Version != FeatherMagic.FEATHER_VERSION)
                {
                    error    = $"Unexpected version {metadataCTable.Version}, only {FeatherMagic.FEATHER_VERSION} is supported";
                    metadata = default(Metadata);
                    return(false);
                }

                if (metadataCTable.ColumnsLength <= 0)
                {
                    error    = $"Invalid number of columns: {metadataCTable.ColumnsLength:N0}";
                    metadata = default(Metadata);
                    return(false);
                }

                var columnSpecs = new ColumnSpec[metadataCTable.ColumnsLength];
                for (var i = 0; i < columnSpecs.Length; i++)
                {
                    var metadataColumn = metadataCTable.Columns(i).Value;
                    var name           = metadataColumn.Name;
                    var metadataType   = metadataColumn.MetadataType;

                    string[] categoryLevels         = null;
                    DateTimePrecisionType precision = default(DateTimePrecisionType);

                    var arrayDetails  = metadataColumn.Values.Value;
                    var effectiveType = arrayDetails.Type;

                    switch (metadataType)
                    {
                    case TypeMetadata.CategoryMetadata:
                        if (!TryReadCategoryLevels(accessor, ref metadataColumn, out categoryLevels, out error))
                        {
                            metadata = default(Metadata);
                            return(false);
                        }
                        break;

                    case TypeMetadata.TimestampMetadata:
                        if (arrayDetails.Type != feather.fbs.Type.INT64)
                        {
                            metadata = default(Metadata);
                            error    = $"Column {name} has Timestamp metadata, but isn't backed by an Int64 array";
                            return(false);
                        }

                        if (!TryReadTimestampPrecision(ref metadataColumn, out precision, out error))
                        {
                            metadata = default(Metadata);
                            return(false);
                        }

                        // note: this type is spec'd (https://github.com/wesm/feather/blob/master/cpp/src/feather/metadata.fbs#L25),
                        //  but it looks like R always writes it as an int64?
                        // Possibly a bug.
                        effectiveType = feather.fbs.Type.TIMESTAMP;

                        break;

                    case TypeMetadata.TimeMetadata:
                        if (arrayDetails.Type != feather.fbs.Type.INT64)
                        {
                            metadata = default(Metadata);
                            error    = $"Column {name} has Time metadata, but isn't backed by an Int64 array";
                            return(false);
                        }

                        if (!TryReadTimePrecision(ref metadataColumn, out precision, out error))
                        {
                            metadata = default(Metadata);
                            return(false);
                        }

                        // note: this type is spec'd (https://github.com/wesm/feather/blob/master/cpp/src/feather/metadata.fbs#L27),
                        //  but it looks like R always writes it as an int64?
                        // Possibly a bug.
                        effectiveType = feather.fbs.Type.TIME;

                        break;

                    case TypeMetadata.DateMetadata:
                        if (arrayDetails.Type != feather.fbs.Type.INT32)
                        {
                            metadata = default(Metadata);
                            error    = $"Column {name} has Time metadata, but isn't backed by an Int32 array";
                            return(false);
                        }

                        // note: this type is spec'd (https://github.com/wesm/feather/blob/master/cpp/src/feather/metadata.fbs#L26),
                        //  but it looks like R always writes it as an int32?
                        // Possibly a bug.
                        effectiveType = feather.fbs.Type.DATE;

                        break;

                    case TypeMetadata.NONE: break;
                    }

                    ColumnSpec column;
                    if (!TryMakeColumnSpec(name, effectiveType, ref arrayDetails, categoryLevels, precision, out column, out error))
                    {
                        metadata = default(Metadata);
                        return(false);
                    }

                    columnSpecs[i] = column;
                }

                metadata =
                    new Metadata
                {
                    Columns = columnSpecs,
                    NumRows = metadataCTable.NumRows
                };
                error = null;
                return(true);
            }
        }