Ejemplo n.º 1
0
 public IEnumerable <KeyValuePair <string, ColumnType> > GetMetadataTypes(int col)
 {
     _ectx.Check(0 <= col && col < ColumnCount);
     if (!IsPivot(col))
     {
         return(_inputSchema.GetMetadataTypes(col));
     }
     return(_inputSchema.GetMetadataTypes(col).Where(pair => ShouldPreserveMetadata(pair.Key)));
 }
Ejemplo n.º 2
0
        /// <summary>
        /// Create a schema shape out of the fully defined schema.
        /// </summary>
        public static SchemaShape Create(ISchema schema)
        {
            Contracts.CheckValue(schema, nameof(schema));
            var cols = new List <Column>();

            for (int iCol = 0; iCol < schema.ColumnCount; iCol++)
            {
                if (!schema.IsHidden(iCol))
                {
                    Column.VectorKind vecKind;
                    var type = schema.GetColumnType(iCol);
                    if (type.IsKnownSizeVector)
                    {
                        vecKind = Column.VectorKind.Vector;
                    }
                    else if (type.IsVector)
                    {
                        vecKind = Column.VectorKind.VariableVector;
                    }
                    else
                    {
                        vecKind = Column.VectorKind.Scalar;
                    }

                    var kind  = type.ItemType.RawKind;
                    var isKey = type.ItemType.IsKey;

                    var metadataNames = schema.GetMetadataTypes(iCol)
                                        .Select(kvp => kvp.Key)
                                        .ToArray();
                    cols.Add(new Column(schema.GetColumnName(iCol), vecKind, kind, isKey, metadataNames));
                }
            }
            return(new SchemaShape(cols.ToArray()));
        }
Ejemplo n.º 3
0
        private static void ShowMetadata(IndentingTextWriter itw, ISchema schema, int col, bool showVals)
        {
            Contracts.AssertValue(itw);
            Contracts.AssertValue(schema);
            Contracts.Assert(0 <= col && col < schema.ColumnCount);

            using (itw.Nest())
            {
                foreach (var kvp in schema.GetMetadataTypes(col).OrderBy(p => p.Key))
                {
                    Contracts.AssertNonEmpty(kvp.Key);
                    Contracts.AssertValue(kvp.Value);
                    var type = kvp.Value;
                    itw.Write("Metadata '{0}': {1}", kvp.Key, type);
                    if (showVals)
                    {
                        if (!type.IsVector)
                        {
                            ShowMetadataValue(itw, schema, col, kvp.Key, type);
                        }
                        else
                        {
                            ShowMetadataValueVec(itw, schema, col, kvp.Key, type);
                        }
                    }
                    itw.WriteLine();
                }
            }
        }
Ejemplo n.º 4
0
                    public IEnumerable <KeyValuePair <string, ColumnType> > GetMetadataTypes(int col)
                    {
                        var result = _parent.GetMetadataTypes(col);

                        if (col == _scoreCol)
                        {
                            return(result.Prepend(_labelNameType.GetPair(_metadataKind)));
                        }
                        return(result);
                    }
Ejemplo n.º 5
0
            public MetadataRow(ISchema schema, int col)
            {
                Contracts.AssertValue(schema);
                Contracts.Assert(0 <= col && col < schema.ColumnCount);

                _metaSchema = schema;
                _col        = col;
                _map        = _metaSchema.GetMetadataTypes(_col).ToArray();
                _schema     = new SchemaImpl(this);
            }
Ejemplo n.º 6
0
            public IEnumerable <KeyValuePair <string, ColumnType> > GetMetadataTypes(int col)
            {
                var result = _parent.GetMetadataTypes(col);

                if (col == _col)
                {
                    return(result.Prepend(_type.GetPair(MetadataUtils.Kinds.SlotNames)));
                }
                return(result);
            }
Ejemplo n.º 7
0
            public MetadataRow(ISchema schema, int col)
            {
                Contracts.CheckValue(schema, nameof(schema));
                Contracts.CheckParam(0 <= col && col < schema.ColumnCount, nameof(col));

                _metaSchema = schema;
                _col        = col;
                _map        = _metaSchema.GetMetadataTypes(_col).ToArray();
                _schema     = new SchemaImpl(this);
            }
Ejemplo n.º 8
0
            public MetadataRow(ISchema schema, int col, Func <string, bool> takeMetadata)
            {
                Contracts.CheckValue(schema, nameof(schema));
                Contracts.CheckParam(0 <= col && col < schema.ColumnCount, nameof(col));
                Contracts.CheckValue(takeMetadata, nameof(takeMetadata));

                _metaSchema = schema;
                _col        = col;
                _map        = _metaSchema.GetMetadataTypes(_col).Where(x => takeMetadata(x.Key)).ToArray();
                _schema     = new SchemaImpl(this);
            }
        public IEnumerable <KeyValuePair <string, ColumnType> > GetMetadataTypes(int col)
        {
            Contracts.CheckParam(0 <= col && col < ColumnCount, nameof(col));

            bool isSrc;
            int  index = MapColumnIndex(out isSrc, col);

            if (isSrc)
            {
                return(Input.GetMetadataTypes(index));
            }
            Contracts.Assert(0 <= index && index < InfoCount);
            return(GetMetadataTypesCore(index));
        }
Ejemplo n.º 10
0
        public IEnumerable <KeyValuePair <string, DataViewType> > GetMetadataTypes(int col)
        {
            int count = _schemaInput.ColumnCount;

            if (col < count)
            {
                foreach (var s in _schemaInput.GetMetadataTypes(col))
                {
                    yield return(s);
                }
            }
            else
            {
                throw new IndexOutOfRangeException();
            }
        }
Ejemplo n.º 11
0
        internal static Schema Create(ISchema inputSchema)
        {
            Contracts.CheckValue(inputSchema, nameof(inputSchema));

            var builder = new SchemaBuilder();

            for (int i = 0; i < inputSchema.ColumnCount; i++)
            {
                var meta = new MetadataBuilder();
                foreach (var kvp in inputSchema.GetMetadataTypes(i))
                {
                    var getter = Utils.MarshalInvoke(GetMetadataGetterDelegate <int>, kvp.Value.RawType, inputSchema, i, kvp.Key);
                    meta.Add(kvp.Key, kvp.Value, getter);
                }
                builder.AddColumn(inputSchema.GetColumnName(i), inputSchema.GetColumnType(i), meta.GetMetadata());
            }

            return(builder.GetSchema());
        }
Ejemplo n.º 12
0
        /// <summary>
        /// Returns an enumerator on the metadata.
        /// </summary>
        public IEnumerable <KeyValuePair <string, ColumnType> > GetMetadataTypes(int col)
        {
            int count = _schemaInput == null ? 0 : _schemaInput.ColumnCount;

            if (col < count)
            {
                foreach (var s in _schemaInput.GetMetadataTypes(col))
                {
                    yield return(s);
                }
            }
            else if (col < ColumnCount)
            {
                int c = col - count;
                yield return(new KeyValuePair <string, ColumnType>(_names[c], _types[c]));
            }
            else
            {
                throw new IndexOutOfRangeException();
            }
        }
            public IEnumerable <KeyValuePair <string, ColumnType> > GetMetadataTypes(int col)
            {
                CheckColumnInRange(col);
                if (col < _groupCount)
                {
                    return(_input.GetMetadataTypes(GroupIds[col]));
                }

                col -= _groupCount;
                var result = new List <KeyValuePair <string, ColumnType> >();

                foreach (var kind in _preservedMetadata)
                {
                    var colType = _input.GetMetadataTypeOrNull(kind, KeepIds[col]);
                    if (colType != null)
                    {
                        result.Add(colType.GetPair(kind));
                    }
                }

                return(result);
            }
Ejemplo n.º 14
0
        /// <summary>
        /// Create a schema shape out of the fully defined schema.
        /// </summary>
        public static SchemaShape Create(ISchema schema)
        {
            Contracts.CheckValue(schema, nameof(schema));
            var cols = new List <Column>();

            for (int iCol = 0; iCol < schema.ColumnCount; iCol++)
            {
                if (!schema.IsHidden(iCol))
                {
                    // First create the metadata.
                    var mCols = new List <Column>();
                    foreach (var metaNameType in schema.GetMetadataTypes(iCol))
                    {
                        GetColumnArgs(metaNameType.Value, out var mVecKind, out var mItemType, out var mIsKey);
                        mCols.Add(new Column(metaNameType.Key, mVecKind, mItemType, mIsKey));
                    }
                    var metadata = mCols.Count > 0 ? new SchemaShape(mCols) : _empty;
                    // Next create the single column.
                    GetColumnArgs(schema.GetColumnType(iCol), out var vecKind, out var itemType, out var isKey);
                    cols.Add(new Column(schema.GetColumnName(iCol), vecKind, itemType, isKey, metadata));
                }
            }
            return(new SchemaShape(cols));
        }
Ejemplo n.º 15
0
 public IEnumerable <KeyValuePair <string, ColumnType> > GetMetadataTypes(int col)
 {
     Contracts.CheckParam(0 <= col && col < ColumnCount, nameof(col));
     return(Input.GetMetadataTypes(ColMap[col]));
 }
 public IEnumerable <KeyValuePair <string, ColumnType> > GetMetadataTypes(int col)
 {
     return(_schema.GetMetadataTypes(_revmapping[col]));
 }
Ejemplo n.º 17
0
        /// <summary>
        /// A helper method to query and write metadata to the stream.
        /// </summary>
        /// <param name="writer">A binary writer, which if metadata exists for the
        /// indicated column the base stream will be positioned just past the end of
        /// the written metadata table of contents, and if metadata does not exist
        /// remains unchanged</param>
        /// <param name="schema">The schema to query for metadat</param>
        /// <param name="col">The column we are attempting to get metadata for</param>
        /// <param name="ch">The channel to which we write any diagnostic information</param>
        /// <returns>The offset of the metadata table of contents, or 0 if there was
        /// no metadata</returns>
        private long WriteMetadata(BinaryWriter writer, ISchema schema, int col, IChannel ch)
        {
            _host.AssertValue(writer);
            _host.AssertValue(schema);
            _host.Assert(0 <= col && col < schema.ColumnCount);

            int count = 0;
            WriteMetadataCoreDelegate del = WriteMetadataCore <int>;
            MethodInfo methInfo           = del.GetMethodInfo().GetGenericMethodDefinition();

            object[] args = new object[] { writer.BaseStream, schema, col, null, null, null };

            List <long> offsets = new List <long>();

            offsets.Add(writer.BaseStream.Position);
            var metadataInfos = new List <Tuple <string, IValueCodec, CompressionKind> >();
            var kinds         = new HashSet <string>();

            // Write all metadata blocks for this column to the file, one after the other, keeping
            // track of the location and size of each for when we write the metadata table of contents.
            // (To be clear, this specific layout is not required by the format.)

            foreach (var pair in schema.GetMetadataTypes(col))
            {
                _host.Check(!string.IsNullOrEmpty(pair.Key), "Metadata with null or empty kind detected, disallowed");
                _host.Check(pair.Value != null, "Metadata with null type detected, disallowed");
                if (!kinds.Add(pair.Key))
                {
                    throw _host.Except("Metadata with duplicate kind '{0}' encountered, disallowed", pair.Key, schema.GetColumnName(col));
                }
                args[3] = pair.Key;
                args[4] = pair.Value;
                IValueCodec codec = (IValueCodec)methInfo.MakeGenericMethod(pair.Value.RawType).Invoke(this, args);
                if (codec == null)
                {
                    // Nothing was written.
                    ch.Warning("Could not get codec for type {0}, dropping column '{1}' index {2} metadata kind '{3}'",
                               pair.Value, schema.GetColumnName(col), col, pair.Key);
                    continue;
                }
                offsets.Add(writer.BaseStream.Position);
                _host.CheckIO(offsets[offsets.Count - 1] > offsets[offsets.Count - 2], "Bad offsets detected during write");
                metadataInfos.Add(Tuple.Create(pair.Key, codec, (CompressionKind)args[5]));
                count++;
            }
            if (metadataInfos.Count == 0)
            {
                _host.CheckIO(writer.BaseStream.Position == offsets[0], "unexpected offset after no writing of metadata");
                return(0);
            }
            // Write the metadata table of contents just past the end of the last metadata block.

            // *** Metadata TOC format ***
            // LEB128 int: Number of metadata TOC entries
            // Metadata TOC entries: As many of these as indicated by the count above

            long expectedPosition = offsets[metadataInfos.Count];

            writer.WriteLeb128Int((ulong)metadataInfos.Count);
            expectedPosition += Utils.Leb128IntLength((ulong)metadataInfos.Count);
            for (int i = 0; i < metadataInfos.Count; ++i)
            {
                // *** Metadata TOC entry format ***
                // string: metadata kind
                // codec definition: metadata codec
                // CompressionKind(byte): block compression strategy
                // long: Offset into the stream of the start of the metadata block
                // LEB128 int: Byte size of the metadata block in the file

                writer.Write(metadataInfos[i].Item1);
                int stringLen = Encoding.UTF8.GetByteCount(metadataInfos[i].Item1);
                expectedPosition += Utils.Leb128IntLength((ulong)stringLen) + stringLen;
                _host.CheckIO(writer.BaseStream.Position == expectedPosition, "unexpected offsets after metadata table of contents kind");

                expectedPosition += _factory.WriteCodec(writer.BaseStream, metadataInfos[i].Item2);
                _host.CheckIO(writer.BaseStream.Position == expectedPosition, "unexpected offsets after metadata table of contents type description");

                writer.Write((byte)metadataInfos[i].Item3);
                expectedPosition++;

                writer.Write(offsets[i]);
                expectedPosition += sizeof(long);

                long blockSize = offsets[i + 1] - offsets[i];
                writer.WriteLeb128Int((ulong)blockSize);
                expectedPosition += Utils.Leb128IntLength((ulong)blockSize);
                _host.CheckIO(writer.BaseStream.Position == expectedPosition, "unexpected offsets after metadata table of contents location");
            }
            _host.Assert(metadataInfos.Count == offsets.Count - 1);
            return(offsets[metadataInfos.Count]);
        }