Esempio n. 1
0
        private static ValueGetter <StringBuilder> GetGetterAsStringBuilderCore <TSrc>(ColumnType typeSrc, IRow row, int col)
        {
            Contracts.Assert(typeof(TSrc) == typeSrc.RawType);

            var getter = row.GetGetter <TSrc>(col);
            var conv   = Conversions.Instance.GetStringConversion <TSrc>(typeSrc);

            var src = default(TSrc);

            return
                ((ref StringBuilder dst) =>
            {
                getter(ref src);
                conv(in src, ref dst);
            });
        }
            public FeatureContributionSchema(IExceptionContext ectx, string columnName, ColumnType columnType, Schema parentSchema, int featureCol)
            {
                Contracts.CheckValueOrNull(ectx);
                Contracts.CheckValue(parentSchema, nameof(parentSchema));
                _ectx = ectx;
                _ectx.CheckNonEmpty(columnName, nameof(columnName));
                _parentSchema      = parentSchema;
                _featureCol        = featureCol;
                _featureVectorSize = _parentSchema[_featureCol].Type.VectorSize;
                _hasSlotNames      = _parentSchema[_featureCol].HasSlotNames(_featureVectorSize);

                _names         = new string[] { columnName };
                _types         = new ColumnType[] { columnType };
                _columnNameMap = new Dictionary <string, int>()
                {
                    { columnName, 0 }
                };
            }
Esempio n. 3
0
 internal override IColumnFunctionBuilder MakeBuilder(IHost host, int srcIndex, ColumnType srcType, IRowCursor cursor)
 => NormalizeTransform.BinUtils.CreateBuilder(this, host, srcIndex, srcType, cursor);
 public ColInfo(string name, string source, ColumnType type)
 {
     Name    = name;
     Source  = source;
     TypeSrc = type;
 }
 private static bool OutputTypeMatches(ColumnType scoreType)
 {
     return(scoreType.IsKnownSizeVector && scoreType.ItemType == NumberType.Float);
 }
Esempio n. 6
0
        // REVIEW: It would be nice to support propagation of select metadata.
        public static IDataView Create <TSrc, TDst>(IHostEnvironment env, string name, IDataView input,
                                                    string src, string dst, ColumnType typeSrc, ColumnType typeDst, ValueMapper <TSrc, TDst> mapper,
                                                    ValueGetter <VBuffer <ReadOnlyMemory <char> > > keyValueGetter = null, ValueGetter <VBuffer <ReadOnlyMemory <char> > > slotNamesGetter = null)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckNonEmpty(name, nameof(name));
            env.CheckValue(input, nameof(input));
            env.CheckNonEmpty(src, nameof(src));
            env.CheckNonEmpty(dst, nameof(dst));
            env.CheckValue(typeSrc, nameof(typeSrc));
            env.CheckValue(typeDst, nameof(typeDst));
            env.CheckValue(mapper, nameof(mapper));
            env.Check(keyValueGetter == null || typeDst.ItemType.IsKey);
            env.Check(slotNamesGetter == null || typeDst.IsKnownSizeVector);

            if (typeSrc.RawType != typeof(TSrc))
            {
                throw env.ExceptParam(nameof(mapper),
                                      "The source column type '{0}' doesn't match the input type of the mapper", typeSrc);
            }
            if (typeDst.RawType != typeof(TDst))
            {
                throw env.ExceptParam(nameof(mapper),
                                      "The destination column type '{0}' doesn't match the output type of the mapper", typeDst);
            }

            bool tmp = input.Schema.TryGetColumnIndex(src, out int colSrc);

            if (!tmp)
            {
                throw env.ExceptParam(nameof(src), "The input data doesn't have a column named '{0}'", src);
            }
            var typeOrig = input.Schema.GetColumnType(colSrc);

            // REVIEW: Ideally this should support vector-type conversion. It currently doesn't.
            bool     ident;
            Delegate conv;

            if (typeOrig.SameSizeAndItemType(typeSrc))
            {
                ident = true;
                conv  = null;
            }
            else if (!Conversions.Instance.TryGetStandardConversion(typeOrig, typeSrc, out conv, out ident))
            {
                throw env.ExceptParam(nameof(mapper),
                                      "The type of column '{0}', '{1}', cannot be converted to the input type of the mapper '{2}'",
                                      src, typeOrig, typeSrc);
            }

            var       col = new Column(src, dst);
            IDataView impl;

            if (ident)
            {
                impl = new Impl <TSrc, TDst, TDst>(env, name, input, col, typeDst, mapper, keyValueGetter: keyValueGetter, slotNamesGetter: slotNamesGetter);
            }
            else
            {
                Func <IHostEnvironment, string, IDataView, Column, ColumnType, ValueMapper <int, int>,
                      ValueMapper <int, int>, ValueGetter <VBuffer <ReadOnlyMemory <char> > >, ValueGetter <VBuffer <ReadOnlyMemory <char> > >,
                      Impl <int, int, int> > del = CreateImpl <int, int, int>;
                var meth = del.GetMethodInfo().GetGenericMethodDefinition()
                           .MakeGenericMethod(typeOrig.RawType, typeof(TSrc), typeof(TDst));
                impl = (IDataView)meth.Invoke(null, new object[] { env, name, input, col, typeDst, conv, mapper, keyValueGetter, slotNamesGetter });
            }

            return(new OpaqueDataView(impl));
        }
 public RowMapperColumnInfo(string name, ColumnType type, IRow metadata)
 {
     Name     = name;
     ColType  = type;
     Metadata = metadata;
 }
Esempio n. 8
0
 public GetterImpl(string name, IRow meta, ColumnType type, ValueGetter <T> getter)
     : base(name, meta, type)
 {
     Contracts.CheckValueOrNull(getter);
     _getter = getter;
 }
Esempio n. 9
0
        private SchemaShape.Column CheckInputsAndMakeColumn(
            SchemaShape inputSchema, string name, string[] sources)
        {
            _host.AssertNonEmpty(sources);

            var cols = new SchemaShape.Column[sources.Length];
            // If any input is a var vector, so is the output.
            bool varVector = false;
            // If any input is not normalized, the output is not normalized.
            bool isNormalized = true;
            // If any input has categorical indices, so will the output.
            bool hasCategoricals = false;
            // If any is scalar or had slot names, then the output will have slot names.
            bool hasSlotNames = false;

            // We will get the item type from the first column.
            ColumnType itemType = null;

            for (int i = 0; i < sources.Length; ++i)
            {
                if (!inputSchema.TryFindColumn(sources[i], out var col))
                {
                    throw _host.ExceptSchemaMismatch(nameof(inputSchema), "input", sources[i]);
                }
                if (i == 0)
                {
                    itemType = col.ItemType;
                }
                // For the sake of an estimator I am going to have a hard policy of no keys.
                // Appending keys makes no real sense anyway.
                if (col.IsKey)
                {
                    throw _host.Except($"Column '{sources[i]}' is key." +
                                       $"Concatenation of keys is unsupported.");
                }
                if (!col.ItemType.Equals(itemType))
                {
                    throw _host.Except($"Column '{sources[i]}' has values of {col.ItemType}" +
                                       $"which is not the same as earlier observed type of {itemType}.");
                }
                varVector       |= col.Kind == SchemaShape.Column.VectorKind.VariableVector;
                isNormalized    &= col.IsNormalized();
                hasCategoricals |= HasCategoricals(col);
                hasSlotNames    |= col.Kind == SchemaShape.Column.VectorKind.Scalar || col.HasSlotNames();
            }
            var vecKind = varVector ? SchemaShape.Column.VectorKind.VariableVector :
                          SchemaShape.Column.VectorKind.Vector;

            List <SchemaShape.Column> meta = new List <SchemaShape.Column>();

            if (isNormalized)
            {
                meta.Add(new SchemaShape.Column(MetadataUtils.Kinds.IsNormalized, SchemaShape.Column.VectorKind.Scalar, BoolType.Instance, false));
            }
            if (hasCategoricals)
            {
                meta.Add(new SchemaShape.Column(MetadataUtils.Kinds.CategoricalSlotRanges, SchemaShape.Column.VectorKind.Vector, NumberType.I4, false));
            }
            if (hasSlotNames)
            {
                meta.Add(new SchemaShape.Column(MetadataUtils.Kinds.SlotNames, SchemaShape.Column.VectorKind.Vector, TextType.Instance, false));
            }

            return(new SchemaShape.Column(name, vecKind, itemType, false, new SchemaShape(meta)));
        }
Esempio n. 10
0
 public ConstOneImpl(string name, IRow meta, ColumnType type, T value)
     : base(name, meta, type)
 {
     Contracts.Assert(type.IsPrimitive);
     _value = value;
 }
Esempio n. 11
0
 public ConstVecImpl(string name, IRow meta, ColumnType type, VBuffer <T> value)
     : base(name, meta, type)
 {
     _value = value;
 }
Esempio n. 12
0
 public InactiveImpl(string name, IRow meta, ColumnType type)
     : base(name, meta, type)
 {
 }
Esempio n. 13
0
        private static ValueGetter <TDst> GetGetterAsCore <TSrc, TDst>(ColumnType typeSrc, ColumnType typeDst, IRow row, int col)
        {
            Contracts.Assert(typeof(TSrc) == typeSrc.RawType);
            Contracts.Assert(typeof(TDst) == typeDst.RawType);

            var  getter = row.GetGetter <TSrc>(col);
            bool identity;
            var  conv = Conversions.Instance.GetStandardConversion <TSrc, TDst>(typeSrc, typeDst, out identity);

            if (identity)
            {
                Contracts.Assert(typeof(TSrc) == typeof(TDst));
                return((ValueGetter <TDst>)(Delegate) getter);
            }

            var src = default(TSrc);

            return
                ((ref TDst dst) =>
            {
                getter(ref src);
                conv(in src, ref dst);
            });
        }
Esempio n. 14
0
 public static string TestGetLabelGetter(ColumnType type)
 {
     return(TestGetLabelGetter(type, true));
 }
Esempio n. 15
0
 private Delegate GetIsNADelegate <T>(ColumnType type)
 {
     return(Conversions.Instance.GetIsNAPredicate <T>(type.ItemType));
 }
Esempio n. 16
0
        private static void ShowMetadataValue <T>(IndentingTextWriter itw, ISchema schema, int col, string kind, ColumnType type)
        {
            Contracts.AssertValue(itw);
            Contracts.AssertValue(schema);
            Contracts.Assert(0 <= col && col < schema.ColumnCount);
            Contracts.AssertNonEmpty(kind);
            Contracts.AssertValue(type);
            Contracts.Assert(!type.IsVector);
            Contracts.Assert(type.RawType == typeof(T));

            var conv = Conversions.Instance.GetStringConversion <T>(type);

            var value = default(T);
            var sb    = default(StringBuilder);

            schema.GetMetadata(kind, col, ref value);
            conv(in value, ref sb);

            itw.Write(": '{0}'", sb);
        }
Esempio n. 17
0
 /// <summary>
 /// Given a type and metadata kind string, returns a key-value pair. This is useful when
 /// implementing GetMetadataTypes(col).
 /// </summary>
 public static KeyValuePair <string, ColumnType> GetPair(this ColumnType type, string kind)
 {
     Contracts.CheckValue(type, nameof(type));
     return(new KeyValuePair <string, ColumnType>(kind, type));
 }
Esempio n. 18
0
        private static void ShowMetadataValueVec(IndentingTextWriter itw, ISchema schema, int col, string kind, ColumnType type)
        {
            Contracts.AssertValue(itw);
            Contracts.AssertValue(schema);
            Contracts.Assert(0 <= col && col < schema.ColumnCount);
            Contracts.AssertNonEmpty(kind);
            Contracts.AssertValue(type);
            Contracts.Assert(type.IsVector);

            if (!type.ItemType.IsStandardScalar && !type.ItemType.IsKey)
            {
                itw.Write(": Can't display value of this type");
                return;
            }

            Action <IndentingTextWriter, ISchema, int, string, ColumnType> del = ShowMetadataValueVec <int>;
            var meth = del.GetMethodInfo().GetGenericMethodDefinition().MakeGenericMethod(type.ItemType.RawType);

            meth.Invoke(null, new object[] { itw, schema, col, kind, type });
        }
 private static bool OutputTypeMatches(ColumnType scoreType)
 {
     return(scoreType == NumberType.Float);
 }
Esempio n. 20
0
        private static void ShowMetadataValueVec <T>(IndentingTextWriter itw, ISchema schema, int col, string kind, ColumnType type)
        {
            Contracts.AssertValue(itw);
            Contracts.AssertValue(schema);
            Contracts.Assert(0 <= col && col < schema.ColumnCount);
            Contracts.AssertNonEmpty(kind);
            Contracts.AssertValue(type);
            Contracts.Assert(type.IsVector);
            Contracts.Assert(type.ItemType.RawType == typeof(T));

            var conv = Conversions.Instance.GetStringConversion <T>(type.ItemType);

            var value = default(VBuffer <T>);

            schema.GetMetadata(kind, col, ref value);

            itw.Write(": Length={0}, Count={0}", value.Length, value.GetValues().Length);

            using (itw.Nest())
            {
                var sb    = default(StringBuilder);
                int count = 0;
                foreach (var item in value.Items())
                {
                    if ((count % 10) == 0)
                    {
                        itw.WriteLine();
                    }
                    else
                    {
                        itw.Write(", ");
                    }
                    var val = item.Value;
                    conv(in val, ref sb);
                    itw.Write("[{0}] '{1}'", item.Key, sb);
                    count++;
                }
            }
        }
 protected MetadataInfo(ColumnType type)
 {
     Contracts.AssertValueOrNull(type);
     Type = type;
 }
        private KeyToValueMap GetKeyMetadata <TKey, TValue>(int iinfo, ColumnType typeKey, ColumnType typeVal)
        {
            Host.Assert(0 <= iinfo && iinfo < Infos.Length);
            Host.AssertValue(typeKey);
            Host.AssertValue(typeVal);
            Host.Assert(typeKey.ItemType.RawType == typeof(TKey));
            Host.Assert(typeVal.ItemType.RawType == typeof(TValue));

            var keyMetadata = default(VBuffer <TValue>);

            Source.Schema.GetMetadata <VBuffer <TValue> >(MetadataUtils.Kinds.KeyValues, Infos[iinfo].Source, ref keyMetadata);
            Host.Check(keyMetadata.Length == typeKey.ItemType.KeyCount);

            VBufferUtils.Densify <TValue>(ref keyMetadata);
            return(new KeyToValueMap <TKey, TValue>(this, typeKey.ItemType.AsKey, typeVal.ItemType.AsPrimitive, keyMetadata.Values, iinfo));
        }
Esempio n. 23
0
 private static ColumnType GetPredColType(ColumnType scoreType, ISchemaBoundRowMapper mapper)
 {
     return(new KeyType(DataKind.U4, 0, scoreType.VectorSize));
 }
Esempio n. 24
0
 private Delegate CreateGetterDelegateCore <TValue>(int col, ColumnType type)
 {
     return((Delegate)GetterDelegateCore <TValue>(col, type));
 }
Esempio n. 25
0
 public GetterInfoPrimitive(string kind, ColumnType type, TValue value)
     : base(kind, type)
 {
     Contracts.Check(type.RawType == typeof(TValue), "Incompatible types");
     Value = value;
 }
                public SchemaImpl(IExceptionContext ectx, TreeEnsembleFeaturizerBindableMapper parent,
                                  ColumnType treeValueColType, ColumnType leafIdColType, ColumnType pathIdColType)
                {
                    Contracts.CheckValueOrNull(ectx);
                    _ectx = ectx;
                    _ectx.AssertValue(parent);
                    _ectx.AssertValue(treeValueColType);
                    _ectx.AssertValue(leafIdColType);
                    _ectx.AssertValue(pathIdColType);

                    _parent = parent;

                    _names          = new string[3];
                    _names[TreeIdx] = OutputColumnNames.Trees;
                    _names[LeafIdx] = OutputColumnNames.Leaves;
                    _names[PathIdx] = OutputColumnNames.Paths;

                    _types          = new ColumnType[3];
                    _types[TreeIdx] = treeValueColType;
                    _types[LeafIdx] = leafIdColType;
                    _types[PathIdx] = pathIdColType;
                }
Esempio n. 27
0
        public static bool IsValidRangeFilterColumnType(IExceptionContext ectx, ColumnType type)
        {
            ectx.CheckValue(type, nameof(type));

            return(type == NumberType.R4 || type == NumberType.R8 || type.KeyCount > 0);
        }
Esempio n. 28
0
        /// <summary>
        /// Returns the isNA predicate for the respective type.
        /// </summary>
        private Delegate GetIsNADelegate(ColumnType type)
        {
            Func <ColumnType, Delegate> func = GetIsNADelegate <int>;

            return(Utils.MarshalInvoke(func, type.ItemType.RawType, type));
        }
Esempio n. 29
0
 internal abstract IColumnFunctionBuilder MakeBuilder(IHost host, int srcIndex, ColumnType srcType, IRowCursor cursor);
 protected ValueMap(ColumnType type)
 {
     Type = type;
 }