Esempio n. 1
0
            public void Save(ModelSaveContext ctx)
            {
                _ectx.AssertValue(ctx);

                // *** Binary format ***
                // int: G - number of group columns
                // int[G]: ids of group column names
                // int: K: number of keep columns
                // int[K]: ids of keep column names

                _ectx.AssertNonEmpty(_groupColumns);
                ctx.Writer.Write(_groupColumns.Length);
                foreach (var name in _groupColumns)
                {
                    _ectx.AssertNonEmpty(name);
                    ctx.SaveString(name);
                }

                _ectx.AssertValue(_keepColumns);
                ctx.Writer.Write(_keepColumns.Length);
                foreach (var name in _keepColumns)
                {
                    _ectx.AssertNonEmpty(name);
                    ctx.SaveString(name);
                }
            }
            private static void CheckAndBind(IExceptionContext ectx, ISchema inputSchema,
                                             string[] pivotColumns, out PivotColumnInfo[] infos)
            {
                Contracts.AssertValueOrNull(ectx);
                ectx.AssertValue(inputSchema);
                ectx.AssertNonEmpty(pivotColumns);

                infos = new PivotColumnInfo[pivotColumns.Length];
                for (int i = 0; i < pivotColumns.Length; i++)
                {
                    var name = pivotColumns[i];
                    // REVIEW: replace Check with CheckUser, once existing CheckUser is renamed to CheckUserArg or something.
                    ectx.CheckUserArg(!string.IsNullOrEmpty(name), nameof(Arguments.Column), "Column name cannot be empty");
                    int col;
                    if (!inputSchema.TryGetColumnIndex(name, out col))
                    {
                        throw ectx.ExceptUserArg(nameof(Arguments.Column), "Pivot column '{0}' is not found", name);
                    }
                    var colType = inputSchema.GetColumnType(col);
                    if (!colType.IsVector || !colType.ItemType.IsPrimitive)
                    {
                        throw ectx.ExceptUserArg(nameof(Arguments.Column),
                                                 "Pivot column '{0}' has type '{1}', but must be a vector of primitive types", name, colType);
                    }
                    infos[i] = new PivotColumnInfo(name, col, colType.VectorSize, colType.ItemType.AsPrimitive);
                }
            }
            public ColumnInfo(IExceptionContext ectx, string[] inputColumnNames, DataViewType[] inputTypes, string expression, string outputColumnName, int vectorInputColumn, LambdaNode node, int[] perm)
            {
                ectx.AssertNonEmpty(inputTypes);
                ectx.Assert(Utils.Size(inputTypes) == Utils.Size(inputColumnNames));
                ectx.AssertNonWhiteSpace(expression);
                ectx.AssertNonWhiteSpace(outputColumnName);
                ectx.AssertValue(node);

                InputColumnNames     = inputColumnNames;
                OutputColumnName     = outputColumnName;
                OutputColumnItemType = node.ResultType as PrimitiveDataViewType;
                ectx.AssertValue(OutputColumnItemType);
                VectorInputColumn = vectorInputColumn;
                Perm       = perm;
                Expression = expression;

                InputKinds = new InternalDataKind[inputTypes.Length];
                for (int i = 0; i < inputTypes.Length; i++)
                {
                    InputKinds[i] = inputTypes[i].GetRawKind();
                }

                Del = LambdaCompiler.Compile(out var errors, node);
                if (Utils.Size(errors) > 0)
                {
                    throw ectx.Except($"generating code failed: {errors[0].GetMessage()}");
                }
            }
        /// <summary>
        /// Returns true if the input type is something recognizable as being oen of the standard
        /// builtin types. This method will also throw if something is detected as being definitely
        /// wrong (e.g., the input type does not descend from <see cref="PipelineColumn"/> at all,
        /// or a <see cref="Key{T}"/> is declared with a <see cref="string"/> type parameter or
        /// something.
        /// </summary>
        private static bool IsStandard(IExceptionContext ectx, Type t)
        {
            Contracts.AssertValue(ectx);
            ectx.AssertValue(t);
            if (!typeof(PipelineColumn).IsAssignableFrom(t))
            {
                throw ectx.ExceptParam(nameof(t), $"Type {t} was not even of {nameof(PipelineColumn)}");
            }
            var gt = t.IsGenericType ? t.GetGenericTypeDefinition() : t;

            if (gt != typeof(Scalar <>) && gt != typeof(Key <>) && gt != typeof(Key <,>) && gt != typeof(VarKey <>) &&
                gt != typeof(Vector <>) && gt != typeof(VarVector <>) && gt != typeof(NormVector <>))
            {
                throw ectx.ExceptParam(nameof(t),
                                       $"Type {t} was not one of the standard subclasses of {nameof(PipelineColumn)}");
            }
            ectx.Assert(t.IsGenericType);
            var ga = t.GetGenericArguments();

            ectx.AssertNonEmpty(ga);

            if (gt == typeof(Key <>) || gt == typeof(Key <,>) || gt == typeof(VarKey <>))
            {
                ectx.Assert((gt == typeof(Key <,>) && ga.Length == 2) || ga.Length == 1);
                var kt = ga[0];
                if (kt != typeof(byte) && kt != typeof(ushort) && kt != typeof(uint) && kt != typeof(ulong))
                {
                    throw ectx.ExceptParam(nameof(t), $"Type parameter {kt.Name} is not a valid type for key");
                }
                return(gt != typeof(Key <,>) || IsStandardCore(ga[1]));
            }

            ectx.Assert(ga.Length == 1);
            return(IsStandardCore(ga[0]));
        }
        /// <summary>
        /// This is essentially the inverse function to <see cref="GetSerializedStaticDelegate"/>. If the function
        /// is not recoverable for any reason, this will return <c>null</c>, and the error parameter will be set.
        /// </summary>
        /// <param name="ectx">Exception context.</param>
        /// <param name="serialized">The serialized bytes, as returned by <see cref="GetSerializedStaticDelegate"/></param>
        /// <param name="inner">An exception the caller may raise as an inner exception if the return value is
        /// <c>null</c>, else, this itself will be <c>null</c></param>
        /// <returns>The recovered function wrapping the recovered method, or <c>null</c> if it could not
        /// be created, for some reason</returns>
        public static LambdaTransform.LoadDelegate DeserializeStaticDelegateOrNull(IExceptionContext ectx, byte[] serialized, out Exception inner)
        {
            Contracts.AssertValue(ectx);
            ectx.AssertNonEmpty(serialized);
            MethodInfo info = null;

            try
            {
                using (var ms = new MemoryStream(serialized, false))
                {
#if CORECLR
                    var    formatter = new BinaryFormatter();
                    object obj       = formatter.Deserialize(ms);
                    var    hack      = obj as CoreHackMethodInfo;
                    var    assembly  = Assembly.Load(new AssemblyName(hack.AssemblyName));
                    Type   t         = assembly.GetType(hack.ClassName);
                    info = t.GetTypeInfo().GetDeclaredMethod(hack.MethodName);
#else
                    var    formatter = new BinaryFormatter();
                    object obj       = formatter.Deserialize(ms);
                    info = obj as MethodInfo;
#endif
                }
            }
            catch (Exception e)
            {
                inner = ectx.ExceptDecode(e, "Failed to deserialize a .NET object");
                return(null);
            }
            // Either it's not the right type, or obj itself may be null. Either way we have an error.
            switch (info)
            {
            case info == null:
                inner = ectx.ExceptDecode("Failed to deserialize the method");
                return(null);

                break;

            case !info.IsStatic:
                inner = ectx.ExceptDecode("Deserialized method is not static");
                return(null);

                break;
            }

            try
            {
                var del = info.CreateDelegate(typeof(LambdaTransform.LoadDelegate));
                inner = null;
                return((LambdaTransform.LoadDelegate)del);
            }
            catch (Exception)
            {
                inner = ectx.ExceptDecode("Deserialized method has wrong signature");
                return(null);
            }
        }
Esempio n. 6
0
        public EntryPointVariable(IExceptionContext ectx, string name, Type type)
        {
            Contracts.AssertValueOrNull(ectx);
            _ectx = ectx;
            _ectx.AssertNonEmpty(name);

            Name = name;
            ectx.Assert(IsValidType(type));
            Type = type;
        }
Esempio n. 7
0
 /// <summary>
 /// Retreives the field index for a field with the given alias, or -1 if
 /// that field alias is not found.
 /// </summary>
 private int GetFieldIndex(string name)
 {
     _ectx.AssertNonEmpty(name);
     for (int i = 0; i < _attrs.Length; i++)
     {
         if (name == (_attrs[i].Input.Name ?? _fields[i].Name) || AnyMatch(name, _attrs[i].Input.Aliases))
         {
             return(i);
         }
     }
     return(-1);
 }
Esempio n. 8
0
 private FieldInfo GetField(string name)
 {
     _ectx.AssertNonEmpty(name);
     for (int i = 0; i < _attrs.Length; i++)
     {
         if (name == (_attrs[i].Name ?? _fields[i].Name))
         {
             return(_fields[i]);
         }
     }
     return(null);
 }
Esempio n. 9
0
            public UngroupBinding(IExceptionContext ectx, Schema inputSchema, UngroupMode mode, string[] pivotColumns)
            {
                Contracts.AssertValueOrNull(ectx);
                _ectx = ectx;
                _ectx.AssertValue(inputSchema);
                _ectx.AssertNonEmpty(pivotColumns);

                _inputSchema = inputSchema; // This also makes InputColumnCount valid.
                Mode         = mode;

                Bind(_ectx, inputSchema, pivotColumns, out _infos);

                _pivotIndex = Utils.CreateArray(InputColumnCount, -1);
                for (int i = 0; i < _infos.Length; i++)
                {
                    var info = _infos[i];
                    _ectx.Assert(_pivotIndex[info.Index] == -1);
                    _pivotIndex[info.Index] = i;
                }

                var schemaBuilder = new SchemaBuilder();

                // Iterate through input columns. Input columns which are not pivot columns will be copied to output schema with the same column index unchanged.
                // Input columns which are pivot columns would also be copied but with different data types and different metadata.
                for (int i = 0; i < InputColumnCount; ++i)
                {
                    if (_pivotIndex[i] < 0)
                    {
                        // i-th input column is not a pivot column. Let's do a naive copy.
                        schemaBuilder.AddColumn(inputSchema[i].Name, inputSchema[i].Type, inputSchema[i].Metadata);
                    }
                    else
                    {
                        // i-th input column is a pivot column. Let's calculate proper type and metadata for it.
                        var metadataBuilder = new MetadataBuilder();
                        metadataBuilder.Add(inputSchema[i].Metadata, metadataName => ShouldPreserveMetadata(metadataName));
                        // To explain the output type of pivot columns, let's consider a row
                        //   Age UserID
                        //   18  {"Amy", "Willy"}
                        // where "Age" and "UserID" are column names and 18/{"Amy", "Willy"} is "Age"/"UserID" column in this example row.
                        // If the only pivot column is "UserID", the ungroup may produce
                        //   Age UserID
                        //   18  "Amy"
                        //   18  "Willy"
                        // One can see that "UserID" column (in output data) has a type identical to the element's type of the "UserID" column in input data.
                        schemaBuilder.AddColumn(inputSchema[i].Name, inputSchema[i].Type.GetItemType(), metadataBuilder.GetMetadata());
                    }
                }
                OutputSchema = schemaBuilder.GetSchema();
            }
Esempio n. 10
0
            public GroupBinding(IExceptionContext ectx, Schema inputSchema, string[] groupColumns, string[] keepColumns)
            {
                Contracts.AssertValue(ectx);
                _ectx = ectx;
                _ectx.AssertValue(inputSchema);
                _ectx.AssertNonEmpty(groupColumns);
                _ectx.AssertValue(keepColumns);
                _inputSchema = inputSchema;

                _groupColumns      = groupColumns;
                GroupColumnIndexes = GetColumnIds(inputSchema, groupColumns, x => _ectx.ExceptUserArg(nameof(Arguments.GroupKeys), x));

                _keepColumns      = keepColumns;
                KeepColumnIndexes = GetColumnIds(inputSchema, keepColumns, x => _ectx.ExceptUserArg(nameof(Arguments.Columns), x));

                // Compute output schema from the specified input schema.
                OutputSchema = BuildOutputSchema(inputSchema);
            }
Esempio n. 11
0
            internal ComponentInfo(IExceptionContext ectx, Type interfaceType, string kind, Type argumentType, TlcModule.ComponentAttribute attribute)
            {
                Contracts.AssertValueOrNull(ectx);
                ectx.AssertValue(interfaceType);
                ectx.AssertNonEmpty(kind);
                ectx.AssertValue(argumentType);
                ectx.AssertValue(attribute);

                Name        = attribute.Name;
                Description = attribute.Desc;
                if (string.IsNullOrWhiteSpace(attribute.FriendlyName))
                {
                    FriendlyName = Name;
                }
                else
                {
                    FriendlyName = attribute.FriendlyName;
                }

                Kind = kind;
                if (!IsValidName(Kind))
                {
                    throw ectx.Except("Invalid component kind: '{0}'", Kind);
                }

                Aliases = attribute.Aliases;
                if (!IsValidName(Name))
                {
                    throw ectx.Except("Component name '{0}' is not valid.", Name);
                }

                if (Aliases != null && Aliases.Any(x => !IsValidName(x)))
                {
                    throw ectx.Except("Component '{0}' has an invalid alias '{1}'", Name, Aliases.First(x => !IsValidName(x)));
                }

                if (!typeof(IComponentFactory).IsAssignableFrom(argumentType))
                {
                    throw ectx.Except("Component '{0}' must inherit from IComponentFactory", argumentType);
                }

                ArgumentType  = argumentType;
                InterfaceType = interfaceType;
            }
Esempio n. 12
0
            public GroupSchema(IExceptionContext ectx, ISchema inputSchema, string[] groupColumns, string[] keepColumns)
            {
                Contracts.AssertValue(ectx);
                _ectx = ectx;
                _ectx.AssertValue(inputSchema);
                _ectx.AssertNonEmpty(groupColumns);
                _ectx.AssertValue(keepColumns);
                _input = inputSchema;

                _groupColumns = groupColumns;
                GroupIds      = GetColumnIds(inputSchema, groupColumns, x => _ectx.ExceptUserArg(nameof(Arguments.GroupKey), x));
                _groupCount   = GroupIds.Length;

                _keepColumns = keepColumns;
                KeepIds      = GetColumnIds(inputSchema, keepColumns, x => _ectx.ExceptUserArg(nameof(Arguments.Column), x));

                _columnTypes   = BuildColumnTypes(_input, KeepIds);
                _columnNameMap = BuildColumnNameMap();
            }
Esempio n. 13
0
        private static IComponentFactory GetComponentJson(IExceptionContext ectx, Type signatureType, string name, JObject settings, ComponentCatalog catalog)
        {
            Contracts.AssertValue(ectx);
            ectx.AssertValue(signatureType);
            ectx.AssertNonEmpty(name);
            ectx.AssertValueOrNull(settings);
            ectx.AssertValue(catalog);

            if (!catalog.TryGetComponentKind(signatureType, out string kind))
            {
                throw ectx.Except($"Component type '{signatureType}' is not a valid signature type.");
            }

            if (!catalog.TryFindComponent(kind, name, out ComponentCatalog.ComponentInfo component))
            {
                var available = catalog.GetAllComponents(kind).Select(x => $"'{x.Name}'");
                throw ectx.Except($"Component '{name}' of kind '{kind}' is not found. Available components are: {string.Join(", ", available)}");
            }

            var inputBuilder = new InputBuilder(ectx, component.ArgumentType, catalog);

            if (settings != null)
            {
                foreach (var pair in settings)
                {
                    if (!inputBuilder.TrySetValueJson(pair.Key, pair.Value))
                    {
                        throw ectx.Except($"Unexpected value for component '{name}', field '{pair.Key}': '{pair.Value}'");
                    }
                }
            }

            var missing = inputBuilder.GetMissingValues().ToArray();

            if (missing.Length > 0)
            {
                throw ectx.Except($"The following required inputs were not provided for component '{name}': {string.Join(", ", missing)}");
            }
            return(inputBuilder.GetInstance() as IComponentFactory);
        }
Esempio n. 14
0
            public SchemaImpl(IExceptionContext ectx, ISchema inputSchema, UngroupMode mode, string[] pivotColumns)
            {
                Contracts.AssertValueOrNull(ectx);
                _ectx = ectx;
                _ectx.AssertValue(inputSchema);
                _ectx.AssertNonEmpty(pivotColumns);

                _inputSchema = inputSchema;
                Mode         = mode;

                CheckAndBind(_ectx, inputSchema, pivotColumns, out _infos);

                _pivotColMap = new Dictionary <string, int>();
                _pivotIndex  = Utils.CreateArray(_inputSchema.ColumnCount, -1);
                for (int i = 0; i < _infos.Length; i++)
                {
                    var info = _infos[i];
                    _pivotColMap[info.Name] = info.Index;
                    _ectx.Assert(_pivotIndex[info.Index] == -1);
                    _pivotIndex[info.Index] = i;
                }
            }
Esempio n. 15
0
        /// <summary>
        /// Returns true if added new variable, false if variable already exists.
        /// </summary>
        public Boolean AddOutputVariable(string name, Type type)
        {
            _ectx.AssertNonEmpty(name);
            _ectx.AssertValue(type);

            EntryPointVariable v;

            if (!_vars.TryGetValue(name, out v))
            {
                v           = new EntryPointVariable(_ectx, name, type);
                _vars[name] = v;
            }
            else
            {
                if (v.Type != type)
                {
                    throw _ectx.Except($"Variable '{v.Name}' is used as {v.Type} and as {type}");
                }
                return(false);
            }
            v.MarkUsage(false);
            return(true);
        }