public static SchemaImpl Create(ModelLoadContext ctx, IExceptionContext ectx, ISchema inputSchema) { Contracts.AssertValueOrNull(ectx); ectx.AssertValue(ctx); ectx.AssertValue(inputSchema); // *** Binary format *** // int: ungroup mode // int: K - number of pivot columns // int[K]: ids of pivot column names int modeIndex = ctx.Reader.ReadInt32(); ectx.CheckDecode(Enum.IsDefined(typeof(UngroupMode), modeIndex)); UngroupMode mode = (UngroupMode)modeIndex; int k = ctx.Reader.ReadInt32(); ectx.CheckDecode(k > 0); var pivotColumns = new string[k]; for (int i = 0; i < k; i++) { pivotColumns[i] = ctx.LoadNonEmptyString(); } return(new SchemaImpl(ectx, inputSchema, mode, pivotColumns)); }
public UngroupBinding(IExceptionContext ectx, Schema inputSchema, UngroupMode mode, string[] pivotColumns) { Contracts.AssertValueOrNull(ectx); _ectx = ectx; _ectx.AssertValue(inputSchema); _ectx.AssertNonEmpty(pivotColumns); _inputSchema = inputSchema; // This also makes InputColumnCount valid. Mode = mode; Bind(_ectx, inputSchema, pivotColumns, out _infos); _pivotIndex = Utils.CreateArray(InputColumnCount, -1); for (int i = 0; i < _infos.Length; i++) { var info = _infos[i]; _ectx.Assert(_pivotIndex[info.Index] == -1); _pivotIndex[info.Index] = i; } var schemaBuilder = new SchemaBuilder(); // Iterate through input columns. Input columns which are not pivot columns will be copied to output schema with the same column index unchanged. // Input columns which are pivot columns would also be copied but with different data types and different metadata. for (int i = 0; i < InputColumnCount; ++i) { if (_pivotIndex[i] < 0) { // i-th input column is not a pivot column. Let's do a naive copy. schemaBuilder.AddColumn(inputSchema[i].Name, inputSchema[i].Type, inputSchema[i].Metadata); } else { // i-th input column is a pivot column. Let's calculate proper type and metadata for it. var metadataBuilder = new MetadataBuilder(); metadataBuilder.Add(inputSchema[i].Metadata, metadataName => ShouldPreserveMetadata(metadataName)); // To explain the output type of pivot columns, let's consider a row // Age UserID // 18 {"Amy", "Willy"} // where "Age" and "UserID" are column names and 18/{"Amy", "Willy"} is "Age"/"UserID" column in this example row. // If the only pivot column is "UserID", the ungroup may produce // Age UserID // 18 "Amy" // 18 "Willy" // One can see that "UserID" column (in output data) has a type identical to the element's type of the "UserID" column in input data. schemaBuilder.AddColumn(inputSchema[i].Name, inputSchema[i].Type.GetItemType(), metadataBuilder.GetMetadata()); } } OutputSchema = schemaBuilder.GetSchema(); }
public SchemaImpl(IExceptionContext ectx, ISchema inputSchema, UngroupMode mode, string[] pivotColumns) { Contracts.AssertValueOrNull(ectx); _ectx = ectx; _ectx.AssertValue(inputSchema); _ectx.AssertNonEmpty(pivotColumns); _inputSchema = inputSchema; Mode = mode; CheckAndBind(_ectx, inputSchema, pivotColumns, out _infos); _pivotColMap = new Dictionary <string, int>(); _pivotIndex = Utils.CreateArray(_inputSchema.ColumnCount, -1); for (int i = 0; i < _infos.Length; i++) { var info = _infos[i]; _pivotColMap[info.Name] = info.Index; _ectx.Assert(_pivotIndex[info.Index] == -1); _pivotIndex[info.Index] = i; } }
/// <summary> /// Convenience constructor for public facing API. /// </summary> /// <param name="env">Host Environment.</param> /// <param name="input">Input <see cref="IDataView"/>. This is the output from previous transform or loader.</param> /// <param name="mode">Specifies how to unroll multiple pivot columns of different size.</param> /// <param name="columns">Columns to unroll, or 'pivot'</param> public UngroupTransform(IHostEnvironment env, IDataView input, UngroupMode mode, params string[] columns) : this(env, new Arguments() { Column = columns, Mode = mode }, input) { }