public static SchemaImpl Create(ModelLoadContext ctx, IExceptionContext ectx, ISchema inputSchema)
            {
                Contracts.AssertValueOrNull(ectx);
                ectx.AssertValue(ctx);
                ectx.AssertValue(inputSchema);

                // *** Binary format ***
                // int: ungroup mode
                // int: K - number of pivot columns
                // int[K]: ids of pivot column names

                int modeIndex = ctx.Reader.ReadInt32();

                ectx.CheckDecode(Enum.IsDefined(typeof(UngroupMode), modeIndex));
                UngroupMode mode = (UngroupMode)modeIndex;

                int k = ctx.Reader.ReadInt32();

                ectx.CheckDecode(k > 0);
                var pivotColumns = new string[k];

                for (int i = 0; i < k; i++)
                {
                    pivotColumns[i] = ctx.LoadNonEmptyString();
                }

                return(new SchemaImpl(ectx, inputSchema, mode, pivotColumns));
            }
Exemple #2
0
            public UngroupBinding(IExceptionContext ectx, Schema inputSchema, UngroupMode mode, string[] pivotColumns)
            {
                Contracts.AssertValueOrNull(ectx);
                _ectx = ectx;
                _ectx.AssertValue(inputSchema);
                _ectx.AssertNonEmpty(pivotColumns);

                _inputSchema = inputSchema; // This also makes InputColumnCount valid.
                Mode         = mode;

                Bind(_ectx, inputSchema, pivotColumns, out _infos);

                _pivotIndex = Utils.CreateArray(InputColumnCount, -1);
                for (int i = 0; i < _infos.Length; i++)
                {
                    var info = _infos[i];
                    _ectx.Assert(_pivotIndex[info.Index] == -1);
                    _pivotIndex[info.Index] = i;
                }

                var schemaBuilder = new SchemaBuilder();

                // Iterate through input columns. Input columns which are not pivot columns will be copied to output schema with the same column index unchanged.
                // Input columns which are pivot columns would also be copied but with different data types and different metadata.
                for (int i = 0; i < InputColumnCount; ++i)
                {
                    if (_pivotIndex[i] < 0)
                    {
                        // i-th input column is not a pivot column. Let's do a naive copy.
                        schemaBuilder.AddColumn(inputSchema[i].Name, inputSchema[i].Type, inputSchema[i].Metadata);
                    }
                    else
                    {
                        // i-th input column is a pivot column. Let's calculate proper type and metadata for it.
                        var metadataBuilder = new MetadataBuilder();
                        metadataBuilder.Add(inputSchema[i].Metadata, metadataName => ShouldPreserveMetadata(metadataName));
                        // To explain the output type of pivot columns, let's consider a row
                        //   Age UserID
                        //   18  {"Amy", "Willy"}
                        // where "Age" and "UserID" are column names and 18/{"Amy", "Willy"} is "Age"/"UserID" column in this example row.
                        // If the only pivot column is "UserID", the ungroup may produce
                        //   Age UserID
                        //   18  "Amy"
                        //   18  "Willy"
                        // One can see that "UserID" column (in output data) has a type identical to the element's type of the "UserID" column in input data.
                        schemaBuilder.AddColumn(inputSchema[i].Name, inputSchema[i].Type.GetItemType(), metadataBuilder.GetMetadata());
                    }
                }
                OutputSchema = schemaBuilder.GetSchema();
            }
            public SchemaImpl(IExceptionContext ectx, ISchema inputSchema, UngroupMode mode, string[] pivotColumns)
            {
                Contracts.AssertValueOrNull(ectx);
                _ectx = ectx;
                _ectx.AssertValue(inputSchema);
                _ectx.AssertNonEmpty(pivotColumns);

                _inputSchema = inputSchema;
                Mode         = mode;

                CheckAndBind(_ectx, inputSchema, pivotColumns, out _infos);

                _pivotColMap = new Dictionary <string, int>();
                _pivotIndex  = Utils.CreateArray(_inputSchema.ColumnCount, -1);
                for (int i = 0; i < _infos.Length; i++)
                {
                    var info = _infos[i];
                    _pivotColMap[info.Name] = info.Index;
                    _ectx.Assert(_pivotIndex[info.Index] == -1);
                    _pivotIndex[info.Index] = i;
                }
            }
 /// <summary>
 /// Convenience constructor for public facing API.
 /// </summary>
 /// <param name="env">Host Environment.</param>
 /// <param name="input">Input <see cref="IDataView"/>. This is the output from previous transform or loader.</param>
 /// <param name="mode">Specifies how to unroll multiple pivot columns of different size.</param>
 /// <param name="columns">Columns to unroll, or 'pivot'</param>
 public UngroupTransform(IHostEnvironment env, IDataView input, UngroupMode mode, params string[] columns)
     : this(env, new Arguments() { Column = columns, Mode = mode }, input)
 {
 }