public BoundMapper(IExceptionContext ectx, TreeEnsembleFeaturizerBindableMapper owner, RoleMappedSchema schema) { Contracts.AssertValue(ectx); ectx.AssertValue(owner); ectx.AssertValue(schema); ectx.Assert(schema.Feature.HasValue); _ectx = ectx; _owner = owner; InputRoleMappedSchema = schema; // A vector containing the output of each tree on a given example. var treeValueType = new VectorType(NumberDataViewType.Single, owner._ensemble.TrainedEnsemble.NumTrees); // An indicator vector with length = the total number of leaves in the ensemble, indicating which leaf the example // ends up in all the trees in the ensemble. var leafIdType = new VectorType(NumberDataViewType.Single, owner._totalLeafCount); // An indicator vector with length = the total number of nodes in the ensemble, indicating the nodes on // the paths of the example in all the trees in the ensemble. // The total number of nodes in a binary tree is equal to the number of internal nodes + the number of leaf nodes, // and it is also equal to the number of children of internal nodes (which is 2 * the number of internal nodes) // plus one (since the root node is not a child of any node). So we have #internal + #leaf = 2*(#internal) + 1, // which means that #internal = #leaf - 1. // Therefore, the number of internal nodes in the ensemble is #leaf - #trees. var pathIdType = new VectorType(NumberDataViewType.Single, owner._totalLeafCount - owner._ensemble.TrainedEnsemble.NumTrees); // Start creating output schema with types derived above. var schemaBuilder = new DataViewSchema.Builder(); // Metadata of tree values. var treeIdMetadataBuilder = new DataViewSchema.Annotations.Builder(); treeIdMetadataBuilder.Add(AnnotationUtils.Kinds.SlotNames, AnnotationUtils.GetNamesType(treeValueType.Size), (ValueGetter <VBuffer <ReadOnlyMemory <char> > >)owner.GetTreeSlotNames); // Add the column of trees' output values schemaBuilder.AddColumn(OutputColumnNames.Trees, treeValueType, treeIdMetadataBuilder.ToAnnotations()); // Metadata of leaf IDs. var leafIdMetadataBuilder = new DataViewSchema.Annotations.Builder(); leafIdMetadataBuilder.Add(AnnotationUtils.Kinds.SlotNames, AnnotationUtils.GetNamesType(leafIdType.Size), (ValueGetter <VBuffer <ReadOnlyMemory <char> > >)owner.GetLeafSlotNames); leafIdMetadataBuilder.Add(AnnotationUtils.Kinds.IsNormalized, BooleanDataViewType.Instance, (ref bool value) => value = true); // Add the column of leaves' IDs where the input example reaches. schemaBuilder.AddColumn(OutputColumnNames.Leaves, leafIdType, leafIdMetadataBuilder.ToAnnotations()); // Metadata of path IDs. var pathIdMetadataBuilder = new DataViewSchema.Annotations.Builder(); pathIdMetadataBuilder.Add(AnnotationUtils.Kinds.SlotNames, AnnotationUtils.GetNamesType(pathIdType.Size), (ValueGetter <VBuffer <ReadOnlyMemory <char> > >)owner.GetPathSlotNames); pathIdMetadataBuilder.Add(AnnotationUtils.Kinds.IsNormalized, BooleanDataViewType.Instance, (ref bool value) => value = true); // Add the column of encoded paths which the input example passes. schemaBuilder.AddColumn(OutputColumnNames.Paths, pathIdType, pathIdMetadataBuilder.ToAnnotations()); OutputSchema = schemaBuilder.ToSchema(); // Tree values must be the first output column. Contracts.Assert(OutputSchema[OutputColumnNames.Trees].Index == TreeValuesColumnId); // leaf IDs must be the second output column. Contracts.Assert(OutputSchema[OutputColumnNames.Leaves].Index == LeafIdsColumnId); // Path IDs must be the third output column. Contracts.Assert(OutputSchema[OutputColumnNames.Paths].Index == PathIdsColumnId); }
//private Delegate CreateGetter(SchemaProxy schema, int index, Delegate peek) private Delegate CreateGetter(DataViewType colType, InternalSchemaDefinition.Column column, Delegate peek) { var outputType = column.OutputType; var genericType = outputType; Func <Delegate, Delegate> del; if (outputType.IsArray) { VectorType vectorType = colType as VectorType; Host.Assert(vectorType != null); // String[] -> ReadOnlyMemory<char> if (outputType.GetElementType() == typeof(string)) { Host.Assert(vectorType.ItemType is TextDataViewType); return(CreateConvertingArrayGetterDelegate <string, ReadOnlyMemory <char> >(peek, x => x != null ? x.AsMemory() : ReadOnlyMemory <char> .Empty)); } // T[] -> VBuffer<T> if (outputType.GetElementType().IsGenericType&& outputType.GetElementType().GetGenericTypeDefinition() == typeof(Nullable <>)) { Host.Assert(Nullable.GetUnderlyingType(outputType.GetElementType()) == vectorType.ItemType.RawType); } else { Host.Assert(outputType.GetElementType() == vectorType.ItemType.RawType); } del = CreateDirectArrayGetterDelegate <int>; genericType = outputType.GetElementType(); } else if (colType is VectorType vectorType) { // VBuffer<T> -> VBuffer<T> // REVIEW: Do we care about accomodating VBuffer<string> -> ReadOnlyMemory<char>? Host.Assert(outputType.IsGenericType); Host.Assert(outputType.GetGenericTypeDefinition() == typeof(VBuffer <>)); Host.Assert(outputType.GetGenericArguments()[0] == vectorType.ItemType.RawType); del = CreateDirectVBufferGetterDelegate <int>; genericType = vectorType.ItemType.RawType; } else if (colType is PrimitiveDataViewType) { if (outputType == typeof(string)) { // String -> ReadOnlyMemory<char> Host.Assert(colType is TextDataViewType); return(CreateConvertingGetterDelegate <String, ReadOnlyMemory <char> >(peek, x => x != null ? x.AsMemory() : ReadOnlyMemory <char> .Empty)); } // T -> T if (outputType.IsGenericType && outputType.GetGenericTypeDefinition() == typeof(Nullable <>)) { Host.Assert(colType.RawType == Nullable.GetUnderlyingType(outputType)); } else { Host.Assert(colType.RawType == outputType); } if (!(colType is KeyType keyType)) { del = CreateDirectGetterDelegate <int>; } else { var keyRawType = colType.RawType; Func <Delegate, DataViewType, Delegate> delForKey = CreateKeyGetterDelegate <uint>; return(Utils.MarshalInvoke(delForKey, keyRawType, peek, colType)); } }
private LabelNameBindableMapper(IHostEnvironment env, ISchemaBoundMapper mapper, VectorType type, Delegate getter, string metadataKind, Func <ISchemaBoundMapper, ColumnType, bool> canWrap) : this(env, mapper.Bindable, type, getter, metadataKind, canWrap) { }
/// <summary> /// Append label names to score column as its metadata. /// </summary> private Schema DecorateOutputSchema(Schema partialSchema, int scoreColumnIndex, VectorType labelNameType, ValueGetter <VBuffer <T> > labelNameGetter, string labelNameKind) { var builder = new SchemaBuilder(); // Sequentially add columns so that the order of them is not changed comparing with the schema in the mapper // that computes score column. for (int i = 0; i < partialSchema.Count; ++i) { var meta = new MetadataBuilder(); if (i == scoreColumnIndex) { // Add label names for score column. meta.Add(partialSchema[i].Metadata, selector: s => s != labelNameKind); meta.Add(labelNameKind, labelNameType, labelNameGetter); } else { // Copy all existing metadata because this transform only affects score column. meta.Add(partialSchema[i].Metadata, selector: s => true); } // Instead of appending extra metadata to the existing score column, we create new one because // metadata is read-only. builder.AddColumn(partialSchema[i].Name, partialSchema[i].Type, meta.GetMetadata()); } return(builder.GetSchema()); }
internal static ISchemaBoundMapper CreateBound <T>(IHostEnvironment env, ISchemaBoundRowMapper mapper, VectorType type, Delegate getter, string metadataKind, Func <ISchemaBoundMapper, ColumnType, bool> canWrap) { Contracts.AssertValue(env); env.AssertValue(mapper); env.AssertValue(type); env.AssertValue(getter); env.Assert(getter is ValueGetter <VBuffer <T> >); env.AssertNonEmpty(metadataKind); env.AssertValueOrNull(canWrap); return(new Bound <T>(env, mapper, type, (ValueGetter <VBuffer <T> >)getter, metadataKind, canWrap)); }
private LabelNameBindableMapper(IHostEnvironment env, ISchemaBindableMapper bindable, VectorType type, Delegate getter, string metadataKind, Func <ISchemaBoundMapper, ColumnType, bool> canWrap) { Contracts.AssertValue(env); _host = env.Register(LoaderSignature); _host.AssertValue(bindable); _host.AssertValue(type); _host.AssertValue(getter); _host.AssertNonEmpty(metadataKind); _host.AssertValueOrNull(canWrap); _bindable = bindable; _type = type; _getter = getter; _metadataKind = metadataKind; _canWrap = canWrap; }
public static InternalSchemaDefinition Create(Type userType, SchemaDefinition userSchemaDefinition) { Contracts.AssertValue(userType); Contracts.AssertValueOrNull(userSchemaDefinition); if (userSchemaDefinition == null) { userSchemaDefinition = SchemaDefinition.Create(userType); } Column[] dstCols = new Column[userSchemaDefinition.Count]; for (int i = 0; i < userSchemaDefinition.Count; ++i) { var col = userSchemaDefinition[i]; if (col.MemberName == null) { throw Contracts.ExceptParam(nameof(userSchemaDefinition), "Null field name detected in schema definition"); } bool isVector; Type dataItemType; MemberInfo memberInfo = null; if (!col.IsComputed) { memberInfo = userType.GetField(col.MemberName); if (memberInfo == null) { memberInfo = userType.GetProperty(col.MemberName); } if (memberInfo == null) { throw Contracts.ExceptParam(nameof(userSchemaDefinition), "No field or property with name '{0}' found in type '{1}'", col.MemberName, userType.FullName); } //Clause to handle the field that may be used to expose the cursor channel. //This field does not need a column. if ((memberInfo is FieldInfo && (memberInfo as FieldInfo).FieldType == typeof(IChannel)) || (memberInfo is PropertyInfo && (memberInfo as PropertyInfo).PropertyType == typeof(IChannel))) { continue; } GetVectorAndItemType(memberInfo, out isVector, out dataItemType); } else { var parameterType = col.ReturnType; if (parameterType == null) { throw Contracts.ExceptParam(nameof(userSchemaDefinition), "No return parameter found in computed column."); } GetVectorAndItemType(parameterType, "returnType", out isVector, out dataItemType); } // Infer the column name. var colName = string.IsNullOrEmpty(col.ColumnName) ? col.MemberName : col.ColumnName; // REVIEW: Because order is defined, we allow duplicate column names, since producing an IDataView // with duplicate column names is completely legal. Possible objection is that we should make it less // convenient to produce "hidden" columns, since this may not be of practical use to users. DataViewType colType; if (col.ColumnType == null) { // Infer a type as best we can. PrimitiveDataViewType itemType = ColumnTypeExtensions.PrimitiveTypeFromType(dataItemType); colType = isVector ? new VectorType(itemType) : (DataViewType)itemType; } else { // Make sure that the types are compatible with the declared type, including // whether it is a vector type. VectorType columnVectorType = col.ColumnType as VectorType; if (isVector != (columnVectorType != null)) { throw Contracts.ExceptParam(nameof(userSchemaDefinition), "Column '{0}' is supposed to be {1}, but type of associated field '{2}' is {3}", colName, columnVectorType != null ? "vector" : "scalar", col.MemberName, isVector ? "vector" : "scalar"); } DataViewType itemType = columnVectorType?.ItemType ?? col.ColumnType; if (itemType.RawType != dataItemType) { throw Contracts.ExceptParam(nameof(userSchemaDefinition), "Column '{0}' is supposed to have item type {1}, but associated field has type {2}", colName, itemType.RawType, dataItemType); } colType = col.ColumnType; } dstCols[i] = col.IsComputed ? new Column(colName, colType, col.Generator, col.Annotations) : new Column(colName, colType, memberInfo, col.Annotations); } return(new InternalSchemaDefinition(dstCols)); }