public static InternalSchemaDefinition Create(Type userType, SchemaDefinition userSchemaDefinition) { Contracts.AssertValue(userType); Contracts.AssertValueOrNull(userSchemaDefinition); if (userSchemaDefinition == null) { userSchemaDefinition = SchemaDefinition.Create(userType); } Column[] dstCols = new Column[userSchemaDefinition.Count]; for (int i = 0; i < userSchemaDefinition.Count; ++i) { var col = userSchemaDefinition[i]; if (col.MemberName == null) { throw Contracts.ExceptParam(nameof(userSchemaDefinition), "Null field name detected in schema definition"); } bool isVector; Type dataItemType; MemberInfo memberInfo = null; if (!col.IsComputed) { memberInfo = userType.GetField(col.MemberName); if (memberInfo == null) { memberInfo = userType.GetProperty(col.MemberName); } if (memberInfo == null) { throw Contracts.ExceptParam(nameof(userSchemaDefinition), "No field or property with name '{0}' found in type '{1}'", col.MemberName, userType.FullName); } //Clause to handle the field that may be used to expose the cursor channel. //This field does not need a column. if ((memberInfo is FieldInfo && (memberInfo as FieldInfo).FieldType == typeof(IChannel)) || (memberInfo is PropertyInfo && (memberInfo as PropertyInfo).PropertyType == typeof(IChannel))) { continue; } GetVectorAndItemType(memberInfo, out isVector, out dataItemType); } else { var parameterType = col.ReturnType; if (parameterType == null) { throw Contracts.ExceptParam(nameof(userSchemaDefinition), "No return parameter found in computed column."); } GetVectorAndItemType(parameterType, "returnType", out isVector, out dataItemType); } // Infer the column name. var colName = string.IsNullOrEmpty(col.ColumnName) ? col.MemberName : col.ColumnName; // REVIEW: Because order is defined, we allow duplicate column names, since producing an IDataView // with duplicate column names is completely legal. Possible objection is that we should make it less // convenient to produce "hidden" columns, since this may not be of practical use to users. DataViewType colType; if (col.ColumnType == null) { // Infer a type as best we can. PrimitiveDataViewType itemType = ColumnTypeExtensions.PrimitiveTypeFromType(dataItemType); colType = isVector ? new VectorType(itemType) : (DataViewType)itemType; } else { // Make sure that the types are compatible with the declared type, including // whether it is a vector type. VectorType columnVectorType = col.ColumnType as VectorType; if (isVector != (columnVectorType != null)) { throw Contracts.ExceptParam(nameof(userSchemaDefinition), "Column '{0}' is supposed to be {1}, but type of associated field '{2}' is {3}", colName, columnVectorType != null ? "vector" : "scalar", col.MemberName, isVector ? "vector" : "scalar"); } DataViewType itemType = columnVectorType?.ItemType ?? col.ColumnType; if (itemType.RawType != dataItemType) { throw Contracts.ExceptParam(nameof(userSchemaDefinition), "Column '{0}' is supposed to have item type {1}, but associated field has type {2}", colName, itemType.RawType, dataItemType); } colType = col.ColumnType; } dstCols[i] = col.IsComputed ? new Column(colName, colType, col.Generator, col.Annotations) : new Column(colName, colType, memberInfo, col.Annotations); } return(new InternalSchemaDefinition(dstCols)); }
public static InternalSchemaDefinition Create(Type userType, SchemaDefinition.Direction direction) { var userSchemaDefinition = SchemaDefinition.Create(userType, direction); return(Create(userType, userSchemaDefinition)); }
/// <summary> /// Create a new <see cref="IDataView"/> over an in-memory collection of the items of user-defined type. /// The user maintains ownership of the <paramref name="data"/> and the resulting data view will /// never alter the contents of the <paramref name="data"/>. /// Since <see cref="IDataView"/> is assumed to be immutable, the user is expected to not /// modify the contents of <paramref name="data"/> while the data view is being actively cursored. /// /// One typical usage for in-memory data view could be: create the data view, train a predictor. /// Once the predictor is fully trained, modify the contents of the underlying collection and /// train another predictor. /// </summary> /// <typeparam name="TRow">The user-defined item type.</typeparam> /// <param name="env">The host environment to use for data view creation.</param> /// <param name="data">The data to wrap around.</param> /// <param name="schemaDefinition">The optional schema definition of the data view to create. If <c>null</c>, /// the schema definition is inferred from <typeparamref name="TRow"/>.</param> /// <returns>The constructed <see cref="IDataView"/>.</returns> public static IDataView CreateDataView <TRow>(this IHostEnvironment env, IList <TRow> data, SchemaDefinition schemaDefinition = null) where TRow : class { Contracts.CheckValue(env, nameof(env)); env.CheckValue(data, nameof(data)); env.CheckValueOrNull(schemaDefinition); return(DataViewConstructionUtils.CreateFromList(env, data, schemaDefinition)); }
/// <summary> /// Create an on-demand prediction engine. /// </summary> /// <param name="env">The host environment to use.</param> /// <param name="transformer">The transformer.</param> /// <param name="ignoreMissingColumns">Whether to ignore missing columns in the data view.</param> /// <param name="inputSchemaDefinition">The optional input schema. If <c>null</c>, the schema is inferred from the <typeparamref name="TSrc"/> type.</param> /// <param name="outputSchemaDefinition">The optional output schema. If <c>null</c>, the schema is inferred from the <typeparamref name="TDst"/> type.</param> internal static PredictionEngine <TSrc, TDst> CreatePredictionEngine <TSrc, TDst>(this IHostEnvironment env, ITransformer transformer, bool ignoreMissingColumns = false, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) where TSrc : class where TDst : class, new() { Contracts.CheckValue(env, nameof(env)); env.CheckValue(transformer, nameof(transformer)); env.CheckValueOrNull(inputSchemaDefinition); env.CheckValueOrNull(outputSchemaDefinition); return(new PredictionEngine <TSrc, TDst>(env, transformer, ignoreMissingColumns, inputSchemaDefinition, outputSchemaDefinition)); }
/// <summary> /// Create a batch prediction engine. /// </summary> /// <param name="env">The host environment to use.</param> /// <param name="dataPipe">The transformation pipe that may or may not include a scorer.</param> /// <param name="ignoreMissingColumns">Whether to ignore missing columns in the data view.</param> /// <param name="inputSchemaDefinition">The optional input schema. If <c>null</c>, the schema is inferred from the <typeparamref name="TSrc"/> type.</param> /// <param name="outputSchemaDefinition">The optional output schema. If <c>null</c>, the schema is inferred from the <typeparamref name="TDst"/> type.</param> internal static BatchPredictionEngine <TSrc, TDst> CreateBatchPredictionEngine <TSrc, TDst>(this IHostEnvironment env, IDataView dataPipe, bool ignoreMissingColumns = false, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null) where TSrc : class where TDst : class, new() { Contracts.CheckValue(env, nameof(env)); env.CheckValue(dataPipe, nameof(dataPipe)); env.CheckValueOrNull(inputSchemaDefinition); env.CheckValueOrNull(outputSchemaDefinition); return(new BatchPredictionEngine <TSrc, TDst>(env, dataPipe, ignoreMissingColumns, inputSchemaDefinition, outputSchemaDefinition)); }
/// <summary> /// Create a Cursorable object on a given data view. /// </summary> /// <param name="env">Host environment.</param> /// <param name="data">The underlying data view.</param> /// <param name="ignoreMissingColumns">Whether to ignore missing columns in the data view.</param> /// <param name="schemaDefinition">The optional user-provided schema.</param> /// <returns>The constructed Cursorable.</returns> public static TypedCursorable <TRow> Create(IHostEnvironment env, IDataView data, bool ignoreMissingColumns, SchemaDefinition schemaDefinition) { Contracts.AssertValue(env); env.AssertValue(data); env.AssertValueOrNull(schemaDefinition); ValidateUserType(schemaDefinition, typeof(TRow), data); var outSchema = schemaDefinition == null ? InternalSchemaDefinition.Create(typeof(TRow), SchemaDefinition.Direction.Write) : InternalSchemaDefinition.Create(typeof(TRow), schemaDefinition); return(new TypedCursorable <TRow>(env, data, ignoreMissingColumns, outSchema)); }