private protected TrainCatalogBase(IHostEnvironment env, string registrationName) { Contracts.CheckValue(env, nameof(env)); env.CheckNonEmpty(registrationName, nameof(registrationName)); Environment = env; }
// REVIEW: It would be nice to support propagation of select metadata. public static IDataView Create <TSrc, TDst>(IHostEnvironment env, string name, IDataView input, string src, string dst, DataViewType typeSrc, DataViewType typeDst, ValueMapper <TSrc, TDst> mapper, ValueGetter <VBuffer <ReadOnlyMemory <char> > > keyValueGetter = null, ValueGetter <VBuffer <ReadOnlyMemory <char> > > slotNamesGetter = null) { Contracts.CheckValue(env, nameof(env)); env.CheckNonEmpty(name, nameof(name)); env.CheckValue(input, nameof(input)); env.CheckNonEmpty(src, nameof(src)); env.CheckNonEmpty(dst, nameof(dst)); env.CheckValue(typeSrc, nameof(typeSrc)); env.CheckValue(typeDst, nameof(typeDst)); env.CheckValue(mapper, nameof(mapper)); env.Check(keyValueGetter == null || typeDst.GetItemType() is KeyDataViewType); env.Check(slotNamesGetter == null || typeDst.IsKnownSizeVector()); if (typeSrc.RawType != typeof(TSrc)) { throw env.ExceptParam(nameof(mapper), "The source column type '{0}' doesn't match the input type of the mapper", typeSrc); } if (typeDst.RawType != typeof(TDst)) { throw env.ExceptParam(nameof(mapper), "The destination column type '{0}' doesn't match the output type of the mapper", typeDst); } bool tmp = input.Schema.TryGetColumnIndex(src, out int colSrc); if (!tmp) { throw env.ExceptParam(nameof(src), "The input data doesn't have a column named '{0}'", src); } var typeOrig = input.Schema[colSrc].Type; // REVIEW: Ideally this should support vector-type conversion. It currently doesn't. bool ident; Delegate conv; if (typeOrig.SameSizeAndItemType(typeSrc)) { ident = true; conv = null; } else if (!Conversions.DefaultInstance.TryGetStandardConversion(typeOrig, typeSrc, out conv, out ident)) { throw env.ExceptParam(nameof(mapper), "The type of column '{0}', '{1}', cannot be converted to the input type of the mapper '{2}'", src, typeOrig, typeSrc); } var col = new Column(src, dst); IDataView impl; if (ident) { impl = new Impl <TSrc, TDst, TDst>(env, name, input, col, typeDst, mapper, keyValueGetter: keyValueGetter, slotNamesGetter: slotNamesGetter); } else { Func <IHostEnvironment, string, IDataView, Column, DataViewType, ValueMapper <int, int>, ValueMapper <int, int>, ValueGetter <VBuffer <ReadOnlyMemory <char> > >, ValueGetter <VBuffer <ReadOnlyMemory <char> > >, Impl <int, int, int> > del = CreateImpl <int, int, int>; var meth = del.GetMethodInfo().GetGenericMethodDefinition() .MakeGenericMethod(typeOrig.RawType, typeof(TSrc), typeof(TDst)); impl = (IDataView)meth.Invoke(null, new object[] { env, name, input, col, typeDst, conv, mapper, keyValueGetter, slotNamesGetter }); } return(new OpaqueDataView(impl)); }
protected internal TrainCatalogBase(IHostEnvironment env, string registrationName) { Contracts.CheckValue(env, nameof(env)); env.CheckNonEmpty(registrationName, nameof(registrationName)); Host = env.Register(registrationName); }
/// <summary> /// Extract all values of one column of the data view in a form of an <see cref="IEnumerable{T}"/>. /// </summary> /// <typeparam name="T">The type of the values. This must match the actual column type.</typeparam> /// <param name="data">The data view to get the column from.</param> /// <param name="env">The current host environment.</param> /// <param name="columnName">The name of the column to extract.</param> public static IEnumerable <T> GetColumn <T>(this IDataView data, IHostEnvironment env, string columnName) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(data, nameof(data)); env.CheckNonEmpty(columnName, nameof(columnName)); if (!data.Schema.TryGetColumnIndex(columnName, out int col)) { throw env.ExceptSchemaMismatch(nameof(columnName), "input", columnName); } // There are two decisions that we make here: // - Is the T an array type? // - If yes, we need to map VBuffer to array and densify. // - If no, this is not needed. // - Does T (or item type of T if it's an array) equal to the data view type? // - If this is the same type, we can map directly. // - Otherwise, we need a conversion delegate. var colType = data.Schema[col].Type; if (colType.RawType == typeof(T)) { // Direct mapping is possible. return(GetColumnDirect <T>(data, col)); } else if (typeof(T) == typeof(string) && colType is TextType) { // Special case of ROM<char> to string conversion. Delegate convert = (Func <ReadOnlyMemory <char>, string>)((ReadOnlyMemory <char> txt) => txt.ToString()); Func <IDataView, int, Func <int, T>, IEnumerable <T> > del = GetColumnConvert; var meth = del.Method.GetGenericMethodDefinition().MakeGenericMethod(typeof(T), colType.RawType); return((IEnumerable <T>)(meth.Invoke(null, new object[] { data, col, convert }))); } else if (typeof(T).IsArray) { // Output is an array type. if (!colType.IsVector) { throw env.ExceptSchemaMismatch(nameof(columnName), "input", columnName, "vector", "scalar"); } var elementType = typeof(T).GetElementType(); if (elementType == colType.ItemType.RawType) { // Direct mapping of items. Func <IDataView, int, IEnumerable <int[]> > del = GetColumnArrayDirect <int>; var meth = del.Method.GetGenericMethodDefinition().MakeGenericMethod(elementType); return((IEnumerable <T>)meth.Invoke(null, new object[] { data, col })); } else if (elementType == typeof(string) && colType.ItemType is TextType) { // Conversion of DvText items to string items. Delegate convert = (Func <ReadOnlyMemory <char>, string>)((ReadOnlyMemory <char> txt) => txt.ToString()); Func <IDataView, int, Func <int, long>, IEnumerable <long[]> > del = GetColumnArrayConvert; var meth = del.Method.GetGenericMethodDefinition().MakeGenericMethod(elementType, colType.ItemType.RawType); return((IEnumerable <T>)meth.Invoke(null, new object[] { data, col, convert })); } // Fall through to the failure. } throw env.Except($"Could not map a data view column '{columnName}' of type {colType} to {typeof(T)}."); }