/// <summary> /// Create a composite schema of both the partitioned columns and the underlying loader columns. /// </summary> /// <param name="ectx">The exception context.</param> /// <param name="cols">The partitioned columns.</param> /// <param name="subLoader">The sub loader.</param> /// <returns>The resulting schema.</returns> private Schema CreateSchema(IExceptionContext ectx, Column[] cols, IDataLoader subLoader) { Contracts.AssertValue(cols); Contracts.AssertValue(subLoader); var builder = new SchemaBuilder(); builder.AddColumns(cols.Select(c => new Schema.DetachedColumn(c.Name, PrimitiveType.FromKind(c.Type.Value), null))); var colSchema = builder.GetSchema(); var subSchema = subLoader.Schema; if (subSchema.Count == 0) { return(colSchema); } else { var schemas = new ISchema[] { subSchema, colSchema }; return(Schema.Create(new CompositeSchema(schemas))); } }
/// <summary> /// Create a composite schema of both the partitioned columns and the underlying loader columns. /// </summary> /// <param name="ectx">The exception context.</param> /// <param name="cols">The partitioned columns.</param> /// <param name="subLoader">The sub loader.</param> /// <returns>The resulting schema.</returns> private ISchema CreateSchema(IExceptionContext ectx, Column[] cols, IDataLoader subLoader) { Contracts.AssertValue(cols); Contracts.AssertValue(subLoader); var columnNameTypes = cols.Select((col) => new KeyValuePair <string, ColumnType>(col.Name, PrimitiveType.FromKind(col.Type.Value))); var colSchema = new SimpleSchema(ectx, columnNameTypes.ToArray()); var subSchema = subLoader.Schema; if (subSchema.ColumnCount == 0) { return(colSchema); } else { var schemas = new ISchema[] { subSchema, colSchema }; return(new CompositeSchema(schemas)); } }
public void Run() { using (var ch = _host.Start("Run")) { var conv = Conversions.Instance; var comp = new KindSetComparer(); var dstToSrcMap = new Dictionary <HashSet <DataKind>, HashSet <DataKind> >(comp); var srcToDstMap = new Dictionary <DataKind, HashSet <DataKind> >(); var kinds = Enum.GetValues(typeof(DataKind)).Cast <DataKind>().Distinct().OrderBy(k => k).ToArray(); var types = kinds.Select(kind => PrimitiveType.FromKind(kind)).ToArray(); HashSet <DataKind> nonIdentity = null; // For each kind and its associated type. for (int i = 0; i < types.Length; ++i) { ch.AssertValue(types[i]); var info = Utils.MarshalInvoke(KindReport <int>, types[i].RawType, ch, types[i]); var dstKinds = new HashSet <DataKind>(); Delegate del; bool isIdentity; for (int j = 0; j < types.Length; ++j) { if (conv.TryGetStandardConversion(types[i], types[j], out del, out isIdentity)) { dstKinds.Add(types[j].RawKind); } } if (!conv.TryGetStandardConversion(types[i], types[i], out del, out isIdentity)) { Utils.Add(ref nonIdentity, types[i].RawKind); } else { ch.Assert(isIdentity); } srcToDstMap[types[i].RawKind] = dstKinds; HashSet <DataKind> srcKinds; if (!dstToSrcMap.TryGetValue(dstKinds, out srcKinds)) { dstToSrcMap[dstKinds] = srcKinds = new HashSet <DataKind>(); } srcKinds.Add(types[i].RawKind); } // Now perform the final outputs. for (int i = 0; i < kinds.Length; ++i) { var dsts = srcToDstMap[kinds[i]]; HashSet <DataKind> srcs; if (!dstToSrcMap.TryGetValue(dsts, out srcs)) { continue; } ch.Assert(Utils.Size(dsts) >= 1); ch.Assert(Utils.Size(srcs) >= 1); string srcStrings = string.Join(", ", srcs.OrderBy(k => k).Select(k => '`' + k.GetString() + '`')); string dstStrings = string.Join(", ", dsts.OrderBy(k => k).Select(k => '`' + k.GetString() + '`')); dstToSrcMap.Remove(dsts); ch.Info(srcStrings + " | " + dstStrings); } if (Utils.Size(nonIdentity) > 0) { ch.Warning("The following kinds did not have an identity conversion: {0}", string.Join(", ", nonIdentity.OrderBy(k => k).Select(DataKindExtensions.GetString))); } } }
public static InternalSchemaDefinition Create(Type userType, SchemaDefinition userSchemaDefinition) { Contracts.AssertValue(userType); Contracts.AssertValueOrNull(userSchemaDefinition); if (userSchemaDefinition == null) { userSchemaDefinition = SchemaDefinition.Create(userType); } Column[] dstCols = new Column[userSchemaDefinition.Count]; for (int i = 0; i < userSchemaDefinition.Count; ++i) { var col = userSchemaDefinition[i]; if (col.MemberName == null) { throw Contracts.ExceptParam(nameof(userSchemaDefinition), "Null field name detected in schema definition"); } bool isVector; DataKind kind; MemberInfo memberInfo = null; if (!col.IsComputed) { memberInfo = userType.GetField(col.MemberName); if (memberInfo == null) { memberInfo = userType.GetProperty(col.MemberName); } if (memberInfo == null) { throw Contracts.ExceptParam(nameof(userSchemaDefinition), "No field or property with name '{0}' found in type '{1}'", col.MemberName, userType.FullName); } //Clause to handle the field that may be used to expose the cursor channel. //This field does not need a column. if ((memberInfo is FieldInfo && (memberInfo as FieldInfo).FieldType == typeof(IChannel)) || (memberInfo is PropertyInfo && (memberInfo as PropertyInfo).PropertyType == typeof(IChannel))) { continue; } GetVectorAndKind(memberInfo, out isVector, out kind); } else { var parameterType = col.ReturnType; if (parameterType == null) { throw Contracts.ExceptParam(nameof(userSchemaDefinition), "No return parameter found in computed column."); } GetVectorAndKind(parameterType, "returnType", out isVector, out kind); } // Infer the column name. var colName = string.IsNullOrEmpty(col.ColumnName) ? col.MemberName : col.ColumnName; // REVIEW: Because order is defined, we allow duplicate column names, since producing an IDataView // with duplicate column names is completely legal. Possible objection is that we should make it less // convenient to produce "hidden" columns, since this may not be of practical use to users. ColumnType colType; if (col.ColumnType == null) { // Infer a type as best we can. PrimitiveType itemType = PrimitiveType.FromKind(kind); colType = isVector ? new VectorType(itemType) : (ColumnType)itemType; } else { // Make sure that the types are compatible with the declared type, including // whether it is a vector type. if (isVector != col.ColumnType.IsVector) { throw Contracts.ExceptParam(nameof(userSchemaDefinition), "Column '{0}' is supposed to be {1}, but type of associated field '{2}' is {3}", colName, col.ColumnType.IsVector ? "vector" : "scalar", col.MemberName, isVector ? "vector" : "scalar"); } if (kind != col.ColumnType.ItemType.RawKind) { throw Contracts.ExceptParam(nameof(userSchemaDefinition), "Column '{0}' is supposed to have item kind {1}, but associated field has kind {2}", colName, col.ColumnType.ItemType.RawKind, kind); } colType = col.ColumnType; } dstCols[i] = col.IsComputed ? new Column(colName, colType, col.Generator, col.Metadata) : new Column(colName, colType, memberInfo, col.Metadata); } return(new InternalSchemaDefinition(dstCols)); }
private static bool TryCreateEx(IExceptionContext ectx, ColInfo info, DataKind kind, KeyRange range, out PrimitiveType itemType, out ColInfoEx ex) { ectx.AssertValue(info); ectx.Assert(Enum.IsDefined(typeof(DataKind), kind)); ex = null; var typeSrc = info.TypeSrc; if (range != null) { itemType = TypeParsingUtils.ConstructKeyType(kind, range); if (!typeSrc.ItemType.IsKey && !typeSrc.ItemType.IsText) { return(false); } } else if (!typeSrc.ItemType.IsKey) { itemType = PrimitiveType.FromKind(kind); } else if (!KeyType.IsValidDataKind(kind)) { itemType = PrimitiveType.FromKind(kind); return(false); } else { var key = typeSrc.ItemType.AsKey; ectx.Assert(KeyType.IsValidDataKind(key.RawKind)); int count = key.Count; // Technically, it's an error for the counts not to match, but we'll let the Conversions // code return false below. There's a possibility we'll change the standard conversions to // map out of bounds values to zero, in which case, this is the right thing to do. ulong max = kind.ToMaxInt(); if ((ulong)count > max) { count = (int)max; } itemType = new KeyType(kind, key.Min, count, key.Contiguous); } // Ensure that the conversion is legal. We don't actually cache the delegate here. It will get // re-fetched by the utils code when needed. bool identity; Delegate del; if (!Conversions.Instance.TryGetStandardConversion(typeSrc.ItemType, itemType, out del, out identity)) { return(false); } ColumnType typeDst = itemType; if (typeSrc.IsVector) { typeDst = new VectorType(itemType, typeSrc.AsVector); } // An output column is transposable iff the input column was transposable. VectorType slotType = null; if (info.SlotTypeSrc != null) { slotType = new VectorType(itemType, info.SlotTypeSrc); } ex = new ColInfoEx(kind, range != null, typeDst, slotType); return(true); }