/// <summary> /// Checks whether this object is consistent with an actual schema from a dynamic object, /// throwing exceptions if not. /// </summary> /// <param name="ectx">The context on which to throw exceptions</param> /// <param name="schema">The schema to check</param> public void Check(IExceptionContext ectx, ISchema schema) { Contracts.AssertValue(ectx); ectx.AssertValue(schema); foreach (var pair in Pairs) { if (!schema.TryGetColumnIndex(pair.Key, out int colIdx)) { throw ectx.ExceptParam(nameof(schema), $"Column named '{pair.Key}' was not found"); } var col = RowColumnUtils.GetColumn(schema, colIdx); var type = GetTypeOrNull(col); if ((type != null && !pair.Value.IsAssignableFromStaticPipeline(type)) || (type == null && IsStandard(ectx, pair.Value))) { // When not null, we can use IsAssignableFrom to indicate we could assign to this, so as to allow // for example Key<uint, string> to be considered to be compatible with Key<uint>. // In the null case, while we cannot directly verify an unrecognized type, we can at least verify // that the statically declared type should not have corresponded to a recognized type. if (!pair.Value.IsAssignableFromStaticPipeline(type)) { throw ectx.ExceptParam(nameof(schema), $"Column '{pair.Key}' of type '{col.Type}' cannot be expressed statically as type '{pair.Value}'."); } } } }
public override RowMapperColumnInfo[] GetOutputColumns() { var result = new RowMapperColumnInfo[_parent.ColumnPairs.Length]; for (int i = 0; i < _parent.ColumnPairs.Length; i++) { var meta = RowColumnUtils.GetMetadataAsRow(InputSchema, ColMapNewToOld[i], x => x == MetadataUtils.Kinds.SlotNames); result[i] = new RowMapperColumnInfo(_parent.ColumnPairs[i].output, _types[i], meta); } return(result); }
public IRow GetStatsIRowOrNull(RoleMappedSchema schema) { if (_stats == null) { return(null); } var cols = new List <IColumn>(); var names = default(VBuffer <ReadOnlyMemory <char> >); _stats.AddStatsColumns(cols, null, schema, ref names); return(RowColumnUtils.GetRow(null, cols.ToArray())); }
public override IRow GetStatsIRowOrNull(RoleMappedSchema schema) { if (_stats == null) { return(null); } var cols = new List <IColumn>(); var names = default(VBuffer <DvText>); MetadataUtils.GetSlotNames(schema, RoleMappedSchema.ColumnRole.Feature, Weight.Length, ref names); // Add the stat columns. _stats.AddStatsColumns(cols, this, schema, ref names); return(RowColumnUtils.GetRow(null, cols.ToArray())); }
public virtual IRow GetSummaryIRowOrNull(RoleMappedSchema schema) { var cols = new List <IColumn>(); var names = default(VBuffer <DvText>); MetadataUtils.GetSlotNames(schema, RoleMappedSchema.ColumnRole.Feature, Weight.Length, ref names); var slotNamesCol = RowColumnUtils.GetColumn(MetadataUtils.Kinds.SlotNames, new VectorType(TextType.Instance, Weight.Length), ref names); var slotNamesRow = RowColumnUtils.GetRow(null, slotNamesCol); var colType = new VectorType(NumberType.R4, Weight.Length); // Add the bias and the weight columns. var bias = Bias; cols.Add(RowColumnUtils.GetColumn("Bias", NumberType.R4, ref bias)); var weights = Weight; cols.Add(RowColumnUtils.GetColumn("Weights", colType, ref weights, slotNamesRow)); return(RowColumnUtils.GetRow(null, cols.ToArray())); }
private void InitMap <T>(T val, ColumnType type, int hashBits = 20) { var col = RowColumnUtils.GetColumn("Foo", type, ref val); _counted = new Counted(); var inRow = RowColumnUtils.GetRow(_counted, col); // One million features is a nice, typical number. var info = new HashTransformer.ColumnInfo("Foo", "Bar", hashBits: hashBits); var xf = new HashTransformer(_env, new[] { info }); var mapper = xf.GetRowToRowMapper(inRow.Schema); mapper.Schema.TryGetColumnIndex("Bar", out int outCol); var outRow = mapper.GetRow(inRow, c => c == outCol, out var _); if (type.IsVector) { _vecGetter = outRow.GetGetter <VBuffer <uint> >(outCol); } else { _getter = outRow.GetGetter <uint>(outCol); } }
public void AssertStaticKeys() { var env = new ConsoleEnvironment(0, verbose: true); var counted = new MetaCounted(); // We'll test a few things here. First, the case where the key-value metadata is text. var metaValues1 = new VBuffer <ReadOnlyMemory <char> >(3, new[] { "a".AsMemory(), "b".AsMemory(), "c".AsMemory() }); var meta1 = RowColumnUtils.GetColumn(MetadataUtils.Kinds.KeyValues, new VectorType(TextType.Instance, 3), ref metaValues1); uint value1 = 2; var col1 = RowColumnUtils.GetColumn("stay", new KeyType(DataKind.U4, 0, 3), ref value1, RowColumnUtils.GetRow(counted, meta1)); // Next the case where those values are ints. var metaValues2 = new VBuffer <int>(3, new int[] { 1, 2, 3, 4 }); var meta2 = RowColumnUtils.GetColumn(MetadataUtils.Kinds.KeyValues, new VectorType(NumberType.I4, 4), ref metaValues2); var value2 = new VBuffer <byte>(2, 0, null, null); var col2 = RowColumnUtils.GetColumn("awhile", new VectorType(new KeyType(DataKind.U1, 2, 4), 2), ref value2, RowColumnUtils.GetRow(counted, meta2)); // Then the case where a value of that kind exists, but is of not of the right kind, in which case it should not be identified as containing that metadata. var metaValues3 = (float)2; var meta3 = RowColumnUtils.GetColumn(MetadataUtils.Kinds.KeyValues, NumberType.R4, ref metaValues3); var value3 = (ushort)1; var col3 = RowColumnUtils.GetColumn("and", new KeyType(DataKind.U2, 0, 2), ref value3, RowColumnUtils.GetRow(counted, meta3)); // Then a final case where metadata of that kind is actaully simply altogether absent. var value4 = new VBuffer <uint>(5, 0, null, null); var col4 = RowColumnUtils.GetColumn("listen", new VectorType(new KeyType(DataKind.U4, 0, 2)), ref value4); // Finally compose a trivial data view out of all this. var row = RowColumnUtils.GetRow(counted, col1, col2, col3, col4); var view = RowCursorUtils.RowAsDataView(env, row); // Whew! I'm glad that's over with. Let us start running the test in ernest. // First let's do a direct match of the types to ensure that works. view.AssertStatic(env, c => ( stay: c.KeyU4.TextValues.Scalar, awhile: c.KeyU1.I4Values.Vector, and: c.KeyU2.NoValue.Scalar, listen: c.KeyU4.NoValue.VarVector)); // Next let's match against the superclasses (where no value types are // asserted), to ensure that the less specific case still passes. view.AssertStatic(env, c => ( stay: c.KeyU4.NoValue.Scalar, awhile: c.KeyU1.NoValue.Vector, and: c.KeyU2.NoValue.Scalar, listen: c.KeyU4.NoValue.VarVector)); // Here we assert a subset. view.AssertStatic(env, c => ( stay: c.KeyU4.TextValues.Scalar, awhile: c.KeyU1.I4Values.Vector)); // OK. Now we've confirmed the basic stuff works, let's check other scenarios. // Due to the fact that we cannot yet assert only a *single* column, these always appear // in at least pairs. // First try to get the right type of exception to test against. Type e = null; try { view.AssertStatic(env, c => ( stay: c.KeyU4.TextValues.Scalar, awhile: c.KeyU2.I4Values.Vector)); } catch (Exception eCaught) { e = eCaught.GetType(); } Assert.NotNull(e); // What if the key representation type is wrong? Assert.Throws(e, () => view.AssertStatic(env, c => ( stay: c.KeyU4.TextValues.Scalar, awhile: c.KeyU2.I4Values.Vector))); // What if the key value type is wrong? Assert.Throws(e, () => view.AssertStatic(env, c => ( stay: c.KeyU4.TextValues.Scalar, awhile: c.KeyU1.I2Values.Vector))); // Same two tests, but for scalar? Assert.Throws(e, () => view.AssertStatic(env, c => ( stay: c.KeyU2.TextValues.Scalar, awhile: c.KeyU1.I2Values.Vector))); Assert.Throws(e, () => view.AssertStatic(env, c => ( stay: c.KeyU4.BoolValues.Scalar, awhile: c.KeyU1.I2Values.Vector))); // How about if we misidentify the vectorness? Assert.Throws(e, () => view.AssertStatic(env, c => ( stay: c.KeyU4.TextValues.Vector, awhile: c.KeyU1.I2Values.Vector))); // How about the names? Assert.Throws(e, () => view.AssertStatic(env, c => ( stay: c.KeyU4.TextValues.Scalar, alot: c.KeyU1.I4Values.Vector))); }
/// <summary> /// Returns a .NET type corresponding to the static pipelines that would tend to represent this column. /// Generally this will return <c>null</c> if it simply does not recognize the type but might throw if /// there is something seriously wrong with it. /// </summary> /// <param name="col">The column</param> /// <returns>The .NET type for the static pipelines that should be used to reflect this type, given /// both the characteristics of the <see cref="ColumnType"/> as well as one or two crucial pieces of metadata</returns> private static Type GetTypeOrNull(IColumn col) { Contracts.AssertValue(col); var t = col.Type; Type vecType = null; if (t is VectorType vt) { vecType = vt.VectorSize > 0 ? typeof(Vector <>) : typeof(VarVector <>); // Check normalized subtype of vectors. if (vt.VectorSize > 0) { // Check to see if the column is normalized. // Once we shift to metadata being a row globally we can also make this a bit more efficient: var meta = col.Metadata; if (meta.Schema.TryGetColumnIndex(MetadataUtils.Kinds.IsNormalized, out int normcol)) { var normtype = meta.Schema.GetColumnType(normcol); if (normtype == BoolType.Instance) { DvBool val = default; meta.GetGetter <DvBool>(normcol)(ref val); if (val.IsTrue) { vecType = typeof(NormVector <>); } } } } t = t.ItemType; // Fall through to the non-vector case to handle subtypes. } Contracts.Assert(!t.IsVector); if (t is KeyType kt) { Type physType = StaticKind(kt.RawKind); Contracts.Assert(physType == typeof(byte) || physType == typeof(ushort) || physType == typeof(uint) || physType == typeof(ulong)); var keyType = kt.Count > 0 ? typeof(Key <>) : typeof(VarKey <>); keyType = keyType.MakeGenericType(physType); if (kt.Count > 0) { // Check to see if we have key value metadata of the appropriate type, size, and whatnot. var meta = col.Metadata; if (meta.Schema.TryGetColumnIndex(MetadataUtils.Kinds.KeyValues, out int kvcol)) { var kvType = meta.Schema.GetColumnType(kvcol); if (kvType.VectorSize == kt.Count) { Contracts.Assert(kt.Count > 0); var subtype = GetTypeOrNull(RowColumnUtils.GetColumn(meta, kvcol)); if (subtype != null && subtype.IsGenericType) { var sgtype = subtype.GetGenericTypeDefinition(); if (sgtype == typeof(NormVector <>) || sgtype == typeof(Vector <>)) { var args = subtype.GetGenericArguments(); Contracts.Assert(args.Length == 1); keyType = typeof(Key <,>).MakeGenericType(physType, args[0]); } } } } } return(vecType?.MakeGenericType(keyType) ?? keyType); } if (t is PrimitiveType pt) { Type physType = StaticKind(pt.RawKind); // Though I am unaware of any existing instances, it is theoretically possible for a // primitive type to exist, have the same data kind as one of the existing types, and yet // not be one of the built in types. (E.g., an outside analogy to the key types.) For this // reason, we must be certain that when we return here we are covering one fo the builtin types. if (physType != null && ( pt == NumberType.I1 || pt == NumberType.I2 || pt == NumberType.I4 || pt == NumberType.I8 || pt == NumberType.U1 || pt == NumberType.U2 || pt == NumberType.U4 || pt == NumberType.U8 || pt == NumberType.R4 || pt == NumberType.R8 || pt == NumberType.UG || pt == BoolType.Instance || pt == DateTimeType.Instance || pt == DateTimeZoneType.Instance || pt == TimeSpanType.Instance || pt == TextType.Instance)) { return((vecType ?? typeof(Scalar <>)).MakeGenericType(physType)); } } return(null); }
private void HashTestCore <T>(T val, PrimitiveType type, uint expected, uint expectedOrdered, uint expectedOrdered3) { const int bits = 10; var col = RowColumnUtils.GetColumn("Foo", type, ref val); var inRow = RowColumnUtils.GetRow(new Counted(), col); // First do an unordered hash. var info = new HashingTransformer.ColumnInfo("Foo", "Bar", hashBits: bits); var xf = new HashingTransformer(Env, new[] { info }); var mapper = xf.GetRowToRowMapper(inRow.Schema); mapper.Schema.TryGetColumnIndex("Bar", out int outCol); var outRow = mapper.GetRow(inRow, c => c == outCol, out var _); var getter = outRow.GetGetter <uint>(outCol); uint result = 0; getter(ref result); Assert.Equal(expected, result); // Next do an ordered hash. info = new HashingTransformer.ColumnInfo("Foo", "Bar", hashBits: bits, ordered: true); xf = new HashingTransformer(Env, new[] { info }); mapper = xf.GetRowToRowMapper(inRow.Schema); mapper.Schema.TryGetColumnIndex("Bar", out outCol); outRow = mapper.GetRow(inRow, c => c == outCol, out var _); getter = outRow.GetGetter <uint>(outCol); getter(ref result); Assert.Equal(expectedOrdered, result); // Next build up a vector to make sure that hashing is consistent between scalar values // at least in the first position, and in the unordered case, the last position. const int vecLen = 5; var denseVec = new VBuffer <T>(vecLen, Utils.CreateArray(vecLen, val)); col = RowColumnUtils.GetColumn("Foo", new VectorType(type, vecLen), ref denseVec); inRow = RowColumnUtils.GetRow(new Counted(), col); info = new HashingTransformer.ColumnInfo("Foo", "Bar", hashBits: bits, ordered: false); xf = new HashingTransformer(Env, new[] { info }); mapper = xf.GetRowToRowMapper(inRow.Schema); mapper.Schema.TryGetColumnIndex("Bar", out outCol); outRow = mapper.GetRow(inRow, c => c == outCol, out var _); var vecGetter = outRow.GetGetter <VBuffer <uint> >(outCol); VBuffer <uint> vecResult = default; vecGetter(ref vecResult); Assert.Equal(vecLen, vecResult.Length); // They all should equal this in this case. Assert.All(vecResult.DenseValues(), v => Assert.Equal(expected, v)); // Now do ordered with the dense vector. info = new HashingTransformer.ColumnInfo("Foo", "Bar", hashBits: bits, ordered: true); xf = new HashingTransformer(Env, new[] { info }); mapper = xf.GetRowToRowMapper(inRow.Schema); mapper.Schema.TryGetColumnIndex("Bar", out outCol); outRow = mapper.GetRow(inRow, c => c == outCol, out var _); vecGetter = outRow.GetGetter <VBuffer <uint> >(outCol); vecGetter(ref vecResult); Assert.Equal(vecLen, vecResult.Length); Assert.Equal(expectedOrdered, vecResult.GetItemOrDefault(0)); Assert.Equal(expectedOrdered3, vecResult.GetItemOrDefault(3)); Assert.All(vecResult.DenseValues(), v => Assert.True((v == 0) == (expectedOrdered == 0))); // Let's now do a sparse vector. var sparseVec = new VBuffer <T>(10, 3, Utils.CreateArray(3, val), new[] { 0, 3, 7 }); col = RowColumnUtils.GetColumn("Foo", new VectorType(type, vecLen), ref sparseVec); inRow = RowColumnUtils.GetRow(new Counted(), col); info = new HashingTransformer.ColumnInfo("Foo", "Bar", hashBits: bits, ordered: false); xf = new HashingTransformer(Env, new[] { info }); mapper = xf.GetRowToRowMapper(inRow.Schema); mapper.Schema.TryGetColumnIndex("Bar", out outCol); outRow = mapper.GetRow(inRow, c => c == outCol, out var _); vecGetter = outRow.GetGetter <VBuffer <uint> >(outCol); vecGetter(ref vecResult); Assert.Equal(10, vecResult.Length); Assert.Equal(expected, vecResult.GetItemOrDefault(0)); Assert.Equal(expected, vecResult.GetItemOrDefault(3)); Assert.Equal(expected, vecResult.GetItemOrDefault(7)); info = new HashingTransformer.ColumnInfo("Foo", "Bar", hashBits: bits, ordered: true); xf = new HashingTransformer(Env, new[] { info }); mapper = xf.GetRowToRowMapper(inRow.Schema); mapper.Schema.TryGetColumnIndex("Bar", out outCol); outRow = mapper.GetRow(inRow, c => c == outCol, out var _); vecGetter = outRow.GetGetter <VBuffer <uint> >(outCol); vecGetter(ref vecResult); Assert.Equal(10, vecResult.Length); Assert.Equal(expectedOrdered, vecResult.GetItemOrDefault(0)); Assert.Equal(expectedOrdered3, vecResult.GetItemOrDefault(3)); }
public void AddStatsColumns(List <IColumn> list, LinearBinaryPredictor parent, RoleMappedSchema schema, ref VBuffer <ReadOnlyMemory <char> > names) { _env.AssertValue(list); _env.AssertValueOrNull(parent); _env.AssertValue(schema); long count = _trainingExampleCount; list.Add(RowColumnUtils.GetColumn("Count of training examples", NumberType.I8, ref count)); var dev = _deviance; list.Add(RowColumnUtils.GetColumn("Residual Deviance", NumberType.R4, ref dev)); var nullDev = _nullDeviance; list.Add(RowColumnUtils.GetColumn("Null Deviance", NumberType.R4, ref nullDev)); var aic = 2 * _paramCount + _deviance; list.Add(RowColumnUtils.GetColumn("AIC", NumberType.R4, ref aic)); if (parent == null) { return; } Single biasStdErr; Single biasZScore; Single biasPValue; if (!TryGetBiasStatistics(parent.Statistics, parent.Bias, out biasStdErr, out biasZScore, out biasPValue)) { return; } var biasEstimate = parent.Bias; list.Add(RowColumnUtils.GetColumn("BiasEstimate", NumberType.R4, ref biasEstimate)); list.Add(RowColumnUtils.GetColumn("BiasStandardError", NumberType.R4, ref biasStdErr)); list.Add(RowColumnUtils.GetColumn("BiasZScore", NumberType.R4, ref biasZScore)); list.Add(RowColumnUtils.GetColumn("BiasPValue", NumberType.R4, ref biasPValue)); var weights = default(VBuffer <Single>); parent.GetFeatureWeights(ref weights); var estimate = default(VBuffer <Single>); var stdErr = default(VBuffer <Single>); var zScore = default(VBuffer <Single>); var pValue = default(VBuffer <Single>); ValueGetter <VBuffer <ReadOnlyMemory <char> > > getSlotNames; GetUnorderedCoefficientStatistics(parent.Statistics, ref weights, ref names, ref estimate, ref stdErr, ref zScore, ref pValue, out getSlotNames); var slotNamesCol = RowColumnUtils.GetColumn(MetadataUtils.Kinds.SlotNames, new VectorType(TextType.Instance, stdErr.Length), getSlotNames); var slotNamesRow = RowColumnUtils.GetRow(null, slotNamesCol); var colType = new VectorType(NumberType.R4, stdErr.Length); list.Add(RowColumnUtils.GetColumn("Estimate", colType, ref estimate, slotNamesRow)); list.Add(RowColumnUtils.GetColumn("StandardError", colType, ref stdErr, slotNamesRow)); list.Add(RowColumnUtils.GetColumn("ZScore", colType, ref zScore, slotNamesRow)); list.Add(RowColumnUtils.GetColumn("PValue", colType, ref pValue, slotNamesRow)); }