コード例 #1
0
        /// <summary>
        /// Checks whether this object is consistent with an actual schema from a dynamic object,
        /// throwing exceptions if not.
        /// </summary>
        /// <param name="ectx">The context on which to throw exceptions</param>
        /// <param name="schema">The schema to check</param>
        public void Check(IExceptionContext ectx, ISchema schema)
        {
            Contracts.AssertValue(ectx);
            ectx.AssertValue(schema);

            foreach (var pair in Pairs)
            {
                if (!schema.TryGetColumnIndex(pair.Key, out int colIdx))
                {
                    throw ectx.ExceptParam(nameof(schema), $"Column named '{pair.Key}' was not found");
                }
                var col  = RowColumnUtils.GetColumn(schema, colIdx);
                var type = GetTypeOrNull(col);
                if ((type != null && !pair.Value.IsAssignableFromStaticPipeline(type)) || (type == null && IsStandard(ectx, pair.Value)))
                {
                    // When not null, we can use IsAssignableFrom to indicate we could assign to this, so as to allow
                    // for example Key<uint, string> to be considered to be compatible with Key<uint>.

                    // In the null case, while we cannot directly verify an unrecognized type, we can at least verify
                    // that the statically declared type should not have corresponded to a recognized type.
                    if (!pair.Value.IsAssignableFromStaticPipeline(type))
                    {
                        throw ectx.ExceptParam(nameof(schema),
                                               $"Column '{pair.Key}' of type '{col.Type}' cannot be expressed statically as type '{pair.Value}'.");
                    }
                }
            }
        }
コード例 #2
0
            public override RowMapperColumnInfo[] GetOutputColumns()
            {
                var result = new RowMapperColumnInfo[_parent.ColumnPairs.Length];

                for (int i = 0; i < _parent.ColumnPairs.Length; i++)
                {
                    var meta = RowColumnUtils.GetMetadataAsRow(InputSchema, ColMapNewToOld[i],
                                                               x => x == MetadataUtils.Kinds.SlotNames);
                    result[i] = new RowMapperColumnInfo(_parent.ColumnPairs[i].output, _types[i], meta);
                }
                return(result);
            }
コード例 #3
0
        public IRow GetStatsIRowOrNull(RoleMappedSchema schema)
        {
            if (_stats == null)
            {
                return(null);
            }

            var cols  = new List <IColumn>();
            var names = default(VBuffer <ReadOnlyMemory <char> >);

            _stats.AddStatsColumns(cols, null, schema, ref names);
            return(RowColumnUtils.GetRow(null, cols.ToArray()));
        }
コード例 #4
0
        public override IRow GetStatsIRowOrNull(RoleMappedSchema schema)
        {
            if (_stats == null)
            {
                return(null);
            }
            var cols  = new List <IColumn>();
            var names = default(VBuffer <DvText>);

            MetadataUtils.GetSlotNames(schema, RoleMappedSchema.ColumnRole.Feature, Weight.Length, ref names);

            // Add the stat columns.
            _stats.AddStatsColumns(cols, this, schema, ref names);
            return(RowColumnUtils.GetRow(null, cols.ToArray()));
        }
コード例 #5
0
        public virtual IRow GetSummaryIRowOrNull(RoleMappedSchema schema)
        {
            var cols = new List <IColumn>();

            var names = default(VBuffer <DvText>);

            MetadataUtils.GetSlotNames(schema, RoleMappedSchema.ColumnRole.Feature, Weight.Length, ref names);
            var slotNamesCol = RowColumnUtils.GetColumn(MetadataUtils.Kinds.SlotNames,
                                                        new VectorType(TextType.Instance, Weight.Length), ref names);
            var slotNamesRow = RowColumnUtils.GetRow(null, slotNamesCol);
            var colType      = new VectorType(NumberType.R4, Weight.Length);

            // Add the bias and the weight columns.
            var bias = Bias;

            cols.Add(RowColumnUtils.GetColumn("Bias", NumberType.R4, ref bias));
            var weights = Weight;

            cols.Add(RowColumnUtils.GetColumn("Weights", colType, ref weights, slotNamesRow));
            return(RowColumnUtils.GetRow(null, cols.ToArray()));
        }
コード例 #6
0
ファイル: HashBench.cs プロジェクト: zeekay18/machinelearning
        private void InitMap <T>(T val, ColumnType type, int hashBits = 20)
        {
            var col = RowColumnUtils.GetColumn("Foo", type, ref val);

            _counted = new Counted();
            var inRow = RowColumnUtils.GetRow(_counted, col);
            // One million features is a nice, typical number.
            var info   = new HashTransformer.ColumnInfo("Foo", "Bar", hashBits: hashBits);
            var xf     = new HashTransformer(_env, new[] { info });
            var mapper = xf.GetRowToRowMapper(inRow.Schema);

            mapper.Schema.TryGetColumnIndex("Bar", out int outCol);
            var outRow = mapper.GetRow(inRow, c => c == outCol, out var _);

            if (type.IsVector)
            {
                _vecGetter = outRow.GetGetter <VBuffer <uint> >(outCol);
            }
            else
            {
                _getter = outRow.GetGetter <uint>(outCol);
            }
        }
コード例 #7
0
        public void AssertStaticKeys()
        {
            var env     = new ConsoleEnvironment(0, verbose: true);
            var counted = new MetaCounted();

            // We'll test a few things here. First, the case where the key-value metadata is text.
            var  metaValues1 = new VBuffer <ReadOnlyMemory <char> >(3, new[] { "a".AsMemory(), "b".AsMemory(), "c".AsMemory() });
            var  meta1       = RowColumnUtils.GetColumn(MetadataUtils.Kinds.KeyValues, new VectorType(TextType.Instance, 3), ref metaValues1);
            uint value1      = 2;
            var  col1        = RowColumnUtils.GetColumn("stay", new KeyType(DataKind.U4, 0, 3), ref value1, RowColumnUtils.GetRow(counted, meta1));

            // Next the case where those values are ints.
            var metaValues2 = new VBuffer <int>(3, new int[] { 1, 2, 3, 4 });
            var meta2       = RowColumnUtils.GetColumn(MetadataUtils.Kinds.KeyValues, new VectorType(NumberType.I4, 4), ref metaValues2);
            var value2      = new VBuffer <byte>(2, 0, null, null);
            var col2        = RowColumnUtils.GetColumn("awhile", new VectorType(new KeyType(DataKind.U1, 2, 4), 2), ref value2, RowColumnUtils.GetRow(counted, meta2));

            // Then the case where a value of that kind exists, but is of not of the right kind, in which case it should not be identified as containing that metadata.
            var metaValues3 = (float)2;
            var meta3       = RowColumnUtils.GetColumn(MetadataUtils.Kinds.KeyValues, NumberType.R4, ref metaValues3);
            var value3      = (ushort)1;
            var col3        = RowColumnUtils.GetColumn("and", new KeyType(DataKind.U2, 0, 2), ref value3, RowColumnUtils.GetRow(counted, meta3));

            // Then a final case where metadata of that kind is actaully simply altogether absent.
            var value4 = new VBuffer <uint>(5, 0, null, null);
            var col4   = RowColumnUtils.GetColumn("listen", new VectorType(new KeyType(DataKind.U4, 0, 2)), ref value4);

            // Finally compose a trivial data view out of all this.
            var row  = RowColumnUtils.GetRow(counted, col1, col2, col3, col4);
            var view = RowCursorUtils.RowAsDataView(env, row);

            // Whew! I'm glad that's over with. Let us start running the test in ernest.
            // First let's do a direct match of the types to ensure that works.
            view.AssertStatic(env, c => (
                                  stay: c.KeyU4.TextValues.Scalar,
                                  awhile: c.KeyU1.I4Values.Vector,
                                  and: c.KeyU2.NoValue.Scalar,
                                  listen: c.KeyU4.NoValue.VarVector));

            // Next let's match against the superclasses (where no value types are
            // asserted), to ensure that the less specific case still passes.
            view.AssertStatic(env, c => (
                                  stay: c.KeyU4.NoValue.Scalar,
                                  awhile: c.KeyU1.NoValue.Vector,
                                  and: c.KeyU2.NoValue.Scalar,
                                  listen: c.KeyU4.NoValue.VarVector));

            // Here we assert a subset.
            view.AssertStatic(env, c => (
                                  stay: c.KeyU4.TextValues.Scalar,
                                  awhile: c.KeyU1.I4Values.Vector));

            // OK. Now we've confirmed the basic stuff works, let's check other scenarios.
            // Due to the fact that we cannot yet assert only a *single* column, these always appear
            // in at least pairs.

            // First try to get the right type of exception to test against.
            Type e = null;

            try
            {
                view.AssertStatic(env, c => (
                                      stay: c.KeyU4.TextValues.Scalar,
                                      awhile: c.KeyU2.I4Values.Vector));
            }
            catch (Exception eCaught)
            {
                e = eCaught.GetType();
            }
            Assert.NotNull(e);

            // What if the key representation type is wrong?
            Assert.Throws(e, () =>
                          view.AssertStatic(env, c => (
                                                stay: c.KeyU4.TextValues.Scalar,
                                                awhile: c.KeyU2.I4Values.Vector)));

            // What if the key value type is wrong?
            Assert.Throws(e, () =>
                          view.AssertStatic(env, c => (
                                                stay: c.KeyU4.TextValues.Scalar,
                                                awhile: c.KeyU1.I2Values.Vector)));

            // Same two tests, but for scalar?
            Assert.Throws(e, () =>
                          view.AssertStatic(env, c => (
                                                stay: c.KeyU2.TextValues.Scalar,
                                                awhile: c.KeyU1.I2Values.Vector)));

            Assert.Throws(e, () =>
                          view.AssertStatic(env, c => (
                                                stay: c.KeyU4.BoolValues.Scalar,
                                                awhile: c.KeyU1.I2Values.Vector)));

            // How about if we misidentify the vectorness?
            Assert.Throws(e, () =>
                          view.AssertStatic(env, c => (
                                                stay: c.KeyU4.TextValues.Vector,
                                                awhile: c.KeyU1.I2Values.Vector)));

            // How about the names?
            Assert.Throws(e, () =>
                          view.AssertStatic(env, c => (
                                                stay: c.KeyU4.TextValues.Scalar,
                                                alot: c.KeyU1.I4Values.Vector)));
        }
コード例 #8
0
        /// <summary>
        /// Returns a .NET type corresponding to the static pipelines that would tend to represent this column.
        /// Generally this will return <c>null</c> if it simply does not recognize the type but might throw if
        /// there is something seriously wrong with it.
        /// </summary>
        /// <param name="col">The column</param>
        /// <returns>The .NET type for the static pipelines that should be used to reflect this type, given
        /// both the characteristics of the <see cref="ColumnType"/> as well as one or two crucial pieces of metadata</returns>
        private static Type GetTypeOrNull(IColumn col)
        {
            Contracts.AssertValue(col);
            var t = col.Type;

            Type vecType = null;

            if (t is VectorType vt)
            {
                vecType = vt.VectorSize > 0 ? typeof(Vector <>) : typeof(VarVector <>);
                // Check normalized subtype of vectors.
                if (vt.VectorSize > 0)
                {
                    // Check to see if the column is normalized.
                    // Once we shift to metadata being a row globally we can also make this a bit more efficient:
                    var meta = col.Metadata;
                    if (meta.Schema.TryGetColumnIndex(MetadataUtils.Kinds.IsNormalized, out int normcol))
                    {
                        var normtype = meta.Schema.GetColumnType(normcol);
                        if (normtype == BoolType.Instance)
                        {
                            DvBool val = default;
                            meta.GetGetter <DvBool>(normcol)(ref val);
                            if (val.IsTrue)
                            {
                                vecType = typeof(NormVector <>);
                            }
                        }
                    }
                }
                t = t.ItemType;
                // Fall through to the non-vector case to handle subtypes.
            }
            Contracts.Assert(!t.IsVector);

            if (t is KeyType kt)
            {
                Type physType = StaticKind(kt.RawKind);
                Contracts.Assert(physType == typeof(byte) || physType == typeof(ushort) ||
                                 physType == typeof(uint) || physType == typeof(ulong));
                var keyType = kt.Count > 0 ? typeof(Key <>) : typeof(VarKey <>);
                keyType = keyType.MakeGenericType(physType);

                if (kt.Count > 0)
                {
                    // Check to see if we have key value metadata of the appropriate type, size, and whatnot.
                    var meta = col.Metadata;
                    if (meta.Schema.TryGetColumnIndex(MetadataUtils.Kinds.KeyValues, out int kvcol))
                    {
                        var kvType = meta.Schema.GetColumnType(kvcol);
                        if (kvType.VectorSize == kt.Count)
                        {
                            Contracts.Assert(kt.Count > 0);
                            var subtype = GetTypeOrNull(RowColumnUtils.GetColumn(meta, kvcol));
                            if (subtype != null && subtype.IsGenericType)
                            {
                                var sgtype = subtype.GetGenericTypeDefinition();
                                if (sgtype == typeof(NormVector <>) || sgtype == typeof(Vector <>))
                                {
                                    var args = subtype.GetGenericArguments();
                                    Contracts.Assert(args.Length == 1);
                                    keyType = typeof(Key <,>).MakeGenericType(physType, args[0]);
                                }
                            }
                        }
                    }
                }
                return(vecType?.MakeGenericType(keyType) ?? keyType);
            }

            if (t is PrimitiveType pt)
            {
                Type physType = StaticKind(pt.RawKind);
                // Though I am unaware of any existing instances, it is theoretically possible for a
                // primitive type to exist, have the same data kind as one of the existing types, and yet
                // not be one of the built in types. (E.g., an outside analogy to the key types.) For this
                // reason, we must be certain that when we return here we are covering one fo the builtin types.
                if (physType != null && (
                        pt == NumberType.I1 || pt == NumberType.I2 || pt == NumberType.I4 || pt == NumberType.I8 ||
                        pt == NumberType.U1 || pt == NumberType.U2 || pt == NumberType.U4 || pt == NumberType.U8 ||
                        pt == NumberType.R4 || pt == NumberType.R8 || pt == NumberType.UG || pt == BoolType.Instance ||
                        pt == DateTimeType.Instance || pt == DateTimeZoneType.Instance || pt == TimeSpanType.Instance ||
                        pt == TextType.Instance))
                {
                    return((vecType ?? typeof(Scalar <>)).MakeGenericType(physType));
                }
            }

            return(null);
        }
コード例 #9
0
        private void HashTestCore <T>(T val, PrimitiveType type, uint expected, uint expectedOrdered, uint expectedOrdered3)
        {
            const int bits = 10;

            var col   = RowColumnUtils.GetColumn("Foo", type, ref val);
            var inRow = RowColumnUtils.GetRow(new Counted(), col);

            // First do an unordered hash.
            var info   = new HashingTransformer.ColumnInfo("Foo", "Bar", hashBits: bits);
            var xf     = new HashingTransformer(Env, new[] { info });
            var mapper = xf.GetRowToRowMapper(inRow.Schema);

            mapper.Schema.TryGetColumnIndex("Bar", out int outCol);
            var outRow = mapper.GetRow(inRow, c => c == outCol, out var _);

            var  getter = outRow.GetGetter <uint>(outCol);
            uint result = 0;

            getter(ref result);
            Assert.Equal(expected, result);

            // Next do an ordered hash.
            info   = new HashingTransformer.ColumnInfo("Foo", "Bar", hashBits: bits, ordered: true);
            xf     = new HashingTransformer(Env, new[] { info });
            mapper = xf.GetRowToRowMapper(inRow.Schema);
            mapper.Schema.TryGetColumnIndex("Bar", out outCol);
            outRow = mapper.GetRow(inRow, c => c == outCol, out var _);

            getter = outRow.GetGetter <uint>(outCol);
            getter(ref result);
            Assert.Equal(expectedOrdered, result);

            // Next build up a vector to make sure that hashing is consistent between scalar values
            // at least in the first position, and in the unordered case, the last position.
            const int vecLen   = 5;
            var       denseVec = new VBuffer <T>(vecLen, Utils.CreateArray(vecLen, val));

            col   = RowColumnUtils.GetColumn("Foo", new VectorType(type, vecLen), ref denseVec);
            inRow = RowColumnUtils.GetRow(new Counted(), col);

            info   = new HashingTransformer.ColumnInfo("Foo", "Bar", hashBits: bits, ordered: false);
            xf     = new HashingTransformer(Env, new[] { info });
            mapper = xf.GetRowToRowMapper(inRow.Schema);
            mapper.Schema.TryGetColumnIndex("Bar", out outCol);
            outRow = mapper.GetRow(inRow, c => c == outCol, out var _);

            var            vecGetter = outRow.GetGetter <VBuffer <uint> >(outCol);
            VBuffer <uint> vecResult = default;

            vecGetter(ref vecResult);

            Assert.Equal(vecLen, vecResult.Length);
            // They all should equal this in this case.
            Assert.All(vecResult.DenseValues(), v => Assert.Equal(expected, v));

            // Now do ordered with the dense vector.
            info   = new HashingTransformer.ColumnInfo("Foo", "Bar", hashBits: bits, ordered: true);
            xf     = new HashingTransformer(Env, new[] { info });
            mapper = xf.GetRowToRowMapper(inRow.Schema);
            mapper.Schema.TryGetColumnIndex("Bar", out outCol);
            outRow    = mapper.GetRow(inRow, c => c == outCol, out var _);
            vecGetter = outRow.GetGetter <VBuffer <uint> >(outCol);
            vecGetter(ref vecResult);

            Assert.Equal(vecLen, vecResult.Length);
            Assert.Equal(expectedOrdered, vecResult.GetItemOrDefault(0));
            Assert.Equal(expectedOrdered3, vecResult.GetItemOrDefault(3));
            Assert.All(vecResult.DenseValues(), v => Assert.True((v == 0) == (expectedOrdered == 0)));

            // Let's now do a sparse vector.
            var sparseVec = new VBuffer <T>(10, 3, Utils.CreateArray(3, val), new[] { 0, 3, 7 });

            col   = RowColumnUtils.GetColumn("Foo", new VectorType(type, vecLen), ref sparseVec);
            inRow = RowColumnUtils.GetRow(new Counted(), col);

            info   = new HashingTransformer.ColumnInfo("Foo", "Bar", hashBits: bits, ordered: false);
            xf     = new HashingTransformer(Env, new[] { info });
            mapper = xf.GetRowToRowMapper(inRow.Schema);
            mapper.Schema.TryGetColumnIndex("Bar", out outCol);
            outRow    = mapper.GetRow(inRow, c => c == outCol, out var _);
            vecGetter = outRow.GetGetter <VBuffer <uint> >(outCol);
            vecGetter(ref vecResult);

            Assert.Equal(10, vecResult.Length);
            Assert.Equal(expected, vecResult.GetItemOrDefault(0));
            Assert.Equal(expected, vecResult.GetItemOrDefault(3));
            Assert.Equal(expected, vecResult.GetItemOrDefault(7));

            info   = new HashingTransformer.ColumnInfo("Foo", "Bar", hashBits: bits, ordered: true);
            xf     = new HashingTransformer(Env, new[] { info });
            mapper = xf.GetRowToRowMapper(inRow.Schema);
            mapper.Schema.TryGetColumnIndex("Bar", out outCol);
            outRow    = mapper.GetRow(inRow, c => c == outCol, out var _);
            vecGetter = outRow.GetGetter <VBuffer <uint> >(outCol);
            vecGetter(ref vecResult);

            Assert.Equal(10, vecResult.Length);
            Assert.Equal(expectedOrdered, vecResult.GetItemOrDefault(0));
            Assert.Equal(expectedOrdered3, vecResult.GetItemOrDefault(3));
        }
コード例 #10
0
        public void AddStatsColumns(List <IColumn> list, LinearBinaryPredictor parent, RoleMappedSchema schema, ref VBuffer <ReadOnlyMemory <char> > names)
        {
            _env.AssertValue(list);
            _env.AssertValueOrNull(parent);
            _env.AssertValue(schema);

            long count = _trainingExampleCount;

            list.Add(RowColumnUtils.GetColumn("Count of training examples", NumberType.I8, ref count));
            var dev = _deviance;

            list.Add(RowColumnUtils.GetColumn("Residual Deviance", NumberType.R4, ref dev));
            var nullDev = _nullDeviance;

            list.Add(RowColumnUtils.GetColumn("Null Deviance", NumberType.R4, ref nullDev));
            var aic = 2 * _paramCount + _deviance;

            list.Add(RowColumnUtils.GetColumn("AIC", NumberType.R4, ref aic));

            if (parent == null)
            {
                return;
            }

            Single biasStdErr;
            Single biasZScore;
            Single biasPValue;

            if (!TryGetBiasStatistics(parent.Statistics, parent.Bias, out biasStdErr, out biasZScore, out biasPValue))
            {
                return;
            }

            var biasEstimate = parent.Bias;

            list.Add(RowColumnUtils.GetColumn("BiasEstimate", NumberType.R4, ref biasEstimate));
            list.Add(RowColumnUtils.GetColumn("BiasStandardError", NumberType.R4, ref biasStdErr));
            list.Add(RowColumnUtils.GetColumn("BiasZScore", NumberType.R4, ref biasZScore));
            list.Add(RowColumnUtils.GetColumn("BiasPValue", NumberType.R4, ref biasPValue));

            var weights = default(VBuffer <Single>);

            parent.GetFeatureWeights(ref weights);
            var estimate = default(VBuffer <Single>);
            var stdErr   = default(VBuffer <Single>);
            var zScore   = default(VBuffer <Single>);
            var pValue   = default(VBuffer <Single>);
            ValueGetter <VBuffer <ReadOnlyMemory <char> > > getSlotNames;

            GetUnorderedCoefficientStatistics(parent.Statistics, ref weights, ref names, ref estimate, ref stdErr, ref zScore, ref pValue, out getSlotNames);

            var slotNamesCol = RowColumnUtils.GetColumn(MetadataUtils.Kinds.SlotNames,
                                                        new VectorType(TextType.Instance, stdErr.Length), getSlotNames);
            var slotNamesRow = RowColumnUtils.GetRow(null, slotNamesCol);
            var colType      = new VectorType(NumberType.R4, stdErr.Length);

            list.Add(RowColumnUtils.GetColumn("Estimate", colType, ref estimate, slotNamesRow));
            list.Add(RowColumnUtils.GetColumn("StandardError", colType, ref stdErr, slotNamesRow));
            list.Add(RowColumnUtils.GetColumn("ZScore", colType, ref zScore, slotNamesRow));
            list.Add(RowColumnUtils.GetColumn("PValue", colType, ref pValue, slotNamesRow));
        }