private void InitMap <T>(T val, ColumnType type, int hashBits = 20, ValueGetter <T> getter = null)
        {
            if (getter == null)
            {
                getter = (ref T dst) => dst = val;
            }
            _inRow = RowImpl.Create(type, getter);
            // One million features is a nice, typical number.
            var info   = new HashingTransformer.ColumnInfo("Foo", "Bar", hashBits: hashBits);
            var xf     = new HashingTransformer(_env, new[] { info });
            var mapper = xf.GetRowToRowMapper(_inRow.Schema);
            var column = mapper.OutputSchema["Bar"];
            var outRow = mapper.GetRow(_inRow, c => c == column.Index);

            if (type is VectorType)
            {
                _vecGetter = outRow.GetGetter <VBuffer <uint> >(column.Index);
            }
            else
            {
                _getter = outRow.GetGetter <uint>(column.Index);
            }
        }
Beispiel #2
0
        private void InitMap <T>(T val, ColumnType type, int hashBits = 20)
        {
            var col = RowColumnUtils.GetColumn("Foo", type, ref val);

            _counted = new Counted();
            var inRow = RowColumnUtils.GetRow(_counted, col);
            // One million features is a nice, typical number.
            var info   = new HashingTransformer.ColumnInfo("Foo", "Bar", hashBits: hashBits);
            var xf     = new HashingTransformer(_env, new[] { info });
            var mapper = xf.GetRowToRowMapper(inRow.Schema);

            mapper.OutputSchema.TryGetColumnIndex("Bar", out int outCol);
            var outRow = mapper.GetRow(inRow, c => c == outCol, out var _);

            if (type is VectorType)
            {
                _vecGetter = outRow.GetGetter <VBuffer <uint> >(outCol);
            }
            else
            {
                _getter = outRow.GetGetter <uint>(outCol);
            }
        }
        private void HashTestCore <T>(T val, PrimitiveType type, uint expected, uint expectedOrdered, uint expectedOrdered3)
        {
            const int bits = 10;

            var col   = RowColumnUtils.GetColumn("Foo", type, ref val);
            var inRow = RowColumnUtils.GetRow(new Counted(), col);

            // First do an unordered hash.
            var info   = new HashingTransformer.ColumnInfo("Foo", "Bar", hashBits: bits);
            var xf     = new HashingTransformer(Env, new[] { info });
            var mapper = xf.GetRowToRowMapper(inRow.Schema);

            mapper.Schema.TryGetColumnIndex("Bar", out int outCol);
            var outRow = mapper.GetRow(inRow, c => c == outCol, out var _);

            var  getter = outRow.GetGetter <uint>(outCol);
            uint result = 0;

            getter(ref result);
            Assert.Equal(expected, result);

            // Next do an ordered hash.
            info   = new HashingTransformer.ColumnInfo("Foo", "Bar", hashBits: bits, ordered: true);
            xf     = new HashingTransformer(Env, new[] { info });
            mapper = xf.GetRowToRowMapper(inRow.Schema);
            mapper.Schema.TryGetColumnIndex("Bar", out outCol);
            outRow = mapper.GetRow(inRow, c => c == outCol, out var _);

            getter = outRow.GetGetter <uint>(outCol);
            getter(ref result);
            Assert.Equal(expectedOrdered, result);

            // Next build up a vector to make sure that hashing is consistent between scalar values
            // at least in the first position, and in the unordered case, the last position.
            const int vecLen   = 5;
            var       denseVec = new VBuffer <T>(vecLen, Utils.CreateArray(vecLen, val));

            col   = RowColumnUtils.GetColumn("Foo", new VectorType(type, vecLen), ref denseVec);
            inRow = RowColumnUtils.GetRow(new Counted(), col);

            info   = new HashingTransformer.ColumnInfo("Foo", "Bar", hashBits: bits, ordered: false);
            xf     = new HashingTransformer(Env, new[] { info });
            mapper = xf.GetRowToRowMapper(inRow.Schema);
            mapper.Schema.TryGetColumnIndex("Bar", out outCol);
            outRow = mapper.GetRow(inRow, c => c == outCol, out var _);

            var            vecGetter = outRow.GetGetter <VBuffer <uint> >(outCol);
            VBuffer <uint> vecResult = default;

            vecGetter(ref vecResult);

            Assert.Equal(vecLen, vecResult.Length);
            // They all should equal this in this case.
            Assert.All(vecResult.DenseValues(), v => Assert.Equal(expected, v));

            // Now do ordered with the dense vector.
            info   = new HashingTransformer.ColumnInfo("Foo", "Bar", hashBits: bits, ordered: true);
            xf     = new HashingTransformer(Env, new[] { info });
            mapper = xf.GetRowToRowMapper(inRow.Schema);
            mapper.Schema.TryGetColumnIndex("Bar", out outCol);
            outRow    = mapper.GetRow(inRow, c => c == outCol, out var _);
            vecGetter = outRow.GetGetter <VBuffer <uint> >(outCol);
            vecGetter(ref vecResult);

            Assert.Equal(vecLen, vecResult.Length);
            Assert.Equal(expectedOrdered, vecResult.GetItemOrDefault(0));
            Assert.Equal(expectedOrdered3, vecResult.GetItemOrDefault(3));
            Assert.All(vecResult.DenseValues(), v => Assert.True((v == 0) == (expectedOrdered == 0)));

            // Let's now do a sparse vector.
            var sparseVec = new VBuffer <T>(10, 3, Utils.CreateArray(3, val), new[] { 0, 3, 7 });

            col   = RowColumnUtils.GetColumn("Foo", new VectorType(type, vecLen), ref sparseVec);
            inRow = RowColumnUtils.GetRow(new Counted(), col);

            info   = new HashingTransformer.ColumnInfo("Foo", "Bar", hashBits: bits, ordered: false);
            xf     = new HashingTransformer(Env, new[] { info });
            mapper = xf.GetRowToRowMapper(inRow.Schema);
            mapper.Schema.TryGetColumnIndex("Bar", out outCol);
            outRow    = mapper.GetRow(inRow, c => c == outCol, out var _);
            vecGetter = outRow.GetGetter <VBuffer <uint> >(outCol);
            vecGetter(ref vecResult);

            Assert.Equal(10, vecResult.Length);
            Assert.Equal(expected, vecResult.GetItemOrDefault(0));
            Assert.Equal(expected, vecResult.GetItemOrDefault(3));
            Assert.Equal(expected, vecResult.GetItemOrDefault(7));

            info   = new HashingTransformer.ColumnInfo("Foo", "Bar", hashBits: bits, ordered: true);
            xf     = new HashingTransformer(Env, new[] { info });
            mapper = xf.GetRowToRowMapper(inRow.Schema);
            mapper.Schema.TryGetColumnIndex("Bar", out outCol);
            outRow    = mapper.GetRow(inRow, c => c == outCol, out var _);
            vecGetter = outRow.GetGetter <VBuffer <uint> >(outCol);
            vecGetter(ref vecResult);

            Assert.Equal(10, vecResult.Length);
            Assert.Equal(expectedOrdered, vecResult.GetItemOrDefault(0));
            Assert.Equal(expectedOrdered3, vecResult.GetItemOrDefault(3));
        }