Beispiel #1
0
        private void HashTestCore <T>(T val, PrimitiveDataViewType type, uint expected, uint expectedOrdered, uint expectedOrdered3, uint expectedCombined, uint expectedCombinedSparse)
        {
            const int bits = 10;

            var builder = new DataViewSchema.Annotations.Builder();

            builder.AddPrimitiveValue("Foo", type, val);
            var inRow = AnnotationUtils.AnnotationsAsRow(builder.ToAnnotations());

            //helper
            ValueGetter <TType> hashGetter <TType>(HashingEstimator.ColumnOptions colInfo)
            {
                var xf     = new HashingTransformer(Env, new[] { colInfo });
                var mapper = ((ITransformer)xf).GetRowToRowMapper(inRow.Schema);
                var col    = mapper.OutputSchema["Bar"];
                var outRow = mapper.GetRow(inRow, col);

                return(outRow.GetGetter <TType>(col));
            };

            // First do an unordered hash.
            var  info   = new HashingEstimator.ColumnOptions("Bar", "Foo", numberOfBits: bits);
            var  getter = hashGetter <uint>(info);
            uint result = 0;

            getter(ref result);
            Assert.Equal(expected, result);

            // Next do an ordered hash.
            info   = new HashingEstimator.ColumnOptions("Bar", "Foo", numberOfBits: bits, useOrderedHashing: true);
            getter = hashGetter <uint>(info);
            getter(ref result);
            Assert.Equal(expectedOrdered, result);

            // Next build up a vector to make sure that hashing is consistent between scalar values
            // at least in the first position, and in the unordered case, the last position.
            const int vecLen   = 5;
            var       denseVec = new VBuffer <T>(vecLen, Utils.CreateArray(vecLen, val));

            builder = new DataViewSchema.Annotations.Builder();
            builder.Add("Foo", new VectorDataViewType(type, vecLen), (ref VBuffer <T> dst) => denseVec.CopyTo(ref dst));
            inRow = AnnotationUtils.AnnotationsAsRow(builder.ToAnnotations());

            info = new HashingEstimator.ColumnOptions("Bar", "Foo", numberOfBits: bits, useOrderedHashing: false);
            var            vecGetter = hashGetter <VBuffer <uint> >(info);
            VBuffer <uint> vecResult = default;

            vecGetter(ref vecResult);

            Assert.Equal(vecLen, vecResult.Length);
            // They all should equal this in this case.
            Assert.All(vecResult.DenseValues(), v => Assert.Equal(expected, v));

            // Now do ordered with the dense vector.
            info      = new HashingEstimator.ColumnOptions("Bar", "Foo", numberOfBits: bits, useOrderedHashing: true);
            vecGetter = hashGetter <VBuffer <uint> >(info);
            vecGetter(ref vecResult);

            Assert.Equal(vecLen, vecResult.Length);
            Assert.Equal(expectedOrdered, vecResult.GetItemOrDefault(0));
            Assert.Equal(expectedOrdered3, vecResult.GetItemOrDefault(3));
            Assert.All(vecResult.DenseValues(), v => Assert.True((v == 0) == (expectedOrdered == 0)));

            // Now combine into one hash.
            info   = new HashingEstimator.ColumnOptions("Bar", "Foo", numberOfBits: bits, combine: true);
            getter = hashGetter <uint>(info);
            getter(ref result);
            Assert.Equal(expectedCombined, result);

            // Let's now do a sparse vector.
            var sparseVec = new VBuffer <T>(10, 3, Utils.CreateArray(3, val), new[] { 0, 3, 7 });

            builder = new DataViewSchema.Annotations.Builder();
            builder.Add("Foo", new VectorDataViewType(type, vecLen), (ref VBuffer <T> dst) => sparseVec.CopyTo(ref dst));
            inRow = AnnotationUtils.AnnotationsAsRow(builder.ToAnnotations());

            info      = new HashingEstimator.ColumnOptions("Bar", "Foo", numberOfBits: bits, useOrderedHashing: false);
            vecGetter = hashGetter <VBuffer <uint> >(info);
            vecGetter(ref vecResult);

            Assert.Equal(10, vecResult.Length);
            Assert.Equal(expected, vecResult.GetItemOrDefault(0));
            Assert.Equal(expected, vecResult.GetItemOrDefault(3));
            Assert.Equal(expected, vecResult.GetItemOrDefault(7));

            info      = new HashingEstimator.ColumnOptions("Bar", "Foo", numberOfBits: bits, useOrderedHashing: true);
            vecGetter = hashGetter <VBuffer <uint> >(info);
            vecGetter(ref vecResult);

            Assert.Equal(10, vecResult.Length);
            Assert.Equal(expectedOrdered, vecResult.GetItemOrDefault(0));
            Assert.Equal(expectedOrdered3, vecResult.GetItemOrDefault(3));

            info   = new HashingEstimator.ColumnOptions("Bar", "Foo", numberOfBits: bits, combine: true);
            getter = hashGetter <uint>(info);
            getter(ref result);
            Assert.Equal(expectedCombinedSparse, result);
        }