private void HashTestCore <T>(T val, PrimitiveDataViewType type, uint expected, uint expectedOrdered, uint expectedOrdered3, uint expectedCombined, uint expectedCombinedSparse) { const int bits = 10; var builder = new DataViewSchema.Annotations.Builder(); builder.AddPrimitiveValue("Foo", type, val); var inRow = AnnotationUtils.AnnotationsAsRow(builder.ToAnnotations()); //helper ValueGetter <TType> hashGetter <TType>(HashingEstimator.ColumnOptions colInfo) { var xf = new HashingTransformer(Env, new[] { colInfo }); var mapper = ((ITransformer)xf).GetRowToRowMapper(inRow.Schema); var col = mapper.OutputSchema["Bar"]; var outRow = mapper.GetRow(inRow, col); return(outRow.GetGetter <TType>(col)); }; // First do an unordered hash. var info = new HashingEstimator.ColumnOptions("Bar", "Foo", numberOfBits: bits); var getter = hashGetter <uint>(info); uint result = 0; getter(ref result); Assert.Equal(expected, result); // Next do an ordered hash. info = new HashingEstimator.ColumnOptions("Bar", "Foo", numberOfBits: bits, useOrderedHashing: true); getter = hashGetter <uint>(info); getter(ref result); Assert.Equal(expectedOrdered, result); // Next build up a vector to make sure that hashing is consistent between scalar values // at least in the first position, and in the unordered case, the last position. const int vecLen = 5; var denseVec = new VBuffer <T>(vecLen, Utils.CreateArray(vecLen, val)); builder = new DataViewSchema.Annotations.Builder(); builder.Add("Foo", new VectorDataViewType(type, vecLen), (ref VBuffer <T> dst) => denseVec.CopyTo(ref dst)); inRow = AnnotationUtils.AnnotationsAsRow(builder.ToAnnotations()); info = new HashingEstimator.ColumnOptions("Bar", "Foo", numberOfBits: bits, useOrderedHashing: false); var vecGetter = hashGetter <VBuffer <uint> >(info); VBuffer <uint> vecResult = default; vecGetter(ref vecResult); Assert.Equal(vecLen, vecResult.Length); // They all should equal this in this case. Assert.All(vecResult.DenseValues(), v => Assert.Equal(expected, v)); // Now do ordered with the dense vector. info = new HashingEstimator.ColumnOptions("Bar", "Foo", numberOfBits: bits, useOrderedHashing: true); vecGetter = hashGetter <VBuffer <uint> >(info); vecGetter(ref vecResult); Assert.Equal(vecLen, vecResult.Length); Assert.Equal(expectedOrdered, vecResult.GetItemOrDefault(0)); Assert.Equal(expectedOrdered3, vecResult.GetItemOrDefault(3)); Assert.All(vecResult.DenseValues(), v => Assert.True((v == 0) == (expectedOrdered == 0))); // Now combine into one hash. info = new HashingEstimator.ColumnOptions("Bar", "Foo", numberOfBits: bits, combine: true); getter = hashGetter <uint>(info); getter(ref result); Assert.Equal(expectedCombined, result); // Let's now do a sparse vector. var sparseVec = new VBuffer <T>(10, 3, Utils.CreateArray(3, val), new[] { 0, 3, 7 }); builder = new DataViewSchema.Annotations.Builder(); builder.Add("Foo", new VectorDataViewType(type, vecLen), (ref VBuffer <T> dst) => sparseVec.CopyTo(ref dst)); inRow = AnnotationUtils.AnnotationsAsRow(builder.ToAnnotations()); info = new HashingEstimator.ColumnOptions("Bar", "Foo", numberOfBits: bits, useOrderedHashing: false); vecGetter = hashGetter <VBuffer <uint> >(info); vecGetter(ref vecResult); Assert.Equal(10, vecResult.Length); Assert.Equal(expected, vecResult.GetItemOrDefault(0)); Assert.Equal(expected, vecResult.GetItemOrDefault(3)); Assert.Equal(expected, vecResult.GetItemOrDefault(7)); info = new HashingEstimator.ColumnOptions("Bar", "Foo", numberOfBits: bits, useOrderedHashing: true); vecGetter = hashGetter <VBuffer <uint> >(info); vecGetter(ref vecResult); Assert.Equal(10, vecResult.Length); Assert.Equal(expectedOrdered, vecResult.GetItemOrDefault(0)); Assert.Equal(expectedOrdered3, vecResult.GetItemOrDefault(3)); info = new HashingEstimator.ColumnOptions("Bar", "Foo", numberOfBits: bits, combine: true); getter = hashGetter <uint>(info); getter(ref result); Assert.Equal(expectedCombinedSparse, result); }