protected KeyToValueMap(Mapper mapper, PrimitiveDataViewType typeVal, int iinfo) { // REVIEW: Is there a better way to perform this first assert value? Contracts.AssertValue(mapper); Parent = mapper; Parent.Host.AssertValue(typeVal); Parent.Host.Assert(0 <= iinfo && iinfo < Parent._types.Length); TypeOutput = typeVal; InfoIndex = iinfo; }
public PivotColumnOptions(string name, int index, int size, PrimitiveDataViewType itemType) { Contracts.AssertNonEmpty(name); Contracts.Assert(index >= 0); Contracts.Assert(size >= 0); Contracts.AssertValue(itemType); Name = name; Index = index; Size = size; ItemType = itemType; }
/// <summary> /// Add one annotation column for a primitive value type. /// </summary> /// <param name="name">The annotation name.</param> /// <param name="type">The annotation type.</param> /// <param name="value">The value of the annotation.</param> /// <param name="annotations">Annotations of the input column. Note that annotations on an annotation column is somewhat rare /// except for certain types (for example, slot names for a vector, key values for something of key type).</param> public void AddPrimitiveValue <TValue>(string name, PrimitiveDataViewType type, TValue value, Annotations annotations = null) { if (string.IsNullOrEmpty(name)) { throw new ArgumentNullException(nameof(name)); } if (type == null) { throw new ArgumentNullException(nameof(type)); } if (type.RawType != typeof(TValue)) { throw new ArgumentException($"{nameof(type)}.{nameof(type.RawType)} must be of type '{typeof(TValue).FullName}'.", nameof(type)); } Add(name, type, (ref TValue dst) => dst = value, annotations); }
private TypeNaInfo KindReport <T>(IChannel ch, PrimitiveDataViewType type) { Contracts.AssertValue(ch); ch.AssertValue(type); ch.Assert(type.IsStandardScalar()); var conv = Conversions.Instance; InPredicate <T> isNaDel; bool hasNaPred = conv.TryGetIsNAPredicate(type, out isNaDel); bool defaultIsNa = false; if (hasNaPred) { T def = default(T); defaultIsNa = isNaDel(in def); } return(new TypeNaInfo(hasNaPred, defaultIsNa)); }
public static JToken DefaultTokenOrNull(PrimitiveDataViewType itemType) { Contracts.CheckValue(itemType, nameof(itemType)); if (itemType is KeyDataViewType) { return(0); } System.Type rawType = itemType.RawType; if (rawType == typeof(sbyte) || rawType == typeof(byte) || rawType == typeof(short) || rawType == typeof(ushort) || rawType == typeof(int) || rawType == typeof(uint) || rawType == typeof(long) || rawType == typeof(ulong)) { return(0); } else if (rawType == typeof(float) // REVIEW: The above should really be float. But, for the // sake of the POC, we use double since all the PFA convenience // libraries operate over doubles. || rawType == typeof(double)) { return(0.0); } else if (rawType == typeof(bool)) { return(false); } else if (rawType == typeof(System.ReadOnlyMemory <char>) || rawType == typeof(string)) { return(String("")); } return(null); }
protected ValueWriterBase(PrimitiveDataViewType type, int source, char sep) : base(source) { Contracts.Assert(type.IsStandardScalar() || type is KeyType); Contracts.Assert(type.RawType == typeof(T)); Sep = sep; if (type is TextDataViewType) { // For text we need to deal with escaping. ValueMapper <ReadOnlyMemory <char>, StringBuilder> c = MapText; Conv = (ValueMapper <T, StringBuilder>)(Delegate) c; } else if (type is TimeSpanDataViewType) { ValueMapper <TimeSpan, StringBuilder> c = MapTimeSpan; Conv = (ValueMapper <T, StringBuilder>)(Delegate) c; } else if (type is DateTimeDataViewType) { ValueMapper <DateTime, StringBuilder> c = MapDateTime; Conv = (ValueMapper <T, StringBuilder>)(Delegate) c; } else if (type is DateTimeOffsetDataViewType) { ValueMapper <DateTimeOffset, StringBuilder> c = MapDateTimeZone; Conv = (ValueMapper <T, StringBuilder>)(Delegate) c; } else { Conv = Conversions.Instance.GetStringConversion <T>(type); } var d = default(T); Conv(in d, ref Sb); Default = Sb.ToString(); }
private void InitDenseVecMap <T>(T[] vals, PrimitiveDataViewType itemType, int numberOfBits = 20) { var vbuf = new VBuffer <T>(vals.Length, vals); InitMap(vbuf, new VectorType(itemType, vals.Length), numberOfBits, vbuf.CopyTo); }
private void HashTestCore <T>(T val, PrimitiveDataViewType type, uint expected, uint expectedOrdered, uint expectedOrdered3, uint expectedCombined, uint expectedCombinedSparse) { const int bits = 10; var builder = new DataViewSchema.Annotations.Builder(); builder.AddPrimitiveValue("Foo", type, val); var inRow = AnnotationUtils.AnnotationsAsRow(builder.ToAnnotations()); //helper ValueGetter <TType> hashGetter <TType>(HashingEstimator.ColumnOptions colInfo) { var xf = new HashingTransformer(Env, new[] { colInfo }); var mapper = ((ITransformer)xf).GetRowToRowMapper(inRow.Schema); var col = mapper.OutputSchema["Bar"]; var outRow = mapper.GetRow(inRow, col); return(outRow.GetGetter <TType>(col)); }; // First do an unordered hash. var info = new HashingEstimator.ColumnOptions("Bar", "Foo", numberOfBits: bits); var getter = hashGetter <uint>(info); uint result = 0; getter(ref result); Assert.Equal(expected, result); // Next do an ordered hash. info = new HashingEstimator.ColumnOptions("Bar", "Foo", numberOfBits: bits, useOrderedHashing: true); getter = hashGetter <uint>(info); getter(ref result); Assert.Equal(expectedOrdered, result); // Next build up a vector to make sure that hashing is consistent between scalar values // at least in the first position, and in the unordered case, the last position. const int vecLen = 5; var denseVec = new VBuffer <T>(vecLen, Utils.CreateArray(vecLen, val)); builder = new DataViewSchema.Annotations.Builder(); builder.Add("Foo", new VectorDataViewType(type, vecLen), (ref VBuffer <T> dst) => denseVec.CopyTo(ref dst)); inRow = AnnotationUtils.AnnotationsAsRow(builder.ToAnnotations()); info = new HashingEstimator.ColumnOptions("Bar", "Foo", numberOfBits: bits, useOrderedHashing: false); var vecGetter = hashGetter <VBuffer <uint> >(info); VBuffer <uint> vecResult = default; vecGetter(ref vecResult); Assert.Equal(vecLen, vecResult.Length); // They all should equal this in this case. Assert.All(vecResult.DenseValues(), v => Assert.Equal(expected, v)); // Now do ordered with the dense vector. info = new HashingEstimator.ColumnOptions("Bar", "Foo", numberOfBits: bits, useOrderedHashing: true); vecGetter = hashGetter <VBuffer <uint> >(info); vecGetter(ref vecResult); Assert.Equal(vecLen, vecResult.Length); Assert.Equal(expectedOrdered, vecResult.GetItemOrDefault(0)); Assert.Equal(expectedOrdered3, vecResult.GetItemOrDefault(3)); Assert.All(vecResult.DenseValues(), v => Assert.True((v == 0) == (expectedOrdered == 0))); // Now combine into one hash. info = new HashingEstimator.ColumnOptions("Bar", "Foo", numberOfBits: bits, combine: true); getter = hashGetter <uint>(info); getter(ref result); Assert.Equal(expectedCombined, result); // Let's now do a sparse vector. var sparseVec = new VBuffer <T>(10, 3, Utils.CreateArray(3, val), new[] { 0, 3, 7 }); builder = new DataViewSchema.Annotations.Builder(); builder.Add("Foo", new VectorDataViewType(type, vecLen), (ref VBuffer <T> dst) => sparseVec.CopyTo(ref dst)); inRow = AnnotationUtils.AnnotationsAsRow(builder.ToAnnotations()); info = new HashingEstimator.ColumnOptions("Bar", "Foo", numberOfBits: bits, useOrderedHashing: false); vecGetter = hashGetter <VBuffer <uint> >(info); vecGetter(ref vecResult); Assert.Equal(10, vecResult.Length); Assert.Equal(expected, vecResult.GetItemOrDefault(0)); Assert.Equal(expected, vecResult.GetItemOrDefault(3)); Assert.Equal(expected, vecResult.GetItemOrDefault(7)); info = new HashingEstimator.ColumnOptions("Bar", "Foo", numberOfBits: bits, useOrderedHashing: true); vecGetter = hashGetter <VBuffer <uint> >(info); vecGetter(ref vecResult); Assert.Equal(10, vecResult.Length); Assert.Equal(expectedOrdered, vecResult.GetItemOrDefault(0)); Assert.Equal(expectedOrdered3, vecResult.GetItemOrDefault(3)); info = new HashingEstimator.ColumnOptions("Bar", "Foo", numberOfBits: bits, combine: true); getter = hashGetter <uint>(info); getter(ref result); Assert.Equal(expectedCombinedSparse, result); }
public void TestEqualAndGetHashCode() { var dict = new Dictionary <DataViewType, string>(); // add PrimitiveTypes, KeyType & corresponding VectorTypes VectorType tmp1, tmp2; var types = new PrimitiveDataViewType[] { NumberDataViewType.SByte, NumberDataViewType.Int16, NumberDataViewType.Int32, NumberDataViewType.Int64, NumberDataViewType.Byte, NumberDataViewType.UInt16, NumberDataViewType.UInt32, NumberDataViewType.UInt64, RowIdDataViewType.Instance, TextDataViewType.Instance, BooleanDataViewType.Instance, DateTimeDataViewType.Instance, DateTimeOffsetDataViewType.Instance, TimeSpanDataViewType.Instance }; foreach (var type in types) { var tmp = type; if (dict.ContainsKey(tmp) && dict[tmp] != tmp.ToString()) { Assert.True(false, dict[tmp] + " and " + tmp.ToString() + " are duplicates."); } dict[tmp] = tmp.ToString(); for (int size = 0; size < 5; size++) { tmp1 = new VectorType(tmp, size); if (dict.ContainsKey(tmp1) && dict[tmp1] != tmp1.ToString()) { Assert.True(false, dict[tmp1] + " and " + tmp1.ToString() + " are duplicates."); } dict[tmp1] = tmp1.ToString(); for (int size1 = 0; size1 < 5; size1++) { tmp2 = new VectorType(tmp, size, size1); if (dict.ContainsKey(tmp2) && dict[tmp2] != tmp2.ToString()) { Assert.True(false, dict[tmp2] + " and " + tmp2.ToString() + " are duplicates."); } dict[tmp2] = tmp2.ToString(); } } // KeyType & Vector var rawType = tmp.RawType; if (!KeyType.IsValidDataType(rawType)) { continue; } for (ulong min = 0; min < 5; min++) { for (var count = 1; count < 5; count++) { tmp = new KeyType(rawType, count); if (dict.ContainsKey(tmp) && dict[tmp] != tmp.ToString()) { Assert.True(false, dict[tmp] + " and " + tmp.ToString() + " are duplicates."); } dict[tmp] = tmp.ToString(); for (int size = 0; size < 5; size++) { tmp1 = new VectorType(tmp, size); if (dict.ContainsKey(tmp1) && dict[tmp1] != tmp1.ToString()) { Assert.True(false, dict[tmp1] + " and " + tmp1.ToString() + " are duplicates."); } dict[tmp1] = tmp1.ToString(); for (int size1 = 0; size1 < 5; size1++) { tmp2 = new VectorType(tmp, size, size1); if (dict.ContainsKey(tmp2) && dict[tmp2] != tmp2.ToString()) { Assert.True(false, dict[tmp2] + " and " + tmp2.ToString() + " are duplicates."); } dict[tmp2] = tmp2.ToString(); } } } Assert.True(rawType.TryGetDataKind(out var kind)); tmp = new KeyType(rawType, kind.ToMaxInt()); if (dict.ContainsKey(tmp) && dict[tmp] != tmp.ToString()) { Assert.True(false, dict[tmp] + " and " + tmp.ToString() + " are duplicates."); } dict[tmp] = tmp.ToString(); for (int size = 0; size < 5; size++) { tmp1 = new VectorType(tmp, size); if (dict.ContainsKey(tmp1) && dict[tmp1] != tmp1.ToString()) { Assert.True(false, dict[tmp1] + " and " + tmp1.ToString() + " are duplicates."); } dict[tmp1] = tmp1.ToString(); for (int size1 = 0; size1 < 5; size1++) { tmp2 = new VectorType(tmp, size, size1); if (dict.ContainsKey(tmp2) && dict[tmp2] != tmp2.ToString()) { Assert.True(false, dict[tmp2] + " and " + tmp2.ToString() + " are duplicates."); } dict[tmp2] = tmp2.ToString(); } } } } // add ImageTypes for (int height = 1; height < 5; height++) { for (int width = 1; width < 5; width++) { var tmp4 = new ImageType(height, width); if (dict.ContainsKey(tmp4)) { Assert.True(false, dict[tmp4] + " and " + tmp4.ToString() + " are duplicates."); } dict[tmp4] = tmp4.ToString(); } } }
public Column(int columnIndex, string suggestedName, PrimitiveDataViewType itemType) { ColumnIndex = columnIndex; SuggestedName = suggestedName; ItemType = itemType; }
public ValueWriter(DataViewRowCursor cursor, PrimitiveDataViewType type, int source, char sep) : base(type, source, sep) { _getSrc = cursor.GetGetter <T>(source); _columnName = cursor.Schema[source].Name; }
private void HashTestCore <T>(T val, PrimitiveDataViewType type, uint expected, uint expectedOrdered, uint expectedOrdered3) { const int bits = 10; var builder = new MetadataBuilder(); builder.AddPrimitiveValue("Foo", type, val); var inRow = MetadataUtils.MetadataAsRow(builder.GetMetadata()); // First do an unordered hash. var info = new HashingEstimator.ColumnInfo("Bar", "Foo", hashBits: bits); var xf = new HashingTransformer(Env, new[] { info }); var mapper = xf.GetRowToRowMapper(inRow.Schema); mapper.OutputSchema.TryGetColumnIndex("Bar", out int outCol); var outRow = mapper.GetRow(inRow, c => c == outCol); var getter = outRow.GetGetter <uint>(outCol); uint result = 0; getter(ref result); Assert.Equal(expected, result); // Next do an ordered hash. info = new HashingEstimator.ColumnInfo("Bar", "Foo", hashBits: bits, ordered: true); xf = new HashingTransformer(Env, new[] { info }); mapper = xf.GetRowToRowMapper(inRow.Schema); mapper.OutputSchema.TryGetColumnIndex("Bar", out outCol); outRow = mapper.GetRow(inRow, c => c == outCol); getter = outRow.GetGetter <uint>(outCol); getter(ref result); Assert.Equal(expectedOrdered, result); // Next build up a vector to make sure that hashing is consistent between scalar values // at least in the first position, and in the unordered case, the last position. const int vecLen = 5; var denseVec = new VBuffer <T>(vecLen, Utils.CreateArray(vecLen, val)); builder = new MetadataBuilder(); builder.Add("Foo", new VectorType(type, vecLen), (ref VBuffer <T> dst) => denseVec.CopyTo(ref dst)); inRow = MetadataUtils.MetadataAsRow(builder.GetMetadata()); info = new HashingEstimator.ColumnInfo("Bar", "Foo", hashBits: bits, ordered: false); xf = new HashingTransformer(Env, new[] { info }); mapper = xf.GetRowToRowMapper(inRow.Schema); mapper.OutputSchema.TryGetColumnIndex("Bar", out outCol); outRow = mapper.GetRow(inRow, c => c == outCol); var vecGetter = outRow.GetGetter <VBuffer <uint> >(outCol); VBuffer <uint> vecResult = default; vecGetter(ref vecResult); Assert.Equal(vecLen, vecResult.Length); // They all should equal this in this case. Assert.All(vecResult.DenseValues(), v => Assert.Equal(expected, v)); // Now do ordered with the dense vector. info = new HashingEstimator.ColumnInfo("Bar", "Foo", hashBits: bits, ordered: true); xf = new HashingTransformer(Env, new[] { info }); mapper = xf.GetRowToRowMapper(inRow.Schema); mapper.OutputSchema.TryGetColumnIndex("Bar", out outCol); outRow = mapper.GetRow(inRow, c => c == outCol); vecGetter = outRow.GetGetter <VBuffer <uint> >(outCol); vecGetter(ref vecResult); Assert.Equal(vecLen, vecResult.Length); Assert.Equal(expectedOrdered, vecResult.GetItemOrDefault(0)); Assert.Equal(expectedOrdered3, vecResult.GetItemOrDefault(3)); Assert.All(vecResult.DenseValues(), v => Assert.True((v == 0) == (expectedOrdered == 0))); // Let's now do a sparse vector. var sparseVec = new VBuffer <T>(10, 3, Utils.CreateArray(3, val), new[] { 0, 3, 7 }); builder = new MetadataBuilder(); builder.Add("Foo", new VectorType(type, vecLen), (ref VBuffer <T> dst) => sparseVec.CopyTo(ref dst)); inRow = MetadataUtils.MetadataAsRow(builder.GetMetadata()); info = new HashingEstimator.ColumnInfo("Bar", "Foo", hashBits: bits, ordered: false); xf = new HashingTransformer(Env, new[] { info }); mapper = xf.GetRowToRowMapper(inRow.Schema); mapper.OutputSchema.TryGetColumnIndex("Bar", out outCol); outRow = mapper.GetRow(inRow, c => c == outCol); vecGetter = outRow.GetGetter <VBuffer <uint> >(outCol); vecGetter(ref vecResult); Assert.Equal(10, vecResult.Length); Assert.Equal(expected, vecResult.GetItemOrDefault(0)); Assert.Equal(expected, vecResult.GetItemOrDefault(3)); Assert.Equal(expected, vecResult.GetItemOrDefault(7)); info = new HashingEstimator.ColumnInfo("Bar", "Foo", hashBits: bits, ordered: true); xf = new HashingTransformer(Env, new[] { info }); mapper = xf.GetRowToRowMapper(inRow.Schema); mapper.OutputSchema.TryGetColumnIndex("Bar", out outCol); outRow = mapper.GetRow(inRow, c => c == outCol); vecGetter = outRow.GetGetter <VBuffer <uint> >(outCol); vecGetter(ref vecResult); Assert.Equal(10, vecResult.Length); Assert.Equal(expectedOrdered, vecResult.GetItemOrDefault(0)); Assert.Equal(expectedOrdered3, vecResult.GetItemOrDefault(3)); }
private static bool TryCreateEx(IExceptionContext ectx, ColInfo info, DataKind kind, KeyCount range, out PrimitiveDataViewType itemType, out ColInfoEx ex) { ectx.AssertValue(info); ectx.Assert(Enum.IsDefined(typeof(DataKind), kind)); ex = null; var typeSrc = info.TypeSrc; if (range != null) { itemType = TypeParsingUtils.ConstructKeyType(SchemaHelper.DataKind2InternalDataKind(kind), range); } else if (!typeSrc.ItemType().IsKey()) { itemType = ColumnTypeHelper.PrimitiveFromKind(kind); } else if (!ColumnTypeHelper.IsValidDataKind(kind)) { itemType = ColumnTypeHelper.PrimitiveFromKind(kind); return(false); } else { var key = typeSrc.ItemType().AsKey(); ectx.Assert(ColumnTypeHelper.IsValidDataKind(key.RawKind())); ulong count = key.Count; // Technically, it's an error for the counts not to match, but we'll let the Conversions // code return false below. There's a possibility we'll change the standard conversions to // map out of bounds values to zero, in which case, this is the right thing to do. ulong max = (ulong)kind; if ((ulong)count > max) { count = max; } itemType = new KeyDataViewType(SchemaHelper.DataKind2ColumnType(kind).RawType, count); } // Ensure that the conversion is legal. We don't actually cache the delegate here. It will get // re-fetched by the utils code when needed. bool identity; Delegate del; if (!Conversions.DefaultInstance.TryGetStandardConversion(typeSrc.ItemType(), itemType, out del, out identity)) { if (typeSrc.ItemType().RawKind() == itemType.RawKind()) { switch (typeSrc.ItemType().RawKind()) { case DataKind.UInt32: // Key starts at 1. // Multiclass future issue uint plus = (itemType.IsKey() ? (uint)1 : (uint)0) - (typeSrc.IsKey() ? (uint)1 : (uint)0); identity = false; ValueMapper <uint, uint> map_ = (in uint src, ref uint dst) => { dst = src + plus; }; del = (Delegate)map_; if (del == null) { throw Contracts.ExceptNotSupp("Issue with casting"); } break; default: throw Contracts.Except("Not suppoted type {0}", typeSrc.ItemType().RawKind()); } } else if (typeSrc.ItemType().RawKind() == DataKind.Int64 && kind == DataKind.UInt64) { ulong plus = (itemType.IsKey() ? (ulong)1 : (ulong)0) - (typeSrc.IsKey() ? (ulong)1 : (ulong)0); identity = false; ValueMapper <long, ulong> map_ = (in long src, ref ulong dst) => { CheckRange(src, dst, ectx); dst = (ulong)src + plus; }; del = (Delegate)map_; if (del == null) { throw Contracts.ExceptNotSupp("Issue with casting"); } } else if (typeSrc.ItemType().RawKind() == DataKind.Single && kind == DataKind.UInt64) { ulong plus = (itemType.IsKey() ? (ulong)1 : (ulong)0) - (typeSrc.IsKey() ? (ulong)1 : (ulong)0); identity = false; ValueMapper <float, ulong> map_ = (in float src, ref ulong dst) => { CheckRange(src, dst, ectx); dst = (ulong)src + plus; }; del = (Delegate)map_; if (del == null) { throw Contracts.ExceptNotSupp("Issue with casting"); } } else if (typeSrc.ItemType().RawKind() == DataKind.Int64 && kind == DataKind.UInt32) { // Multiclass future issue uint plus = (itemType.IsKey() ? (uint)1 : (uint)0) - (typeSrc.IsKey() ? (uint)1 : (uint)0); identity = false; ValueMapper <long, uint> map_ = (in long src, ref uint dst) => { CheckRange(src, dst, ectx); dst = (uint)src + plus; }; del = (Delegate)map_; if (del == null) { throw Contracts.ExceptNotSupp("Issue with casting"); } } else if (typeSrc.ItemType().RawKind() == DataKind.Single && kind == DataKind.UInt32) { // Multiclass future issue uint plus = (itemType.IsKey() ? (uint)1 : (uint)0) - (typeSrc.IsKey() ? (uint)1 : (uint)0); identity = false; ValueMapper <float, uint> map_ = (in float src, ref uint dst) => { CheckRange(src, dst, ectx); dst = (uint)src + plus; }; del = (Delegate)map_; if (del == null) { throw Contracts.ExceptNotSupp("Issue with casting"); } } else if (typeSrc.ItemType().RawKind() == DataKind.Single && kind == DataKind.String) { // Multiclass future issue identity = false; ValueMapper <float, DvText> map_ = (in float src, ref DvText dst) => { dst = new DvText(string.Format("{0}", (int)src)); }; del = (Delegate)map_; if (del == null) { throw Contracts.ExceptNotSupp("Issue with casting"); } } else { return(false); } } DataViewType typeDst = itemType; if (typeSrc.IsVector()) { typeDst = new VectorDataViewType(itemType, typeSrc.AsVector().Dimensions.ToArray()); } // An output column is transposable iff the input column was transposable. VectorDataViewType slotType = null; if (info.SlotTypeSrc != null) { slotType = new VectorDataViewType(itemType, info.SlotTypeSrc.Dimensions.ToArray()); } ex = new ColInfoEx(kind, range != null, typeDst, slotType); return(true); }