private static string TestColumnType(DataViewType type) { // REVIEW: list all types that can be hashed. if (type.GetValueCount() > 0) { return(null); } return("Unknown vector size"); }
public static ModelArgs GetModelArgs(DataViewType type, string colName, List <long> dims = null, List <bool> dimsParams = null) { Contracts.CheckValue(type, nameof(type)); Contracts.CheckNonEmpty(colName, nameof(colName)); Type rawType; if (type is VectorDataViewType vectorType) { rawType = vectorType.ItemType.RawType; } else { rawType = type.RawType; } var dataType = ConvertToTensorProtoType(rawType); string name = colName; List <long> dimsLocal = null; List <bool> dimsParamLocal = null; if (dims != null) { dimsLocal = dims; dimsParamLocal = dimsParams; } else { dimsLocal = new List <long>(); int valueCount = type.GetValueCount(); if (valueCount == 0) //Unknown size. { dimsLocal.Add(1); dimsParamLocal = new List <bool>() { false, true }; //false for batch size, true for dims. } else if (valueCount == 1) { dimsLocal.Add(1); } else if (valueCount > 1) { var vec = (VectorDataViewType)type; for (int i = 0; i < vec.Dimensions.Length; i++) { dimsLocal.Add(vec.Dimensions[i]); } } } //batch size. dimsLocal?.Insert(0, 1); return(new ModelArgs(name, dataType, dimsLocal, dimsParamLocal)); }
private static void ValidateModel(IExceptionContext ectx, float[] model, DataViewType col) { long valueCount = col.GetValueCount(); ectx.CheckDecode(Utils.Size(model) == valueCount * valueCount, "Invalid model size."); for (int i = 0; i < model.Length; i++) { ectx.CheckDecode(FloatUtils.IsFinite(model[i]), "Found NaN or infinity in the model."); } }
public Impl(DataViewRow input, int pyColIndex, int idvColIndex, DataViewType type, ValuePoker <TSrc> poker) : base(input, pyColIndex) { Contracts.AssertValue(input); Contracts.Assert(0 <= idvColIndex && idvColIndex < input.Schema.Count); if (type is VectorDataViewType) { _getVec = RowCursorUtils.GetVecGetterAs <TSrc>((PrimitiveDataViewType)type.GetItemType(), input, idvColIndex); } else { _get = RowCursorUtils.GetGetterAs <TSrc>(type, input, idvColIndex); } _poker = poker; _isVarLength = (type.GetValueCount() == 0); }
// Check if the input column's type is supported. Note that only float vector with a known shape is allowed. internal static string TestColumn(DataViewType type) { VectorDataViewType vectorType = type as VectorDataViewType; DataViewType itemType = vectorType?.ItemType ?? type; if ((vectorType != null && !vectorType.IsKnownSize && vectorType.Dimensions.Length > 1) || itemType != NumberDataViewType.Single) { return("Expected float or float vector of known size"); } long valueCount = type.GetValueCount(); if (valueCount * valueCount > Utils.ArrayMaxSize) { return("Vector size exceeds maximum size for one dimensional array (2 146 435 071 elements)"); } return(null); }
public static ModelArgs GetModelArgs(DataViewType type, string colName, List <long> dims = null, List <bool> dimsParams = null) { Contracts.CheckValue(type, nameof(type)); Contracts.CheckNonEmpty(colName, nameof(colName)); TensorProto.Types.DataType dataType = TensorProto.Types.DataType.Undefined; Type rawType; if (type is VectorType vectorType) { rawType = vectorType.ItemType.RawType; } else { rawType = type.RawType; } if (rawType == typeof(bool)) { dataType = TensorProto.Types.DataType.Float; } else if (rawType == typeof(ReadOnlyMemory <char>)) { dataType = TensorProto.Types.DataType.String; } else if (rawType == typeof(sbyte)) { dataType = TensorProto.Types.DataType.Int8; } else if (rawType == typeof(byte)) { dataType = TensorProto.Types.DataType.Uint8; } else if (rawType == typeof(short)) { dataType = TensorProto.Types.DataType.Int16; } else if (rawType == typeof(ushort)) { dataType = TensorProto.Types.DataType.Uint16; } else if (rawType == typeof(int)) { dataType = TensorProto.Types.DataType.Int32; } else if (rawType == typeof(uint)) { dataType = TensorProto.Types.DataType.Int64; } else if (rawType == typeof(long)) { dataType = TensorProto.Types.DataType.Int64; } else if (rawType == typeof(ulong)) { dataType = TensorProto.Types.DataType.Uint64; } else if (rawType == typeof(float)) { dataType = TensorProto.Types.DataType.Float; } else if (rawType == typeof(double)) { dataType = TensorProto.Types.DataType.Double; } else { string msg = "Unsupported type: " + type.ToString(); Contracts.Check(false, msg); } string name = colName; List <long> dimsLocal = null; List <bool> dimsParamLocal = null; if (dims != null) { dimsLocal = dims; dimsParamLocal = dimsParams; } else { dimsLocal = new List <long>(); int valueCount = type.GetValueCount(); if (valueCount == 0) //Unknown size. { dimsLocal.Add(1); dimsParamLocal = new List <bool>() { false, true }; //false for batch size, true for dims. } else if (valueCount == 1) { dimsLocal.Add(1); } else if (valueCount > 1) { var vec = (VectorType)type; for (int i = 0; i < vec.Dimensions.Length; i++) { dimsLocal.Add(vec.Dimensions[i]); } } } //batch size. dimsLocal?.Insert(0, 1); return(new ModelArgs(name, dataType, dimsLocal, dimsParamLocal)); }
public static ModelArgs GetModelArgs(DataViewType type, string colName, List <long> dims = null, List <bool> dimsParams = null) { Contracts.CheckValue(type, nameof(type)); Contracts.CheckNonEmpty(colName, nameof(colName)); Type rawType; if (type is VectorDataViewType vectorType) { rawType = vectorType.ItemType.RawType; } else { rawType = type.RawType; } var dataType = ConvertToTensorProtoType(rawType); string name = colName; List <long> dimsLocal = null; List <bool> dimsParamLocal = null; if (dims != null) { dimsLocal = dims; dimsParamLocal = dimsParams; } else { dimsLocal = new List <long>(); int valueCount = type.GetValueCount(); if (valueCount == 0) //Unknown size. { dimsLocal.Add(1); dimsParamLocal = new List <bool>() { false, true }; //false for batch size, true for dims. } else if (valueCount == 1) { dimsLocal.Add(1); } else if (valueCount > 1) { var vec = (VectorDataViewType)type; for (int i = 0; i < vec.Dimensions.Length; i++) { dimsLocal.Add(vec.Dimensions[i]); } } } // Set batch size to -1. The ONNX docs, https://github.com/onnx/onnx/blob/master/docs/IR.md#static-tensor-shapes, state that if // dim_param is used instead of dim_value, that the size of the dimension "is not statically constrained to a particular number" // "This is useful for declaring the interfaces that care about the number of dimensions, but not the exact size of each dimension" // This file, https://github.com/onnx/onnx/blob/master/onnx/tools/update_model_dims.py, explains that if the dim value is negative // than it treats that as a dim_param instead of a dim_value. This allows ML.NET to run 1 row at a time in a streaming fassion, // but allows the ONNX model the flexibility to be run in batch mode if that is desired. dimsLocal?.Insert(0, -1); return(new ModelArgs(name, dataType, dimsLocal, dimsParamLocal)); }