private static unsafe void SendViewToNative(IChannel ch, EnvironmentBlock *penv, IDataView view, Dictionary <string, ColumnMetadataInfo> infos = null) { Contracts.AssertValue(ch); Contracts.Assert(penv != null); Contracts.AssertValue(view); Contracts.AssertValueOrNull(infos); if (penv->dataSink == null) { // Environment doesn't want any data! return; } var dataSink = MarshalDelegate <DataSink>(penv->dataSink); var schema = view.Schema; var colIndices = new List <int>(); var kindList = new List <DataKind>(); var keyCardList = new List <int>(); var nameUtf8Bytes = new List <Byte>(); var nameIndices = new List <int>(); var expandCols = new HashSet <int>(); var allNames = new HashSet <string>(); for (int col = 0; col < schema.Count; col++) { if (schema[col].IsHidden) { continue; } var fullType = schema[col].Type; var itemType = fullType.ItemType; var name = schema[col].Name; DataKind kind = itemType.RawKind; int keyCard; if (fullType.ValueCount == 0) { throw ch.ExceptNotSupp("Column has variable length vector: " + name + ". Not supported in python. Drop column before sending to Python"); } if (itemType.IsKey) { // Key types are returned as their signed counterparts in Python, so that -1 can be the missing value. // For U1 and U2 kinds, we convert to a larger type to prevent overflow. For U4 and U8 kinds, we convert // to I4 if the key count is known (since KeyCount is an I4), and to I8 otherwise. switch (kind) { case DataKind.U1: kind = DataKind.I2; break; case DataKind.U2: kind = DataKind.I4; break; case DataKind.U4: // We convert known-cardinality U4 key types to I4. kind = itemType.KeyCount > 0 ? DataKind.I4 : DataKind.I8; break; case DataKind.U8: // We convert known-cardinality U8 key types to I4. kind = itemType.KeyCount > 0 ? DataKind.I4 : DataKind.I8; break; } keyCard = itemType.KeyCount; if (!schema[col].HasKeyValues(keyCard)) { keyCard = -1; } } else if (itemType.IsStandardScalar()) { switch (itemType.RawKind) { default: throw Contracts.Except("Data type {0} not handled", itemType.RawKind); case DataKind.I1: case DataKind.I2: case DataKind.I4: case DataKind.I8: case DataKind.U1: case DataKind.U2: case DataKind.U4: case DataKind.U8: case DataKind.R4: case DataKind.R8: case DataKind.BL: case DataKind.TX: break; } keyCard = -1; } else { throw Contracts.Except("Data type {0} not handled", itemType.RawKind); } int nSlots; ColumnMetadataInfo info; if (infos != null && infos.TryGetValue(name, out info) && info.Expand) { expandCols.Add(col); Contracts.Assert(fullType.IsKnownSizeVector); nSlots = fullType.VectorSize; if (info.SlotNames != null) { Contracts.Assert(info.SlotNames.Length == nSlots); for (int i = 0; i < nSlots; i++) { AddUniqueName(info.SlotNames[i], allNames, nameIndices, nameUtf8Bytes); } } else if (schema[col].HasSlotNames(nSlots)) { var romNames = default(VBuffer <ReadOnlyMemory <char> >); schema[col].Metadata.GetValue(MetadataUtils.Kinds.SlotNames, ref romNames); foreach (var kvp in romNames.Items(true)) { // REVIEW: Add the proper number of zeros to the slot index to make them sort in the right order. var slotName = name + "." + (!kvp.Value.IsEmpty ? kvp.Value.ToString() : kvp.Key.ToString(CultureInfo.InvariantCulture)); AddUniqueName(slotName, allNames, nameIndices, nameUtf8Bytes); } } else { for (int i = 0; i < nSlots; i++) { AddUniqueName(name + "." + i, allNames, nameIndices, nameUtf8Bytes); } } } else { nSlots = 1; AddUniqueName(name, allNames, nameIndices, nameUtf8Bytes); } colIndices.Add(col); for (int i = 0; i < nSlots; i++) { kindList.Add(kind); keyCardList.Add(keyCard); } } ch.Assert(allNames.Count == kindList.Count); ch.Assert(allNames.Count == keyCardList.Count); ch.Assert(allNames.Count == nameIndices.Count); var kinds = kindList.ToArray(); var keyCards = keyCardList.ToArray(); var nameBytes = nameUtf8Bytes.ToArray(); var names = new byte *[allNames.Count]; fixed(DataKind *prgkind = kinds) fixed(byte *prgbNames = nameBytes) fixed(byte **prgname = names) fixed(int *prgkeyCard = keyCards) { for (int iid = 0; iid < names.Length; iid++) { names[iid] = prgbNames + nameIndices[iid]; } DataViewBlock block; block.ccol = allNames.Count; block.crow = view.GetRowCount() ?? 0; block.names = (sbyte **)prgname; block.kinds = prgkind; block.keyCards = prgkeyCard; dataSink(penv, &block, out var setters, out var keyValueSetter); if (setters == null) { // REVIEW: What should we do? return; } ch.Assert(keyValueSetter != null); var kvSet = MarshalDelegate <KeyValueSetter>(keyValueSetter); using (var cursor = view.GetRowCursor(colIndices.Contains)) { var fillers = new BufferFillerBase[colIndices.Count]; var pyColumn = 0; var keyIndex = 0; for (int i = 0; i < colIndices.Count; i++) { var type = schema[colIndices[i]].Type; if (type.ItemType.IsKey && schema[colIndices[i]].HasKeyValues(type.ItemType.KeyCount)) { ch.Assert(schema[colIndices[i]].HasKeyValues(type.ItemType.KeyCount)); var keyValues = default(VBuffer <ReadOnlyMemory <char> >); schema[colIndices[i]].Metadata.GetValue(MetadataUtils.Kinds.KeyValues, ref keyValues); for (int slot = 0; slot < type.ValueCount; slot++) { foreach (var kvp in keyValues.Items()) { if (kvp.Value.IsEmpty) { kvSet(penv, keyIndex, kvp.Key, null, 0); } else { byte[] bt = Encoding.UTF8.GetBytes(kvp.Value.ToString()); fixed(byte *pt = bt) kvSet(penv, keyIndex, kvp.Key, (sbyte *)pt, bt.Length); } } keyIndex++; } } fillers[i] = BufferFillerBase.Create(penv, cursor, pyColumn, colIndices[i], kinds[pyColumn], type, setters[pyColumn]); pyColumn += type.IsVector ? type.VectorSize : 1; } for (int crow = 0; ; crow++) { // Advance to the next row. if (!cursor.MoveNext()) { break; } // Fill values for the current row. for (int i = 0; i < fillers.Length; i++) { fillers[i].Set(); } } } } }
private static unsafe void SendViewToNativeAsDataFrame(IChannel ch, EnvironmentBlock *penv, IDataView view, Dictionary <string, ColumnMetadataInfo> infos = null) { Contracts.AssertValue(ch); Contracts.Assert(penv != null); Contracts.AssertValue(view); Contracts.AssertValueOrNull(infos); if (penv->dataSink == null) { // Environment doesn't want any data! return; } var dataSink = MarshalDelegate <DataSink>(penv->dataSink); var schema = view.Schema; var colIndices = new List <int>(1000); var kindList = new ValueListBuilder <InternalDataKind>(INDICES_BUFFER_SIZE); var keyCardList = new ValueListBuilder <int>(INDICES_BUFFER_SIZE); var nameUtf8Bytes = new ValueListBuilder <byte>(UTF8_BUFFER_SIZE); var nameIndices = new ValueListBuilder <int>(INDICES_BUFFER_SIZE); var expandCols = new HashSet <int>(1000); var valueCounts = new List <byte>(1000); for (int col = 0; col < schema.Count; col++) { if (schema[col].IsHidden) { continue; } var fullType = schema[col].Type; var itemType = fullType.GetItemType(); var name = schema[col].Name; var kind = itemType.GetRawKind(); int keyCard; byte valueCount = (fullType.GetValueCount() == 0) ? (byte)0 : (byte)1; if (itemType is KeyDataViewType) { // Key types are returned as their signed counterparts in Python, so that -1 can be the missing value. // For U1 and U2 kinds, we convert to a larger type to prevent overflow. For U4 and U8 kinds, we convert // to I4 if the key count is known (since KeyCount is an I4), and to I8 otherwise. switch (kind) { case InternalDataKind.U1: kind = InternalDataKind.I2; break; case InternalDataKind.U2: kind = InternalDataKind.I4; break; case InternalDataKind.U4: // We convert known-cardinality U4 key types to I4. kind = itemType.GetKeyCount() > 0 ? InternalDataKind.I4 : InternalDataKind.I8; break; case InternalDataKind.U8: // We convert known-cardinality U8 key types to I4. kind = itemType.GetKeyCount() > 0 ? InternalDataKind.I4 : InternalDataKind.I8; break; } keyCard = itemType.GetKeyCountAsInt32(); if (!schema[col].HasKeyValues()) { keyCard = -1; } } else if (itemType.IsStandardScalar()) { switch (itemType.GetRawKind()) { default: throw Contracts.Except("Data type {0} not handled", itemType.GetRawKind()); case InternalDataKind.I1: case InternalDataKind.I2: case InternalDataKind.I4: case InternalDataKind.I8: case InternalDataKind.U1: case InternalDataKind.U2: case InternalDataKind.U4: case InternalDataKind.U8: case InternalDataKind.R4: case InternalDataKind.R8: case InternalDataKind.BL: case InternalDataKind.TX: case InternalDataKind.DT: break; } keyCard = -1; } else { throw Contracts.Except("Data type {0} not handled", itemType.GetRawKind()); } int nSlots; ColumnMetadataInfo info; if (infos != null && infos.TryGetValue(name, out info) && info.Expand) { expandCols.Add(col); Contracts.Assert(fullType.IsKnownSizeVector()); nSlots = fullType.GetVectorSize(); if (info.SlotNames != null) { Contracts.Assert(info.SlotNames.Length == nSlots); for (int i = 0; i < nSlots; i++) { AddUniqueName(info.SlotNames[i], ref nameIndices, ref nameUtf8Bytes); } } else if (schema[col].HasSlotNames(nSlots)) { var romNames = default(VBuffer <ReadOnlyMemory <char> >); schema[col].Annotations.GetValue(AnnotationUtils.Kinds.SlotNames, ref romNames); AddUniqueName(name, romNames, ref nameIndices, ref nameUtf8Bytes); } else { for (int i = 0; i < nSlots; i++) { AddUniqueName(name + "." + i, ref nameIndices, ref nameUtf8Bytes); } } } else { nSlots = 1; AddUniqueName(name, ref nameIndices, ref nameUtf8Bytes); } colIndices.Add(col); for (int i = 0; i < nSlots; i++) { kindList.Append(kind); keyCardList.Append(keyCard); valueCounts.Add(valueCount); } } ch.Assert(kindList.Length == keyCardList.Length); ch.Assert(kindList.Length == nameIndices.Length); var kinds = kindList.AsSpan(); var keyCards = keyCardList.AsSpan(); var nameBytes = nameUtf8Bytes.AsSpan(); var names = new byte *[nameIndices.Length]; var valueCountsBytes = valueCounts.ToArray(); fixed(InternalDataKind *prgkind = kinds) fixed(byte *prgbNames = nameBytes) fixed(byte **prgname = names) fixed(int *prgkeyCard = keyCards) fixed(byte *prgbValueCount = valueCountsBytes) { for (int iid = 0; iid < names.Length; iid++) { names[iid] = prgbNames + nameIndices[iid]; } DataViewBlock block; block.ccol = nameIndices.Length; block.crow = view.GetRowCount() ?? 0; block.names = (sbyte **)prgname; block.kinds = prgkind; block.keyCards = prgkeyCard; block.valueCounts = prgbValueCount; dataSink(penv, &block, out var setters, out var keyValueSetter); if (setters == null) { // REVIEW: What should we do? return; } ch.Assert(keyValueSetter != null); var kvSet = MarshalDelegate <KeyValueSetter>(keyValueSetter); using (var cursor = view.GetRowCursor(view.Schema.Where(col => colIndices.Contains(col.Index)))) { var fillers = new BufferFillerBase[colIndices.Count]; var pyColumn = 0; var keyIndex = 0; for (int i = 0; i < colIndices.Count; i++) { var type = schema[colIndices[i]].Type; var itemType = type.GetItemType(); if ((itemType is KeyDataViewType) && schema[colIndices[i]].HasKeyValues()) { ch.Assert(schema[colIndices[i]].HasKeyValues()); var keyValues = default(VBuffer <ReadOnlyMemory <char> >); schema[colIndices[i]].Annotations.GetValue(AnnotationUtils.Kinds.KeyValues, ref keyValues); for (int slot = 0; slot < type.GetValueCount(); slot++) { foreach (var kvp in keyValues.Items()) { if (kvp.Value.IsEmpty) { kvSet(penv, keyIndex, kvp.Key, null, 0); } else { byte[] bt = Encoding.UTF8.GetBytes(kvp.Value.ToString()); fixed(byte *pt = bt) kvSet(penv, keyIndex, kvp.Key, (sbyte *)pt, bt.Length); } } keyIndex++; } } fillers[i] = BufferFillerBase.Create(penv, cursor, pyColumn, colIndices[i], prgkind[pyColumn], type, setters[pyColumn]); if ((type is VectorDataViewType) && (type.GetVectorSize() > 0)) { pyColumn += type.GetVectorSize(); } else { pyColumn++; } } for (int crow = 0; ; crow++) { // Advance to the next row. if (!cursor.MoveNext()) { break; } // Fill values for the current row. for (int i = 0; i < fillers.Length; i++) { fillers[i].Set(); } } } } }