Example #1
0
        private static unsafe void SendViewToNative(IChannel ch, EnvironmentBlock *penv, IDataView view, Dictionary <string, ColumnMetadataInfo> infos = null)
        {
            Contracts.AssertValue(ch);
            Contracts.Assert(penv != null);
            Contracts.AssertValue(view);
            Contracts.AssertValueOrNull(infos);
            if (penv->dataSink == null)
            {
                // Environment doesn't want any data!
                return;
            }

            var dataSink = MarshalDelegate <DataSink>(penv->dataSink);

            var schema        = view.Schema;
            var colIndices    = new List <int>();
            var kindList      = new List <DataKind>();
            var keyCardList   = new List <int>();
            var nameUtf8Bytes = new List <Byte>();
            var nameIndices   = new List <int>();

            var expandCols = new HashSet <int>();
            var allNames   = new HashSet <string>();

            for (int col = 0; col < schema.Count; col++)
            {
                if (schema[col].IsHidden)
                {
                    continue;
                }

                var fullType = schema[col].Type;
                var itemType = fullType.ItemType;
                var name     = schema[col].Name;

                DataKind kind = itemType.RawKind;
                int      keyCard;

                if (fullType.ValueCount == 0)
                {
                    throw ch.ExceptNotSupp("Column has variable length vector: " +
                                           name + ". Not supported in python. Drop column before sending to Python");
                }

                if (itemType.IsKey)
                {
                    // Key types are returned as their signed counterparts in Python, so that -1 can be the missing value.
                    // For U1 and U2 kinds, we convert to a larger type to prevent overflow. For U4 and U8 kinds, we convert
                    // to I4 if the key count is known (since KeyCount is an I4), and to I8 otherwise.
                    switch (kind)
                    {
                    case DataKind.U1:
                        kind = DataKind.I2;
                        break;

                    case DataKind.U2:
                        kind = DataKind.I4;
                        break;

                    case DataKind.U4:
                        // We convert known-cardinality U4 key types to I4.
                        kind = itemType.KeyCount > 0 ? DataKind.I4 : DataKind.I8;
                        break;

                    case DataKind.U8:
                        // We convert known-cardinality U8 key types to I4.
                        kind = itemType.KeyCount > 0 ? DataKind.I4 : DataKind.I8;
                        break;
                    }

                    keyCard = itemType.KeyCount;
                    if (!schema[col].HasKeyValues(keyCard))
                    {
                        keyCard = -1;
                    }
                }
                else if (itemType.IsStandardScalar())
                {
                    switch (itemType.RawKind)
                    {
                    default:
                        throw Contracts.Except("Data type {0} not handled", itemType.RawKind);

                    case DataKind.I1:
                    case DataKind.I2:
                    case DataKind.I4:
                    case DataKind.I8:
                    case DataKind.U1:
                    case DataKind.U2:
                    case DataKind.U4:
                    case DataKind.U8:
                    case DataKind.R4:
                    case DataKind.R8:
                    case DataKind.BL:
                    case DataKind.TX:
                        break;
                    }
                    keyCard = -1;
                }
                else
                {
                    throw Contracts.Except("Data type {0} not handled", itemType.RawKind);
                }

                int nSlots;
                ColumnMetadataInfo info;
                if (infos != null && infos.TryGetValue(name, out info) && info.Expand)
                {
                    expandCols.Add(col);
                    Contracts.Assert(fullType.IsKnownSizeVector);
                    nSlots = fullType.VectorSize;
                    if (info.SlotNames != null)
                    {
                        Contracts.Assert(info.SlotNames.Length == nSlots);
                        for (int i = 0; i < nSlots; i++)
                        {
                            AddUniqueName(info.SlotNames[i], allNames, nameIndices, nameUtf8Bytes);
                        }
                    }
                    else if (schema[col].HasSlotNames(nSlots))
                    {
                        var romNames = default(VBuffer <ReadOnlyMemory <char> >);
                        schema[col].Metadata.GetValue(MetadataUtils.Kinds.SlotNames, ref romNames);
                        foreach (var kvp in romNames.Items(true))
                        {
                            // REVIEW: Add the proper number of zeros to the slot index to make them sort in the right order.
                            var slotName = name + "." +
                                           (!kvp.Value.IsEmpty ? kvp.Value.ToString() : kvp.Key.ToString(CultureInfo.InvariantCulture));
                            AddUniqueName(slotName, allNames, nameIndices, nameUtf8Bytes);
                        }
                    }
                    else
                    {
                        for (int i = 0; i < nSlots; i++)
                        {
                            AddUniqueName(name + "." + i, allNames, nameIndices, nameUtf8Bytes);
                        }
                    }
                }
                else
                {
                    nSlots = 1;
                    AddUniqueName(name, allNames, nameIndices, nameUtf8Bytes);
                }

                colIndices.Add(col);
                for (int i = 0; i < nSlots; i++)
                {
                    kindList.Add(kind);
                    keyCardList.Add(keyCard);
                }
            }

            ch.Assert(allNames.Count == kindList.Count);
            ch.Assert(allNames.Count == keyCardList.Count);
            ch.Assert(allNames.Count == nameIndices.Count);

            var kinds     = kindList.ToArray();
            var keyCards  = keyCardList.ToArray();
            var nameBytes = nameUtf8Bytes.ToArray();
            var names     = new byte *[allNames.Count];

            fixed(DataKind *prgkind = kinds)
            fixed(byte *prgbNames = nameBytes)
            fixed(byte **prgname  = names)
            fixed(int *prgkeyCard = keyCards)
            {
                for (int iid = 0; iid < names.Length; iid++)
                {
                    names[iid] = prgbNames + nameIndices[iid];
                }

                DataViewBlock block;

                block.ccol     = allNames.Count;
                block.crow     = view.GetRowCount() ?? 0;
                block.names    = (sbyte **)prgname;
                block.kinds    = prgkind;
                block.keyCards = prgkeyCard;

                dataSink(penv, &block, out var setters, out var keyValueSetter);

                if (setters == null)
                {
                    // REVIEW: What should we do?
                    return;
                }
                ch.Assert(keyValueSetter != null);
                var kvSet = MarshalDelegate <KeyValueSetter>(keyValueSetter);

                using (var cursor = view.GetRowCursor(colIndices.Contains))
                {
                    var fillers  = new BufferFillerBase[colIndices.Count];
                    var pyColumn = 0;
                    var keyIndex = 0;
                    for (int i = 0; i < colIndices.Count; i++)
                    {
                        var type = schema[colIndices[i]].Type;
                        if (type.ItemType.IsKey && schema[colIndices[i]].HasKeyValues(type.ItemType.KeyCount))
                        {
                            ch.Assert(schema[colIndices[i]].HasKeyValues(type.ItemType.KeyCount));
                            var keyValues = default(VBuffer <ReadOnlyMemory <char> >);
                            schema[colIndices[i]].Metadata.GetValue(MetadataUtils.Kinds.KeyValues, ref keyValues);
                            for (int slot = 0; slot < type.ValueCount; slot++)
                            {
                                foreach (var kvp in keyValues.Items())
                                {
                                    if (kvp.Value.IsEmpty)
                                    {
                                        kvSet(penv, keyIndex, kvp.Key, null, 0);
                                    }
                                    else
                                    {
                                        byte[] bt = Encoding.UTF8.GetBytes(kvp.Value.ToString());

                                        fixed(byte *pt = bt)
                                        kvSet(penv, keyIndex, kvp.Key, (sbyte *)pt, bt.Length);
                                    }
                                }
                                keyIndex++;
                            }
                        }
                        fillers[i] = BufferFillerBase.Create(penv, cursor, pyColumn, colIndices[i], kinds[pyColumn], type, setters[pyColumn]);
                        pyColumn  += type.IsVector ? type.VectorSize : 1;
                    }
                    for (int crow = 0; ; crow++)
                    {
                        // Advance to the next row.
                        if (!cursor.MoveNext())
                        {
                            break;
                        }

                        // Fill values for the current row.
                        for (int i = 0; i < fillers.Length; i++)
                        {
                            fillers[i].Set();
                        }
                    }
                }
            }
        }
Example #2
0
        private static unsafe void SendViewToNativeAsDataFrame(IChannel ch, EnvironmentBlock *penv, IDataView view, Dictionary <string, ColumnMetadataInfo> infos = null)
        {
            Contracts.AssertValue(ch);
            Contracts.Assert(penv != null);
            Contracts.AssertValue(view);
            Contracts.AssertValueOrNull(infos);
            if (penv->dataSink == null)
            {
                // Environment doesn't want any data!
                return;
            }

            var dataSink = MarshalDelegate <DataSink>(penv->dataSink);

            var schema        = view.Schema;
            var colIndices    = new List <int>(1000);
            var kindList      = new ValueListBuilder <InternalDataKind>(INDICES_BUFFER_SIZE);
            var keyCardList   = new ValueListBuilder <int>(INDICES_BUFFER_SIZE);
            var nameUtf8Bytes = new ValueListBuilder <byte>(UTF8_BUFFER_SIZE);
            var nameIndices   = new ValueListBuilder <int>(INDICES_BUFFER_SIZE);
            var expandCols    = new HashSet <int>(1000);
            var valueCounts   = new List <byte>(1000);

            for (int col = 0; col < schema.Count; col++)
            {
                if (schema[col].IsHidden)
                {
                    continue;
                }

                var fullType = schema[col].Type;
                var itemType = fullType.GetItemType();
                var name     = schema[col].Name;

                var kind = itemType.GetRawKind();
                int keyCard;

                byte valueCount = (fullType.GetValueCount() == 0) ? (byte)0 : (byte)1;

                if (itemType is KeyDataViewType)
                {
                    // Key types are returned as their signed counterparts in Python, so that -1 can be the missing value.
                    // For U1 and U2 kinds, we convert to a larger type to prevent overflow. For U4 and U8 kinds, we convert
                    // to I4 if the key count is known (since KeyCount is an I4), and to I8 otherwise.
                    switch (kind)
                    {
                    case InternalDataKind.U1:
                        kind = InternalDataKind.I2;
                        break;

                    case InternalDataKind.U2:
                        kind = InternalDataKind.I4;
                        break;

                    case InternalDataKind.U4:
                        // We convert known-cardinality U4 key types to I4.
                        kind = itemType.GetKeyCount() > 0 ? InternalDataKind.I4 : InternalDataKind.I8;
                        break;

                    case InternalDataKind.U8:
                        // We convert known-cardinality U8 key types to I4.
                        kind = itemType.GetKeyCount() > 0 ? InternalDataKind.I4 : InternalDataKind.I8;
                        break;
                    }

                    keyCard = itemType.GetKeyCountAsInt32();
                    if (!schema[col].HasKeyValues())
                    {
                        keyCard = -1;
                    }
                }
                else if (itemType.IsStandardScalar())
                {
                    switch (itemType.GetRawKind())
                    {
                    default:
                        throw Contracts.Except("Data type {0} not handled", itemType.GetRawKind());

                    case InternalDataKind.I1:
                    case InternalDataKind.I2:
                    case InternalDataKind.I4:
                    case InternalDataKind.I8:
                    case InternalDataKind.U1:
                    case InternalDataKind.U2:
                    case InternalDataKind.U4:
                    case InternalDataKind.U8:
                    case InternalDataKind.R4:
                    case InternalDataKind.R8:
                    case InternalDataKind.BL:
                    case InternalDataKind.TX:
                    case InternalDataKind.DT:
                        break;
                    }
                    keyCard = -1;
                }
                else
                {
                    throw Contracts.Except("Data type {0} not handled", itemType.GetRawKind());
                }

                int nSlots;
                ColumnMetadataInfo info;
                if (infos != null && infos.TryGetValue(name, out info) && info.Expand)
                {
                    expandCols.Add(col);
                    Contracts.Assert(fullType.IsKnownSizeVector());
                    nSlots = fullType.GetVectorSize();
                    if (info.SlotNames != null)
                    {
                        Contracts.Assert(info.SlotNames.Length == nSlots);
                        for (int i = 0; i < nSlots; i++)
                        {
                            AddUniqueName(info.SlotNames[i], ref nameIndices, ref nameUtf8Bytes);
                        }
                    }
                    else if (schema[col].HasSlotNames(nSlots))
                    {
                        var romNames = default(VBuffer <ReadOnlyMemory <char> >);
                        schema[col].Annotations.GetValue(AnnotationUtils.Kinds.SlotNames, ref romNames);
                        AddUniqueName(name, romNames, ref nameIndices, ref nameUtf8Bytes);
                    }
                    else
                    {
                        for (int i = 0; i < nSlots; i++)
                        {
                            AddUniqueName(name + "." + i, ref nameIndices, ref nameUtf8Bytes);
                        }
                    }
                }
                else
                {
                    nSlots = 1;
                    AddUniqueName(name, ref nameIndices, ref nameUtf8Bytes);
                }

                colIndices.Add(col);
                for (int i = 0; i < nSlots; i++)
                {
                    kindList.Append(kind);
                    keyCardList.Append(keyCard);
                    valueCounts.Add(valueCount);
                }
            }

            ch.Assert(kindList.Length == keyCardList.Length);
            ch.Assert(kindList.Length == nameIndices.Length);

            var kinds            = kindList.AsSpan();
            var keyCards         = keyCardList.AsSpan();
            var nameBytes        = nameUtf8Bytes.AsSpan();
            var names            = new byte *[nameIndices.Length];
            var valueCountsBytes = valueCounts.ToArray();

            fixed(InternalDataKind *prgkind = kinds)
            fixed(byte *prgbNames      = nameBytes)
            fixed(byte **prgname       = names)
            fixed(int *prgkeyCard      = keyCards)
            fixed(byte *prgbValueCount = valueCountsBytes)
            {
                for (int iid = 0; iid < names.Length; iid++)
                {
                    names[iid] = prgbNames + nameIndices[iid];
                }

                DataViewBlock block;

                block.ccol        = nameIndices.Length;
                block.crow        = view.GetRowCount() ?? 0;
                block.names       = (sbyte **)prgname;
                block.kinds       = prgkind;
                block.keyCards    = prgkeyCard;
                block.valueCounts = prgbValueCount;

                dataSink(penv, &block, out var setters, out var keyValueSetter);

                if (setters == null)
                {
                    // REVIEW: What should we do?
                    return;
                }
                ch.Assert(keyValueSetter != null);
                var kvSet = MarshalDelegate <KeyValueSetter>(keyValueSetter);

                using (var cursor = view.GetRowCursor(view.Schema.Where(col => colIndices.Contains(col.Index))))
                {
                    var fillers  = new BufferFillerBase[colIndices.Count];
                    var pyColumn = 0;
                    var keyIndex = 0;
                    for (int i = 0; i < colIndices.Count; i++)
                    {
                        var type     = schema[colIndices[i]].Type;
                        var itemType = type.GetItemType();
                        if ((itemType is KeyDataViewType) && schema[colIndices[i]].HasKeyValues())
                        {
                            ch.Assert(schema[colIndices[i]].HasKeyValues());
                            var keyValues = default(VBuffer <ReadOnlyMemory <char> >);
                            schema[colIndices[i]].Annotations.GetValue(AnnotationUtils.Kinds.KeyValues, ref keyValues);
                            for (int slot = 0; slot < type.GetValueCount(); slot++)
                            {
                                foreach (var kvp in keyValues.Items())
                                {
                                    if (kvp.Value.IsEmpty)
                                    {
                                        kvSet(penv, keyIndex, kvp.Key, null, 0);
                                    }
                                    else
                                    {
                                        byte[] bt = Encoding.UTF8.GetBytes(kvp.Value.ToString());

                                        fixed(byte *pt = bt)
                                        kvSet(penv, keyIndex, kvp.Key, (sbyte *)pt, bt.Length);
                                    }
                                }
                                keyIndex++;
                            }
                        }
                        fillers[i] = BufferFillerBase.Create(penv, cursor, pyColumn, colIndices[i], prgkind[pyColumn], type, setters[pyColumn]);

                        if ((type is VectorDataViewType) && (type.GetVectorSize() > 0))
                        {
                            pyColumn += type.GetVectorSize();
                        }
                        else
                        {
                            pyColumn++;
                        }
                    }
                    for (int crow = 0; ; crow++)
                    {
                        // Advance to the next row.
                        if (!cursor.MoveNext())
                        {
                            break;
                        }

                        // Fill values for the current row.
                        for (int i = 0; i < fillers.Length; i++)
                        {
                            fillers[i].Set();
                        }
                    }
                }
            }
        }