public CsrData(EnvironmentBlock *penv, void **setters, InternalDataKind outputDataKind) { col = 0; _row = 0; _index = 0; _penv = penv; if (outputDataKind == InternalDataKind.R4) { _r4DataSetter = MarshalDelegate <R4Setter>(setters[DataCol]); _r8DataSetter = null; } else if (outputDataKind == InternalDataKind.R8) { _r4DataSetter = null; _r8DataSetter = MarshalDelegate <R8Setter>(setters[DataCol]); } _indicesSetter = MarshalDelegate <I4Setter>(setters[IndicesCol]); _indptrSetter = MarshalDelegate <I4Setter>(setters[IndPtrCol]); _shapeSetter = MarshalDelegate <I4Setter>(setters[ShapeCol]); _indptrSetter(_penv, IndPtrCol, 0, 0); }
private static void ExecCore(EnvironmentBlock *penv, IHost host, IChannel ch, string graph, int cdata, DataSourceBlock **ppdata) { Contracts.AssertValue(ch); ch.AssertValue(host); ch.AssertNonEmpty(graph); ch.Assert(cdata >= 0); ch.Assert(ppdata != null || cdata == 0); RunGraphCore(penv, host, graph, cdata, ppdata); }
/// <summary> // The Generic entry point. The specific behavior is indicated in a string argument. /// </summary> private static unsafe int GenericExec(EnvironmentBlock *penv, sbyte *psz, int cdata, DataSourceBlock **ppdata) { var env = new RmlEnvironment(MarshalDelegate <CheckCancelled>(penv->checkCancel), penv->seed, verbose: penv != null && penv->verbosity > 3); var host = env.Register("ML.NET_Execution"); env.ComponentCatalog.RegisterAssembly(typeof(TextLoader).Assembly); // ML.Data env.ComponentCatalog.RegisterAssembly(typeof(LinearModelParameters).Assembly); // ML.StandardLearners env.ComponentCatalog.RegisterAssembly(typeof(CategoricalCatalog).Assembly); // ML.Transforms env.ComponentCatalog.RegisterAssembly(typeof(FastTreeRegressionTrainer).Assembly); // ML.FastTree //env.ComponentCatalog.RegisterAssembly(typeof(EnsembleModelParameters).Assembly); // ML.Ensemble env.ComponentCatalog.RegisterAssembly(typeof(KMeansModelParameters).Assembly); // ML.KMeansClustering env.ComponentCatalog.RegisterAssembly(typeof(PcaModelParameters).Assembly); // ML.PCA env.ComponentCatalog.RegisterAssembly(typeof(CVSplit).Assembly); // ML.EntryPoints env.ComponentCatalog.RegisterAssembly(typeof(OlsModelParameters).Assembly); env.ComponentCatalog.RegisterAssembly(typeof(LightGbmBinaryModelParameters).Assembly); env.ComponentCatalog.RegisterAssembly(typeof(TensorFlowTransformer).Assembly); //env.ComponentCatalog.RegisterAssembly(typeof(SymSgdClassificationTrainer).Assembly); //env.ComponentCatalog.RegisterAssembly(typeof(AutoInference).Assembly); // ML.PipelineInference env.ComponentCatalog.RegisterAssembly(typeof(DataViewReference).Assembly); env.ComponentCatalog.RegisterAssembly(typeof(ImageLoadingTransformer).Assembly); //env.ComponentCatalog.RegisterAssembly(typeof(SaveOnnxCommand).Assembly); //env.ComponentCatalog.RegisterAssembly(typeof(TimeSeriesProcessingEntryPoints).Assembly); //env.ComponentCatalog.RegisterAssembly(typeof(ParquetLoader).Assembly); env.ComponentCatalog.RegisterAssembly(typeof(ForecastExtensions).Assembly); using (var ch = host.Start("Executing")) { var sw = new System.Diagnostics.Stopwatch(); sw.Start(); try { // code, pszIn, and pszOut can be null. ch.Trace("Checking parameters"); host.CheckParam(penv != null, nameof(penv)); host.CheckParam(penv->messageSink != null, "penv->message"); host.CheckParam(psz != null, nameof(psz)); ch.Trace("Converting graph operands"); var graph = BytesToString(psz); ch.Trace("Wiring message sink"); var message = MarshalDelegate <MessageSink>(penv->messageSink); var messageValidator = new MessageValidator(host); var lk = new object(); Action <IMessageSource, ChannelMessage> listener = (sender, msg) => { byte[] bs = StringToNullTerminatedBytes(sender.FullName); string m = messageValidator.Validate(msg); if (!string.IsNullOrEmpty(m)) { byte[] bm = StringToNullTerminatedBytes(m); lock (lk) { fixed(byte *ps = bs) fixed(byte *pm = bm) message(penv, msg.Kind, (sbyte *)ps, (sbyte *)pm); } } }; env.AddListener(listener); host.CheckParam(cdata >= 0, nameof(cdata), "must be non-negative"); host.CheckParam(ppdata != null || cdata == 0, nameof(ppdata)); for (int i = 0; i < cdata; i++) { var pdata = ppdata[i]; host.CheckParam(pdata != null, "pdata"); host.CheckParam(0 <= pdata->ccol && pdata->ccol <= int.MaxValue, "ccol"); host.CheckParam(0 <= pdata->crow && pdata->crow <= long.MaxValue, "crow"); if (pdata->ccol > 0) { host.CheckParam(pdata->names != null, "names"); host.CheckParam(pdata->kinds != null, "kinds"); host.CheckParam(pdata->keyCards != null, "keyCards"); host.CheckParam(pdata->vecCards != null, "vecCards"); host.CheckParam(pdata->getters != null, "getters"); } } ch.Trace("Validating number of data sources"); // Wrap the data sets. ch.Trace("Wrapping native data sources"); ch.Trace("Executing"); ExecCore(penv, host, ch, graph, cdata, ppdata); } catch (Exception e) { // Dump the exception chain. var ex = e; while (ex.InnerException != null) { ex = ex.InnerException; } ch.Error("*** {1}: '{0}'", ex.Message, ex.GetType()); return(-1); } finally { sw.Stop(); if (penv != null && penv->verbosity > 0) { ch.Info("Elapsed time: {0}", sw.Elapsed); } else { ch.Trace("Elapsed time: {0}", sw.Elapsed); } } } return(0); }
private static unsafe void SendViewToNative(IChannel ch, EnvironmentBlock *penv, IDataView view, Dictionary <string, ColumnMetadataInfo> infos = null) { Contracts.AssertValue(ch); Contracts.Assert(penv != null); Contracts.AssertValue(view); Contracts.AssertValueOrNull(infos); if (penv->dataSink == null) { // Environment doesn't want any data! return; } var dataSink = MarshalDelegate <DataSink>(penv->dataSink); var schema = view.Schema; var colIndices = new List <int>(); var kindList = new List <DataKind>(); var keyCardList = new List <int>(); var nameUtf8Bytes = new List <Byte>(); var nameIndices = new List <int>(); var expandCols = new HashSet <int>(); var allNames = new HashSet <string>(); for (int col = 0; col < schema.Count; col++) { if (schema[col].IsHidden) { continue; } var fullType = schema[col].Type; var itemType = fullType.ItemType; var name = schema[col].Name; DataKind kind = itemType.RawKind; int keyCard; if (fullType.ValueCount == 0) { throw ch.ExceptNotSupp("Column has variable length vector: " + name + ". Not supported in python. Drop column before sending to Python"); } if (itemType.IsKey) { // Key types are returned as their signed counterparts in Python, so that -1 can be the missing value. // For U1 and U2 kinds, we convert to a larger type to prevent overflow. For U4 and U8 kinds, we convert // to I4 if the key count is known (since KeyCount is an I4), and to I8 otherwise. switch (kind) { case DataKind.U1: kind = DataKind.I2; break; case DataKind.U2: kind = DataKind.I4; break; case DataKind.U4: // We convert known-cardinality U4 key types to I4. kind = itemType.KeyCount > 0 ? DataKind.I4 : DataKind.I8; break; case DataKind.U8: // We convert known-cardinality U8 key types to I4. kind = itemType.KeyCount > 0 ? DataKind.I4 : DataKind.I8; break; } keyCard = itemType.KeyCount; if (!schema[col].HasKeyValues(keyCard)) { keyCard = -1; } } else if (itemType.IsStandardScalar()) { switch (itemType.RawKind) { default: throw Contracts.Except("Data type {0} not handled", itemType.RawKind); case DataKind.I1: case DataKind.I2: case DataKind.I4: case DataKind.I8: case DataKind.U1: case DataKind.U2: case DataKind.U4: case DataKind.U8: case DataKind.R4: case DataKind.R8: case DataKind.BL: case DataKind.TX: break; } keyCard = -1; } else { throw Contracts.Except("Data type {0} not handled", itemType.RawKind); } int nSlots; ColumnMetadataInfo info; if (infos != null && infos.TryGetValue(name, out info) && info.Expand) { expandCols.Add(col); Contracts.Assert(fullType.IsKnownSizeVector); nSlots = fullType.VectorSize; if (info.SlotNames != null) { Contracts.Assert(info.SlotNames.Length == nSlots); for (int i = 0; i < nSlots; i++) { AddUniqueName(info.SlotNames[i], allNames, nameIndices, nameUtf8Bytes); } } else if (schema[col].HasSlotNames(nSlots)) { var romNames = default(VBuffer <ReadOnlyMemory <char> >); schema[col].Metadata.GetValue(MetadataUtils.Kinds.SlotNames, ref romNames); foreach (var kvp in romNames.Items(true)) { // REVIEW: Add the proper number of zeros to the slot index to make them sort in the right order. var slotName = name + "." + (!kvp.Value.IsEmpty ? kvp.Value.ToString() : kvp.Key.ToString(CultureInfo.InvariantCulture)); AddUniqueName(slotName, allNames, nameIndices, nameUtf8Bytes); } } else { for (int i = 0; i < nSlots; i++) { AddUniqueName(name + "." + i, allNames, nameIndices, nameUtf8Bytes); } } } else { nSlots = 1; AddUniqueName(name, allNames, nameIndices, nameUtf8Bytes); } colIndices.Add(col); for (int i = 0; i < nSlots; i++) { kindList.Add(kind); keyCardList.Add(keyCard); } } ch.Assert(allNames.Count == kindList.Count); ch.Assert(allNames.Count == keyCardList.Count); ch.Assert(allNames.Count == nameIndices.Count); var kinds = kindList.ToArray(); var keyCards = keyCardList.ToArray(); var nameBytes = nameUtf8Bytes.ToArray(); var names = new byte *[allNames.Count]; fixed(DataKind *prgkind = kinds) fixed(byte *prgbNames = nameBytes) fixed(byte **prgname = names) fixed(int *prgkeyCard = keyCards) { for (int iid = 0; iid < names.Length; iid++) { names[iid] = prgbNames + nameIndices[iid]; } DataViewBlock block; block.ccol = allNames.Count; block.crow = view.GetRowCount() ?? 0; block.names = (sbyte **)prgname; block.kinds = prgkind; block.keyCards = prgkeyCard; dataSink(penv, &block, out var setters, out var keyValueSetter); if (setters == null) { // REVIEW: What should we do? return; } ch.Assert(keyValueSetter != null); var kvSet = MarshalDelegate <KeyValueSetter>(keyValueSetter); using (var cursor = view.GetRowCursor(colIndices.Contains)) { var fillers = new BufferFillerBase[colIndices.Count]; var pyColumn = 0; var keyIndex = 0; for (int i = 0; i < colIndices.Count; i++) { var type = schema[colIndices[i]].Type; if (type.ItemType.IsKey && schema[colIndices[i]].HasKeyValues(type.ItemType.KeyCount)) { ch.Assert(schema[colIndices[i]].HasKeyValues(type.ItemType.KeyCount)); var keyValues = default(VBuffer <ReadOnlyMemory <char> >); schema[colIndices[i]].Metadata.GetValue(MetadataUtils.Kinds.KeyValues, ref keyValues); for (int slot = 0; slot < type.ValueCount; slot++) { foreach (var kvp in keyValues.Items()) { if (kvp.Value.IsEmpty) { kvSet(penv, keyIndex, kvp.Key, null, 0); } else { byte[] bt = Encoding.UTF8.GetBytes(kvp.Value.ToString()); fixed(byte *pt = bt) kvSet(penv, keyIndex, kvp.Key, (sbyte *)pt, bt.Length); } } keyIndex++; } } fillers[i] = BufferFillerBase.Create(penv, cursor, pyColumn, colIndices[i], kinds[pyColumn], type, setters[pyColumn]); pyColumn += type.IsVector ? type.VectorSize : 1; } for (int crow = 0; ; crow++) { // Advance to the next row. if (!cursor.MoveNext()) { break; } // Fill values for the current row. for (int i = 0; i < fillers.Length; i++) { fillers[i].Set(); } } } } }
public static BufferFillerBase Create(EnvironmentBlock *penv, Row input, int pyCol, int idvCol, DataKind dataKind, ColumnType type, void *setter) { var itemType = type.ItemType; // We convert the unsigned types to signed types, with -1 indicating missing in Python. if (itemType.KeyCount > 0) { var keyCount = itemType.KeyCount; uint keyMax = (uint)keyCount; switch (itemType.RawKind) { case DataKind.U1: var fnI1 = MarshalDelegate <I1Setter>(setter); ValuePoker <byte> pokeU1 = (byte value, int col, long index) => fnI1(penv, col, index, value > keyMax ? (sbyte)-1 : (sbyte)(value - 1)); return(new Impl <byte>(input, pyCol, idvCol, type, pokeU1)); case DataKind.U2: var fnI2 = MarshalDelegate <I2Setter>(setter); ValuePoker <ushort> pokeU2 = (ushort value, int col, long index) => fnI2(penv, col, index, value > keyMax ? (short)-1 : (short)(value - 1)); return(new Impl <ushort>(input, pyCol, idvCol, type, pokeU2)); case DataKind.U4: var fnI4 = MarshalDelegate <I4Setter>(setter); ValuePoker <uint> pokeU4 = (uint value, int col, long index) => fnI4(penv, col, index, value > keyMax ? -1 : (int)(value - 1)); return(new Impl <uint>(input, pyCol, idvCol, type, pokeU4)); case DataKind.U8: // We convert U8 key types with key names to I4. fnI4 = MarshalDelegate <I4Setter>(setter); ValuePoker <ulong> pokeU8 = (ulong value, int col, long index) => fnI4(penv, col, index, value > keyMax ? -1 : (int)(value - 1)); return(new Impl <ulong>(input, pyCol, idvCol, type, pokeU8)); } } // Key type with count=0 else if (itemType.IsKey) { switch (itemType.RawKind) { case DataKind.U1: var fnI1 = MarshalDelegate <I1Setter>(setter); ValuePoker <byte> pokeU1 = (byte value, int col, long index) => fnI1(penv, col, index, (sbyte)(value - 1)); return(new Impl <byte>(input, pyCol, idvCol, type, pokeU1)); case DataKind.U2: var fnI2 = MarshalDelegate <I2Setter>(setter); ValuePoker <ushort> pokeU2 = (ushort value, int col, long index) => fnI2(penv, col, index, (short)(value - 1)); return(new Impl <ushort>(input, pyCol, idvCol, type, pokeU2)); case DataKind.U4: var fnI4 = MarshalDelegate <I4Setter>(setter); ValuePoker <uint> pokeU4 = (uint value, int col, long index) => fnI4(penv, col, index, (int)(value - 1)); return(new Impl <uint>(input, pyCol, idvCol, type, pokeU4)); case DataKind.U8: // We convert U8 key types with key names to I4. fnI4 = MarshalDelegate <I4Setter>(setter); ValuePoker <ulong> pokeU8 = (ulong value, int col, long index) => fnI4(penv, col, index, (int)(value - 1)); return(new Impl <ulong>(input, pyCol, idvCol, type, pokeU8)); } } else { switch (dataKind) { case DataKind.R4: var fnR4 = MarshalDelegate <R4Setter>(setter); ValuePoker <float> pokeR4 = (float value, int col, long index) => fnR4(penv, col, index, value); return(new Impl <float>(input, pyCol, idvCol, type, pokeR4)); case DataKind.R8: var fnR8 = MarshalDelegate <R8Setter>(setter); ValuePoker <double> pokeR8 = (double value, int col, long index) => fnR8(penv, col, index, value); return(new Impl <double>(input, pyCol, idvCol, type, pokeR8)); case DataKind.BL: var fnBl = MarshalDelegate <BLSetter>(setter); ValuePoker <bool> pokeBl = (bool value, int col, long index) => fnBl(penv, col, index, !value ? (byte)0 : value ? (byte)1 : (byte)0xFF); return(new Impl <bool>(input, pyCol, idvCol, type, pokeBl)); case DataKind.I1: var fnI1 = MarshalDelegate <I1Setter>(setter); ValuePoker <sbyte> pokeI1 = (sbyte value, int col, long index) => fnI1(penv, col, index, value); return(new Impl <sbyte>(input, pyCol, idvCol, type, pokeI1)); case DataKind.I2: var fnI2 = MarshalDelegate <I2Setter>(setter); ValuePoker <short> pokeI2 = (short value, int col, long index) => fnI2(penv, col, index, value); return(new Impl <short>(input, pyCol, idvCol, type, pokeI2)); case DataKind.I4: var fnI4 = MarshalDelegate <I4Setter>(setter); ValuePoker <int> pokeI4 = (int value, int col, long index) => fnI4(penv, col, index, value); return(new Impl <int>(input, pyCol, idvCol, type, pokeI4)); case DataKind.I8: var fnI8 = MarshalDelegate <I8Setter>(setter); ValuePoker <long> pokeI8 = (long value, int col, long index) => fnI8(penv, col, index, value); return(new Impl <long>(input, pyCol, idvCol, type, pokeI8)); case DataKind.U1: var fnU1 = MarshalDelegate <U1Setter>(setter); ValuePoker <byte> pokeU1 = (byte value, int col, long index) => fnU1(penv, col, index, value); return(new Impl <byte>(input, pyCol, idvCol, type, pokeU1)); case DataKind.U2: var fnU2 = MarshalDelegate <U2Setter>(setter); ValuePoker <ushort> pokeU2 = (ushort value, int col, long index) => fnU2(penv, col, index, value); return(new Impl <ushort>(input, pyCol, idvCol, type, pokeU2)); case DataKind.U4: var fnU4 = MarshalDelegate <U4Setter>(setter); ValuePoker <uint> pokeU4 = (uint value, int col, long index) => fnU4(penv, col, index, value); return(new Impl <uint>(input, pyCol, idvCol, type, pokeU4)); case DataKind.U8: var fnU8 = MarshalDelegate <U8Setter>(setter); ValuePoker <ulong> pokeU8 = (ulong value, int col, long index) => fnU8(penv, col, index, value); return(new Impl <ulong>(input, pyCol, idvCol, type, pokeU8)); case DataKind.TX: var fnTX = MarshalDelegate <TXSetter>(setter); ValuePoker <ReadOnlyMemory <char> > pokeTX = (ReadOnlyMemory <char> value, int col, long index) => { if (value.IsEmpty) { fnTX(penv, col, index, null, 0); } else { byte[] bt = Encoding.UTF8.GetBytes(value.ToString()); fixed(byte *pt = bt) fnTX(penv, col, index, (sbyte *)pt, bt.Length); } }; return(new Impl <ReadOnlyMemory <char> >(input, pyCol, idvCol, type, pokeTX)); default: throw Contracts.Except("Data type not handled"); } } Contracts.Assert(false, "Unhandled type!"); return(null); }
private static void RunGraphCore(EnvironmentBlock *penv, IHostEnvironment env, string graphStr, int cdata, DataSourceBlock **ppdata) { Contracts.AssertValue(env); var host = env.Register("RunGraph", penv->seed, null); JObject graph; try { graph = JObject.Parse(graphStr); } catch (JsonReaderException ex) { throw host.Except(ex, "Failed to parse experiment graph: {0}", ex.Message); } var runner = new GraphRunner(host, graph["nodes"] as JArray); var dvNative = new IDataView[cdata]; try { for (int i = 0; i < cdata; i++) { dvNative[i] = new NativeDataView(host, ppdata[i]); } // Setting inputs. var jInputs = graph["inputs"] as JObject; if (graph["inputs"] != null && jInputs == null) { throw host.Except("Unexpected value for 'inputs': {0}", graph["inputs"]); } int iDv = 0; if (jInputs != null) { foreach (var kvp in jInputs) { var pathValue = kvp.Value as JValue; if (pathValue == null) { throw host.Except("Invalid value for input: {0}", kvp.Value); } var path = pathValue.Value <string>(); var varName = kvp.Key; var type = runner.GetPortDataKind(varName); switch (type) { case TlcModule.DataKind.FileHandle: var fh = new SimpleFileHandle(host, path, false, false); runner.SetInput(varName, fh); break; case TlcModule.DataKind.DataView: IDataView dv; if (!string.IsNullOrWhiteSpace(path)) { var extension = Path.GetExtension(path); if (extension == ".txt") { dv = TextLoader.LoadFile(host, new TextLoader.Options(), new MultiFileSource(path)); } else if (extension == ".dprep") { dv = LoadDprepFile(BytesToString(penv->pythonPath), path); } else { dv = new BinaryLoader(host, new BinaryLoader.Arguments(), path); } } else { Contracts.Assert(iDv < dvNative.Length); // prefetch all columns dv = dvNative[iDv++]; var prefetch = new int[dv.Schema.Count]; for (int i = 0; i < prefetch.Length; i++) { prefetch[i] = i; } dv = new CacheDataView(host, dv, prefetch); } runner.SetInput(varName, dv); break; case TlcModule.DataKind.PredictorModel: PredictorModel pm; if (!string.IsNullOrWhiteSpace(path)) { using (var fs = File.OpenRead(path)) pm = new PredictorModelImpl(host, fs); } else { throw host.Except("Model must be loaded from a file"); } runner.SetInput(varName, pm); break; case TlcModule.DataKind.TransformModel: TransformModel tm; if (!string.IsNullOrWhiteSpace(path)) { using (var fs = File.OpenRead(path)) tm = new TransformModelImpl(host, fs); } else { throw host.Except("Model must be loaded from a file"); } runner.SetInput(varName, tm); break; default: throw host.Except("Port type {0} not supported", type); } } } runner.RunAll(); // Reading outputs. using (var ch = host.Start("Reading outputs")) { var jOutputs = graph["outputs"] as JObject; if (jOutputs != null) { foreach (var kvp in jOutputs) { var pathValue = kvp.Value as JValue; if (pathValue == null) { throw host.Except("Invalid value for input: {0}", kvp.Value); } var path = pathValue.Value <string>(); var varName = kvp.Key; var type = runner.GetPortDataKind(varName); switch (type) { case TlcModule.DataKind.FileHandle: var fh = runner.GetOutput <IFileHandle>(varName); throw host.ExceptNotSupp("File handle outputs not yet supported."); case TlcModule.DataKind.DataView: var idv = runner.GetOutput <IDataView>(varName); if (path == CSR_MATRIX) { SendViewToNativeAsCsr(ch, penv, idv); } else if (!string.IsNullOrWhiteSpace(path)) { SaveIdvToFile(idv, path, host); } else { var infos = ProcessColumns(ref idv, penv->maxSlots, host); SendViewToNativeAsDataFrame(ch, penv, idv, infos); } break; case TlcModule.DataKind.PredictorModel: var pm = runner.GetOutput <PredictorModel>(varName); if (!string.IsNullOrWhiteSpace(path)) { SavePredictorModelToFile(pm, path, host); } else { throw host.Except("Returning in-memory models is not supported"); } break; case TlcModule.DataKind.TransformModel: var tm = runner.GetOutput <TransformModel>(varName); if (!string.IsNullOrWhiteSpace(path)) { using (var fs = File.OpenWrite(path)) tm.Save(host, fs); } else { throw host.Except("Returning in-memory models is not supported"); } break; case TlcModule.DataKind.Array: var objArray = runner.GetOutput <object[]>(varName); if (objArray is PredictorModel[]) { var modelArray = (PredictorModel[])objArray; // Save each model separately for (var i = 0; i < modelArray.Length; i++) { var modelPath = string.Format(CultureInfo.InvariantCulture, path, i); SavePredictorModelToFile(modelArray[i], modelPath, host); } } else { throw host.Except("DataKind.Array type {0} not supported", objArray.First().GetType()); } break; default: throw host.Except("Port type {0} not supported", type); } } } } } finally { // The raw data view is disposable so it lets go of unmanaged raw pointers before we return. for (int i = 0; i < dvNative.Length; i++) { var view = dvNative[i]; if (view == null) { continue; } host.Assert(view is IDisposable); var disp = (IDisposable)dvNative[i]; disp.Dispose(); } } }
public static CsrFillerBase Create(EnvironmentBlock *penv, DataViewRow input, int idvCol, DataViewType idvColType, InternalDataKind outputDataKind, CsrData csrData) { if (outputDataKind == InternalDataKind.R4) { switch (idvColType.GetItemType().GetRawKind()) { case InternalDataKind.I1: DataAppender <sbyte> appendI1 = (sbyte val, int i) => csrData.AppendR4((float)val, i); return(new CsrFiller <sbyte>(input, idvCol, idvColType, appendI1, csrData)); case InternalDataKind.I2: DataAppender <short> appendI2 = (short val, int i) => csrData.AppendR4((float)val, i); return(new CsrFiller <short>(input, idvCol, idvColType, appendI2, csrData)); case InternalDataKind.U1: DataAppender <byte> appendU1 = (byte val, int i) => csrData.AppendR4((float)val, i); return(new CsrFiller <byte>(input, idvCol, idvColType, appendU1, csrData)); case InternalDataKind.U2: DataAppender <ushort> appendU2 = (ushort val, int i) => csrData.AppendR4((float)val, i); return(new CsrFiller <ushort>(input, idvCol, idvColType, appendU2, csrData)); case InternalDataKind.R4: DataAppender <float> appendR4 = (float val, int i) => csrData.AppendR4((float)val, i); return(new CsrFiller <float>(input, idvCol, idvColType, appendR4, csrData)); default: throw Contracts.Except("Source data type not supported"); } } else if (outputDataKind == InternalDataKind.R8) { switch (idvColType.GetItemType().GetRawKind()) { case InternalDataKind.I1: DataAppender <sbyte> appendI1 = (sbyte val, int i) => csrData.AppendR8((double)val, i); return(new CsrFiller <sbyte>(input, idvCol, idvColType, appendI1, csrData)); case InternalDataKind.I2: DataAppender <short> appendI2 = (short val, int i) => csrData.AppendR8((double)val, i); return(new CsrFiller <short>(input, idvCol, idvColType, appendI2, csrData)); case InternalDataKind.I4: DataAppender <int> appendI4 = (int val, int i) => csrData.AppendR8((double)val, i); return(new CsrFiller <int>(input, idvCol, idvColType, appendI4, csrData)); case InternalDataKind.U1: DataAppender <byte> appendU1 = (byte val, int i) => csrData.AppendR8((double)val, i); return(new CsrFiller <byte>(input, idvCol, idvColType, appendU1, csrData)); case InternalDataKind.U2: DataAppender <ushort> appendU2 = (ushort val, int i) => csrData.AppendR8((double)val, i); return(new CsrFiller <ushort>(input, idvCol, idvColType, appendU2, csrData)); case InternalDataKind.U4: DataAppender <uint> appendU4 = (uint val, int i) => csrData.AppendR8((double)val, i); return(new CsrFiller <uint>(input, idvCol, idvColType, appendU4, csrData)); case InternalDataKind.R4: DataAppender <float> appendR4 = (float val, int i) => csrData.AppendR8((double)val, i); return(new CsrFiller <float>(input, idvCol, idvColType, appendR4, csrData)); case InternalDataKind.R8: DataAppender <double> appendR8 = (double val, int i) => csrData.AppendR8((double)val, i); return(new CsrFiller <double>(input, idvCol, idvColType, appendR8, csrData)); default: throw Contracts.Except("Source data type not supported"); } } throw Contracts.Except("Target data type not supported."); }
private static unsafe void SendViewToNativeAsCsr(IChannel ch, EnvironmentBlock *penv, IDataView view) { Contracts.AssertValue(ch); Contracts.Assert(penv != null); Contracts.AssertValue(view); if (penv->dataSink == null) { // Environment doesn't want any data! return; } var dataSink = MarshalDelegate <DataSink>(penv->dataSink); var schema = view.Schema; var colIndices = new List <int>(); var outputDataKind = InternalDataKind.R4; int numOutputRows = 0; int numOutputCols = 0; for (int col = 0; col < schema.Count; col++) { if (schema[col].IsHidden) { continue; } var fullType = schema[col].Type; var itemType = fullType.GetItemType(); int valueCount = fullType.GetValueCount(); if (valueCount == 0) { throw ch.ExceptNotSupp("Column has variable length vector: " + schema[col].Name + ". Not supported in python. Drop column before sending to Python"); } if (itemType.IsStandardScalar()) { switch (itemType.GetRawKind()) { default: throw Contracts.Except("Data type {0} not supported", itemType.GetRawKind()); case InternalDataKind.I1: case InternalDataKind.I2: case InternalDataKind.U1: case InternalDataKind.U2: case InternalDataKind.R4: break; case InternalDataKind.I4: case InternalDataKind.U4: case InternalDataKind.R8: outputDataKind = InternalDataKind.R8; break; } } else { throw Contracts.Except("Data type {0} not supported", itemType.GetRawKind()); } colIndices.Add(col); numOutputCols += valueCount; } var allNames = new HashSet <string>(); var nameIndices = new List <int>(); var nameUtf8Bytes = new List <Byte>(); AddUniqueName("data", allNames, nameIndices, nameUtf8Bytes); AddUniqueName("indices", allNames, nameIndices, nameUtf8Bytes); AddUniqueName("indptr", allNames, nameIndices, nameUtf8Bytes); AddUniqueName("shape", allNames, nameIndices, nameUtf8Bytes); var kindList = new List <InternalDataKind> { outputDataKind, InternalDataKind.I4, InternalDataKind.I4, InternalDataKind.I4 }; var kinds = kindList.ToArray(); var nameBytes = nameUtf8Bytes.ToArray(); var names = new byte *[allNames.Count]; fixed(InternalDataKind *prgkind = kinds) fixed(byte *prgbNames = nameBytes) fixed(byte **prgname = names) { for (int iid = 0; iid < names.Length; iid++) { names[iid] = prgbNames + nameIndices[iid]; } DataViewBlock block; block.ccol = allNames.Count; block.crow = view.GetRowCount() ?? 0; block.names = (sbyte **)prgname; block.kinds = prgkind; block.keyCards = null; dataSink(penv, &block, out var setters, out var keyValueSetter); if (setters == null) { return; } using (var cursor = view.GetRowCursor(view.Schema.Where(col => colIndices.Contains(col.Index)))) { CsrData csrData = new CsrData(penv, setters, outputDataKind); var fillers = new CsrFillerBase[colIndices.Count]; for (int i = 0; i < colIndices.Count; i++) { var type = schema[colIndices[i]].Type; fillers[i] = CsrFillerBase.Create(penv, cursor, colIndices[i], type, outputDataKind, csrData); } for (;; numOutputRows++) { if (!cursor.MoveNext()) { break; } for (int i = 0; i < fillers.Length; i++) { fillers[i].Set(); } csrData.IncrementRow(); } csrData.SetShape(numOutputRows, numOutputCols); } } }
private static unsafe void SendViewToNativeAsDataFrame(IChannel ch, EnvironmentBlock *penv, IDataView view, Dictionary <string, ColumnMetadataInfo> infos = null) { Contracts.AssertValue(ch); Contracts.Assert(penv != null); Contracts.AssertValue(view); Contracts.AssertValueOrNull(infos); if (penv->dataSink == null) { // Environment doesn't want any data! return; } var dataSink = MarshalDelegate <DataSink>(penv->dataSink); var schema = view.Schema; var colIndices = new List <int>(1000); var kindList = new ValueListBuilder <InternalDataKind>(INDICES_BUFFER_SIZE); var keyCardList = new ValueListBuilder <int>(INDICES_BUFFER_SIZE); var nameUtf8Bytes = new ValueListBuilder <byte>(UTF8_BUFFER_SIZE); var nameIndices = new ValueListBuilder <int>(INDICES_BUFFER_SIZE); var expandCols = new HashSet <int>(1000); var valueCounts = new List <byte>(1000); for (int col = 0; col < schema.Count; col++) { if (schema[col].IsHidden) { continue; } var fullType = schema[col].Type; var itemType = fullType.GetItemType(); var name = schema[col].Name; var kind = itemType.GetRawKind(); int keyCard; byte valueCount = (fullType.GetValueCount() == 0) ? (byte)0 : (byte)1; if (itemType is KeyDataViewType) { // Key types are returned as their signed counterparts in Python, so that -1 can be the missing value. // For U1 and U2 kinds, we convert to a larger type to prevent overflow. For U4 and U8 kinds, we convert // to I4 if the key count is known (since KeyCount is an I4), and to I8 otherwise. switch (kind) { case InternalDataKind.U1: kind = InternalDataKind.I2; break; case InternalDataKind.U2: kind = InternalDataKind.I4; break; case InternalDataKind.U4: // We convert known-cardinality U4 key types to I4. kind = itemType.GetKeyCount() > 0 ? InternalDataKind.I4 : InternalDataKind.I8; break; case InternalDataKind.U8: // We convert known-cardinality U8 key types to I4. kind = itemType.GetKeyCount() > 0 ? InternalDataKind.I4 : InternalDataKind.I8; break; } keyCard = itemType.GetKeyCountAsInt32(); if (!schema[col].HasKeyValues()) { keyCard = -1; } } else if (itemType.IsStandardScalar()) { switch (itemType.GetRawKind()) { default: throw Contracts.Except("Data type {0} not handled", itemType.GetRawKind()); case InternalDataKind.I1: case InternalDataKind.I2: case InternalDataKind.I4: case InternalDataKind.I8: case InternalDataKind.U1: case InternalDataKind.U2: case InternalDataKind.U4: case InternalDataKind.U8: case InternalDataKind.R4: case InternalDataKind.R8: case InternalDataKind.BL: case InternalDataKind.TX: case InternalDataKind.DT: break; } keyCard = -1; } else { throw Contracts.Except("Data type {0} not handled", itemType.GetRawKind()); } int nSlots; ColumnMetadataInfo info; if (infos != null && infos.TryGetValue(name, out info) && info.Expand) { expandCols.Add(col); Contracts.Assert(fullType.IsKnownSizeVector()); nSlots = fullType.GetVectorSize(); if (info.SlotNames != null) { Contracts.Assert(info.SlotNames.Length == nSlots); for (int i = 0; i < nSlots; i++) { AddUniqueName(info.SlotNames[i], ref nameIndices, ref nameUtf8Bytes); } } else if (schema[col].HasSlotNames(nSlots)) { var romNames = default(VBuffer <ReadOnlyMemory <char> >); schema[col].Annotations.GetValue(AnnotationUtils.Kinds.SlotNames, ref romNames); AddUniqueName(name, romNames, ref nameIndices, ref nameUtf8Bytes); } else { for (int i = 0; i < nSlots; i++) { AddUniqueName(name + "." + i, ref nameIndices, ref nameUtf8Bytes); } } } else { nSlots = 1; AddUniqueName(name, ref nameIndices, ref nameUtf8Bytes); } colIndices.Add(col); for (int i = 0; i < nSlots; i++) { kindList.Append(kind); keyCardList.Append(keyCard); valueCounts.Add(valueCount); } } ch.Assert(kindList.Length == keyCardList.Length); ch.Assert(kindList.Length == nameIndices.Length); var kinds = kindList.AsSpan(); var keyCards = keyCardList.AsSpan(); var nameBytes = nameUtf8Bytes.AsSpan(); var names = new byte *[nameIndices.Length]; var valueCountsBytes = valueCounts.ToArray(); fixed(InternalDataKind *prgkind = kinds) fixed(byte *prgbNames = nameBytes) fixed(byte **prgname = names) fixed(int *prgkeyCard = keyCards) fixed(byte *prgbValueCount = valueCountsBytes) { for (int iid = 0; iid < names.Length; iid++) { names[iid] = prgbNames + nameIndices[iid]; } DataViewBlock block; block.ccol = nameIndices.Length; block.crow = view.GetRowCount() ?? 0; block.names = (sbyte **)prgname; block.kinds = prgkind; block.keyCards = prgkeyCard; block.valueCounts = prgbValueCount; dataSink(penv, &block, out var setters, out var keyValueSetter); if (setters == null) { // REVIEW: What should we do? return; } ch.Assert(keyValueSetter != null); var kvSet = MarshalDelegate <KeyValueSetter>(keyValueSetter); using (var cursor = view.GetRowCursor(view.Schema.Where(col => colIndices.Contains(col.Index)))) { var fillers = new BufferFillerBase[colIndices.Count]; var pyColumn = 0; var keyIndex = 0; for (int i = 0; i < colIndices.Count; i++) { var type = schema[colIndices[i]].Type; var itemType = type.GetItemType(); if ((itemType is KeyDataViewType) && schema[colIndices[i]].HasKeyValues()) { ch.Assert(schema[colIndices[i]].HasKeyValues()); var keyValues = default(VBuffer <ReadOnlyMemory <char> >); schema[colIndices[i]].Annotations.GetValue(AnnotationUtils.Kinds.KeyValues, ref keyValues); for (int slot = 0; slot < type.GetValueCount(); slot++) { foreach (var kvp in keyValues.Items()) { if (kvp.Value.IsEmpty) { kvSet(penv, keyIndex, kvp.Key, null, 0); } else { byte[] bt = Encoding.UTF8.GetBytes(kvp.Value.ToString()); fixed(byte *pt = bt) kvSet(penv, keyIndex, kvp.Key, (sbyte *)pt, bt.Length); } } keyIndex++; } } fillers[i] = BufferFillerBase.Create(penv, cursor, pyColumn, colIndices[i], prgkind[pyColumn], type, setters[pyColumn]); if ((type is VectorDataViewType) && (type.GetVectorSize() > 0)) { pyColumn += type.GetVectorSize(); } else { pyColumn++; } } for (int crow = 0; ; crow++) { // Advance to the next row. if (!cursor.MoveNext()) { break; } // Fill values for the current row. for (int i = 0; i < fillers.Length; i++) { fillers[i].Set(); } } } } }