Beispiel #1
0
            public CsrData(EnvironmentBlock *penv, void **setters, InternalDataKind outputDataKind)
            {
                col = 0;

                _row   = 0;
                _index = 0;
                _penv  = penv;

                if (outputDataKind == InternalDataKind.R4)
                {
                    _r4DataSetter = MarshalDelegate <R4Setter>(setters[DataCol]);
                    _r8DataSetter = null;
                }
                else if (outputDataKind == InternalDataKind.R8)
                {
                    _r4DataSetter = null;
                    _r8DataSetter = MarshalDelegate <R8Setter>(setters[DataCol]);
                }

                _indicesSetter = MarshalDelegate <I4Setter>(setters[IndicesCol]);
                _indptrSetter  = MarshalDelegate <I4Setter>(setters[IndPtrCol]);
                _shapeSetter   = MarshalDelegate <I4Setter>(setters[ShapeCol]);

                _indptrSetter(_penv, IndPtrCol, 0, 0);
            }
Beispiel #2
0
        private static void ExecCore(EnvironmentBlock *penv, IHost host, IChannel ch, string graph, int cdata, DataSourceBlock **ppdata)
        {
            Contracts.AssertValue(ch);
            ch.AssertValue(host);
            ch.AssertNonEmpty(graph);
            ch.Assert(cdata >= 0);
            ch.Assert(ppdata != null || cdata == 0);

            RunGraphCore(penv, host, graph, cdata, ppdata);
        }
Beispiel #3
0
        /// <summary>
        // The Generic entry point. The specific behavior is indicated in a string argument.
        /// </summary>
        private static unsafe int GenericExec(EnvironmentBlock *penv, sbyte *psz, int cdata, DataSourceBlock **ppdata)
        {
            var env  = new RmlEnvironment(MarshalDelegate <CheckCancelled>(penv->checkCancel), penv->seed, verbose: penv != null && penv->verbosity > 3);
            var host = env.Register("ML.NET_Execution");

            env.ComponentCatalog.RegisterAssembly(typeof(TextLoader).Assembly);                // ML.Data
            env.ComponentCatalog.RegisterAssembly(typeof(LinearModelParameters).Assembly);     // ML.StandardLearners
            env.ComponentCatalog.RegisterAssembly(typeof(CategoricalCatalog).Assembly);        // ML.Transforms
            env.ComponentCatalog.RegisterAssembly(typeof(FastTreeRegressionTrainer).Assembly); // ML.FastTree

            //env.ComponentCatalog.RegisterAssembly(typeof(EnsembleModelParameters).Assembly); // ML.Ensemble
            env.ComponentCatalog.RegisterAssembly(typeof(KMeansModelParameters).Assembly); // ML.KMeansClustering
            env.ComponentCatalog.RegisterAssembly(typeof(PcaModelParameters).Assembly);    // ML.PCA
            env.ComponentCatalog.RegisterAssembly(typeof(CVSplit).Assembly);               // ML.EntryPoints

            env.ComponentCatalog.RegisterAssembly(typeof(OlsModelParameters).Assembly);
            env.ComponentCatalog.RegisterAssembly(typeof(LightGbmBinaryModelParameters).Assembly);
            env.ComponentCatalog.RegisterAssembly(typeof(TensorFlowTransformer).Assembly);
            //env.ComponentCatalog.RegisterAssembly(typeof(SymSgdClassificationTrainer).Assembly);
            //env.ComponentCatalog.RegisterAssembly(typeof(AutoInference).Assembly); // ML.PipelineInference
            env.ComponentCatalog.RegisterAssembly(typeof(DataViewReference).Assembly);
            env.ComponentCatalog.RegisterAssembly(typeof(ImageLoadingTransformer).Assembly);
            //env.ComponentCatalog.RegisterAssembly(typeof(SaveOnnxCommand).Assembly);
            //env.ComponentCatalog.RegisterAssembly(typeof(TimeSeriesProcessingEntryPoints).Assembly);
            //env.ComponentCatalog.RegisterAssembly(typeof(ParquetLoader).Assembly);
            env.ComponentCatalog.RegisterAssembly(typeof(ForecastExtensions).Assembly);

            using (var ch = host.Start("Executing"))
            {
                var sw = new System.Diagnostics.Stopwatch();
                sw.Start();
                try
                {
                    // code, pszIn, and pszOut can be null.
                    ch.Trace("Checking parameters");

                    host.CheckParam(penv != null, nameof(penv));
                    host.CheckParam(penv->messageSink != null, "penv->message");

                    host.CheckParam(psz != null, nameof(psz));

                    ch.Trace("Converting graph operands");
                    var graph = BytesToString(psz);

                    ch.Trace("Wiring message sink");
                    var message          = MarshalDelegate <MessageSink>(penv->messageSink);
                    var messageValidator = new MessageValidator(host);
                    var lk = new object();
                    Action <IMessageSource, ChannelMessage> listener =
                        (sender, msg) =>
                    {
                        byte[] bs = StringToNullTerminatedBytes(sender.FullName);
                        string m  = messageValidator.Validate(msg);
                        if (!string.IsNullOrEmpty(m))
                        {
                            byte[] bm = StringToNullTerminatedBytes(m);
                            lock (lk)
                            {
                                fixed(byte *ps = bs)
                                fixed(byte *pm = bm)
                                message(penv, msg.Kind, (sbyte *)ps, (sbyte *)pm);
                            }
                        }
                    };
                    env.AddListener(listener);

                    host.CheckParam(cdata >= 0, nameof(cdata), "must be non-negative");
                    host.CheckParam(ppdata != null || cdata == 0, nameof(ppdata));
                    for (int i = 0; i < cdata; i++)
                    {
                        var pdata = ppdata[i];
                        host.CheckParam(pdata != null, "pdata");
                        host.CheckParam(0 <= pdata->ccol && pdata->ccol <= int.MaxValue, "ccol");
                        host.CheckParam(0 <= pdata->crow && pdata->crow <= long.MaxValue, "crow");
                        if (pdata->ccol > 0)
                        {
                            host.CheckParam(pdata->names != null, "names");
                            host.CheckParam(pdata->kinds != null, "kinds");
                            host.CheckParam(pdata->keyCards != null, "keyCards");
                            host.CheckParam(pdata->vecCards != null, "vecCards");
                            host.CheckParam(pdata->getters != null, "getters");
                        }
                    }

                    ch.Trace("Validating number of data sources");

                    // Wrap the data sets.
                    ch.Trace("Wrapping native data sources");
                    ch.Trace("Executing");
                    ExecCore(penv, host, ch, graph, cdata, ppdata);
                }
                catch (Exception e)
                {
                    // Dump the exception chain.
                    var ex = e;
                    while (ex.InnerException != null)
                    {
                        ex = ex.InnerException;
                    }
                    ch.Error("*** {1}: '{0}'", ex.Message, ex.GetType());
                    return(-1);
                }
                finally
                {
                    sw.Stop();
                    if (penv != null && penv->verbosity > 0)
                    {
                        ch.Info("Elapsed time: {0}", sw.Elapsed);
                    }
                    else
                    {
                        ch.Trace("Elapsed time: {0}", sw.Elapsed);
                    }
                }
            }
            return(0);
        }
Beispiel #4
0
        private static unsafe void SendViewToNative(IChannel ch, EnvironmentBlock *penv, IDataView view, Dictionary <string, ColumnMetadataInfo> infos = null)
        {
            Contracts.AssertValue(ch);
            Contracts.Assert(penv != null);
            Contracts.AssertValue(view);
            Contracts.AssertValueOrNull(infos);
            if (penv->dataSink == null)
            {
                // Environment doesn't want any data!
                return;
            }

            var dataSink = MarshalDelegate <DataSink>(penv->dataSink);

            var schema        = view.Schema;
            var colIndices    = new List <int>();
            var kindList      = new List <DataKind>();
            var keyCardList   = new List <int>();
            var nameUtf8Bytes = new List <Byte>();
            var nameIndices   = new List <int>();

            var expandCols = new HashSet <int>();
            var allNames   = new HashSet <string>();

            for (int col = 0; col < schema.Count; col++)
            {
                if (schema[col].IsHidden)
                {
                    continue;
                }

                var fullType = schema[col].Type;
                var itemType = fullType.ItemType;
                var name     = schema[col].Name;

                DataKind kind = itemType.RawKind;
                int      keyCard;

                if (fullType.ValueCount == 0)
                {
                    throw ch.ExceptNotSupp("Column has variable length vector: " +
                                           name + ". Not supported in python. Drop column before sending to Python");
                }

                if (itemType.IsKey)
                {
                    // Key types are returned as their signed counterparts in Python, so that -1 can be the missing value.
                    // For U1 and U2 kinds, we convert to a larger type to prevent overflow. For U4 and U8 kinds, we convert
                    // to I4 if the key count is known (since KeyCount is an I4), and to I8 otherwise.
                    switch (kind)
                    {
                    case DataKind.U1:
                        kind = DataKind.I2;
                        break;

                    case DataKind.U2:
                        kind = DataKind.I4;
                        break;

                    case DataKind.U4:
                        // We convert known-cardinality U4 key types to I4.
                        kind = itemType.KeyCount > 0 ? DataKind.I4 : DataKind.I8;
                        break;

                    case DataKind.U8:
                        // We convert known-cardinality U8 key types to I4.
                        kind = itemType.KeyCount > 0 ? DataKind.I4 : DataKind.I8;
                        break;
                    }

                    keyCard = itemType.KeyCount;
                    if (!schema[col].HasKeyValues(keyCard))
                    {
                        keyCard = -1;
                    }
                }
                else if (itemType.IsStandardScalar())
                {
                    switch (itemType.RawKind)
                    {
                    default:
                        throw Contracts.Except("Data type {0} not handled", itemType.RawKind);

                    case DataKind.I1:
                    case DataKind.I2:
                    case DataKind.I4:
                    case DataKind.I8:
                    case DataKind.U1:
                    case DataKind.U2:
                    case DataKind.U4:
                    case DataKind.U8:
                    case DataKind.R4:
                    case DataKind.R8:
                    case DataKind.BL:
                    case DataKind.TX:
                        break;
                    }
                    keyCard = -1;
                }
                else
                {
                    throw Contracts.Except("Data type {0} not handled", itemType.RawKind);
                }

                int nSlots;
                ColumnMetadataInfo info;
                if (infos != null && infos.TryGetValue(name, out info) && info.Expand)
                {
                    expandCols.Add(col);
                    Contracts.Assert(fullType.IsKnownSizeVector);
                    nSlots = fullType.VectorSize;
                    if (info.SlotNames != null)
                    {
                        Contracts.Assert(info.SlotNames.Length == nSlots);
                        for (int i = 0; i < nSlots; i++)
                        {
                            AddUniqueName(info.SlotNames[i], allNames, nameIndices, nameUtf8Bytes);
                        }
                    }
                    else if (schema[col].HasSlotNames(nSlots))
                    {
                        var romNames = default(VBuffer <ReadOnlyMemory <char> >);
                        schema[col].Metadata.GetValue(MetadataUtils.Kinds.SlotNames, ref romNames);
                        foreach (var kvp in romNames.Items(true))
                        {
                            // REVIEW: Add the proper number of zeros to the slot index to make them sort in the right order.
                            var slotName = name + "." +
                                           (!kvp.Value.IsEmpty ? kvp.Value.ToString() : kvp.Key.ToString(CultureInfo.InvariantCulture));
                            AddUniqueName(slotName, allNames, nameIndices, nameUtf8Bytes);
                        }
                    }
                    else
                    {
                        for (int i = 0; i < nSlots; i++)
                        {
                            AddUniqueName(name + "." + i, allNames, nameIndices, nameUtf8Bytes);
                        }
                    }
                }
                else
                {
                    nSlots = 1;
                    AddUniqueName(name, allNames, nameIndices, nameUtf8Bytes);
                }

                colIndices.Add(col);
                for (int i = 0; i < nSlots; i++)
                {
                    kindList.Add(kind);
                    keyCardList.Add(keyCard);
                }
            }

            ch.Assert(allNames.Count == kindList.Count);
            ch.Assert(allNames.Count == keyCardList.Count);
            ch.Assert(allNames.Count == nameIndices.Count);

            var kinds     = kindList.ToArray();
            var keyCards  = keyCardList.ToArray();
            var nameBytes = nameUtf8Bytes.ToArray();
            var names     = new byte *[allNames.Count];

            fixed(DataKind *prgkind = kinds)
            fixed(byte *prgbNames = nameBytes)
            fixed(byte **prgname  = names)
            fixed(int *prgkeyCard = keyCards)
            {
                for (int iid = 0; iid < names.Length; iid++)
                {
                    names[iid] = prgbNames + nameIndices[iid];
                }

                DataViewBlock block;

                block.ccol     = allNames.Count;
                block.crow     = view.GetRowCount() ?? 0;
                block.names    = (sbyte **)prgname;
                block.kinds    = prgkind;
                block.keyCards = prgkeyCard;

                dataSink(penv, &block, out var setters, out var keyValueSetter);

                if (setters == null)
                {
                    // REVIEW: What should we do?
                    return;
                }
                ch.Assert(keyValueSetter != null);
                var kvSet = MarshalDelegate <KeyValueSetter>(keyValueSetter);

                using (var cursor = view.GetRowCursor(colIndices.Contains))
                {
                    var fillers  = new BufferFillerBase[colIndices.Count];
                    var pyColumn = 0;
                    var keyIndex = 0;
                    for (int i = 0; i < colIndices.Count; i++)
                    {
                        var type = schema[colIndices[i]].Type;
                        if (type.ItemType.IsKey && schema[colIndices[i]].HasKeyValues(type.ItemType.KeyCount))
                        {
                            ch.Assert(schema[colIndices[i]].HasKeyValues(type.ItemType.KeyCount));
                            var keyValues = default(VBuffer <ReadOnlyMemory <char> >);
                            schema[colIndices[i]].Metadata.GetValue(MetadataUtils.Kinds.KeyValues, ref keyValues);
                            for (int slot = 0; slot < type.ValueCount; slot++)
                            {
                                foreach (var kvp in keyValues.Items())
                                {
                                    if (kvp.Value.IsEmpty)
                                    {
                                        kvSet(penv, keyIndex, kvp.Key, null, 0);
                                    }
                                    else
                                    {
                                        byte[] bt = Encoding.UTF8.GetBytes(kvp.Value.ToString());

                                        fixed(byte *pt = bt)
                                        kvSet(penv, keyIndex, kvp.Key, (sbyte *)pt, bt.Length);
                                    }
                                }
                                keyIndex++;
                            }
                        }
                        fillers[i] = BufferFillerBase.Create(penv, cursor, pyColumn, colIndices[i], kinds[pyColumn], type, setters[pyColumn]);
                        pyColumn  += type.IsVector ? type.VectorSize : 1;
                    }
                    for (int crow = 0; ; crow++)
                    {
                        // Advance to the next row.
                        if (!cursor.MoveNext())
                        {
                            break;
                        }

                        // Fill values for the current row.
                        for (int i = 0; i < fillers.Length; i++)
                        {
                            fillers[i].Set();
                        }
                    }
                }
            }
        }
Beispiel #5
0
            public static BufferFillerBase Create(EnvironmentBlock *penv, Row input, int pyCol, int idvCol, DataKind dataKind, ColumnType type, void *setter)
            {
                var itemType = type.ItemType;

                // We convert the unsigned types to signed types, with -1 indicating missing in Python.
                if (itemType.KeyCount > 0)
                {
                    var  keyCount = itemType.KeyCount;
                    uint keyMax   = (uint)keyCount;
                    switch (itemType.RawKind)
                    {
                    case DataKind.U1:
                        var fnI1 = MarshalDelegate <I1Setter>(setter);
                        ValuePoker <byte> pokeU1 =
                            (byte value, int col, long index) => fnI1(penv, col, index, value > keyMax ? (sbyte)-1 : (sbyte)(value - 1));
                        return(new Impl <byte>(input, pyCol, idvCol, type, pokeU1));

                    case DataKind.U2:
                        var fnI2 = MarshalDelegate <I2Setter>(setter);
                        ValuePoker <ushort> pokeU2 =
                            (ushort value, int col, long index) => fnI2(penv, col, index, value > keyMax ? (short)-1 : (short)(value - 1));
                        return(new Impl <ushort>(input, pyCol, idvCol, type, pokeU2));

                    case DataKind.U4:
                        var fnI4 = MarshalDelegate <I4Setter>(setter);
                        ValuePoker <uint> pokeU4 =
                            (uint value, int col, long index) => fnI4(penv, col, index, value > keyMax ? -1 : (int)(value - 1));
                        return(new Impl <uint>(input, pyCol, idvCol, type, pokeU4));

                    case DataKind.U8:
                        // We convert U8 key types with key names to I4.
                        fnI4 = MarshalDelegate <I4Setter>(setter);
                        ValuePoker <ulong> pokeU8 =
                            (ulong value, int col, long index) => fnI4(penv, col, index, value > keyMax ? -1 : (int)(value - 1));
                        return(new Impl <ulong>(input, pyCol, idvCol, type, pokeU8));
                    }
                }
                // Key type with count=0
                else if (itemType.IsKey)
                {
                    switch (itemType.RawKind)
                    {
                    case DataKind.U1:
                        var fnI1 = MarshalDelegate <I1Setter>(setter);
                        ValuePoker <byte> pokeU1 =
                            (byte value, int col, long index) => fnI1(penv, col, index, (sbyte)(value - 1));
                        return(new Impl <byte>(input, pyCol, idvCol, type, pokeU1));

                    case DataKind.U2:
                        var fnI2 = MarshalDelegate <I2Setter>(setter);
                        ValuePoker <ushort> pokeU2 =
                            (ushort value, int col, long index) => fnI2(penv, col, index, (short)(value - 1));
                        return(new Impl <ushort>(input, pyCol, idvCol, type, pokeU2));

                    case DataKind.U4:
                        var fnI4 = MarshalDelegate <I4Setter>(setter);
                        ValuePoker <uint> pokeU4 =
                            (uint value, int col, long index) => fnI4(penv, col, index, (int)(value - 1));
                        return(new Impl <uint>(input, pyCol, idvCol, type, pokeU4));

                    case DataKind.U8:
                        // We convert U8 key types with key names to I4.
                        fnI4 = MarshalDelegate <I4Setter>(setter);
                        ValuePoker <ulong> pokeU8 =
                            (ulong value, int col, long index) => fnI4(penv, col, index, (int)(value - 1));
                        return(new Impl <ulong>(input, pyCol, idvCol, type, pokeU8));
                    }
                }
                else
                {
                    switch (dataKind)
                    {
                    case DataKind.R4:
                        var fnR4 = MarshalDelegate <R4Setter>(setter);
                        ValuePoker <float> pokeR4 =
                            (float value, int col, long index) => fnR4(penv, col, index, value);
                        return(new Impl <float>(input, pyCol, idvCol, type, pokeR4));

                    case DataKind.R8:
                        var fnR8 = MarshalDelegate <R8Setter>(setter);
                        ValuePoker <double> pokeR8 =
                            (double value, int col, long index) => fnR8(penv, col, index, value);
                        return(new Impl <double>(input, pyCol, idvCol, type, pokeR8));

                    case DataKind.BL:
                        var fnBl = MarshalDelegate <BLSetter>(setter);
                        ValuePoker <bool> pokeBl =
                            (bool value, int col, long index) => fnBl(penv, col, index, !value ? (byte)0 : value ? (byte)1 : (byte)0xFF);
                        return(new Impl <bool>(input, pyCol, idvCol, type, pokeBl));

                    case DataKind.I1:
                        var fnI1 = MarshalDelegate <I1Setter>(setter);
                        ValuePoker <sbyte> pokeI1 =
                            (sbyte value, int col, long index) => fnI1(penv, col, index, value);
                        return(new Impl <sbyte>(input, pyCol, idvCol, type, pokeI1));

                    case DataKind.I2:
                        var fnI2 = MarshalDelegate <I2Setter>(setter);
                        ValuePoker <short> pokeI2 =
                            (short value, int col, long index) => fnI2(penv, col, index, value);
                        return(new Impl <short>(input, pyCol, idvCol, type, pokeI2));

                    case DataKind.I4:
                        var fnI4 = MarshalDelegate <I4Setter>(setter);
                        ValuePoker <int> pokeI4 =
                            (int value, int col, long index) => fnI4(penv, col, index, value);
                        return(new Impl <int>(input, pyCol, idvCol, type, pokeI4));

                    case DataKind.I8:
                        var fnI8 = MarshalDelegate <I8Setter>(setter);
                        ValuePoker <long> pokeI8 =
                            (long value, int col, long index) => fnI8(penv, col, index, value);
                        return(new Impl <long>(input, pyCol, idvCol, type, pokeI8));

                    case DataKind.U1:
                        var fnU1 = MarshalDelegate <U1Setter>(setter);
                        ValuePoker <byte> pokeU1 =
                            (byte value, int col, long index) => fnU1(penv, col, index, value);
                        return(new Impl <byte>(input, pyCol, idvCol, type, pokeU1));

                    case DataKind.U2:
                        var fnU2 = MarshalDelegate <U2Setter>(setter);
                        ValuePoker <ushort> pokeU2 =
                            (ushort value, int col, long index) => fnU2(penv, col, index, value);
                        return(new Impl <ushort>(input, pyCol, idvCol, type, pokeU2));

                    case DataKind.U4:
                        var fnU4 = MarshalDelegate <U4Setter>(setter);
                        ValuePoker <uint> pokeU4 =
                            (uint value, int col, long index) => fnU4(penv, col, index, value);
                        return(new Impl <uint>(input, pyCol, idvCol, type, pokeU4));

                    case DataKind.U8:
                        var fnU8 = MarshalDelegate <U8Setter>(setter);
                        ValuePoker <ulong> pokeU8 =
                            (ulong value, int col, long index) => fnU8(penv, col, index, value);
                        return(new Impl <ulong>(input, pyCol, idvCol, type, pokeU8));

                    case DataKind.TX:
                        var fnTX = MarshalDelegate <TXSetter>(setter);
                        ValuePoker <ReadOnlyMemory <char> > pokeTX =
                            (ReadOnlyMemory <char> value, int col, long index) =>
                        {
                            if (value.IsEmpty)
                            {
                                fnTX(penv, col, index, null, 0);
                            }
                            else
                            {
                                byte[] bt = Encoding.UTF8.GetBytes(value.ToString());

                                fixed(byte *pt = bt)
                                fnTX(penv, col, index, (sbyte *)pt, bt.Length);
                            }
                        };
                        return(new Impl <ReadOnlyMemory <char> >(input, pyCol, idvCol, type, pokeTX));

                    default:
                        throw Contracts.Except("Data type not handled");
                    }
                }

                Contracts.Assert(false, "Unhandled type!");
                return(null);
            }
Beispiel #6
0
        private static void RunGraphCore(EnvironmentBlock *penv, IHostEnvironment env, string graphStr, int cdata, DataSourceBlock **ppdata)
        {
            Contracts.AssertValue(env);

            var     host = env.Register("RunGraph", penv->seed, null);
            JObject graph;

            try
            {
                graph = JObject.Parse(graphStr);
            }
            catch (JsonReaderException ex)
            {
                throw host.Except(ex, "Failed to parse experiment graph: {0}", ex.Message);
            }

            var runner = new GraphRunner(host, graph["nodes"] as JArray);

            var dvNative = new IDataView[cdata];

            try
            {
                for (int i = 0; i < cdata; i++)
                {
                    dvNative[i] = new NativeDataView(host, ppdata[i]);
                }

                // Setting inputs.
                var jInputs = graph["inputs"] as JObject;
                if (graph["inputs"] != null && jInputs == null)
                {
                    throw host.Except("Unexpected value for 'inputs': {0}", graph["inputs"]);
                }
                int iDv = 0;
                if (jInputs != null)
                {
                    foreach (var kvp in jInputs)
                    {
                        var pathValue = kvp.Value as JValue;
                        if (pathValue == null)
                        {
                            throw host.Except("Invalid value for input: {0}", kvp.Value);
                        }

                        var path    = pathValue.Value <string>();
                        var varName = kvp.Key;
                        var type    = runner.GetPortDataKind(varName);

                        switch (type)
                        {
                        case TlcModule.DataKind.FileHandle:
                            var fh = new SimpleFileHandle(host, path, false, false);
                            runner.SetInput(varName, fh);
                            break;

                        case TlcModule.DataKind.DataView:
                            IDataView dv;
                            if (!string.IsNullOrWhiteSpace(path))
                            {
                                var extension = Path.GetExtension(path);
                                if (extension == ".txt")
                                {
                                    dv = TextLoader.LoadFile(host, new TextLoader.Options(), new MultiFileSource(path));
                                }
                                else if (extension == ".dprep")
                                {
                                    dv = LoadDprepFile(BytesToString(penv->pythonPath), path);
                                }
                                else
                                {
                                    dv = new BinaryLoader(host, new BinaryLoader.Arguments(), path);
                                }
                            }
                            else
                            {
                                Contracts.Assert(iDv < dvNative.Length);
                                // prefetch all columns
                                dv = dvNative[iDv++];
                                var prefetch = new int[dv.Schema.Count];
                                for (int i = 0; i < prefetch.Length; i++)
                                {
                                    prefetch[i] = i;
                                }
                                dv = new CacheDataView(host, dv, prefetch);
                            }
                            runner.SetInput(varName, dv);
                            break;

                        case TlcModule.DataKind.PredictorModel:
                            PredictorModel pm;
                            if (!string.IsNullOrWhiteSpace(path))
                            {
                                using (var fs = File.OpenRead(path))
                                    pm = new PredictorModelImpl(host, fs);
                            }
                            else
                            {
                                throw host.Except("Model must be loaded from a file");
                            }
                            runner.SetInput(varName, pm);
                            break;

                        case TlcModule.DataKind.TransformModel:
                            TransformModel tm;
                            if (!string.IsNullOrWhiteSpace(path))
                            {
                                using (var fs = File.OpenRead(path))
                                    tm = new TransformModelImpl(host, fs);
                            }
                            else
                            {
                                throw host.Except("Model must be loaded from a file");
                            }
                            runner.SetInput(varName, tm);
                            break;

                        default:
                            throw host.Except("Port type {0} not supported", type);
                        }
                    }
                }
                runner.RunAll();

                // Reading outputs.
                using (var ch = host.Start("Reading outputs"))
                {
                    var jOutputs = graph["outputs"] as JObject;
                    if (jOutputs != null)
                    {
                        foreach (var kvp in jOutputs)
                        {
                            var pathValue = kvp.Value as JValue;
                            if (pathValue == null)
                            {
                                throw host.Except("Invalid value for input: {0}", kvp.Value);
                            }
                            var path    = pathValue.Value <string>();
                            var varName = kvp.Key;
                            var type    = runner.GetPortDataKind(varName);

                            switch (type)
                            {
                            case TlcModule.DataKind.FileHandle:
                                var fh = runner.GetOutput <IFileHandle>(varName);
                                throw host.ExceptNotSupp("File handle outputs not yet supported.");

                            case TlcModule.DataKind.DataView:
                                var idv = runner.GetOutput <IDataView>(varName);
                                if (path == CSR_MATRIX)
                                {
                                    SendViewToNativeAsCsr(ch, penv, idv);
                                }
                                else if (!string.IsNullOrWhiteSpace(path))
                                {
                                    SaveIdvToFile(idv, path, host);
                                }
                                else
                                {
                                    var infos = ProcessColumns(ref idv, penv->maxSlots, host);
                                    SendViewToNativeAsDataFrame(ch, penv, idv, infos);
                                }
                                break;

                            case TlcModule.DataKind.PredictorModel:
                                var pm = runner.GetOutput <PredictorModel>(varName);
                                if (!string.IsNullOrWhiteSpace(path))
                                {
                                    SavePredictorModelToFile(pm, path, host);
                                }
                                else
                                {
                                    throw host.Except("Returning in-memory models is not supported");
                                }
                                break;

                            case TlcModule.DataKind.TransformModel:
                                var tm = runner.GetOutput <TransformModel>(varName);
                                if (!string.IsNullOrWhiteSpace(path))
                                {
                                    using (var fs = File.OpenWrite(path))
                                        tm.Save(host, fs);
                                }
                                else
                                {
                                    throw host.Except("Returning in-memory models is not supported");
                                }
                                break;

                            case TlcModule.DataKind.Array:
                                var objArray = runner.GetOutput <object[]>(varName);
                                if (objArray is PredictorModel[])
                                {
                                    var modelArray = (PredictorModel[])objArray;
                                    // Save each model separately
                                    for (var i = 0; i < modelArray.Length; i++)
                                    {
                                        var modelPath = string.Format(CultureInfo.InvariantCulture, path, i);
                                        SavePredictorModelToFile(modelArray[i], modelPath, host);
                                    }
                                }
                                else
                                {
                                    throw host.Except("DataKind.Array type {0} not supported", objArray.First().GetType());
                                }
                                break;

                            default:
                                throw host.Except("Port type {0} not supported", type);
                            }
                        }
                    }
                }
            }
            finally
            {
                // The raw data view is disposable so it lets go of unmanaged raw pointers before we return.
                for (int i = 0; i < dvNative.Length; i++)
                {
                    var view = dvNative[i];
                    if (view == null)
                    {
                        continue;
                    }
                    host.Assert(view is IDisposable);
                    var disp = (IDisposable)dvNative[i];
                    disp.Dispose();
                }
            }
        }
Beispiel #7
0
            public static CsrFillerBase Create(EnvironmentBlock *penv,
                                               DataViewRow input,
                                               int idvCol,
                                               DataViewType idvColType,
                                               InternalDataKind outputDataKind,
                                               CsrData csrData)
            {
                if (outputDataKind == InternalDataKind.R4)
                {
                    switch (idvColType.GetItemType().GetRawKind())
                    {
                    case InternalDataKind.I1:
                        DataAppender <sbyte> appendI1 = (sbyte val, int i) => csrData.AppendR4((float)val, i);
                        return(new CsrFiller <sbyte>(input, idvCol, idvColType, appendI1, csrData));

                    case InternalDataKind.I2:
                        DataAppender <short> appendI2 = (short val, int i) => csrData.AppendR4((float)val, i);
                        return(new CsrFiller <short>(input, idvCol, idvColType, appendI2, csrData));

                    case InternalDataKind.U1:
                        DataAppender <byte> appendU1 = (byte val, int i) => csrData.AppendR4((float)val, i);
                        return(new CsrFiller <byte>(input, idvCol, idvColType, appendU1, csrData));

                    case InternalDataKind.U2:
                        DataAppender <ushort> appendU2 = (ushort val, int i) => csrData.AppendR4((float)val, i);
                        return(new CsrFiller <ushort>(input, idvCol, idvColType, appendU2, csrData));

                    case InternalDataKind.R4:
                        DataAppender <float> appendR4 = (float val, int i) => csrData.AppendR4((float)val, i);
                        return(new CsrFiller <float>(input, idvCol, idvColType, appendR4, csrData));

                    default:
                        throw Contracts.Except("Source data type not supported");
                    }
                }
                else if (outputDataKind == InternalDataKind.R8)
                {
                    switch (idvColType.GetItemType().GetRawKind())
                    {
                    case InternalDataKind.I1:
                        DataAppender <sbyte> appendI1 = (sbyte val, int i) => csrData.AppendR8((double)val, i);
                        return(new CsrFiller <sbyte>(input, idvCol, idvColType, appendI1, csrData));

                    case InternalDataKind.I2:
                        DataAppender <short> appendI2 = (short val, int i) => csrData.AppendR8((double)val, i);
                        return(new CsrFiller <short>(input, idvCol, idvColType, appendI2, csrData));

                    case InternalDataKind.I4:
                        DataAppender <int> appendI4 = (int val, int i) => csrData.AppendR8((double)val, i);
                        return(new CsrFiller <int>(input, idvCol, idvColType, appendI4, csrData));

                    case InternalDataKind.U1:
                        DataAppender <byte> appendU1 = (byte val, int i) => csrData.AppendR8((double)val, i);
                        return(new CsrFiller <byte>(input, idvCol, idvColType, appendU1, csrData));

                    case InternalDataKind.U2:
                        DataAppender <ushort> appendU2 = (ushort val, int i) => csrData.AppendR8((double)val, i);
                        return(new CsrFiller <ushort>(input, idvCol, idvColType, appendU2, csrData));

                    case InternalDataKind.U4:
                        DataAppender <uint> appendU4 = (uint val, int i) => csrData.AppendR8((double)val, i);
                        return(new CsrFiller <uint>(input, idvCol, idvColType, appendU4, csrData));

                    case InternalDataKind.R4:
                        DataAppender <float> appendR4 = (float val, int i) => csrData.AppendR8((double)val, i);
                        return(new CsrFiller <float>(input, idvCol, idvColType, appendR4, csrData));

                    case InternalDataKind.R8:
                        DataAppender <double> appendR8 = (double val, int i) => csrData.AppendR8((double)val, i);
                        return(new CsrFiller <double>(input, idvCol, idvColType, appendR8, csrData));

                    default:
                        throw Contracts.Except("Source data type not supported");
                    }
                }

                throw Contracts.Except("Target data type not supported.");
            }
Beispiel #8
0
        private static unsafe void SendViewToNativeAsCsr(IChannel ch, EnvironmentBlock *penv, IDataView view)
        {
            Contracts.AssertValue(ch);
            Contracts.Assert(penv != null);
            Contracts.AssertValue(view);
            if (penv->dataSink == null)
            {
                // Environment doesn't want any data!
                return;
            }

            var dataSink = MarshalDelegate <DataSink>(penv->dataSink);

            var schema         = view.Schema;
            var colIndices     = new List <int>();
            var outputDataKind = InternalDataKind.R4;

            int numOutputRows = 0;
            int numOutputCols = 0;

            for (int col = 0; col < schema.Count; col++)
            {
                if (schema[col].IsHidden)
                {
                    continue;
                }

                var fullType   = schema[col].Type;
                var itemType   = fullType.GetItemType();
                int valueCount = fullType.GetValueCount();

                if (valueCount == 0)
                {
                    throw ch.ExceptNotSupp("Column has variable length vector: " +
                                           schema[col].Name + ". Not supported in python. Drop column before sending to Python");
                }

                if (itemType.IsStandardScalar())
                {
                    switch (itemType.GetRawKind())
                    {
                    default:
                        throw Contracts.Except("Data type {0} not supported", itemType.GetRawKind());

                    case InternalDataKind.I1:
                    case InternalDataKind.I2:
                    case InternalDataKind.U1:
                    case InternalDataKind.U2:
                    case InternalDataKind.R4:
                        break;

                    case InternalDataKind.I4:
                    case InternalDataKind.U4:
                    case InternalDataKind.R8:
                        outputDataKind = InternalDataKind.R8;
                        break;
                    }
                }
                else
                {
                    throw Contracts.Except("Data type {0} not supported", itemType.GetRawKind());
                }

                colIndices.Add(col);
                numOutputCols += valueCount;
            }

            var allNames      = new HashSet <string>();
            var nameIndices   = new List <int>();
            var nameUtf8Bytes = new List <Byte>();

            AddUniqueName("data", allNames, nameIndices, nameUtf8Bytes);
            AddUniqueName("indices", allNames, nameIndices, nameUtf8Bytes);
            AddUniqueName("indptr", allNames, nameIndices, nameUtf8Bytes);
            AddUniqueName("shape", allNames, nameIndices, nameUtf8Bytes);

            var kindList = new List <InternalDataKind> {
                outputDataKind,
                InternalDataKind.I4,
                InternalDataKind.I4,
                InternalDataKind.I4
            };

            var kinds     = kindList.ToArray();
            var nameBytes = nameUtf8Bytes.ToArray();
            var names     = new byte *[allNames.Count];

            fixed(InternalDataKind *prgkind = kinds)
            fixed(byte *prgbNames = nameBytes)
            fixed(byte **prgname  = names)
            {
                for (int iid = 0; iid < names.Length; iid++)
                {
                    names[iid] = prgbNames + nameIndices[iid];
                }

                DataViewBlock block;

                block.ccol     = allNames.Count;
                block.crow     = view.GetRowCount() ?? 0;
                block.names    = (sbyte **)prgname;
                block.kinds    = prgkind;
                block.keyCards = null;

                dataSink(penv, &block, out var setters, out var keyValueSetter);

                if (setters == null)
                {
                    return;
                }

                using (var cursor = view.GetRowCursor(view.Schema.Where(col => colIndices.Contains(col.Index))))
                {
                    CsrData csrData = new CsrData(penv, setters, outputDataKind);
                    var     fillers = new CsrFillerBase[colIndices.Count];

                    for (int i = 0; i < colIndices.Count; i++)
                    {
                        var type = schema[colIndices[i]].Type;
                        fillers[i] = CsrFillerBase.Create(penv, cursor, colIndices[i], type, outputDataKind, csrData);
                    }

                    for (;; numOutputRows++)
                    {
                        if (!cursor.MoveNext())
                        {
                            break;
                        }

                        for (int i = 0; i < fillers.Length; i++)
                        {
                            fillers[i].Set();
                        }

                        csrData.IncrementRow();
                    }

                    csrData.SetShape(numOutputRows, numOutputCols);
                }
            }
        }
Beispiel #9
0
        private static unsafe void SendViewToNativeAsDataFrame(IChannel ch, EnvironmentBlock *penv, IDataView view, Dictionary <string, ColumnMetadataInfo> infos = null)
        {
            Contracts.AssertValue(ch);
            Contracts.Assert(penv != null);
            Contracts.AssertValue(view);
            Contracts.AssertValueOrNull(infos);
            if (penv->dataSink == null)
            {
                // Environment doesn't want any data!
                return;
            }

            var dataSink = MarshalDelegate <DataSink>(penv->dataSink);

            var schema        = view.Schema;
            var colIndices    = new List <int>(1000);
            var kindList      = new ValueListBuilder <InternalDataKind>(INDICES_BUFFER_SIZE);
            var keyCardList   = new ValueListBuilder <int>(INDICES_BUFFER_SIZE);
            var nameUtf8Bytes = new ValueListBuilder <byte>(UTF8_BUFFER_SIZE);
            var nameIndices   = new ValueListBuilder <int>(INDICES_BUFFER_SIZE);
            var expandCols    = new HashSet <int>(1000);
            var valueCounts   = new List <byte>(1000);

            for (int col = 0; col < schema.Count; col++)
            {
                if (schema[col].IsHidden)
                {
                    continue;
                }

                var fullType = schema[col].Type;
                var itemType = fullType.GetItemType();
                var name     = schema[col].Name;

                var kind = itemType.GetRawKind();
                int keyCard;

                byte valueCount = (fullType.GetValueCount() == 0) ? (byte)0 : (byte)1;

                if (itemType is KeyDataViewType)
                {
                    // Key types are returned as their signed counterparts in Python, so that -1 can be the missing value.
                    // For U1 and U2 kinds, we convert to a larger type to prevent overflow. For U4 and U8 kinds, we convert
                    // to I4 if the key count is known (since KeyCount is an I4), and to I8 otherwise.
                    switch (kind)
                    {
                    case InternalDataKind.U1:
                        kind = InternalDataKind.I2;
                        break;

                    case InternalDataKind.U2:
                        kind = InternalDataKind.I4;
                        break;

                    case InternalDataKind.U4:
                        // We convert known-cardinality U4 key types to I4.
                        kind = itemType.GetKeyCount() > 0 ? InternalDataKind.I4 : InternalDataKind.I8;
                        break;

                    case InternalDataKind.U8:
                        // We convert known-cardinality U8 key types to I4.
                        kind = itemType.GetKeyCount() > 0 ? InternalDataKind.I4 : InternalDataKind.I8;
                        break;
                    }

                    keyCard = itemType.GetKeyCountAsInt32();
                    if (!schema[col].HasKeyValues())
                    {
                        keyCard = -1;
                    }
                }
                else if (itemType.IsStandardScalar())
                {
                    switch (itemType.GetRawKind())
                    {
                    default:
                        throw Contracts.Except("Data type {0} not handled", itemType.GetRawKind());

                    case InternalDataKind.I1:
                    case InternalDataKind.I2:
                    case InternalDataKind.I4:
                    case InternalDataKind.I8:
                    case InternalDataKind.U1:
                    case InternalDataKind.U2:
                    case InternalDataKind.U4:
                    case InternalDataKind.U8:
                    case InternalDataKind.R4:
                    case InternalDataKind.R8:
                    case InternalDataKind.BL:
                    case InternalDataKind.TX:
                    case InternalDataKind.DT:
                        break;
                    }
                    keyCard = -1;
                }
                else
                {
                    throw Contracts.Except("Data type {0} not handled", itemType.GetRawKind());
                }

                int nSlots;
                ColumnMetadataInfo info;
                if (infos != null && infos.TryGetValue(name, out info) && info.Expand)
                {
                    expandCols.Add(col);
                    Contracts.Assert(fullType.IsKnownSizeVector());
                    nSlots = fullType.GetVectorSize();
                    if (info.SlotNames != null)
                    {
                        Contracts.Assert(info.SlotNames.Length == nSlots);
                        for (int i = 0; i < nSlots; i++)
                        {
                            AddUniqueName(info.SlotNames[i], ref nameIndices, ref nameUtf8Bytes);
                        }
                    }
                    else if (schema[col].HasSlotNames(nSlots))
                    {
                        var romNames = default(VBuffer <ReadOnlyMemory <char> >);
                        schema[col].Annotations.GetValue(AnnotationUtils.Kinds.SlotNames, ref romNames);
                        AddUniqueName(name, romNames, ref nameIndices, ref nameUtf8Bytes);
                    }
                    else
                    {
                        for (int i = 0; i < nSlots; i++)
                        {
                            AddUniqueName(name + "." + i, ref nameIndices, ref nameUtf8Bytes);
                        }
                    }
                }
                else
                {
                    nSlots = 1;
                    AddUniqueName(name, ref nameIndices, ref nameUtf8Bytes);
                }

                colIndices.Add(col);
                for (int i = 0; i < nSlots; i++)
                {
                    kindList.Append(kind);
                    keyCardList.Append(keyCard);
                    valueCounts.Add(valueCount);
                }
            }

            ch.Assert(kindList.Length == keyCardList.Length);
            ch.Assert(kindList.Length == nameIndices.Length);

            var kinds            = kindList.AsSpan();
            var keyCards         = keyCardList.AsSpan();
            var nameBytes        = nameUtf8Bytes.AsSpan();
            var names            = new byte *[nameIndices.Length];
            var valueCountsBytes = valueCounts.ToArray();

            fixed(InternalDataKind *prgkind = kinds)
            fixed(byte *prgbNames      = nameBytes)
            fixed(byte **prgname       = names)
            fixed(int *prgkeyCard      = keyCards)
            fixed(byte *prgbValueCount = valueCountsBytes)
            {
                for (int iid = 0; iid < names.Length; iid++)
                {
                    names[iid] = prgbNames + nameIndices[iid];
                }

                DataViewBlock block;

                block.ccol        = nameIndices.Length;
                block.crow        = view.GetRowCount() ?? 0;
                block.names       = (sbyte **)prgname;
                block.kinds       = prgkind;
                block.keyCards    = prgkeyCard;
                block.valueCounts = prgbValueCount;

                dataSink(penv, &block, out var setters, out var keyValueSetter);

                if (setters == null)
                {
                    // REVIEW: What should we do?
                    return;
                }
                ch.Assert(keyValueSetter != null);
                var kvSet = MarshalDelegate <KeyValueSetter>(keyValueSetter);

                using (var cursor = view.GetRowCursor(view.Schema.Where(col => colIndices.Contains(col.Index))))
                {
                    var fillers  = new BufferFillerBase[colIndices.Count];
                    var pyColumn = 0;
                    var keyIndex = 0;
                    for (int i = 0; i < colIndices.Count; i++)
                    {
                        var type     = schema[colIndices[i]].Type;
                        var itemType = type.GetItemType();
                        if ((itemType is KeyDataViewType) && schema[colIndices[i]].HasKeyValues())
                        {
                            ch.Assert(schema[colIndices[i]].HasKeyValues());
                            var keyValues = default(VBuffer <ReadOnlyMemory <char> >);
                            schema[colIndices[i]].Annotations.GetValue(AnnotationUtils.Kinds.KeyValues, ref keyValues);
                            for (int slot = 0; slot < type.GetValueCount(); slot++)
                            {
                                foreach (var kvp in keyValues.Items())
                                {
                                    if (kvp.Value.IsEmpty)
                                    {
                                        kvSet(penv, keyIndex, kvp.Key, null, 0);
                                    }
                                    else
                                    {
                                        byte[] bt = Encoding.UTF8.GetBytes(kvp.Value.ToString());

                                        fixed(byte *pt = bt)
                                        kvSet(penv, keyIndex, kvp.Key, (sbyte *)pt, bt.Length);
                                    }
                                }
                                keyIndex++;
                            }
                        }
                        fillers[i] = BufferFillerBase.Create(penv, cursor, pyColumn, colIndices[i], prgkind[pyColumn], type, setters[pyColumn]);

                        if ((type is VectorDataViewType) && (type.GetVectorSize() > 0))
                        {
                            pyColumn += type.GetVectorSize();
                        }
                        else
                        {
                            pyColumn++;
                        }
                    }
                    for (int crow = 0; ; crow++)
                    {
                        // Advance to the next row.
                        if (!cursor.MoveNext())
                        {
                            break;
                        }

                        // Fill values for the current row.
                        for (int i = 0; i < fillers.Length; i++)
                        {
                            fillers[i].Set();
                        }
                    }
                }
            }
        }