コード例 #1
0
        public static TransposeLoader Create(IHostEnvironment env, ModelLoadContext ctx, IMultiStreamSource files)
        {
            Contracts.CheckValue(env, nameof(env));
            IHost h = env.Register(LoadName);

            h.CheckValue(ctx, nameof(ctx));
            ctx.CheckAtModel(GetVersionInfo());
            h.CheckValue(files, nameof(files));

            return(h.Apply("Loading Model",
                           ch =>
            {
                if (files.Count == 0)
                {
                    BinaryLoader schemaView = null;
                    // In the case where we have no input streams, but we have an input schema from
                    // the model repository, we still want to surface ourselves as being a binary loader
                    // with the existing schema. The loader "owns" this stream.
                    if (ctx.TryLoadBinaryStream("Schema.idv",
                                                r => schemaView = new BinaryLoader(h, new BinaryLoader.Arguments(),
                                                                                   HybridMemoryStream.CreateCache(r.BaseStream), leaveOpen: false)))
                    {
                        h.AssertValue(schemaView);
                        h.CheckDecode(schemaView.GetRowCount() == 0);
                        // REVIEW: Do we want to be a bit more restrictive around uninterpretable columns?
                        return new TransposeLoader(h, ctx, schemaView);
                    }
                    h.Assert(schemaView == null);
                    // Fall through, allow the failure to be on OpenStream.
                }
                return new TransposeLoader(h, ctx, files);
            }));
        }
コード例 #2
0
        protected PredictionTransformerBase(IHost host, ModelLoadContext ctx)
        {
            Host = host;

            // *** Binary format ***
            // model: prediction model.
            // stream: empty data view that contains train schema.
            // id of string: feature column.

            ctx.LoadModel <TModel, SignatureLoadModel>(host, out TModel model, DirModel);
            Model = model;

            // Clone the stream with the schema into memory.
            var ms = new MemoryStream();

            ctx.TryLoadBinaryStream(DirTransSchema, reader =>
            {
                reader.BaseStream.CopyTo(ms);
            });

            ms.Position = 0;
            var loader = new BinaryLoader(host, new BinaryLoader.Arguments(), ms);

            TrainSchema = loader.Schema;
        }
コード例 #3
0
            public static TensorFlowMapper Create(IHostEnvironment env, ModelLoadContext ctx, ISchema schema)
            {
                Contracts.CheckValue(env, nameof(env));
                env.CheckValue(ctx, nameof(ctx));
                ctx.CheckAtModel(GetVersionInfo());

                var numInputs = ctx.Reader.ReadInt32();

                Contracts.CheckDecode(numInputs > 0);

                string[] source = new string[numInputs];
                for (int j = 0; j < source.Length; j++)
                {
                    source[j] = ctx.LoadNonEmptyString();
                }

                byte[] data = null;
                if (!ctx.TryLoadBinaryStream("TFModel", r => data = r.ReadByteArray()))
                {
                    throw env.ExceptDecode();
                }

                var outputColName = ctx.LoadNonEmptyString();

                return(new TensorFlowMapper(env, schema, data, source, outputColName));
            }
コード例 #4
0
        private PartitionedFileLoader(IHost host, ModelLoadContext ctx, IMultiStreamSource files)
        {
            Contracts.AssertValue(host);
            _host = host;
            _host.AssertValue(ctx);
            _host.AssertValue(files);

            // ** Binary format **
            // int: tailing directory count
            // Schema of the loader
            // int[]: srcColumns
            // byte[]: subloader
            // model: file path spec

            _tailingDirCount = ctx.Reader.ReadInt32();

            // Load the schema
            byte[] buffer = null;
            if (!ctx.TryLoadBinaryStream(SchemaCtxName, r => buffer = r.ReadByteArray()))
            {
                throw _host.ExceptDecode();
            }
            BinaryLoader loader = null;
            var          strm   = new MemoryStream(buffer, writable: false);

            loader = new BinaryLoader(_host, new BinaryLoader.Arguments(), strm);
            Schema = loader.Schema;

            _srcDirIndex    = ctx.Reader.ReadIntArray();
            _subLoaderBytes = ctx.Reader.ReadByteArray();

            ctx.LoadModel <IPartitionedPathParser, SignatureLoadModel>(_host, out _pathParser, FilePathSpecCtxName);

            _files = files;
        }
コード例 #5
0
        private TermLookupTransform(IChannel ch, ModelLoadContext ctx, IHost host, IDataView input)
            : base(host, ctx, input, TestIsText)
        {
            Host.AssertValue(ch);

            // *** Binary format ***
            // <base>
            ch.AssertNonEmpty(Infos);

            // Extra streams:
            // DefaultMap.idv
            byte[] rgb = null;
            Action <BinaryReader> fn = r => rgb = ReadAllBytes(ch, r);

            if (!ctx.TryLoadBinaryStream(DefaultMapName, fn))
            {
                throw ch.ExceptDecode();
            }
            _bytes = rgb;

            // Process the bytes into the loader and map.
            _ldr = GetLoader(Host, _bytes);
            ValidateLoader(ch, _ldr);
            _valueMap = Train(ch, _ldr);
            SetMetadata();
        }
コード例 #6
0
        // Factory method for SignatureLoadModel.
        private static OnnxTransform Create(IHostEnvironment env, ModelLoadContext ctx)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(ctx, nameof(ctx));
            ctx.CheckAtModel(GetVersionInfo());

            byte[] modelBytes = null;
            if (!ctx.TryLoadBinaryStream("OnnxModel", r => modelBytes = r.ReadByteArray()))
                throw env.ExceptDecode();

            bool supportsMultiInputOutput = ctx.Header.ModelVerWritten > 0x00010001;

            var numInputs = (supportsMultiInputOutput) ? ctx.Reader.ReadInt32() : 1;

            env.CheckDecode(numInputs > 0);
            var inputs = new string[numInputs];
            for (int j = 0; j < inputs.Length; j++)
                inputs[j] = ctx.LoadNonEmptyString();

            var numOutputs = (supportsMultiInputOutput) ? ctx.Reader.ReadInt32() : 1;

            env.CheckDecode(numOutputs > 0);
            var outputs = new string[numOutputs];
            for (int j = 0; j < outputs.Length; j++)
                outputs[j] = ctx.LoadNonEmptyString();

            var args = new Arguments() { InputColumns = inputs, OutputColumns = outputs };

            return new OnnxTransform(env, args, modelBytes);
        }
コード例 #7
0
        internal PredictionTransformerBase(IHost host, ModelLoadContext ctx)
        {
            Host = host;

            ctx.LoadModel <TModel, SignatureLoadModel>(host, out TModel model, DirModel);
            Model = model;

            // *** Binary format ***
            // model: prediction model.
            // stream: empty data view that contains train schema.
            // id of string: feature column.

            // Clone the stream with the schema into memory.
            var ms = new MemoryStream();

            ctx.TryLoadBinaryStream(DirTransSchema, reader =>
            {
                reader.BaseStream.CopyTo(ms);
            });

            ms.Position = 0;
            var loader = new BinaryLoader(host, new BinaryLoader.Arguments(), ms);

            TrainSchema = loader.Schema;

            FeatureColumn = ctx.LoadString();
            if (!TrainSchema.TryGetColumnIndex(FeatureColumn, out int col))
            {
                throw Host.ExceptSchemaMismatch(nameof(FeatureColumn), RoleMappedSchema.ColumnRole.Feature.Value, FeatureColumn);
            }
            FeatureColumnType = TrainSchema.GetColumnType(col);

            BindableMapper = ScoreUtils.GetSchemaBindableMapper(Host, model);
        }
コード例 #8
0
        // Factory method for SignatureLoadModel.
        private static OnnxTransformer Create(IHostEnvironment env, ModelLoadContext ctx)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(ctx, nameof(ctx));
            ctx.CheckAtModel(GetVersionInfo());

            byte[] modelBytes = null;
            if (!ctx.TryLoadBinaryStream("OnnxModel", r => modelBytes = r.ReadByteArray()))
            {
                throw env.ExceptDecode();
            }

            bool supportsMultiInputOutput = ctx.Header.ModelVerWritten > 0x00010001;

            var numInputs = (supportsMultiInputOutput) ? ctx.Reader.ReadInt32() : 1;

            env.CheckDecode(numInputs > 0);
            var inputs = new string[numInputs];

            for (int j = 0; j < inputs.Length; j++)
            {
                inputs[j] = ctx.LoadNonEmptyString();
            }

            var numOutputs = (supportsMultiInputOutput) ? ctx.Reader.ReadInt32() : 1;

            env.CheckDecode(numOutputs > 0);
            var outputs = new string[numOutputs];

            for (int j = 0; j < outputs.Length; j++)
            {
                outputs[j] = ctx.LoadNonEmptyString();
            }

            // Save custom-provided shapes. Those shapes overwrite shapes loaded from the ONNX model file.
            int customShapeInfosLength = ctx.Reader.ReadInt32(); // 0 means no custom shape. Non-zero means count of custom shapes.

            CustomShapeInfo[] loadedCustomShapeInfos = null;
            if (customShapeInfosLength > 0)
            {
                loadedCustomShapeInfos = new CustomShapeInfo[customShapeInfosLength];
                for (int i = 0; i < customShapeInfosLength; ++i)
                {
                    var name  = ctx.LoadNonEmptyString();
                    var shape = ctx.Reader.ReadIntArray();
                    loadedCustomShapeInfos[i] = new CustomShapeInfo()
                    {
                        Name = name, Shape = shape
                    };
                }
            }

            var options = new Options()
            {
                InputColumns = inputs, OutputColumns = outputs, CustomShapeInfos = loadedCustomShapeInfos
            };

            return(new OnnxTransformer(env, options, modelBytes));
        }
コード例 #9
0
            public static Bindings Create(IHostEnvironment env, ModelLoadContext ctx, Schema input, OptionalColumnTransform parent)
            {
                Contracts.AssertValue(ctx);
                Contracts.AssertValue(input);

                // *** Binary format ***
                // Schema of the data view containing the optional columns
                // int: number of added columns
                // for each added column
                //   int: id of output column name
                //   ColumnType: the type of the column

                byte[] buffer = null;
                if (!ctx.TryLoadBinaryStream("Schema.idv", r => buffer = r.ReadByteArray()))
                {
                    throw env.ExceptDecode();
                }
                BinaryLoader loader = null;
                var          strm   = new MemoryStream(buffer, writable: false);

                loader = new BinaryLoader(env, new BinaryLoader.Arguments(), strm);

                int size = ctx.Reader.ReadInt32();

                Contracts.CheckDecode(size > 0);

                var saver       = new BinarySaver(env, new BinarySaver.Arguments());
                var names       = new string[size];
                var columnTypes = new ColumnType[size];
                var srcCols     = new int[size];
                var srcColsWithOptionalColumn = new int[size];

                for (int i = 0; i < size; i++)
                {
                    names[i]       = ctx.LoadNonEmptyString();
                    columnTypes[i] = saver.LoadTypeDescriptionOrNull(ctx.Reader.BaseStream);
                    int  col;
                    bool success = input.TryGetColumnIndex(names[i], out col);
                    srcCols[i] = success ? col : -1;

                    success = loader.Schema.TryGetColumnIndex(names[i], out var colWithOptionalColumn);
                    env.CheckDecode(success);
                    srcColsWithOptionalColumn[i] = colWithOptionalColumn;
                }

                return(new Bindings(parent, columnTypes, srcCols, srcColsWithOptionalColumn, input, loader.Schema, false, names));
            }
コード例 #10
0
        // Factory method for SignatureLoadModel.
        private static TensorFlowTransform Create(IHostEnvironment env, ModelLoadContext ctx)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(ctx, nameof(ctx));
            ctx.CheckAtModel(GetVersionInfo());
            // *** Binary format ***
            // stream: tensorFlow model.
            // int: number of input columns
            // for each input column
            //   int: id of int column name
            // int: number of output columns
            // for each output column
            //   int: id of output column name
            byte[] modelBytes = null;
            if (!ctx.TryLoadBinaryStream("TFModel", r => modelBytes = r.ReadByteArray()))
            {
                throw env.ExceptDecode();
            }
            var session   = TensorFlowUtils.LoadTFSession(env, modelBytes);
            var numInputs = ctx.Reader.ReadInt32();

            env.CheckDecode(numInputs > 0);
            string[] inputs = new string[numInputs];
            for (int j = 0; j < inputs.Length; j++)
            {
                inputs[j] = ctx.LoadNonEmptyString();
            }

            bool isMultiOutput = ctx.Header.ModelVerReadable >= 0x00010002;
            var  numOutputs    = 1;

            if (isMultiOutput)
            {
                numOutputs = ctx.Reader.ReadInt32();
            }

            env.CheckDecode(numOutputs > 0);
            var outputs = new string[numOutputs];

            for (int j = 0; j < outputs.Length; j++)
            {
                outputs[j] = ctx.LoadNonEmptyString();
            }

            return(new TensorFlowTransform(env, session, inputs, outputs));
        }
コード例 #11
0
        // Factory method for SignatureLoadModel.
        private static OnnxTransform Create(IHostEnvironment env, ModelLoadContext ctx)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(ctx, nameof(ctx));
            ctx.CheckAtModel(GetVersionInfo());

            byte[] modelBytes = null;
            if (!ctx.TryLoadBinaryStream("OnnxModel", r => modelBytes = r.ReadByteArray()))
            {
                throw env.ExceptDecode();
            }

            var inputColumn  = ctx.LoadNonEmptyString();
            var outputColumn = ctx.LoadNonEmptyString();
            var args         = new Arguments()
            {
                InputColumn = inputColumn, OutputColumn = outputColumn
            };

            return(new OnnxTransform(env, args, modelBytes));
        }
コード例 #12
0
        protected XGBoostPredictorBase(IHostEnvironment env, string name, ModelLoadContext ctx) : base(env, name, ctx)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(ctx, nameof(ctx));

            // *** Binary format ***
            // <base>
            // int
            // int (version > 0x00010003)

            byte[] model = null;
            bool   load  = ctx.TryLoadBinaryStream("xgboost.model", br =>
            {
                using (var memStream = new MemoryStream())
                {
                    br.BaseStream.CopyTo(memStream);
                    model = memStream.ToArray();
                }
            });

            Host.CheckDecode(load);
            Host.CheckDecode(model != null && model.Length > 0);
            int numFeatures = ctx.Reader.ReadInt32();

            Host.CheckDecode(numFeatures > 0);
            // The XGBoost model is loaded, if it fails, it probably means that the model is corrupted
            // or XGBoost library changed its format. The error message comes from XGBoost.
            _booster = new Booster(model, numFeatures);
            if (ctx.Header.ModelVerWritten >= 0x00010003)
            {
                _numFeaturesML = ctx.Reader.ReadInt32();
            }
            else
            {
                _numFeaturesML = _booster.NumFeatures;
            }
            Host.CheckDecode(_numFeaturesML >= numFeatures);
            _inputType = new VectorType(NumberType.R4, _numFeaturesML);
        }
コード例 #13
0
ファイル: ParquetLoader.cs プロジェクト: rrsc/machinelearning
        private ParquetLoader(IHost host, ModelLoadContext ctx, IMultiStreamSource files)
        {
            Contracts.AssertValue(host);
            _host = host;
            _host.AssertValue(ctx);
            _host.AssertValue(files);

            // *** Binary format ***
            // int: cached chunk size
            // bool: TreatBigIntegersAsDates flag
            // Schema of the loader (0x00010002)

            _columnChunkReadSize = ctx.Reader.ReadInt32();
            bool treatBigIntegersAsDates = ctx.Reader.ReadBoolean();

            if (ctx.Header.ModelVerWritten >= 0x00010002)
            {
                // Load the schema
                byte[] buffer = null;
                if (!ctx.TryLoadBinaryStream(SchemaCtxName, r => buffer = r.ReadByteArray()))
                {
                    throw _host.ExceptDecode();
                }
                var strm   = new MemoryStream(buffer, writable: false);
                var loader = new BinaryLoader(_host, new BinaryLoader.Arguments(), strm);
                Schema = loader.Schema;
            }

            // Only load Parquest related data if a file is present. Otherwise, just the Schema is valid.
            if (files.Count > 0)
            {
                _parquetOptions = new ParquetOptions()
                {
                    TreatByteArrayAsString  = true,
                    TreatBigIntegersAsDates = treatBigIntegersAsDates
                };

                _parquetStream = OpenStream(files);
                DataSet schemaDataSet;

                try
                {
                    // We only care about the schema so ignore the rows.
                    ReaderOptions readerOptions = new ReaderOptions()
                    {
                        Count  = 0,
                        Offset = 0
                    };
                    schemaDataSet = ParquetReader.Read(_parquetStream, _parquetOptions, readerOptions);
                    _rowCount     = schemaDataSet.TotalRowCount;
                }
                catch (Exception ex)
                {
                    throw new InvalidDataException("Cannot read Parquet file", ex);
                }

                _columnsLoaded = InitColumns(schemaDataSet);
                Schema         = CreateSchema(_host, _columnsLoaded);
            }
            else if (Schema == null)
            {
                throw _host.Except("Parquet loader must be created with one file");
            }
        }
コード例 #14
0
        private MulticlassLogisticRegressionPredictor(IHostEnvironment env, ModelLoadContext ctx)
            : base(env, RegistrationName, ctx)
        {
            // *** Binary format ***
            // int: number of features
            // int: number of classes = number of biases
            // float[]: biases
            // (weight matrix, in CSR if sparse)
            // (see https://netlib.org/linalg/html_templates/node91.html#SECTION00931100000000000000)
            // int: number of row start indices (_numClasses + 1 if sparse, 0 if dense)
            // int[]: row start indices
            // int: total number of column indices (0 if dense)
            // int[]: column index of each non-zero weight
            // int: total number of non-zero weights  (same as number of column indices if sparse, num of classes * num of features if dense)
            // float[]: non-zero weights
            // int[]: Id of label names (optional, in a separate stream)
            // LinearModelStatistics: model statistics (optional, in a separate stream)

            _numFeatures = ctx.Reader.ReadInt32();
            Host.CheckDecode(_numFeatures >= 1);

            _numClasses = ctx.Reader.ReadInt32();
            Host.CheckDecode(_numClasses >= 1);

            _biases = ctx.Reader.ReadFloatArray(_numClasses);

            int numStarts = ctx.Reader.ReadInt32();

            if (numStarts == 0)
            {
                // The weights are entirely dense.
                int numIndices = ctx.Reader.ReadInt32();
                Host.CheckDecode(numIndices == 0);
                int numWeights = ctx.Reader.ReadInt32();
                Host.CheckDecode(numWeights == _numClasses * _numFeatures);
                _weights = new VBuffer <float> [_numClasses];
                for (int i = 0; i < _weights.Length; i++)
                {
                    var w = ctx.Reader.ReadFloatArray(_numFeatures);
                    _weights[i] = new VBuffer <float>(_numFeatures, w);
                }
                _weightsDense = _weights;
            }
            else
            {
                // Read weight matrix as CSR.
                Host.CheckDecode(numStarts == _numClasses + 1);
                int[] starts = ctx.Reader.ReadIntArray(numStarts);
                Host.CheckDecode(starts[0] == 0);
                Host.CheckDecode(Utils.IsSorted(starts));

                int numIndices = ctx.Reader.ReadInt32();
                Host.CheckDecode(numIndices == starts[starts.Length - 1]);

                var indices = new int[_numClasses][];
                for (int i = 0; i < indices.Length; i++)
                {
                    indices[i] = ctx.Reader.ReadIntArray(starts[i + 1] - starts[i]);
                    Host.CheckDecode(Utils.IsIncreasing(0, indices[i], _numFeatures));
                }

                int numValues = ctx.Reader.ReadInt32();
                Host.CheckDecode(numValues == numIndices);

                _weights = new VBuffer <float> [_numClasses];
                for (int i = 0; i < _weights.Length; i++)
                {
                    float[] values = ctx.Reader.ReadFloatArray(starts[i + 1] - starts[i]);
                    _weights[i] = new VBuffer <float>(_numFeatures, Utils.Size(values), values, indices[i]);
                }
            }
            WarnOnOldNormalizer(ctx, GetType(), Host);
            InputType  = new VectorType(NumberType.R4, _numFeatures);
            OutputType = new VectorType(NumberType.R4, _numClasses);

            // REVIEW: Should not save the label names duplicately with the predictor again.
            // Get it from the label column schema metadata instead.
            string[] labelNames = null;
            if (ctx.TryLoadBinaryStream(LabelNamesSubModelFilename, r => labelNames = LoadLabelNames(ctx, r)))
            {
                _labelNames = labelNames;
            }

            string statsDir = Path.Combine(ctx.Directory ?? "", ModelStatsSubModelFilename);

            using (var statsEntry = ctx.Repository.OpenEntryOrNull(statsDir, ModelLoadContext.ModelStreamName))
            {
                if (statsEntry == null)
                {
                    _stats = null;
                }
                else
                {
                    using (var statsCtx = new ModelLoadContext(ctx.Repository, statsEntry, statsDir))
                        _stats = LinearModelStatistics.Create(Host, statsCtx);
                }
            }
        }
コード例 #15
0
        // Factory method for SignatureLoadModel.
        private static TensorFlowTransform Create(IHostEnvironment env, ModelLoadContext ctx)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(ctx, nameof(ctx));
            ctx.CheckAtModel(GetVersionInfo());

            // *** Binary format ***
            // byte: indicator for frozen models
            // stream: tensorFlow model.
            // int: number of input columns
            // for each input column
            //   int: id of int column name
            // int: number of output columns
            // for each output column
            //   int: id of output column name
            GetModelInfo(env, ctx, out string[] inputs, out string[] outputs, out bool isFrozen);
            if (isFrozen)
            {
                byte[] modelBytes = null;
                if (!ctx.TryLoadBinaryStream("TFModel", r => modelBytes = r.ReadByteArray()))
                {
                    throw env.ExceptDecode();
                }
                return(new TensorFlowTransform(env, TensorFlowUtils.LoadTFSession(env, modelBytes), inputs, outputs, null, false));
            }

            var tempDirPath = Path.GetFullPath(Path.Combine(Path.GetTempPath(), RegistrationName + "_" + Guid.NewGuid()));

            TensorFlowUtils.CreateFolderWithAclIfNotExists(env, tempDirPath);
            try
            {
                var load = ctx.TryLoadBinaryStream("TFSavedModel", br =>
                {
                    int count = br.ReadInt32();
                    for (int n = 0; n < count; n++)
                    {
                        string relativeFile = br.ReadString();
                        long fileLength     = br.ReadInt64();

                        string fullFilePath = Path.Combine(tempDirPath, relativeFile);
                        string fullFileDir  = Path.GetDirectoryName(fullFilePath);
                        if (fullFileDir != tempDirPath)
                        {
                            TensorFlowUtils.CreateFolderWithAclIfNotExists(env, fullFileDir);
                        }
                        using (var fs = new FileStream(fullFilePath, FileMode.Create, FileAccess.Write))
                        {
                            long actualRead = br.BaseStream.CopyRange(fs, fileLength);
                            env.Assert(actualRead == fileLength);
                        }
                    }
                });

                return(new TensorFlowTransform(env, TensorFlowUtils.GetSession(env, tempDirPath), inputs, outputs, tempDirPath, true));
            }
            catch (Exception)
            {
                TensorFlowUtils.DeleteFolderWithRetries(env, tempDirPath);
                throw;
            }
        }