public static TransposeLoader Create(IHostEnvironment env, ModelLoadContext ctx, IMultiStreamSource files) { Contracts.CheckValue(env, nameof(env)); IHost h = env.Register(LoadName); h.CheckValue(ctx, nameof(ctx)); ctx.CheckAtModel(GetVersionInfo()); h.CheckValue(files, nameof(files)); return(h.Apply("Loading Model", ch => { if (files.Count == 0) { BinaryLoader schemaView = null; // In the case where we have no input streams, but we have an input schema from // the model repository, we still want to surface ourselves as being a binary loader // with the existing schema. The loader "owns" this stream. if (ctx.TryLoadBinaryStream("Schema.idv", r => schemaView = new BinaryLoader(h, new BinaryLoader.Arguments(), HybridMemoryStream.CreateCache(r.BaseStream), leaveOpen: false))) { h.AssertValue(schemaView); h.CheckDecode(schemaView.GetRowCount() == 0); // REVIEW: Do we want to be a bit more restrictive around uninterpretable columns? return new TransposeLoader(h, ctx, schemaView); } h.Assert(schemaView == null); // Fall through, allow the failure to be on OpenStream. } return new TransposeLoader(h, ctx, files); })); }
protected PredictionTransformerBase(IHost host, ModelLoadContext ctx) { Host = host; // *** Binary format *** // model: prediction model. // stream: empty data view that contains train schema. // id of string: feature column. ctx.LoadModel <TModel, SignatureLoadModel>(host, out TModel model, DirModel); Model = model; // Clone the stream with the schema into memory. var ms = new MemoryStream(); ctx.TryLoadBinaryStream(DirTransSchema, reader => { reader.BaseStream.CopyTo(ms); }); ms.Position = 0; var loader = new BinaryLoader(host, new BinaryLoader.Arguments(), ms); TrainSchema = loader.Schema; }
public static TensorFlowMapper Create(IHostEnvironment env, ModelLoadContext ctx, ISchema schema) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(ctx, nameof(ctx)); ctx.CheckAtModel(GetVersionInfo()); var numInputs = ctx.Reader.ReadInt32(); Contracts.CheckDecode(numInputs > 0); string[] source = new string[numInputs]; for (int j = 0; j < source.Length; j++) { source[j] = ctx.LoadNonEmptyString(); } byte[] data = null; if (!ctx.TryLoadBinaryStream("TFModel", r => data = r.ReadByteArray())) { throw env.ExceptDecode(); } var outputColName = ctx.LoadNonEmptyString(); return(new TensorFlowMapper(env, schema, data, source, outputColName)); }
private PartitionedFileLoader(IHost host, ModelLoadContext ctx, IMultiStreamSource files) { Contracts.AssertValue(host); _host = host; _host.AssertValue(ctx); _host.AssertValue(files); // ** Binary format ** // int: tailing directory count // Schema of the loader // int[]: srcColumns // byte[]: subloader // model: file path spec _tailingDirCount = ctx.Reader.ReadInt32(); // Load the schema byte[] buffer = null; if (!ctx.TryLoadBinaryStream(SchemaCtxName, r => buffer = r.ReadByteArray())) { throw _host.ExceptDecode(); } BinaryLoader loader = null; var strm = new MemoryStream(buffer, writable: false); loader = new BinaryLoader(_host, new BinaryLoader.Arguments(), strm); Schema = loader.Schema; _srcDirIndex = ctx.Reader.ReadIntArray(); _subLoaderBytes = ctx.Reader.ReadByteArray(); ctx.LoadModel <IPartitionedPathParser, SignatureLoadModel>(_host, out _pathParser, FilePathSpecCtxName); _files = files; }
private TermLookupTransform(IChannel ch, ModelLoadContext ctx, IHost host, IDataView input) : base(host, ctx, input, TestIsText) { Host.AssertValue(ch); // *** Binary format *** // <base> ch.AssertNonEmpty(Infos); // Extra streams: // DefaultMap.idv byte[] rgb = null; Action <BinaryReader> fn = r => rgb = ReadAllBytes(ch, r); if (!ctx.TryLoadBinaryStream(DefaultMapName, fn)) { throw ch.ExceptDecode(); } _bytes = rgb; // Process the bytes into the loader and map. _ldr = GetLoader(Host, _bytes); ValidateLoader(ch, _ldr); _valueMap = Train(ch, _ldr); SetMetadata(); }
// Factory method for SignatureLoadModel. private static OnnxTransform Create(IHostEnvironment env, ModelLoadContext ctx) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(ctx, nameof(ctx)); ctx.CheckAtModel(GetVersionInfo()); byte[] modelBytes = null; if (!ctx.TryLoadBinaryStream("OnnxModel", r => modelBytes = r.ReadByteArray())) throw env.ExceptDecode(); bool supportsMultiInputOutput = ctx.Header.ModelVerWritten > 0x00010001; var numInputs = (supportsMultiInputOutput) ? ctx.Reader.ReadInt32() : 1; env.CheckDecode(numInputs > 0); var inputs = new string[numInputs]; for (int j = 0; j < inputs.Length; j++) inputs[j] = ctx.LoadNonEmptyString(); var numOutputs = (supportsMultiInputOutput) ? ctx.Reader.ReadInt32() : 1; env.CheckDecode(numOutputs > 0); var outputs = new string[numOutputs]; for (int j = 0; j < outputs.Length; j++) outputs[j] = ctx.LoadNonEmptyString(); var args = new Arguments() { InputColumns = inputs, OutputColumns = outputs }; return new OnnxTransform(env, args, modelBytes); }
internal PredictionTransformerBase(IHost host, ModelLoadContext ctx) { Host = host; ctx.LoadModel <TModel, SignatureLoadModel>(host, out TModel model, DirModel); Model = model; // *** Binary format *** // model: prediction model. // stream: empty data view that contains train schema. // id of string: feature column. // Clone the stream with the schema into memory. var ms = new MemoryStream(); ctx.TryLoadBinaryStream(DirTransSchema, reader => { reader.BaseStream.CopyTo(ms); }); ms.Position = 0; var loader = new BinaryLoader(host, new BinaryLoader.Arguments(), ms); TrainSchema = loader.Schema; FeatureColumn = ctx.LoadString(); if (!TrainSchema.TryGetColumnIndex(FeatureColumn, out int col)) { throw Host.ExceptSchemaMismatch(nameof(FeatureColumn), RoleMappedSchema.ColumnRole.Feature.Value, FeatureColumn); } FeatureColumnType = TrainSchema.GetColumnType(col); BindableMapper = ScoreUtils.GetSchemaBindableMapper(Host, model); }
// Factory method for SignatureLoadModel. private static OnnxTransformer Create(IHostEnvironment env, ModelLoadContext ctx) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(ctx, nameof(ctx)); ctx.CheckAtModel(GetVersionInfo()); byte[] modelBytes = null; if (!ctx.TryLoadBinaryStream("OnnxModel", r => modelBytes = r.ReadByteArray())) { throw env.ExceptDecode(); } bool supportsMultiInputOutput = ctx.Header.ModelVerWritten > 0x00010001; var numInputs = (supportsMultiInputOutput) ? ctx.Reader.ReadInt32() : 1; env.CheckDecode(numInputs > 0); var inputs = new string[numInputs]; for (int j = 0; j < inputs.Length; j++) { inputs[j] = ctx.LoadNonEmptyString(); } var numOutputs = (supportsMultiInputOutput) ? ctx.Reader.ReadInt32() : 1; env.CheckDecode(numOutputs > 0); var outputs = new string[numOutputs]; for (int j = 0; j < outputs.Length; j++) { outputs[j] = ctx.LoadNonEmptyString(); } // Save custom-provided shapes. Those shapes overwrite shapes loaded from the ONNX model file. int customShapeInfosLength = ctx.Reader.ReadInt32(); // 0 means no custom shape. Non-zero means count of custom shapes. CustomShapeInfo[] loadedCustomShapeInfos = null; if (customShapeInfosLength > 0) { loadedCustomShapeInfos = new CustomShapeInfo[customShapeInfosLength]; for (int i = 0; i < customShapeInfosLength; ++i) { var name = ctx.LoadNonEmptyString(); var shape = ctx.Reader.ReadIntArray(); loadedCustomShapeInfos[i] = new CustomShapeInfo() { Name = name, Shape = shape }; } } var options = new Options() { InputColumns = inputs, OutputColumns = outputs, CustomShapeInfos = loadedCustomShapeInfos }; return(new OnnxTransformer(env, options, modelBytes)); }
public static Bindings Create(IHostEnvironment env, ModelLoadContext ctx, Schema input, OptionalColumnTransform parent) { Contracts.AssertValue(ctx); Contracts.AssertValue(input); // *** Binary format *** // Schema of the data view containing the optional columns // int: number of added columns // for each added column // int: id of output column name // ColumnType: the type of the column byte[] buffer = null; if (!ctx.TryLoadBinaryStream("Schema.idv", r => buffer = r.ReadByteArray())) { throw env.ExceptDecode(); } BinaryLoader loader = null; var strm = new MemoryStream(buffer, writable: false); loader = new BinaryLoader(env, new BinaryLoader.Arguments(), strm); int size = ctx.Reader.ReadInt32(); Contracts.CheckDecode(size > 0); var saver = new BinarySaver(env, new BinarySaver.Arguments()); var names = new string[size]; var columnTypes = new ColumnType[size]; var srcCols = new int[size]; var srcColsWithOptionalColumn = new int[size]; for (int i = 0; i < size; i++) { names[i] = ctx.LoadNonEmptyString(); columnTypes[i] = saver.LoadTypeDescriptionOrNull(ctx.Reader.BaseStream); int col; bool success = input.TryGetColumnIndex(names[i], out col); srcCols[i] = success ? col : -1; success = loader.Schema.TryGetColumnIndex(names[i], out var colWithOptionalColumn); env.CheckDecode(success); srcColsWithOptionalColumn[i] = colWithOptionalColumn; } return(new Bindings(parent, columnTypes, srcCols, srcColsWithOptionalColumn, input, loader.Schema, false, names)); }
// Factory method for SignatureLoadModel. private static TensorFlowTransform Create(IHostEnvironment env, ModelLoadContext ctx) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(ctx, nameof(ctx)); ctx.CheckAtModel(GetVersionInfo()); // *** Binary format *** // stream: tensorFlow model. // int: number of input columns // for each input column // int: id of int column name // int: number of output columns // for each output column // int: id of output column name byte[] modelBytes = null; if (!ctx.TryLoadBinaryStream("TFModel", r => modelBytes = r.ReadByteArray())) { throw env.ExceptDecode(); } var session = TensorFlowUtils.LoadTFSession(env, modelBytes); var numInputs = ctx.Reader.ReadInt32(); env.CheckDecode(numInputs > 0); string[] inputs = new string[numInputs]; for (int j = 0; j < inputs.Length; j++) { inputs[j] = ctx.LoadNonEmptyString(); } bool isMultiOutput = ctx.Header.ModelVerReadable >= 0x00010002; var numOutputs = 1; if (isMultiOutput) { numOutputs = ctx.Reader.ReadInt32(); } env.CheckDecode(numOutputs > 0); var outputs = new string[numOutputs]; for (int j = 0; j < outputs.Length; j++) { outputs[j] = ctx.LoadNonEmptyString(); } return(new TensorFlowTransform(env, session, inputs, outputs)); }
// Factory method for SignatureLoadModel. private static OnnxTransform Create(IHostEnvironment env, ModelLoadContext ctx) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(ctx, nameof(ctx)); ctx.CheckAtModel(GetVersionInfo()); byte[] modelBytes = null; if (!ctx.TryLoadBinaryStream("OnnxModel", r => modelBytes = r.ReadByteArray())) { throw env.ExceptDecode(); } var inputColumn = ctx.LoadNonEmptyString(); var outputColumn = ctx.LoadNonEmptyString(); var args = new Arguments() { InputColumn = inputColumn, OutputColumn = outputColumn }; return(new OnnxTransform(env, args, modelBytes)); }
protected XGBoostPredictorBase(IHostEnvironment env, string name, ModelLoadContext ctx) : base(env, name, ctx) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(ctx, nameof(ctx)); // *** Binary format *** // <base> // int // int (version > 0x00010003) byte[] model = null; bool load = ctx.TryLoadBinaryStream("xgboost.model", br => { using (var memStream = new MemoryStream()) { br.BaseStream.CopyTo(memStream); model = memStream.ToArray(); } }); Host.CheckDecode(load); Host.CheckDecode(model != null && model.Length > 0); int numFeatures = ctx.Reader.ReadInt32(); Host.CheckDecode(numFeatures > 0); // The XGBoost model is loaded, if it fails, it probably means that the model is corrupted // or XGBoost library changed its format. The error message comes from XGBoost. _booster = new Booster(model, numFeatures); if (ctx.Header.ModelVerWritten >= 0x00010003) { _numFeaturesML = ctx.Reader.ReadInt32(); } else { _numFeaturesML = _booster.NumFeatures; } Host.CheckDecode(_numFeaturesML >= numFeatures); _inputType = new VectorType(NumberType.R4, _numFeaturesML); }
private ParquetLoader(IHost host, ModelLoadContext ctx, IMultiStreamSource files) { Contracts.AssertValue(host); _host = host; _host.AssertValue(ctx); _host.AssertValue(files); // *** Binary format *** // int: cached chunk size // bool: TreatBigIntegersAsDates flag // Schema of the loader (0x00010002) _columnChunkReadSize = ctx.Reader.ReadInt32(); bool treatBigIntegersAsDates = ctx.Reader.ReadBoolean(); if (ctx.Header.ModelVerWritten >= 0x00010002) { // Load the schema byte[] buffer = null; if (!ctx.TryLoadBinaryStream(SchemaCtxName, r => buffer = r.ReadByteArray())) { throw _host.ExceptDecode(); } var strm = new MemoryStream(buffer, writable: false); var loader = new BinaryLoader(_host, new BinaryLoader.Arguments(), strm); Schema = loader.Schema; } // Only load Parquest related data if a file is present. Otherwise, just the Schema is valid. if (files.Count > 0) { _parquetOptions = new ParquetOptions() { TreatByteArrayAsString = true, TreatBigIntegersAsDates = treatBigIntegersAsDates }; _parquetStream = OpenStream(files); DataSet schemaDataSet; try { // We only care about the schema so ignore the rows. ReaderOptions readerOptions = new ReaderOptions() { Count = 0, Offset = 0 }; schemaDataSet = ParquetReader.Read(_parquetStream, _parquetOptions, readerOptions); _rowCount = schemaDataSet.TotalRowCount; } catch (Exception ex) { throw new InvalidDataException("Cannot read Parquet file", ex); } _columnsLoaded = InitColumns(schemaDataSet); Schema = CreateSchema(_host, _columnsLoaded); } else if (Schema == null) { throw _host.Except("Parquet loader must be created with one file"); } }
private MulticlassLogisticRegressionPredictor(IHostEnvironment env, ModelLoadContext ctx) : base(env, RegistrationName, ctx) { // *** Binary format *** // int: number of features // int: number of classes = number of biases // float[]: biases // (weight matrix, in CSR if sparse) // (see https://netlib.org/linalg/html_templates/node91.html#SECTION00931100000000000000) // int: number of row start indices (_numClasses + 1 if sparse, 0 if dense) // int[]: row start indices // int: total number of column indices (0 if dense) // int[]: column index of each non-zero weight // int: total number of non-zero weights (same as number of column indices if sparse, num of classes * num of features if dense) // float[]: non-zero weights // int[]: Id of label names (optional, in a separate stream) // LinearModelStatistics: model statistics (optional, in a separate stream) _numFeatures = ctx.Reader.ReadInt32(); Host.CheckDecode(_numFeatures >= 1); _numClasses = ctx.Reader.ReadInt32(); Host.CheckDecode(_numClasses >= 1); _biases = ctx.Reader.ReadFloatArray(_numClasses); int numStarts = ctx.Reader.ReadInt32(); if (numStarts == 0) { // The weights are entirely dense. int numIndices = ctx.Reader.ReadInt32(); Host.CheckDecode(numIndices == 0); int numWeights = ctx.Reader.ReadInt32(); Host.CheckDecode(numWeights == _numClasses * _numFeatures); _weights = new VBuffer <float> [_numClasses]; for (int i = 0; i < _weights.Length; i++) { var w = ctx.Reader.ReadFloatArray(_numFeatures); _weights[i] = new VBuffer <float>(_numFeatures, w); } _weightsDense = _weights; } else { // Read weight matrix as CSR. Host.CheckDecode(numStarts == _numClasses + 1); int[] starts = ctx.Reader.ReadIntArray(numStarts); Host.CheckDecode(starts[0] == 0); Host.CheckDecode(Utils.IsSorted(starts)); int numIndices = ctx.Reader.ReadInt32(); Host.CheckDecode(numIndices == starts[starts.Length - 1]); var indices = new int[_numClasses][]; for (int i = 0; i < indices.Length; i++) { indices[i] = ctx.Reader.ReadIntArray(starts[i + 1] - starts[i]); Host.CheckDecode(Utils.IsIncreasing(0, indices[i], _numFeatures)); } int numValues = ctx.Reader.ReadInt32(); Host.CheckDecode(numValues == numIndices); _weights = new VBuffer <float> [_numClasses]; for (int i = 0; i < _weights.Length; i++) { float[] values = ctx.Reader.ReadFloatArray(starts[i + 1] - starts[i]); _weights[i] = new VBuffer <float>(_numFeatures, Utils.Size(values), values, indices[i]); } } WarnOnOldNormalizer(ctx, GetType(), Host); InputType = new VectorType(NumberType.R4, _numFeatures); OutputType = new VectorType(NumberType.R4, _numClasses); // REVIEW: Should not save the label names duplicately with the predictor again. // Get it from the label column schema metadata instead. string[] labelNames = null; if (ctx.TryLoadBinaryStream(LabelNamesSubModelFilename, r => labelNames = LoadLabelNames(ctx, r))) { _labelNames = labelNames; } string statsDir = Path.Combine(ctx.Directory ?? "", ModelStatsSubModelFilename); using (var statsEntry = ctx.Repository.OpenEntryOrNull(statsDir, ModelLoadContext.ModelStreamName)) { if (statsEntry == null) { _stats = null; } else { using (var statsCtx = new ModelLoadContext(ctx.Repository, statsEntry, statsDir)) _stats = LinearModelStatistics.Create(Host, statsCtx); } } }
// Factory method for SignatureLoadModel. private static TensorFlowTransform Create(IHostEnvironment env, ModelLoadContext ctx) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(ctx, nameof(ctx)); ctx.CheckAtModel(GetVersionInfo()); // *** Binary format *** // byte: indicator for frozen models // stream: tensorFlow model. // int: number of input columns // for each input column // int: id of int column name // int: number of output columns // for each output column // int: id of output column name GetModelInfo(env, ctx, out string[] inputs, out string[] outputs, out bool isFrozen); if (isFrozen) { byte[] modelBytes = null; if (!ctx.TryLoadBinaryStream("TFModel", r => modelBytes = r.ReadByteArray())) { throw env.ExceptDecode(); } return(new TensorFlowTransform(env, TensorFlowUtils.LoadTFSession(env, modelBytes), inputs, outputs, null, false)); } var tempDirPath = Path.GetFullPath(Path.Combine(Path.GetTempPath(), RegistrationName + "_" + Guid.NewGuid())); TensorFlowUtils.CreateFolderWithAclIfNotExists(env, tempDirPath); try { var load = ctx.TryLoadBinaryStream("TFSavedModel", br => { int count = br.ReadInt32(); for (int n = 0; n < count; n++) { string relativeFile = br.ReadString(); long fileLength = br.ReadInt64(); string fullFilePath = Path.Combine(tempDirPath, relativeFile); string fullFileDir = Path.GetDirectoryName(fullFilePath); if (fullFileDir != tempDirPath) { TensorFlowUtils.CreateFolderWithAclIfNotExists(env, fullFileDir); } using (var fs = new FileStream(fullFilePath, FileMode.Create, FileAccess.Write)) { long actualRead = br.BaseStream.CopyRange(fs, fileLength); env.Assert(actualRead == fileLength); } } }); return(new TensorFlowTransform(env, TensorFlowUtils.GetSession(env, tempDirPath), inputs, outputs, tempDirPath, true)); } catch (Exception) { TensorFlowUtils.DeleteFolderWithRetries(env, tempDirPath); throw; } }