/// <summary> /// Initializes the pipeline. /// </summary> /// <param name="transforms">list of transform, can be empty</param> /// <param name="predictor">a predictor, can be empty</param> /// <param name="host">can be empty too, a <see cref="ExtendedConsoleEnvironment"/> is then created</param> public ScikitPipeline(string[] transforms = null, string predictor = null, IHostEnvironment host = null) { _dispose = false; _env = host ?? ExtendedConsoleEnvironment(); _transforms = new StepTransform[transforms == null ? 1 : transforms.Length + 1]; // We add a PassThroughTransform to be able to change the source. _transforms[0] = new StepTransform() { transformSettings = "pass", transform = null }; if (transforms != null) { for (int i = 0; i < transforms.Length; ++i) { _transforms[i + 1] = new StepTransform() { transformSettings = transforms[i], transform = null } } } ; _predictor = predictor == null ? null : new StepPredictor() { trainerSettings = predictor, predictor = null, trainer = null, roleMapData = null }; _loaderSettings = null; _roles = null; _fastValueMapper = null; _fastValueMapperObject = null; }
/// <summary> /// Loads a pipeline saved in zip format. /// </summary> protected void Load(Stream fs) { var transformPipe = ModelFileUtils.LoadPipeline(_env, fs, new MultiFileSource(null), true); var pred = _env.LoadPredictorOrNull(fs); IDataView root; for (root = transformPipe; root is IDataTransform && !(root is PassThroughTransform); root = ((IDataTransform)root).Source) { ; } if (!(root is PassThroughTransform)) { var tr = new PassThroughTransform(_env, new PassThroughTransform.Arguments(), root); transformPipe = ApplyTransformUtils.ApplyAllTransformsToData(_env, transformPipe, tr, root); } var stack = new List <IDataView>(); for (root = transformPipe; root is IDataTransform; root = ((IDataTransform)root).Source) { stack.Add(root); } stack.Reverse(); _transforms = new StepTransform[stack.Count]; for (int i = 0; i < _transforms.Length; ++i) { _transforms[i] = new StepTransform() { transform = stack[i] as IDataTransform, transformSettings = null } } ; if (pred == null) { _predictor = new StepPredictor() { predictor = null, roleMapData = null, trainer = null, trainerSettings = null } } ; else { #pragma warning disable CS0618 var ipred = pred.GetPredictorObject() as IPredictor; #pragma warning restore CS0618 _roles = ModelFileUtils.LoadRoleMappingsOrNull(_env, fs).ToList(); var data = new RoleMappedData(transformPipe, _roles); _predictor = new StepPredictor() { predictor = ipred, roleMapData = data, trainer = null, trainerSettings = null }; } _fastValueMapper = null; }
/// <summary> /// Loads a pipeline saved in onnx format. /// </summary> protected void LoadOnnx(Stream fs) { var root = new PassThroughTransform(_env, new PassThroughTransform.Arguments(), null); _transforms = new StepTransform[2]; _transforms[0] = new StepTransform() { transform = root as IDataTransform, transformSettings = null }; _transforms[1] = new StepTransform() { transform = ConvertFromOnnx.ReadOnnx(fs, root), transformSettings = null }; _predictor = new StepPredictor() { predictor = null, roleMapData = null, trainer = null, trainerSettings = null }; _fastValueMapper = null; }
/// <summary> /// Trains the pipeline with data coming from a <see cref="IDataView"/>. /// </summary> public ScikitPipeline Train(IDataView data, string feature = "Feature", string label = null, string weight = null, string groupId = null) { IDataView trans = data; using (var ch = _env.Start("Create transforms")) { for (int i = 0; i < _transforms.Length; ++i) { try { trans = _env.CreateTransform(_transforms[i].transformSettings, trans); } catch (Exception e) { if (e.ToString().Contains("Unknown loadable class")) { var nn = _env.ComponentCatalog.GetAllClasses().Length; var filt = _env.ComponentCatalog.GetAllClasses() .Select(c => c.UserName) .OrderBy(c => c) .Where(c => c.Trim().Length > 2); var regis = string.Join("\n", filt); throw Contracts.Except(e, $"Unable to create transform '{_transforms[i].transformSettings}', assembly not registered among {nn}\n{regis}"); } throw e; } _transforms[i].transform = trans as IDataTransform; } } if (_predictor != null) { using (var ch = _env.Start("Create Predictor")) { _predictor.trainer = TrainerHelper.CreateTrainer(_env, _predictor.trainerSettings); _roles = new List <KeyValuePair <RoleMappedSchema.ColumnRole, string> >(); _roles.Add(new KeyValuePair <RoleMappedSchema.ColumnRole, string>(RoleMappedSchema.ColumnRole.Feature, feature)); if (!string.IsNullOrEmpty(label)) { _roles.Add(new KeyValuePair <RoleMappedSchema.ColumnRole, string>(RoleMappedSchema.ColumnRole.Label, label)); } if (!string.IsNullOrEmpty(groupId)) { _roles.Add(new KeyValuePair <RoleMappedSchema.ColumnRole, string>(RoleMappedSchema.ColumnRole.Group, groupId)); } if (!string.IsNullOrEmpty(weight)) { _roles.Add(new KeyValuePair <RoleMappedSchema.ColumnRole, string>(RoleMappedSchema.ColumnRole.Weight, weight)); } var roleMap = new RoleMappedData(trans, label, feature, group: groupId, weight: weight); _predictor.predictor = _predictor.trainer.Train(_env, ch, roleMap); _predictor.roleMapData = roleMap; } } else { _predictor = new StepPredictor() { predictor = null, trainer = null, trainerSettings = null, roleMapData = new RoleMappedData(trans) }; // We predict one to make sure everything works fine. using (var ch = _env.Start("Compute one prediction.")) { var df = DataFrameIO.ReadView(trans, 1, keepVectors: true, env: _env); if (df.Length == 0) { throw _env.ExceptEmpty("Something went wrong. The pipeline does not produce any output."); } } } return(this); }