/// <summary> /// Trains the pipeline with data coming from a <see cref="IDataView"/>. /// </summary> public ScikitPipeline Train(IDataView data, string feature = "Feature", string label = null, string weight = null, string groupId = null) { IDataView trans = data; using (var ch = _env.Start("Create transforms")) { for (int i = 0; i < _transforms.Length; ++i) { try { trans = _env.CreateTransform(_transforms[i].transformSettings, trans); } catch (Exception e) { if (e.ToString().Contains("Unknown loadable class")) { var nn = _env.ComponentCatalog.GetAllClasses().Length; var filt = _env.ComponentCatalog.GetAllClasses() .Select(c => c.UserName) .OrderBy(c => c) .Where(c => c.Trim().Length > 2); var regis = string.Join("\n", filt); throw Contracts.Except(e, $"Unable to create transform '{_transforms[i].transformSettings}', assembly not registered among {nn}\n{regis}"); } throw e; } _transforms[i].transform = trans as IDataTransform; } } if (_predictor != null) { using (var ch = _env.Start("Create Predictor")) { _predictor.trainer = TrainerHelper.CreateTrainer(_env, _predictor.trainerSettings); _roles = new List <KeyValuePair <RoleMappedSchema.ColumnRole, string> >(); _roles.Add(new KeyValuePair <RoleMappedSchema.ColumnRole, string>(RoleMappedSchema.ColumnRole.Feature, feature)); if (!string.IsNullOrEmpty(label)) { _roles.Add(new KeyValuePair <RoleMappedSchema.ColumnRole, string>(RoleMappedSchema.ColumnRole.Label, label)); } if (!string.IsNullOrEmpty(groupId)) { _roles.Add(new KeyValuePair <RoleMappedSchema.ColumnRole, string>(RoleMappedSchema.ColumnRole.Group, groupId)); } if (!string.IsNullOrEmpty(weight)) { _roles.Add(new KeyValuePair <RoleMappedSchema.ColumnRole, string>(RoleMappedSchema.ColumnRole.Weight, weight)); } var roleMap = new RoleMappedData(trans, label, feature, group: groupId, weight: weight); _predictor.predictor = _predictor.trainer.Train(_env, ch, roleMap); _predictor.roleMapData = roleMap; } } else { _predictor = new StepPredictor() { predictor = null, trainer = null, trainerSettings = null, roleMapData = new RoleMappedData(trans) }; // We predict one to make sure everything works fine. using (var ch = _env.Start("Compute one prediction.")) { var df = DataFrameIO.ReadView(trans, 1, keepVectors: true, env: _env); if (df.Length == 0) { throw _env.ExceptEmpty("Something went wrong. The pipeline does not produce any output."); } } } return(this); }