예제 #1
0
        public void AddTransform(string transform)
        {
            if (_env == null)
            {
                throw Contracts.ExceptNotSupp("The class must be initialized with an envrionment to enable that functionality.");
            }
            var tr = _env.CreateTransform(transform, Source);

            if (tr == null)
            {
                throw Contracts.ExceptNotSupp($"Unable to create transform '{transform}'.");
            }
            AddTransform(tr);
        }
        /// <summary>
        /// Trains the pipeline with data coming from a <see cref="IDataView"/>.
        /// </summary>
        public ScikitPipeline Train(IDataView data,
                                    string feature = "Feature", string label = null,
                                    string weight  = null, string groupId    = null)
        {
            IDataView trans = data;

            using (var ch = _env.Start("Create transforms"))
            {
                for (int i = 0; i < _transforms.Length; ++i)
                {
                    try
                    {
                        trans = _env.CreateTransform(_transforms[i].transformSettings, trans);
                    }
                    catch (Exception e)
                    {
                        if (e.ToString().Contains("Unknown loadable class"))
                        {
                            var nn   = _env.ComponentCatalog.GetAllClasses().Length;
                            var filt = _env.ComponentCatalog.GetAllClasses()
                                       .Select(c => c.UserName)
                                       .OrderBy(c => c)
                                       .Where(c => c.Trim().Length > 2);
                            var regis = string.Join("\n", filt);
                            throw Contracts.Except(e, $"Unable to create transform '{_transforms[i].transformSettings}', assembly not registered among {nn}\n{regis}");
                        }
                        throw e;
                    }
                    _transforms[i].transform = trans as IDataTransform;
                }
            }

            if (_predictor != null)
            {
                using (var ch = _env.Start("Create Predictor"))
                {
                    _predictor.trainer = TrainerHelper.CreateTrainer(_env, _predictor.trainerSettings);
                    _roles             = new List <KeyValuePair <RoleMappedSchema.ColumnRole, string> >();
                    _roles.Add(new KeyValuePair <RoleMappedSchema.ColumnRole, string>(RoleMappedSchema.ColumnRole.Feature, feature));
                    if (!string.IsNullOrEmpty(label))
                    {
                        _roles.Add(new KeyValuePair <RoleMappedSchema.ColumnRole, string>(RoleMappedSchema.ColumnRole.Label, label));
                    }
                    if (!string.IsNullOrEmpty(groupId))
                    {
                        _roles.Add(new KeyValuePair <RoleMappedSchema.ColumnRole, string>(RoleMappedSchema.ColumnRole.Group, groupId));
                    }
                    if (!string.IsNullOrEmpty(weight))
                    {
                        _roles.Add(new KeyValuePair <RoleMappedSchema.ColumnRole, string>(RoleMappedSchema.ColumnRole.Weight, weight));
                    }
                    var roleMap = new RoleMappedData(trans, label, feature, group: groupId, weight: weight);
                    _predictor.predictor   = _predictor.trainer.Train(_env, ch, roleMap);
                    _predictor.roleMapData = roleMap;
                }
            }
            else
            {
                _predictor = new StepPredictor()
                {
                    predictor       = null,
                    trainer         = null,
                    trainerSettings = null,
                    roleMapData     = new RoleMappedData(trans)
                };

                // We predict one to make sure everything works fine.
                using (var ch = _env.Start("Compute one prediction."))
                {
                    var df = DataFrameIO.ReadView(trans, 1, keepVectors: true, env: _env);
                    if (df.Length == 0)
                    {
                        throw _env.ExceptEmpty("Something went wrong. The pipeline does not produce any output.");
                    }
                }
            }
            return(this);
        }