/// <summary>
 /// Initializes the pipeline.
 /// </summary>
 /// <param name="transforms">list of transform, can be empty</param>
 /// <param name="predictor">a predictor, can be empty</param>
 /// <param name="host">can be empty too, a <see cref="ExtendedConsoleEnvironment"/> is then created</param>
 public ScikitPipeline(string[] transforms   = null,
                       string predictor      = null,
                       IHostEnvironment host = null)
 {
     _dispose    = false;
     _env        = host ?? ExtendedConsoleEnvironment();
     _transforms = new StepTransform[transforms == null ? 1 : transforms.Length + 1];
     // We add a PassThroughTransform to be able to change the source.
     _transforms[0] = new StepTransform()
     {
         transformSettings = "pass", transform = null
     };
     if (transforms != null)
     {
         for (int i = 0; i < transforms.Length; ++i)
         {
             _transforms[i + 1] = new StepTransform()
             {
                 transformSettings = transforms[i], transform = null
             }
         }
     }
     ;
     _predictor = predictor == null ? null : new StepPredictor()
     {
         trainerSettings = predictor,
         predictor       = null,
         trainer         = null,
         roleMapData     = null
     };
     _loaderSettings        = null;
     _roles                 = null;
     _fastValueMapper       = null;
     _fastValueMapperObject = null;
 }
        /// <summary>
        /// Loads a pipeline saved in zip format.
        /// </summary>
        protected void Load(Stream fs)
        {
            var transformPipe = ModelFileUtils.LoadPipeline(_env, fs, new MultiFileSource(null), true);
            var pred          = _env.LoadPredictorOrNull(fs);

            IDataView root;

            for (root = transformPipe; root is IDataTransform && !(root is PassThroughTransform); root = ((IDataTransform)root).Source)
            {
                ;
            }
            if (!(root is PassThroughTransform))
            {
                var tr = new PassThroughTransform(_env, new PassThroughTransform.Arguments(), root);
                transformPipe = ApplyTransformUtils.ApplyAllTransformsToData(_env, transformPipe, tr, root);
            }

            var stack = new List <IDataView>();

            for (root = transformPipe; root is IDataTransform; root = ((IDataTransform)root).Source)
            {
                stack.Add(root);
            }
            stack.Reverse();

            _transforms = new StepTransform[stack.Count];
            for (int i = 0; i < _transforms.Length; ++i)
            {
                _transforms[i] = new StepTransform()
                {
                    transform = stack[i] as IDataTransform, transformSettings = null
                }
            }
            ;

            if (pred == null)
            {
                _predictor = new StepPredictor()
                {
                    predictor = null, roleMapData = null, trainer = null, trainerSettings = null
                }
            }
            ;
            else
            {
#pragma warning disable CS0618
                var ipred = pred.GetPredictorObject() as IPredictor;
#pragma warning restore CS0618
                _roles = ModelFileUtils.LoadRoleMappingsOrNull(_env, fs).ToList();
                var data = new RoleMappedData(transformPipe, _roles);
                _predictor = new StepPredictor()
                {
                    predictor = ipred, roleMapData = data, trainer = null, trainerSettings = null
                };
            }
            _fastValueMapper = null;
        }
        /// <summary>
        /// Loads a pipeline saved in onnx format.
        /// </summary>
        protected void LoadOnnx(Stream fs)
        {
            var root = new PassThroughTransform(_env, new PassThroughTransform.Arguments(), null);

            _transforms    = new StepTransform[2];
            _transforms[0] = new StepTransform()
            {
                transform = root as IDataTransform, transformSettings = null
            };
            _transforms[1] = new StepTransform()
            {
                transform = ConvertFromOnnx.ReadOnnx(fs, root), transformSettings = null
            };
            _predictor = new StepPredictor()
            {
                predictor = null, roleMapData = null, trainer = null, trainerSettings = null
            };
            _fastValueMapper = null;
        }
        /// <summary>
        /// Trains the pipeline with data coming from a <see cref="IDataView"/>.
        /// </summary>
        public ScikitPipeline Train(IDataView data,
                                    string feature = "Feature", string label = null,
                                    string weight  = null, string groupId    = null)
        {
            IDataView trans = data;

            using (var ch = _env.Start("Create transforms"))
            {
                for (int i = 0; i < _transforms.Length; ++i)
                {
                    try
                    {
                        trans = _env.CreateTransform(_transforms[i].transformSettings, trans);
                    }
                    catch (Exception e)
                    {
                        if (e.ToString().Contains("Unknown loadable class"))
                        {
                            var nn   = _env.ComponentCatalog.GetAllClasses().Length;
                            var filt = _env.ComponentCatalog.GetAllClasses()
                                       .Select(c => c.UserName)
                                       .OrderBy(c => c)
                                       .Where(c => c.Trim().Length > 2);
                            var regis = string.Join("\n", filt);
                            throw Contracts.Except(e, $"Unable to create transform '{_transforms[i].transformSettings}', assembly not registered among {nn}\n{regis}");
                        }
                        throw e;
                    }
                    _transforms[i].transform = trans as IDataTransform;
                }
            }

            if (_predictor != null)
            {
                using (var ch = _env.Start("Create Predictor"))
                {
                    _predictor.trainer = TrainerHelper.CreateTrainer(_env, _predictor.trainerSettings);
                    _roles             = new List <KeyValuePair <RoleMappedSchema.ColumnRole, string> >();
                    _roles.Add(new KeyValuePair <RoleMappedSchema.ColumnRole, string>(RoleMappedSchema.ColumnRole.Feature, feature));
                    if (!string.IsNullOrEmpty(label))
                    {
                        _roles.Add(new KeyValuePair <RoleMappedSchema.ColumnRole, string>(RoleMappedSchema.ColumnRole.Label, label));
                    }
                    if (!string.IsNullOrEmpty(groupId))
                    {
                        _roles.Add(new KeyValuePair <RoleMappedSchema.ColumnRole, string>(RoleMappedSchema.ColumnRole.Group, groupId));
                    }
                    if (!string.IsNullOrEmpty(weight))
                    {
                        _roles.Add(new KeyValuePair <RoleMappedSchema.ColumnRole, string>(RoleMappedSchema.ColumnRole.Weight, weight));
                    }
                    var roleMap = new RoleMappedData(trans, label, feature, group: groupId, weight: weight);
                    _predictor.predictor   = _predictor.trainer.Train(_env, ch, roleMap);
                    _predictor.roleMapData = roleMap;
                }
            }
            else
            {
                _predictor = new StepPredictor()
                {
                    predictor       = null,
                    trainer         = null,
                    trainerSettings = null,
                    roleMapData     = new RoleMappedData(trans)
                };

                // We predict one to make sure everything works fine.
                using (var ch = _env.Start("Compute one prediction."))
                {
                    var df = DataFrameIO.ReadView(trans, 1, keepVectors: true, env: _env);
                    if (df.Length == 0)
                    {
                        throw _env.ExceptEmpty("Something went wrong. The pipeline does not produce any output.");
                    }
                }
            }
            return(this);
        }