Ejemplo n.º 1
0
        private static bool AddCacheIfWanted(IHostEnvironment env, IChannel ch, ITrainer trainer, ref RoleMappedData data, bool?cacheData)
        {
            Contracts.AssertValue(env, nameof(env));
            env.AssertValue(ch, nameof(ch));
            ch.AssertValue(trainer, nameof(trainer));
            ch.AssertValue(data, nameof(data));

            ITrainerEx trainerEx   = trainer as ITrainerEx;
            bool       shouldCache = cacheData ?? (!(data.Data is BinaryLoader) && (trainerEx == null || trainerEx.WantCaching));

            if (shouldCache)
            {
                ch.Trace("Caching");
                var prefetch  = data.Schema.GetColumnRoles().Select(kc => kc.Value.Index).ToArray();
                var cacheView = new CacheDataView(env, data.Data, prefetch);
                // Because the prefetching worked, we know that these are valid columns.
                data = RoleMappedData.Create(cacheView, data.Schema.GetColumnRoleNames());
            }
            else
            {
                ch.Trace("Not caching");
            }
            return(shouldCache);
        }
Ejemplo n.º 2
0
        public static TOut Train <TArg, TOut>(IHost host, TArg input,
                                              Func <ITrainer> createTrainer,
                                              Func <string> getLabel  = null,
                                              Func <string> getWeight = null,
                                              Func <string> getGroup  = null,
                                              Func <string> getName   = null,
                                              Func <IEnumerable <KeyValuePair <RoleMappedSchema.ColumnRole, string> > > getCustom = null,
                                              ICalibratorTrainerFactory calibrator = null,
                                              int maxCalibrationExamples           = 0)
            where TArg : LearnerInputBase
            where TOut : CommonOutputs.TrainerOutput, new()
        {
            using (var ch = host.Start("Training"))
            {
                ISchema schema  = input.TrainingData.Schema;
                var     feature = FindColumn(ch, schema, input.FeatureColumn);
                var     label   = getLabel?.Invoke();
                var     weight  = getWeight?.Invoke();
                var     group   = getGroup?.Invoke();
                var     name    = getName?.Invoke();
                var     custom  = getCustom?.Invoke();

                var trainer = createTrainer();

                IDataView view = input.TrainingData;
                TrainUtils.AddNormalizerIfNeeded(host, ch, trainer, ref view, feature, input.NormalizeFeatures);

                ch.Trace("Binding columns");
                var roleMappedData = TrainUtils.CreateExamples(view, label, feature, group, weight, name, custom);

                RoleMappedData    cachedRoleMappedData = roleMappedData;
                Cache.CachingType?cachingType          = null;
                switch (input.Caching)
                {
                case CachingOptions.Memory:
                {
                    cachingType = Cache.CachingType.Memory;
                    break;
                }

                case CachingOptions.Disk:
                {
                    cachingType = Cache.CachingType.Disk;
                    break;
                }

                case CachingOptions.Auto:
                {
                    ITrainerEx trainerEx = trainer as ITrainerEx;
                    // REVIEW: we should switch to hybrid caching in future.
                    if (!(input.TrainingData is BinaryLoader) && (trainerEx == null || trainerEx.WantCaching))
                    {
                        // default to Memory so mml is on par with maml
                        cachingType = Cache.CachingType.Memory;
                    }
                    break;
                }

                case CachingOptions.None:
                    break;

                default:
                    throw ch.ExceptParam(nameof(input.Caching), "Unknown option for caching: '{0}'", input.Caching);
                }

                if (cachingType.HasValue)
                {
                    var cacheView = Cache.CacheData(host, new Cache.CacheInput()
                    {
                        Data    = roleMappedData.Data,
                        Caching = cachingType.Value
                    }).OutputData;
                    cachedRoleMappedData = RoleMappedData.Create(cacheView, roleMappedData.Schema.GetColumnRoleNames());
                }

                var predictor = TrainUtils.Train(host, ch, cachedRoleMappedData, trainer, "Train", calibrator, maxCalibrationExamples);
                var output    = new TOut()
                {
                    PredictorModel = new PredictorModel(host, roleMappedData, input.TrainingData, predictor)
                };

                ch.Done();
                return(output);
            }
        }