private static bool AddCacheIfWanted(IHostEnvironment env, IChannel ch, ITrainer trainer, ref RoleMappedData data, bool?cacheData) { Contracts.AssertValue(env, nameof(env)); env.AssertValue(ch, nameof(ch)); ch.AssertValue(trainer, nameof(trainer)); ch.AssertValue(data, nameof(data)); ITrainerEx trainerEx = trainer as ITrainerEx; bool shouldCache = cacheData ?? (!(data.Data is BinaryLoader) && (trainerEx == null || trainerEx.WantCaching)); if (shouldCache) { ch.Trace("Caching"); var prefetch = data.Schema.GetColumnRoles().Select(kc => kc.Value.Index).ToArray(); var cacheView = new CacheDataView(env, data.Data, prefetch); // Because the prefetching worked, we know that these are valid columns. data = RoleMappedData.Create(cacheView, data.Schema.GetColumnRoleNames()); } else { ch.Trace("Not caching"); } return(shouldCache); }
public static TOut Train <TArg, TOut>(IHost host, TArg input, Func <ITrainer> createTrainer, Func <string> getLabel = null, Func <string> getWeight = null, Func <string> getGroup = null, Func <string> getName = null, Func <IEnumerable <KeyValuePair <RoleMappedSchema.ColumnRole, string> > > getCustom = null, ICalibratorTrainerFactory calibrator = null, int maxCalibrationExamples = 0) where TArg : LearnerInputBase where TOut : CommonOutputs.TrainerOutput, new() { using (var ch = host.Start("Training")) { ISchema schema = input.TrainingData.Schema; var feature = FindColumn(ch, schema, input.FeatureColumn); var label = getLabel?.Invoke(); var weight = getWeight?.Invoke(); var group = getGroup?.Invoke(); var name = getName?.Invoke(); var custom = getCustom?.Invoke(); var trainer = createTrainer(); IDataView view = input.TrainingData; TrainUtils.AddNormalizerIfNeeded(host, ch, trainer, ref view, feature, input.NormalizeFeatures); ch.Trace("Binding columns"); var roleMappedData = TrainUtils.CreateExamples(view, label, feature, group, weight, name, custom); RoleMappedData cachedRoleMappedData = roleMappedData; Cache.CachingType?cachingType = null; switch (input.Caching) { case CachingOptions.Memory: { cachingType = Cache.CachingType.Memory; break; } case CachingOptions.Disk: { cachingType = Cache.CachingType.Disk; break; } case CachingOptions.Auto: { ITrainerEx trainerEx = trainer as ITrainerEx; // REVIEW: we should switch to hybrid caching in future. if (!(input.TrainingData is BinaryLoader) && (trainerEx == null || trainerEx.WantCaching)) { // default to Memory so mml is on par with maml cachingType = Cache.CachingType.Memory; } break; } case CachingOptions.None: break; default: throw ch.ExceptParam(nameof(input.Caching), "Unknown option for caching: '{0}'", input.Caching); } if (cachingType.HasValue) { var cacheView = Cache.CacheData(host, new Cache.CacheInput() { Data = roleMappedData.Data, Caching = cachingType.Value }).OutputData; cachedRoleMappedData = RoleMappedData.Create(cacheView, roleMappedData.Schema.GetColumnRoleNames()); } var predictor = TrainUtils.Train(host, ch, cachedRoleMappedData, trainer, "Train", calibrator, maxCalibrationExamples); var output = new TOut() { PredictorModel = new PredictorModel(host, roleMappedData, input.TrainingData, predictor) }; ch.Done(); return(output); } }