/// <summary> /// Potentially apply a min-max normalizer to the data's feature column, keeping all existing role /// mappings except for the feature role mapping. /// </summary> /// <param name="env">The host environment to use to potentially instantiate the transform</param> /// <param name="data">The role-mapped data that is potentially going to be modified by this method.</param> /// <param name="trainer">The trainer to query with <see cref="NormalizeUtils.NeedNormalization(ITrainer)"/>. /// This method will not modify <paramref name="data"/> if the return from that is <c>null</c> or /// <c>false</c>.</param> /// <returns>True if the normalizer was applied and <paramref name="data"/> was modified</returns> public static bool CreateIfNeeded(IHostEnvironment env, ref RoleMappedData data, ITrainer trainer) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(data, nameof(data)); env.CheckValue(trainer, nameof(trainer)); // If this is false or null, we do not want to normalize. if (trainer.NeedNormalization() != true) { return(false); } // If this is true or null, we do not want to normalize. if (data.Schema.FeaturesAreNormalized() != false) { return(false); } var featInfo = data.Schema.Feature; env.AssertValue(featInfo); // Should be defined, if FEaturesAreNormalized returned a definite value. var view = CreateMinMaxNormalizer(env, data.Data, name: featInfo.Name); data = RoleMappedData.Create(view, data.Schema.GetColumnRoleNames()); return(true); }
// Returns true if a normalizer was added. public static bool AddNormalizerIfNeeded(IHostEnvironment env, IChannel ch, ITrainer trainer, ref IDataView view, string featureColumn, NormalizeOption autoNorm) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(ch, nameof(ch)); ch.CheckValue(trainer, nameof(trainer)); ch.CheckValue(view, nameof(view)); ch.CheckValueOrNull(featureColumn); ch.CheckUserArg(Enum.IsDefined(typeof(NormalizeOption), autoNorm), nameof(TrainCommand.Arguments.NormalizeFeatures), "Normalize option is invalid. Specify one of 'norm=No', 'norm=Warn', 'norm=Auto', or 'norm=Yes'."); if (autoNorm == NormalizeOption.No) { ch.Info("Not adding a normalizer."); return(false); } if (string.IsNullOrEmpty(featureColumn)) { return(false); } int featCol; var schema = view.Schema; if (schema.TryGetColumnIndex(featureColumn, out featCol)) { if (autoNorm != NormalizeOption.Yes) { DvBool isNormalized = DvBool.False; if (trainer.NeedNormalization() != true || schema.IsNormalized(featCol)) { ch.Info("Not adding a normalizer."); return(false); } if (autoNorm == NormalizeOption.Warn) { ch.Warning("A normalizer is needed for this trainer. Either add a normalizing transform or use the 'norm=Auto', 'norm=Yes' or 'norm=No' options."); return(false); } } ch.Info("Automatically adding a MinMax normalization transform, use 'norm=Warn' or 'norm=No' to turn this behavior off."); IDataView ApplyNormalizer(IHostEnvironment innerEnv, IDataView input) => NormalizeTransform.CreateMinMaxNormalizer(innerEnv, input, featureColumn); if (view is IDataLoader loader) { view = CompositeDataLoader.ApplyTransform(env, loader, tag: null, creationArgs: null, ApplyNormalizer); } else { view = ApplyNormalizer(env, view); } return(true); } return(false); }