// We have a slightly different breaking up of categories of normalizers versus the dynamic API. Both the mean-var and // CDF normalizers are initialized in the same way because they gather exactly the same statistics, but from the point of // view of the static API what is more important is the type of mapping that winds up being computed. /// <summary> /// Learns an affine function based on the observed mean and standard deviation. This is less susceptible /// to outliers as compared to <see cref="Normalize(Vector{float}, bool, long, OnFitAffine{ImmutableArray{float}})"/>. /// </summary> /// <param name="input">The column containing the vectors to apply the normalization to.</param> /// <param name="ensureZeroUntouched">If set to <c>true</c> then the offset will always be considered zero.</param> /// <param name="useLog">If set to true then we transform over the logarithm of the values, rather /// than just the raw values. If this is set to <c>true</c> then <paramref name="ensureZeroUntouched"/> is ignored.</param> /// <param name="maximumExampleCount">When gathering statistics only look at most this many examples.</param> /// <param name="onFit">A delegate called whenever the estimator is fit, with the learned slopes /// and, if <paramref name="ensureZeroUntouched"/> is <c>false</c>, the offsets as well.</param> /// <remarks>Note that the statistics gathering and normalization is done independently per slot of the /// vector values.</remarks> /// <returns>The normalized column.</returns> public static NormVector <float> NormalizeMeanVariance( this Vector <float> input, bool ensureZeroUntouched = NormalizingEstimator.Defaults.EnsureZeroUntouched, bool useLog = false, long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, OnFitAffine <ImmutableArray <float> > onFit = null) { return(NormalizeByMVCdfCore(input, ensureZeroUntouched, useLog, false, maximumExampleCount, AffineMapper(onFit))); }
private static NormVector <T> NormalizeByMinMaxCore <T>(Vector <T> input, bool fixZero, long maxTrainingExamples, OnFitAffine <ImmutableArray <T> > onFit) { Contracts.CheckValue(input, nameof(input)); Contracts.CheckParam(maxTrainingExamples > 1, nameof(maxTrainingExamples), "Must be greater than 1"); return(new Impl <T>(input, (src, name) => new Normalizer.MinMaxColumn(src, name, maxTrainingExamples, fixZero), AffineMapper(onFit))); }
private static NormVector <T> NormalizeByMinMaxCore <T>(Vector <T> input, bool ensureZeroUntouched, long maximumExampleCount, OnFitAffine <ImmutableArray <T> > onFit) { Contracts.CheckValue(input, nameof(input)); Contracts.CheckParam(maximumExampleCount > 1, nameof(maximumExampleCount), "Must be greater than 1"); return(new Impl <T>(input, (name, src) => new NormalizingEstimator.MinMaxColumnOptions(name, src, maximumExampleCount, ensureZeroUntouched), AffineMapper(onFit))); }
/// <summary> /// Learns an affine function based on the minimum and maximum, so that all values between the minimum and /// maximum observed during fitting fall into the range of -1 to 1. /// </summary> /// <param name="input">The column containing the vectors to apply the normalization to.</param> /// <param name="ensureZeroUntouched">If set to <c>false</c>, then the observed minimum and maximum during fitting /// will map to -1 and 1 respectively, exactly. If however set to <c>true</c>, then 0 will always map to 0. /// This is valuable for the sake of sparsity preservation, if normalizing sparse vectors.</param> /// <param name="maximumExampleCount">When gathering statistics only look at most this many examples.</param> /// <param name="onFit">A delegate called whenever the estimator is fit, with the learned slopes /// and, if <paramref name="ensureZeroUntouched"/> is <c>false</c>, the offsets as well.</param> /// <remarks>Note that the statistics gathering and normalization is done independently per slot of the /// vector values. /// Note that if values are later transformed that are lower than the minimum, or higher than the maximum, /// observed during fitting, that the output values may be outside the range of -1 to 1.</remarks> /// <returns>The normalized column.</returns> public static NormVector <double> Normalize( this Vector <double> input, bool ensureZeroUntouched = NormalizingEstimator.Defaults.EnsureZeroUntouched, long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, OnFitAffine <ImmutableArray <double> > onFit = null) { return(NormalizeByMinMaxCore(input, ensureZeroUntouched, maximumExampleCount, onFit)); }
private static Action <IColumnFunction> AffineMapper <TData>(OnFitAffine <TData> onFit) { Contracts.AssertValueOrNull(onFit); if (onFit == null) { return(null); } return(col => { var aCol = (NormalizerTransformer.IAffineData <TData>)col; onFit(aCol.Scale, aCol.Offset); }); }
private static Action <IColumnFunction> AffineMapper <TData>(OnFitAffine <TData> onFit) { Contracts.AssertValueOrNull(onFit); if (onFit == null) { return(null); } return(col => { var aCol = (NormalizingTransformer.AffineNormalizerModelParameters <TData>)col?.GetNormalizerModelParams(); onFit(aCol.Scale, aCol.Offset); }); }
// We have a slightly different breaking up of categories of normalizers versus the dynamic API. Both the mean-var and // CDF normalizers are initialized in the same way because they gather exactly the same statistics, but from the point of // view of the static API what is more important is the type of mapping that winds up being computed. /// <summary> /// Learns an affine function based on the observed mean and standard deviation. This is less susceptible /// to outliers as compared to <see cref="Normalize(Vector{float}, bool, long, OnFitAffine{ImmutableArray{float}})"/>. /// </summary> /// <param name="input">The input column.</param> /// <param name="fixZero">If set to <c>true</c> then the offset will always be considered zero.</param> /// <param name="useLog">If set to true then we transform over the logarithm of the values, rather /// than just the raw values. If this is set to <c>true</c> then <paramref name="fixZero"/> is ignored.</param> /// <param name="maxTrainingExamples">When gathering statistics only look at most this many examples.</param> /// <param name="onFit">A delegate called whenever the estimator is fit, with the learned slopes /// and, if <paramref name="fixZero"/> is <c>false</c>, the offsets as well.</param> /// <remarks>Note that the statistics gathering and normalization is done independently per slot of the /// vector values.</remarks> /// <returns>The normalized column.</returns> public static NormVector <float> NormalizeByMeanVar( this Vector <float> input, bool fixZero = FZ, bool useLog = false, long maxTrainingExamples = MaxTrain, OnFitAffine <ImmutableArray <float> > onFit = null) { return(NormalizeByMVCdfCore(input, fixZero, useLog, false, maxTrainingExamples, AffineMapper(onFit))); }
/// <summary> /// Learns an affine function based on the minimum and maximum, so that all values between the minimum and /// maximum observed during fitting fall into the range of -1 to 1. /// </summary> /// <param name="input">The input column.</param> /// <param name="fixZero">If set to <c>false</c>, then the observed minimum and maximum during fitting /// will map to -1 and 1 respectively, exactly. If however set to <c>true</c>, then 0 will always map to 0. /// This is valuable for the sake of sparsity preservation, if normalizing sparse vectors.</param> /// <param name="maxTrainingExamples">When gathering statistics only look at most this many examples.</param> /// <param name="onFit">A delegate called whenever the estimator is fit, with the learned slopes /// and, if <paramref name="fixZero"/> is <c>false</c>, the offsets as well.</param> /// <remarks>Note that the statistics gathering and normalization is done independently per slot of the /// vector values. /// Note that if values are later transformed that are lower than the minimum, or higher than the maximum, /// observed during fitting, that the output values may be outside the range of -1 to 1.</remarks> /// <returns>The normalized column.</returns> public static NormVector <double> Normalize( this Vector <double> input, bool fixZero = FZ, long maxTrainingExamples = MaxTrain, OnFitAffine <ImmutableArray <double> > onFit = null) { return(NormalizeByMinMaxCore(input, fixZero, maxTrainingExamples, onFit)); }