/// <summary> /// Learns a function based on the cumulative density function of a normal distribution parameterized by /// a mean and variance as observed during fitting. /// </summary> /// <param name="input">The column containing the vectors to apply the normalization to.</param> /// <param name="ensureZeroUntouched">If set to <c>false</c>, then the learned distributional parameters will be /// adjusted in such a way as to ensure that the input 0 maps to the output 0. /// This is valuable for the sake of sparsity preservation, if normalizing sparse vectors.</param> /// <param name="useLog">If set to true then we transform over the logarithm of the values, rather /// than just the raw values. If this is set to <c>true</c> then <paramref name="ensureZeroUntouched"/> is ignored.</param> /// <param name="maximumExampleCount">When gathering statistics only look at most this many examples.</param> /// <param name="onFit">A delegate called whenever the estimator is fit, with the learned mean and standard /// deviation for all slots.</param> /// <remarks>Note that the statistics gathering and normalization is done independently per slot of the /// vector values.</remarks> /// <returns>The normalized column.</returns> public static NormVector <double> NormalizeByCumulativeDistribution( this Vector <double> input, bool ensureZeroUntouched = NormalizingEstimator.Defaults.EnsureZeroUntouched, bool useLog = false, long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount, OnFitCumulativeDistribution <ImmutableArray <double> > onFit = null) { return(NormalizeByMVCdfCore(input, ensureZeroUntouched, useLog, true, maximumExampleCount, CdfMapper(onFit))); }
private static Action <IColumnFunction> CdfMapper <TData>(OnFitCumulativeDistribution <TData> onFit) { Contracts.AssertValueOrNull(onFit); if (onFit == null) { return(null); } return(col => { var aCol = (NormalizerTransformer.ICdfData <TData>)col; onFit(aCol.Mean, aCol.Stddev); }); }
private static Action <IColumnFunction> CdfMapper <TData>(OnFitCumulativeDistribution <TData> onFit) { Contracts.AssertValueOrNull(onFit); if (onFit == null) { return(null); } return(col => { var aCol = (NormalizingTransformer.CdfNormalizerModelParameters <TData>)col?.GetNormalizerModelParams(); onFit(aCol.Mean, aCol.Stddev); }); }
/// <summary> /// Learns a function based on the cumulative density function of a normal distribution parameterized by /// a mean and variance as observed during fitting. /// </summary> /// <param name="input">The input column.</param> /// <param name="fixZero">If set to <c>false</c>, then the learned distributional parameters will be /// adjusted in such a way as to ensure that the input 0 maps to the output 0. /// This is valuable for the sake of sparsity preservation, if normalizing sparse vectors.</param> /// <param name="useLog">If set to true then we transform over the logarithm of the values, rather /// than just the raw values. If this is set to <c>true</c> then <paramref name="fixZero"/> is ignored.</param> /// <param name="maxTrainingExamples">When gathering statistics only look at most this many examples.</param> /// <param name="onFit">A delegate called whenever the estimator is fit, with the learned mean and standard /// deviation for all slots.</param> /// <remarks>Note that the statistics gathering and normalization is done independently per slot of the /// vector values.</remarks> /// <returns>The normalized column.</returns> public static NormVector <double> NormalizeByCumulativeDistribution( this Vector <double> input, bool fixZero = FZ, bool useLog = false, long maxTrainingExamples = MaxTrain, OnFitCumulativeDistribution <ImmutableArray <double> > onFit = null) { return(NormalizeByMVCdfCore(input, fixZero, useLog, true, maxTrainingExamples, CdfMapper(onFit))); }