Esempio n. 1
0
        // We have a slightly different breaking up of categories of normalizers versus the dynamic API. Both the mean-var and
        // CDF normalizers are initialized in the same way because they gather exactly the same statistics, but from the point of
        // view of the static API what is more important is the type of mapping that winds up being computed.

        /// <summary>
        /// Learns an affine function based on the observed mean and standard deviation. This is less susceptible
        /// to outliers as compared to <see cref="Normalize(Vector{float}, bool, long, OnFitAffine{ImmutableArray{float}})"/>.
        /// </summary>
        /// <param name="input">The column containing the vectors to apply the normalization to.</param>
        /// <param name="ensureZeroUntouched">If set to <c>true</c> then the offset will always be considered zero.</param>
        /// <param name="useLog">If set to true then we transform over the logarithm of the values, rather
        /// than just the raw values. If this is set to <c>true</c> then <paramref name="ensureZeroUntouched"/> is ignored.</param>
        /// <param name="maximumExampleCount">When gathering statistics only look at most this many examples.</param>
        /// <param name="onFit">A delegate called whenever the estimator is fit, with the learned slopes
        /// and, if <paramref name="ensureZeroUntouched"/> is <c>false</c>, the offsets as well.</param>
        /// <remarks>Note that the statistics gathering and normalization is done independently per slot of the
        /// vector values.</remarks>
        /// <returns>The normalized column.</returns>
        public static NormVector <float> NormalizeMeanVariance(
            this Vector <float> input, bool ensureZeroUntouched = NormalizingEstimator.Defaults.EnsureZeroUntouched,
            bool useLog = false, long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount,
            OnFitAffine <ImmutableArray <float> > onFit = null)
        {
            return(NormalizeByMVCdfCore(input, ensureZeroUntouched, useLog, false, maximumExampleCount, AffineMapper(onFit)));
        }
 private static NormVector <T> NormalizeByMinMaxCore <T>(Vector <T> input, bool fixZero, long maxTrainingExamples,
                                                         OnFitAffine <ImmutableArray <T> > onFit)
 {
     Contracts.CheckValue(input, nameof(input));
     Contracts.CheckParam(maxTrainingExamples > 1, nameof(maxTrainingExamples), "Must be greater than 1");
     return(new Impl <T>(input, (src, name) => new Normalizer.MinMaxColumn(src, name, maxTrainingExamples, fixZero), AffineMapper(onFit)));
 }
Esempio n. 3
0
 private static NormVector <T> NormalizeByMinMaxCore <T>(Vector <T> input, bool ensureZeroUntouched, long maximumExampleCount,
                                                         OnFitAffine <ImmutableArray <T> > onFit)
 {
     Contracts.CheckValue(input, nameof(input));
     Contracts.CheckParam(maximumExampleCount > 1, nameof(maximumExampleCount), "Must be greater than 1");
     return(new Impl <T>(input, (name, src) => new NormalizingEstimator.MinMaxColumnOptions(name, src, maximumExampleCount, ensureZeroUntouched), AffineMapper(onFit)));
 }
Esempio n. 4
0
 /// <summary>
 /// Learns an affine function based on the minimum and maximum, so that all values between the minimum and
 /// maximum observed during fitting fall into the range of -1 to 1.
 /// </summary>
 /// <param name="input">The column containing the vectors to apply the normalization to.</param>
 /// <param name="ensureZeroUntouched">If set to <c>false</c>, then the observed minimum and maximum during fitting
 /// will map to -1 and 1 respectively, exactly. If however set to <c>true</c>, then 0 will always map to 0.
 /// This is valuable for the sake of sparsity preservation, if normalizing sparse vectors.</param>
 /// <param name="maximumExampleCount">When gathering statistics only look at most this many examples.</param>
 /// <param name="onFit">A delegate called whenever the estimator is fit, with the learned slopes
 /// and, if <paramref name="ensureZeroUntouched"/> is <c>false</c>, the offsets as well.</param>
 /// <remarks>Note that the statistics gathering and normalization is done independently per slot of the
 /// vector values.
 /// Note that if values are later transformed that are lower than the minimum, or higher than the maximum,
 /// observed during fitting, that the output values may be outside the range of -1 to 1.</remarks>
 /// <returns>The normalized column.</returns>
 public static NormVector <double> Normalize(
     this Vector <double> input, bool ensureZeroUntouched = NormalizingEstimator.Defaults.EnsureZeroUntouched,
     long maximumExampleCount = NormalizingEstimator.Defaults.MaximumExampleCount,
     OnFitAffine <ImmutableArray <double> > onFit = null)
 {
     return(NormalizeByMinMaxCore(input, ensureZeroUntouched, maximumExampleCount, onFit));
 }
 private static Action <IColumnFunction> AffineMapper <TData>(OnFitAffine <TData> onFit)
 {
     Contracts.AssertValueOrNull(onFit);
     if (onFit == null)
     {
         return(null);
     }
     return(col =>
     {
         var aCol = (NormalizerTransformer.IAffineData <TData>)col;
         onFit(aCol.Scale, aCol.Offset);
     });
 }
Esempio n. 6
0
 private static Action <IColumnFunction> AffineMapper <TData>(OnFitAffine <TData> onFit)
 {
     Contracts.AssertValueOrNull(onFit);
     if (onFit == null)
     {
         return(null);
     }
     return(col =>
     {
         var aCol = (NormalizingTransformer.AffineNormalizerModelParameters <TData>)col?.GetNormalizerModelParams();
         onFit(aCol.Scale, aCol.Offset);
     });
 }
        // We have a slightly different breaking up of categories of normalizers versus the dynamic API. Both the mean-var and
        // CDF normalizers are initialized in the same way because they gather exactly the same statistics, but from the point of
        // view of the static API what is more important is the type of mapping that winds up being computed.

        /// <summary>
        /// Learns an affine function based on the observed mean and standard deviation. This is less susceptible
        /// to outliers as compared to <see cref="Normalize(Vector{float}, bool, long, OnFitAffine{ImmutableArray{float}})"/>.
        /// </summary>
        /// <param name="input">The input column.</param>
        /// <param name="fixZero">If set to <c>true</c> then the offset will always be considered zero.</param>
        /// <param name="useLog">If set to true then we transform over the logarithm of the values, rather
        /// than just the raw values. If this is set to <c>true</c> then <paramref name="fixZero"/> is ignored.</param>
        /// <param name="maxTrainingExamples">When gathering statistics only look at most this many examples.</param>
        /// <param name="onFit">A delegate called whenever the estimator is fit, with the learned slopes
        /// and, if <paramref name="fixZero"/> is <c>false</c>, the offsets as well.</param>
        /// <remarks>Note that the statistics gathering and normalization is done independently per slot of the
        /// vector values.</remarks>
        /// <returns>The normalized column.</returns>
        public static NormVector <float> NormalizeByMeanVar(
            this Vector <float> input, bool fixZero     = FZ, bool useLog = false, long maxTrainingExamples = MaxTrain,
            OnFitAffine <ImmutableArray <float> > onFit = null)
        {
            return(NormalizeByMVCdfCore(input, fixZero, useLog, false, maxTrainingExamples, AffineMapper(onFit)));
        }
 /// <summary>
 /// Learns an affine function based on the minimum and maximum, so that all values between the minimum and
 /// maximum observed during fitting fall into the range of -1 to 1.
 /// </summary>
 /// <param name="input">The input column.</param>
 /// <param name="fixZero">If set to <c>false</c>, then the observed minimum and maximum during fitting
 /// will map to -1 and 1 respectively, exactly. If however set to <c>true</c>, then 0 will always map to 0.
 /// This is valuable for the sake of sparsity preservation, if normalizing sparse vectors.</param>
 /// <param name="maxTrainingExamples">When gathering statistics only look at most this many examples.</param>
 /// <param name="onFit">A delegate called whenever the estimator is fit, with the learned slopes
 /// and, if <paramref name="fixZero"/> is <c>false</c>, the offsets as well.</param>
 /// <remarks>Note that the statistics gathering and normalization is done independently per slot of the
 /// vector values.
 /// Note that if values are later transformed that are lower than the minimum, or higher than the maximum,
 /// observed during fitting, that the output values may be outside the range of -1 to 1.</remarks>
 /// <returns>The normalized column.</returns>
 public static NormVector <double> Normalize(
     this Vector <double> input, bool fixZero     = FZ, long maxTrainingExamples = MaxTrain,
     OnFitAffine <ImmutableArray <double> > onFit = null)
 {
     return(NormalizeByMinMaxCore(input, fixZero, maxTrainingExamples, onFit));
 }