/// <summary>
 /// Create a <see cref="ValueToKeyMappingEstimator"/>, which converts categorical values into numerical keys.
 /// </summary>
 /// <param name="catalog">The conversion transform's catalog.</param>
 /// <param name="outputColumnName">Name of the column containing the keys.</param>
 /// <param name="inputColumnName">Name of the column containing the categorical values. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> is used.
 /// The input data types can be numeric, text, boolean, <see cref="System.DateTime"/> or <see cref="System.DateTimeOffset"/>.
 /// </param>
 /// <param name="maximumNumberOfKeys">Maximum number of keys to keep per column when training.</param>
 /// <param name="keyOrdinality">The order in which keys are assigned.
 /// If set to <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByOccurrence"/>, keys are assigned in the order encountered.
 /// If set to <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByValue"/>, values are sorted, and keys are assigned based on the sort order.</param>
 /// <param name="addKeyValueAnnotationsAsText">If set to true, use text type
 /// for values, regardless of the actual input type. When doing the reverse
 /// mapping, the values are text rather than the original input type.</param>
 /// <param name="keyData">Use a pre-defined mapping between values and keys, instead of building
 /// the mapping from the input data during training. If specified, this should be a single column <see cref="IDataView"/> containing the values.
 /// The keys are allocated based on the value of keyOrdinality.</param>
 /// <example>
 /// <format type="text/markdown">
 /// <![CDATA[
 /// [!code-csharp[MapValueToKey](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/KeyToValueToKey.cs)]
 /// ]]>
 /// </format>
 /// </example>
 public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.ConversionTransforms catalog,
                                                        string outputColumnName,
                                                        string inputColumnName  = null,
                                                        int maximumNumberOfKeys = ValueToKeyMappingEstimator.Defaults.MaximumNumberOfKeys,
                                                        ValueToKeyMappingEstimator.KeyOrdinality keyOrdinality = ValueToKeyMappingEstimator.Defaults.Ordinality,
                                                        bool addKeyValueAnnotationsAsText = ValueToKeyMappingEstimator.Defaults.AddKeyValueAnnotationsAsText,
                                                        IDataView keyData = null)
 => new ValueToKeyMappingEstimator(CatalogUtils.GetEnvironment(catalog),
                                   new[] { new ValueToKeyMappingEstimator.ColumnOptions(outputColumnName, inputColumnName, maximumNumberOfKeys, keyOrdinality, addKeyValueAnnotationsAsText) }, keyData);
Example #2
0
 /// <summary>
 /// Convert text columns into one-hot encoded vectors.
 /// </summary>
 /// <param name="catalog">The transform catalog</param>
 /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
 /// <param name="inputColumnName">Name of column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
 /// <param name="outputKind">Output kind: Bag (multi-set vector), Ind (indicator vector), Key (index), or Binary encoded indicator vector.</param>
 /// <param name="maximumNumberOfKeys">Maximum number of terms to keep per column when auto-training.</param>
 /// <param name="keyOrdinality">How items should be ordered when vectorized. If <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByOccurrence"/> choosen they will be in the order encountered.
 /// If <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByValue"/>, items are sorted according to their default comparison, for example, text sorting will be case sensitive (for example, 'A' then 'Z' then 'a').</param>
 /// <param name="keyData">Specifies an ordering for the encoding. If specified, this should be a single column data view,
 /// and the key-values will be taken from that column. If unspecified, the ordering will be determined from the input data upon fitting.</param>
 /// <example>
 /// <format type="text/markdown">
 /// <![CDATA[
 ///  [!code-csharp[OneHotEncoding](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Categorical/OneHotEncoding.cs)]
 /// ]]></format>
 /// </example>
 public static OneHotEncodingEstimator OneHotEncoding(this TransformsCatalog.CategoricalTransforms catalog,
                                                      string outputColumnName,
                                                      string inputColumnName = null,
                                                      OneHotEncodingEstimator.OutputKind outputKind = OneHotEncodingEstimator.Defaults.OutKind,
                                                      int maximumNumberOfKeys = ValueToKeyMappingEstimator.Defaults.MaximumNumberOfKeys,
                                                      ValueToKeyMappingEstimator.KeyOrdinality keyOrdinality = ValueToKeyMappingEstimator.Defaults.Ordinality,
                                                      IDataView keyData = null)
 => new OneHotEncodingEstimator(CatalogUtils.GetEnvironment(catalog),
                                new[] { new OneHotEncodingEstimator.ColumnOptions(outputColumnName, inputColumnName, outputKind, maximumNumberOfKeys, keyOrdinality) }, keyData);
        /// <summary>
        /// Create a <see cref="ValueToKeyMappingEstimator"/>, which converts categorical values into keys.
        /// </summary>
        /// <remarks>This transform can operate over multiple pairs of columns, creating a mapping for each pair.</remarks>
        /// <param name="catalog">The conversion transform's catalog.</param>
        /// <param name="columns">The input and output columns.
        /// The input data types can be numeric, text, boolean, <see cref="System.DateTime"/> or <see cref="System.DateTimeOffset"/>.
        /// </param>
        /// <param name="maximumNumberOfKeys">Maximum number of keys to keep per column when training.</param>
        /// <param name="keyOrdinality">The order in which keys are assigned.
        /// If set to <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByOccurrence"/>, keys are assigned in the order encountered.
        /// If set to <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByValue"/>, values are sorted, and keys are assigned based on the sort order.</param>
        /// <param name="addKeyValueAnnotationsAsText">If set to true, use text type
        /// for values, regardless of the actual input type. When doing the reverse
        /// mapping, the values are text rather than the original input type.</param>
        /// <param name="keyData">Use a pre-defined mapping between values and keys, instead of building
        /// the mapping from the input data during training. If specified, this should be a single column <see cref="IDataView"/> containing the values.
        /// The keys are allocated based on the value of keyOrdinality.</param>
        /// <example>
        /// <format type="text/markdown">
        /// <![CDATA[
        /// [!code-csharp[MapValueToKey](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueToKeyMultiColumn.cs)]
        /// ]]>
        /// </format>
        /// </example>
        public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.ConversionTransforms catalog,
                                                               InputOutputColumnPair[] columns,
                                                               int maximumNumberOfKeys = ValueToKeyMappingEstimator.Defaults.MaximumNumberOfKeys,
                                                               ValueToKeyMappingEstimator.KeyOrdinality keyOrdinality = ValueToKeyMappingEstimator.Defaults.Ordinality,
                                                               bool addKeyValueAnnotationsAsText = ValueToKeyMappingEstimator.Defaults.AddKeyValueAnnotationsAsText,
                                                               IDataView keyData = null)
        {
            var env = CatalogUtils.GetEnvironment(catalog);

            env.CheckValue(columns, nameof(columns));
            var columnOptions = columns.Select(x => new ValueToKeyMappingEstimator.ColumnOptions(x.OutputColumnName, x.InputColumnName, maximumNumberOfKeys, keyOrdinality, addKeyValueAnnotationsAsText)).ToArray();

            return(new ValueToKeyMappingEstimator(env, columnOptions, keyData));
        }
Example #4
0
        /// <summary>
        /// Convert text columns into one-hot encoded vectors.
        /// </summary>
        /// <param name="catalog">The transform catalog</param>
        /// <param name="columns">Specifies the names of the columns on which to apply the transformation.</param>
        /// <param name="outputKind">Output kind: Bag (multi-set vector), Ind (indicator vector), Key (index), or Binary encoded indicator vector.</param>
        /// <param name="maximumNumberOfKeys">Maximum number of terms to keep per column when auto-training.</param>
        /// <param name="keyOrdinality">How items should be ordered when vectorized. If <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByOccurrence"/> choosen they will be in the order encountered.
        /// If <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByValue"/>, items are sorted according to their default comparison, for example, text sorting will be case sensitive (for example, 'A' then 'Z' then 'a').</param>
        /// <param name="keyData">Specifies an ordering for the encoding. If specified, this should be a single column data view,
        /// and the key-values will be taken from that column. If unspecified, the ordering will be determined from the input data upon fitting.</param>
        /// <example>
        /// <format type="text/markdown">
        /// <![CDATA[
        ///  [!code-csharp[OneHotEncoding](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Categorical/OneHotEncodingMultiColumn.cs)]
        /// ]]></format>
        /// </example>
        public static OneHotEncodingEstimator OneHotEncoding(this TransformsCatalog.CategoricalTransforms catalog,
                                                             InputOutputColumnPair[] columns,
                                                             OneHotEncodingEstimator.OutputKind outputKind = OneHotEncodingEstimator.Defaults.OutKind,
                                                             int maximumNumberOfKeys = ValueToKeyMappingEstimator.Defaults.MaximumNumberOfKeys,
                                                             ValueToKeyMappingEstimator.KeyOrdinality keyOrdinality = ValueToKeyMappingEstimator.Defaults.Ordinality,
                                                             IDataView keyData = null)
        {
            var env = CatalogUtils.GetEnvironment(catalog);

            env.CheckValue(columns, nameof(columns));
            var columnOptions = columns.Select(x => new OneHotEncodingEstimator.ColumnOptions(x.OutputColumnName, x.InputColumnName, outputKind, maximumNumberOfKeys, keyOrdinality)).ToArray();

            return(new OneHotEncodingEstimator(env, columnOptions, keyData));
        }
 /// <summary>
 /// Describes how the transformer handles one column pair.
 /// </summary>
 /// <param name="name">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
 /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="name"/> will be used as source.</param>
 /// <param name="outputKind">Output kind: Bag (multi-set vector), Ind (indicator vector), Key (index), or Binary encoded indicator vector.</param>
 /// <param name="maximumNumberOfKeys">Maximum number of terms to keep per column when auto-training.</param>
 /// <param name="keyOrdinality">How items should be ordered when vectorized. If <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByOccurrence"/> choosen they will be in the order encountered.
 /// If <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByValue"/>, items are sorted according to their default comparison, for example, text sorting will be case sensitive (for example, 'A' then 'Z' then 'a').</param>
 public ColumnOptions(string name, string inputColumnName = null,
                      OutputKind outputKind   = Defaults.OutKind,
                      int maximumNumberOfKeys = ValueToKeyMappingEstimator.Defaults.MaximumNumberOfKeys, ValueToKeyMappingEstimator.KeyOrdinality keyOrdinality = ValueToKeyMappingEstimator.Defaults.Ordinality)
     : base(name, inputColumnName ?? name, maximumNumberOfKeys, keyOrdinality, true)
 {
     OutputKind = outputKind;
 }
Example #6
0
 /// <summary>
 /// Converts value types into <see cref="KeyType"/>.
 /// </summary>
 /// <param name="catalog">The conversion transform's catalog.</param>
 /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param>
 /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param>
 /// <param name="maximumNumberOfKeys">Maximum number of keys to keep per column when auto-training.</param>
 /// <param name="keyOrdinality">How items should be ordered when vectorized. If <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByOccurrence"/> choosen they will be in the order encountered.
 /// If <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByValue"/>, items are sorted according to their default comparison, for example, text sorting will be case sensitive (for example, 'A' then 'Z' then 'a').</param>
 /// <example>
 /// <format type="text/markdown">
 /// <![CDATA[
 /// [!code-csharp[ValueToKey](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/KeyToValueValueToKey.cs)]
 /// ]]>
 /// </format>
 /// </example>
 public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.ConversionTransforms catalog,
                                                        string outputColumnName,
                                                        string inputColumnName  = null,
                                                        int maximumNumberOfKeys = ValueToKeyMappingEstimator.Defaults.MaximumNumberOfKeys,
                                                        ValueToKeyMappingEstimator.KeyOrdinality keyOrdinality = ValueToKeyMappingEstimator.Defaults.Ordinality)
 => new ValueToKeyMappingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, maximumNumberOfKeys, keyOrdinality);