/// <summary> /// Create a <see cref="ValueToKeyMappingEstimator"/>, which converts categorical values into numerical keys. /// </summary> /// <param name="catalog">The conversion transform's catalog.</param> /// <param name="outputColumnName">Name of the column containing the keys.</param> /// <param name="inputColumnName">Name of the column containing the categorical values. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> is used. /// The input data types can be numeric, text, boolean, <see cref="System.DateTime"/> or <see cref="System.DateTimeOffset"/>. /// </param> /// <param name="maximumNumberOfKeys">Maximum number of keys to keep per column when training.</param> /// <param name="keyOrdinality">The order in which keys are assigned. /// If set to <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByOccurrence"/>, keys are assigned in the order encountered. /// If set to <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByValue"/>, values are sorted, and keys are assigned based on the sort order.</param> /// <param name="addKeyValueAnnotationsAsText">If set to true, use text type /// for values, regardless of the actual input type. When doing the reverse /// mapping, the values are text rather than the original input type.</param> /// <param name="keyData">Use a pre-defined mapping between values and keys, instead of building /// the mapping from the input data during training. If specified, this should be a single column <see cref="IDataView"/> containing the values. /// The keys are allocated based on the value of keyOrdinality.</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[MapValueToKey](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/KeyToValueToKey.cs)] /// ]]> /// </format> /// </example> public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.ConversionTransforms catalog, string outputColumnName, string inputColumnName = null, int maximumNumberOfKeys = ValueToKeyMappingEstimator.Defaults.MaximumNumberOfKeys, ValueToKeyMappingEstimator.KeyOrdinality keyOrdinality = ValueToKeyMappingEstimator.Defaults.Ordinality, bool addKeyValueAnnotationsAsText = ValueToKeyMappingEstimator.Defaults.AddKeyValueAnnotationsAsText, IDataView keyData = null) => new ValueToKeyMappingEstimator(CatalogUtils.GetEnvironment(catalog), new[] { new ValueToKeyMappingEstimator.ColumnOptions(outputColumnName, inputColumnName, maximumNumberOfKeys, keyOrdinality, addKeyValueAnnotationsAsText) }, keyData);
/// <summary> /// Convert text columns into one-hot encoded vectors. /// </summary> /// <param name="catalog">The transform catalog</param> /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> /// <param name="inputColumnName">Name of column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param> /// <param name="outputKind">Output kind: Bag (multi-set vector), Ind (indicator vector), Key (index), or Binary encoded indicator vector.</param> /// <param name="maximumNumberOfKeys">Maximum number of terms to keep per column when auto-training.</param> /// <param name="keyOrdinality">How items should be ordered when vectorized. If <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByOccurrence"/> choosen they will be in the order encountered. /// If <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByValue"/>, items are sorted according to their default comparison, for example, text sorting will be case sensitive (for example, 'A' then 'Z' then 'a').</param> /// <param name="keyData">Specifies an ordering for the encoding. If specified, this should be a single column data view, /// and the key-values will be taken from that column. If unspecified, the ordering will be determined from the input data upon fitting.</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[OneHotEncoding](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Categorical/OneHotEncoding.cs)] /// ]]></format> /// </example> public static OneHotEncodingEstimator OneHotEncoding(this TransformsCatalog.CategoricalTransforms catalog, string outputColumnName, string inputColumnName = null, OneHotEncodingEstimator.OutputKind outputKind = OneHotEncodingEstimator.Defaults.OutKind, int maximumNumberOfKeys = ValueToKeyMappingEstimator.Defaults.MaximumNumberOfKeys, ValueToKeyMappingEstimator.KeyOrdinality keyOrdinality = ValueToKeyMappingEstimator.Defaults.Ordinality, IDataView keyData = null) => new OneHotEncodingEstimator(CatalogUtils.GetEnvironment(catalog), new[] { new OneHotEncodingEstimator.ColumnOptions(outputColumnName, inputColumnName, outputKind, maximumNumberOfKeys, keyOrdinality) }, keyData);
/// <summary> /// Create a <see cref="ValueToKeyMappingEstimator"/>, which converts categorical values into keys. /// </summary> /// <remarks>This transform can operate over multiple pairs of columns, creating a mapping for each pair.</remarks> /// <param name="catalog">The conversion transform's catalog.</param> /// <param name="columns">The input and output columns. /// The input data types can be numeric, text, boolean, <see cref="System.DateTime"/> or <see cref="System.DateTimeOffset"/>. /// </param> /// <param name="maximumNumberOfKeys">Maximum number of keys to keep per column when training.</param> /// <param name="keyOrdinality">The order in which keys are assigned. /// If set to <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByOccurrence"/>, keys are assigned in the order encountered. /// If set to <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByValue"/>, values are sorted, and keys are assigned based on the sort order.</param> /// <param name="addKeyValueAnnotationsAsText">If set to true, use text type /// for values, regardless of the actual input type. When doing the reverse /// mapping, the values are text rather than the original input type.</param> /// <param name="keyData">Use a pre-defined mapping between values and keys, instead of building /// the mapping from the input data during training. If specified, this should be a single column <see cref="IDataView"/> containing the values. /// The keys are allocated based on the value of keyOrdinality.</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[MapValueToKey](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueToKeyMultiColumn.cs)] /// ]]> /// </format> /// </example> public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.ConversionTransforms catalog, InputOutputColumnPair[] columns, int maximumNumberOfKeys = ValueToKeyMappingEstimator.Defaults.MaximumNumberOfKeys, ValueToKeyMappingEstimator.KeyOrdinality keyOrdinality = ValueToKeyMappingEstimator.Defaults.Ordinality, bool addKeyValueAnnotationsAsText = ValueToKeyMappingEstimator.Defaults.AddKeyValueAnnotationsAsText, IDataView keyData = null) { var env = CatalogUtils.GetEnvironment(catalog); env.CheckValue(columns, nameof(columns)); var columnOptions = columns.Select(x => new ValueToKeyMappingEstimator.ColumnOptions(x.OutputColumnName, x.InputColumnName, maximumNumberOfKeys, keyOrdinality, addKeyValueAnnotationsAsText)).ToArray(); return(new ValueToKeyMappingEstimator(env, columnOptions, keyData)); }
/// <summary> /// Convert text columns into one-hot encoded vectors. /// </summary> /// <param name="catalog">The transform catalog</param> /// <param name="columns">Specifies the names of the columns on which to apply the transformation.</param> /// <param name="outputKind">Output kind: Bag (multi-set vector), Ind (indicator vector), Key (index), or Binary encoded indicator vector.</param> /// <param name="maximumNumberOfKeys">Maximum number of terms to keep per column when auto-training.</param> /// <param name="keyOrdinality">How items should be ordered when vectorized. If <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByOccurrence"/> choosen they will be in the order encountered. /// If <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByValue"/>, items are sorted according to their default comparison, for example, text sorting will be case sensitive (for example, 'A' then 'Z' then 'a').</param> /// <param name="keyData">Specifies an ordering for the encoding. If specified, this should be a single column data view, /// and the key-values will be taken from that column. If unspecified, the ordering will be determined from the input data upon fitting.</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[OneHotEncoding](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Categorical/OneHotEncodingMultiColumn.cs)] /// ]]></format> /// </example> public static OneHotEncodingEstimator OneHotEncoding(this TransformsCatalog.CategoricalTransforms catalog, InputOutputColumnPair[] columns, OneHotEncodingEstimator.OutputKind outputKind = OneHotEncodingEstimator.Defaults.OutKind, int maximumNumberOfKeys = ValueToKeyMappingEstimator.Defaults.MaximumNumberOfKeys, ValueToKeyMappingEstimator.KeyOrdinality keyOrdinality = ValueToKeyMappingEstimator.Defaults.Ordinality, IDataView keyData = null) { var env = CatalogUtils.GetEnvironment(catalog); env.CheckValue(columns, nameof(columns)); var columnOptions = columns.Select(x => new OneHotEncodingEstimator.ColumnOptions(x.OutputColumnName, x.InputColumnName, outputKind, maximumNumberOfKeys, keyOrdinality)).ToArray(); return(new OneHotEncodingEstimator(env, columnOptions, keyData)); }
/// <summary> /// Describes how the transformer handles one column pair. /// </summary> /// <param name="name">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="name"/> will be used as source.</param> /// <param name="outputKind">Output kind: Bag (multi-set vector), Ind (indicator vector), Key (index), or Binary encoded indicator vector.</param> /// <param name="maximumNumberOfKeys">Maximum number of terms to keep per column when auto-training.</param> /// <param name="keyOrdinality">How items should be ordered when vectorized. If <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByOccurrence"/> choosen they will be in the order encountered. /// If <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByValue"/>, items are sorted according to their default comparison, for example, text sorting will be case sensitive (for example, 'A' then 'Z' then 'a').</param> public ColumnOptions(string name, string inputColumnName = null, OutputKind outputKind = Defaults.OutKind, int maximumNumberOfKeys = ValueToKeyMappingEstimator.Defaults.MaximumNumberOfKeys, ValueToKeyMappingEstimator.KeyOrdinality keyOrdinality = ValueToKeyMappingEstimator.Defaults.Ordinality) : base(name, inputColumnName ?? name, maximumNumberOfKeys, keyOrdinality, true) { OutputKind = outputKind; }
/// <summary> /// Converts value types into <see cref="KeyType"/>. /// </summary> /// <param name="catalog">The conversion transform's catalog.</param> /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param> /// <param name="maximumNumberOfKeys">Maximum number of keys to keep per column when auto-training.</param> /// <param name="keyOrdinality">How items should be ordered when vectorized. If <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByOccurrence"/> choosen they will be in the order encountered. /// If <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByValue"/>, items are sorted according to their default comparison, for example, text sorting will be case sensitive (for example, 'A' then 'Z' then 'a').</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[ValueToKey](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/KeyToValueValueToKey.cs)] /// ]]> /// </format> /// </example> public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.ConversionTransforms catalog, string outputColumnName, string inputColumnName = null, int maximumNumberOfKeys = ValueToKeyMappingEstimator.Defaults.MaximumNumberOfKeys, ValueToKeyMappingEstimator.KeyOrdinality keyOrdinality = ValueToKeyMappingEstimator.Defaults.Ordinality) => new ValueToKeyMappingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, maximumNumberOfKeys, keyOrdinality);