/// <summary> /// Convert a text column into hash-based one-hot encoded vector. /// </summary> /// <param name="catalog">The transform catalog</param> /// <param name="inputColumn">The input column</param> /// <param name="outputColumn">The output column. If <c>null</c>, <paramref name="inputColumn"/> is used.</param> /// <param name="hashBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param> /// <param name="invertHash">During hashing we constuct mappings between original values and the produced hash values. /// Text representation of original values are stored in the slot names of the metadata for the new column.Hashing, as such, can map many initial values to one. /// <paramref name="invertHash"/> specifies the upper bound of the number of distinct input values mapping to a hash that should be retained. /// <value>0</value> does not retain any input values. <value>-1</value> retains all input values mapping to each hash.</param> /// <param name="outputKind">The conversion mode.</param> /// <returns></returns> public static OneHotHashEncodingEstimator OneHotHashEncoding(this TransformsCatalog.CategoricalTransforms catalog, string inputColumn, string outputColumn = null, int hashBits = OneHotHashEncodingEstimator.Defaults.HashBits, int invertHash = OneHotHashEncodingEstimator.Defaults.InvertHash, OneHotEncodingTransformer.OutputKind outputKind = OneHotEncodingTransformer.OutputKind.Ind) => new OneHotHashEncodingEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn, outputColumn, hashBits, invertHash, outputKind);
/// <summary> /// Convert a text column into hash-based one-hot encoded vector. /// </summary> /// <param name="catalog">The transform catalog</param> /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> /// <param name="inputColumnName">Name of column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param> /// <param name="numberOfBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param> /// <param name="maximumNumberOfInverts">During hashing we constuct mappings between original values and the produced hash values. /// Text representation of original values are stored in the slot names of the metadata for the new column.Hashing, as such, can map many initial values to one. /// <paramref name="maximumNumberOfInverts"/> specifies the upper bound of the number of distinct input values mapping to a hash that should be retained. /// <value>0</value> does not retain any input values. <value>-1</value> retains all input values mapping to each hash.</param> /// <param name="outputKind">The conversion mode.</param> public static OneHotHashEncodingEstimator OneHotHashEncoding(this TransformsCatalog.CategoricalTransforms catalog, string outputColumnName, string inputColumnName = null, int numberOfBits = OneHotHashEncodingEstimator.Defaults.NumberOfBits, int maximumNumberOfInverts = OneHotHashEncodingEstimator.Defaults.MaximumNumberOfInverts, OneHotEncodingEstimator.OutputKind outputKind = OneHotEncodingEstimator.OutputKind.Indicator) => new OneHotHashEncodingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName ?? outputColumnName, numberOfBits, maximumNumberOfInverts, outputKind);
/// <summary> /// Convert text columns into one-hot encoded vectors. /// </summary> /// <param name="catalog">The transform catalog</param> /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> /// <param name="inputColumnName">Name of column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param> /// <param name="outputKind">Output kind: Bag (multi-set vector), Ind (indicator vector), Key (index), or Binary encoded indicator vector.</param> /// <param name="maximumNumberOfKeys">Maximum number of terms to keep per column when auto-training.</param> /// <param name="keyOrdinality">How items should be ordered when vectorized. If <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByOccurrence"/> choosen they will be in the order encountered. /// If <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByValue"/>, items are sorted according to their default comparison, for example, text sorting will be case sensitive (for example, 'A' then 'Z' then 'a').</param> /// <param name="keyData">Specifies an ordering for the encoding. If specified, this should be a single column data view, /// and the key-values will be taken from that column. If unspecified, the ordering will be determined from the input data upon fitting.</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[OneHotEncoding](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Categorical/OneHotEncoding.cs)] /// ]]></format> /// </example> public static OneHotEncodingEstimator OneHotEncoding(this TransformsCatalog.CategoricalTransforms catalog, string outputColumnName, string inputColumnName = null, OneHotEncodingEstimator.OutputKind outputKind = OneHotEncodingEstimator.Defaults.OutKind, int maximumNumberOfKeys = ValueToKeyMappingEstimator.Defaults.MaximumNumberOfKeys, ValueToKeyMappingEstimator.KeyOrdinality keyOrdinality = ValueToKeyMappingEstimator.Defaults.Ordinality, IDataView keyData = null) => new OneHotEncodingEstimator(CatalogUtils.GetEnvironment(catalog), new[] { new OneHotEncodingEstimator.ColumnOptions(outputColumnName, inputColumnName, outputKind, maximumNumberOfKeys, keyOrdinality) }, keyData);
/// <summary> /// Convert a text column into hash-based one-hot encoded vector. /// </summary> /// <param name="catalog">The transform catalog</param> /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> /// <param name="inputColumnName">Name of column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param> /// <param name="outputKind">The conversion mode.</param> /// <param name="numberOfBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param> /// <param name="seed">Hashing seed.</param> /// <param name="useOrderedHashing">Whether the position of each term should be included in the hash.</param> /// <param name="maximumNumberOfInverts">During hashing we constuct mappings between original values and the produced hash values. /// Text representation of original values are stored in the slot names of the metadata for the new column.Hashing, as such, can map many initial values to one. /// <paramref name="maximumNumberOfInverts"/> specifies the upper bound of the number of distinct input values mapping to a hash that should be retained. /// <value>0</value> does not retain any input values. <value>-1</value> retains all input values mapping to each hash.</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[OneHotHashEncoding](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Categorical/OneHotHashEncoding.cs)] /// ]]></format> /// </example> public static OneHotHashEncodingEstimator OneHotHashEncoding(this TransformsCatalog.CategoricalTransforms catalog, string outputColumnName, string inputColumnName = null, OneHotEncodingEstimator.OutputKind outputKind = OneHotEncodingEstimator.OutputKind.Indicator, int numberOfBits = OneHotHashEncodingEstimator.Defaults.NumberOfBits, uint seed = OneHotHashEncodingEstimator.Defaults.Seed, bool useOrderedHashing = OneHotHashEncodingEstimator.Defaults.UseOrderedHashing, int maximumNumberOfInverts = OneHotHashEncodingEstimator.Defaults.MaximumNumberOfInverts) => new OneHotHashEncodingEstimator(CatalogUtils.GetEnvironment(catalog), new[] { new OneHotHashEncodingEstimator.ColumnOptions(outputColumnName, inputColumnName, outputKind, numberOfBits, seed, useOrderedHashing, maximumNumberOfInverts) });
/// <summary> /// Convert text columns into one-hot encoded vectors. /// </summary> /// <param name="catalog">The transform catalog</param> /// <param name="columns">Specifies the names of the columns on which to apply the transformation.</param> /// <param name="outputKind">Output kind: Bag (multi-set vector), Ind (indicator vector), Key (index), or Binary encoded indicator vector.</param> /// <param name="maximumNumberOfKeys">Maximum number of terms to keep per column when auto-training.</param> /// <param name="keyOrdinality">How items should be ordered when vectorized. If <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByOccurrence"/> choosen they will be in the order encountered. /// If <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByValue"/>, items are sorted according to their default comparison, for example, text sorting will be case sensitive (for example, 'A' then 'Z' then 'a').</param> /// <param name="keyData">Specifies an ordering for the encoding. If specified, this should be a single column data view, /// and the key-values will be taken from that column. If unspecified, the ordering will be determined from the input data upon fitting.</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[OneHotEncoding](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Categorical/OneHotEncodingMultiColumn.cs)] /// ]]></format> /// </example> public static OneHotEncodingEstimator OneHotEncoding(this TransformsCatalog.CategoricalTransforms catalog, InputOutputColumnPair[] columns, OneHotEncodingEstimator.OutputKind outputKind = OneHotEncodingEstimator.Defaults.OutKind, int maximumNumberOfKeys = ValueToKeyMappingEstimator.Defaults.MaximumNumberOfKeys, ValueToKeyMappingEstimator.KeyOrdinality keyOrdinality = ValueToKeyMappingEstimator.Defaults.Ordinality, IDataView keyData = null) { var env = CatalogUtils.GetEnvironment(catalog); env.CheckValue(columns, nameof(columns)); var columnOptions = columns.Select(x => new OneHotEncodingEstimator.ColumnOptions(x.OutputColumnName, x.InputColumnName, outputKind, maximumNumberOfKeys, keyOrdinality)).ToArray(); return(new OneHotEncodingEstimator(env, columnOptions, keyData)); }
/// <summary> /// Convert text columns into hash-based one-hot encoded vector columns. /// </summary> /// <param name="catalog">The transform catalog</param> /// <param name="columns">Specifies the names of the columns on which to apply the transformation.</param> /// <param name="outputKind">The conversion mode.</param> /// <param name="numberOfBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param> /// <param name="seed">Hashing seed.</param> /// <param name="useOrderedHashing">Whether the position of each term should be included in the hash.</param> /// <param name="maximumNumberOfInverts">During hashing we constuct mappings between original values and the produced hash values. /// Text representation of original values are stored in the slot names of the metadata for the new column.Hashing, as such, can map many initial values to one. /// <paramref name="maximumNumberOfInverts"/> specifies the upper bound of the number of distinct input values mapping to a hash that should be retained. /// <value>0</value> does not retain any input values. <value>-1</value> retains all input values mapping to each hash.</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[OneHotHashEncoding](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Categorical/OneHotHashEncodingMultiColumn.cs)] /// ]]></format> /// </example> public static OneHotHashEncodingEstimator OneHotHashEncoding(this TransformsCatalog.CategoricalTransforms catalog, InputOutputColumnPair[] columns, OneHotEncodingEstimator.OutputKind outputKind = OneHotEncodingEstimator.OutputKind.Indicator, int numberOfBits = OneHotHashEncodingEstimator.Defaults.NumberOfBits, uint seed = OneHotHashEncodingEstimator.Defaults.Seed, bool useOrderedHashing = OneHotHashEncodingEstimator.Defaults.UseOrderedHashing, int maximumNumberOfInverts = OneHotHashEncodingEstimator.Defaults.MaximumNumberOfInverts) { var env = CatalogUtils.GetEnvironment(catalog); env.CheckValue(columns, nameof(columns)); var columnOptions = columns.Select(x => new OneHotHashEncodingEstimator.ColumnOptions(x.OutputColumnName, x.InputColumnName, outputKind, numberOfBits, seed, useOrderedHashing, maximumNumberOfInverts)).ToArray(); return(new OneHotHashEncodingEstimator(env, columnOptions)); }
/// <summary> /// Initializes a new instance of <see cref="ValueToKeyMappingEstimator"/> loading the terms to use from <paramref name="file"/>. /// </summary> /// <param name="catalog">The categorical transform's catalog.</param> /// <param name="columns">The data columns to map to keys.</param> /// <param name="file">The path of the file containing the terms.</param> /// <param name="termsColumn"></param> /// <param name="loaderFactory"></param> public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.CategoricalTransforms catalog, ValueToKeyMappingTransformer.ColumnInfo[] columns, string file = null, string termsColumn = null, IComponentFactory <IMultiStreamSource, IDataLoader> loaderFactory = null) => new ValueToKeyMappingEstimator(CatalogUtils.GetEnvironment(catalog), columns, file, termsColumn, loaderFactory);
/// <summary> /// Initializes a new instance of <see cref="ValueToKeyMappingEstimator"/>. /// </summary> /// <param name="catalog">The categorical transform's catalog.</param> /// <param name="inputColumn">Name of the column to be transformed.</param> /// <param name="outputColumn">Name of the output column. If this is null '<paramref name="inputColumn"/>' will be used.</param> /// <param name="maxNumTerms">Maximum number of keys to keep per column when auto-training.</param> /// <param name="sort">How items should be ordered when vectorized. By default, they will be in the order encountered. /// If by value items are sorted according to their default comparison, for example, text sorting will be case sensitive (for example, 'A' then 'Z' then 'a').</param> public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.CategoricalTransforms catalog, string inputColumn, string outputColumn = null, int maxNumTerms = ValueToKeyMappingEstimator.Defaults.MaxNumTerms, ValueToKeyMappingTransformer.SortOrder sort = ValueToKeyMappingEstimator.Defaults.Sort) => new ValueToKeyMappingEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn, outputColumn, maxNumTerms, sort);
internal static OneHotEncodingEstimator OneHotEncoding(this TransformsCatalog.CategoricalTransforms catalog, OneHotEncodingEstimator.ColumnOptions[] columns, IDataView keyData = null) => new OneHotEncodingEstimator(CatalogUtils.GetEnvironment(catalog), columns, keyData);
internal static OneHotEncodingEstimator OneHotEncoding(this TransformsCatalog.CategoricalTransforms catalog, params OneHotEncodingEstimator.ColumnOptions[] columns) => new OneHotEncodingEstimator(CatalogUtils.GetEnvironment(catalog), columns);
/// <summary> /// Convert several text column into hash-based one-hot encoded vectors. /// </summary> /// <param name="catalog">The transform catalog</param> /// <param name="columns">The column settings.</param> /// <returns></returns> public static OneHotHashEncodingEstimator OneHotHashEncoding(this TransformsCatalog.CategoricalTransforms catalog, params OneHotHashEncodingEstimator.ColumnInfo[] columns) => new OneHotHashEncodingEstimator(CatalogUtils.GetEnvironment(catalog), columns);
/// <summary> /// Convert a text column into hash-based one-hot encoded vector. /// </summary> /// <param name="catalog">The transform catalog</param> /// <param name="inputColumn">The input column</param> /// <param name="outputColumn">The output column. If <c>null</c>, <paramref name="inputColumn"/> is used.</param> /// <param name="outputKind">The conversion mode.</param> /// <returns></returns> public static OneHotHashEncodingEstimator OneHotHashEncoding(this TransformsCatalog.CategoricalTransforms catalog, string inputColumn, string outputColumn = null, CategoricalTransform.OutputKind outputKind = CategoricalTransform.OutputKind.Ind) => new OneHotHashEncodingEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn, outputColumn, outputKind);
/// <summary> /// Convert a text column into one-hot encoded vector. /// </summary> /// <param name="catalog">The transform catalog</param> /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> /// <param name="inputColumnName">Name of column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param> /// <param name="outputKind">The conversion mode.</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[RPCA](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Categorical/OneHotEncoding.cs)] /// ]]></format> /// </example> public static OneHotEncodingEstimator OneHotEncoding(this TransformsCatalog.CategoricalTransforms catalog, string outputColumnName, string inputColumnName = null, OneHotEncodingEstimator.OutputKind outputKind = OneHotEncodingEstimator.OutputKind.Indicator) => new OneHotEncodingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, outputKind);