/// <summary> /// Create a <see cref="KeyToValueMappingEstimator"/>, which converts the key types back to their original values. /// </summary> /// <remarks>This transform can operate over several columns. /// This transform often is in the pipeline after one of the overloads of /// <see cref="MapValueToKey(TransformsCatalog.ConversionTransforms, InputOutputColumnPair[], int, ValueToKeyMappingEstimator.KeyOrdinality, bool, IDataView)"/></remarks> /// <param name="catalog">The conversion transform's catalog.</param> /// <param name="columns">The input and output columns. /// This transform operates over keys. /// The new column's data type will be the original value's type.</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[MapKeyToValue](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapKeyToValueMultiColumn.cs)] /// ]]> /// </format> /// </example> public static KeyToValueMappingEstimator MapKeyToValue(this TransformsCatalog.ConversionTransforms catalog, InputOutputColumnPair[] columns) { var env = CatalogUtils.GetEnvironment(catalog); env.CheckValue(columns, nameof(columns)); return(new KeyToValueMappingEstimator(env, columns.Select(x => (x.OutputColumnName, x.InputColumnName)).ToArray())); }
/// <summary> /// <see cref="ValueMappingEstimator"/> /// </summary> /// <typeparam name="TInputType">The key type.</typeparam> /// <typeparam name="TOutputType">The value type.</typeparam> /// <param name="catalog">The conversion transform's catalog</param> /// <param name="keys">The list of keys to use for the mapping. The mapping is 1-1 with <paramref name="values"/>. The length of this list must be the same length as <paramref name="values"/> and /// cannot contain duplicate keys.</param> /// <param name="values">The list of values to pair with the keys for the mapping of TOutputType[]. The length of this list must be equal to the same length as <paramref name="keys"/>.</param> /// <param name="columns">The columns to apply this transform on.</param> /// <returns>An instance of the <see cref="ValueMappingEstimator"/></returns> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[ValueMappingEstimator](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs)] /// [!code-csharp[ValueMappingEstimator](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs)] /// [!code-csharp[ValueMappingEstimator](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingFloatToString.cs)] /// [!code-csharp[ValueMappingEstimator](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs)] /// ]]></format> /// </example> public static ValueMappingEstimator <TInputType, TOutputType> MapValue <TInputType, TOutputType>( this TransformsCatalog.ConversionTransforms catalog, IEnumerable <TInputType> keys, IEnumerable <TOutputType[]> values, params ColumnOptions[] columns) => new ValueMappingEstimator <TInputType, TOutputType>(CatalogUtils.GetEnvironment(catalog), keys, values, ColumnOptions.ConvertToValueTuples(columns));
/// <summary> /// <see cref="ValueMappingEstimator"/> /// </summary> /// <typeparam name="TInputType">The key type.</typeparam> /// <typeparam name="TOutputType">The value type.</typeparam> /// <param name="catalog">The categorical transform's catalog</param> /// <param name="keys">The list of keys to use for the mapping. The mapping is 1-1 with <paramref name="values"/>. The length of this list must be the same length as <paramref name="values"/> and /// cannot contain duplicate keys.</param> /// <param name="values">The list of values to pair with the keys for the mapping. The length of this list must be equal to the same length as <paramref name="keys"/>.</param> /// <param name="treatValuesAsKeyType">Whether to treat the values as a <see cref="KeyType"/>.</param> /// <param name="columns">The columns to apply this transform on.</param> /// <returns>An instance of the <see cref="ValueMappingEstimator"/></returns> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[ValueMappingEstimator](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs)] /// ]]></format> /// </example> public static ValueMappingEstimator <TInputType, TOutputType> ValueMap <TInputType, TOutputType>( this TransformsCatalog.ConversionTransforms catalog, IEnumerable <TInputType> keys, IEnumerable <TOutputType> values, bool treatValuesAsKeyType, params SimpleColumnInfo[] columns) => new ValueMappingEstimator <TInputType, TOutputType>(CatalogUtils.GetEnvironment(catalog), keys, values, treatValuesAsKeyType, SimpleColumnInfo.ConvertToValueTuples(columns));
internal static KeyToBinaryVectorMappingEstimator MapKeyToBinaryVector(this TransformsCatalog.ConversionTransforms catalog, params InputOutputColumnPair[] columns) { var env = CatalogUtils.GetEnvironment(catalog); env.CheckValue(columns, nameof(columns)); return(new KeyToBinaryVectorMappingEstimator(env, InputOutputColumnPair.ConvertToValueTuples(columns))); }
/// <summary> /// Create a <see cref="ValueToKeyMappingEstimator"/>, which converts categorical values into numerical keys. /// </summary> /// <param name="catalog">The conversion transform's catalog.</param> /// <param name="outputColumnName">Name of the column containing the keys.</param> /// <param name="inputColumnName">Name of the column containing the categorical values. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> is used. /// The input data types can be numeric, text, boolean, <see cref="System.DateTime"/> or <see cref="System.DateTimeOffset"/>. /// </param> /// <param name="maximumNumberOfKeys">Maximum number of keys to keep per column when training.</param> /// <param name="keyOrdinality">The order in which keys are assigned. /// If set to <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByOccurrence"/>, keys are assigned in the order encountered. /// If set to <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByValue"/>, values are sorted, and keys are assigned based on the sort order.</param> /// <param name="addKeyValueAnnotationsAsText">If set to true, use text type /// for values, regardless of the actual input type. When doing the reverse /// mapping, the values are text rather than the original input type.</param> /// <param name="keyData">Use a pre-defined mapping between values and keys, instead of building /// the mapping from the input data during training. If specified, this should be a single column <see cref="IDataView"/> containing the values. /// The keys are allocated based on the value of keyOrdinality.</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[MapValueToKey](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/KeyToValueToKey.cs)] /// ]]> /// </format> /// </example> public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.ConversionTransforms catalog, string outputColumnName, string inputColumnName = null, int maximumNumberOfKeys = ValueToKeyMappingEstimator.Defaults.MaximumNumberOfKeys, ValueToKeyMappingEstimator.KeyOrdinality keyOrdinality = ValueToKeyMappingEstimator.Defaults.Ordinality, bool addKeyValueAnnotationsAsText = ValueToKeyMappingEstimator.Defaults.AddKeyValueAnnotationsAsText, IDataView keyData = null) => new ValueToKeyMappingEstimator(CatalogUtils.GetEnvironment(catalog), new[] { new ValueToKeyMappingEstimator.ColumnOptions(outputColumnName, inputColumnName, maximumNumberOfKeys, keyOrdinality, addKeyValueAnnotationsAsText) }, keyData);
/// <summary> /// Create a <see cref="KeyToVectorMappingEstimator"/>, which maps the value of a key into a floating point vector representing the value. /// </summary> /// <remarks>This transform can operate over several columns of keys.</remarks> /// <param name="catalog">The conversion transform's catalog.</param> /// <param name="columns">The input and output columns. /// The new column's data type is a vector of <see cref="System.Single"/> representing the original value.</param> /// <param name="outputCountVector">Whether to combine multiple indicator vectors into a single vector of counts instead of concatenating them. /// This is only relevant when the input column is a vector of keys.</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[MapKeyToVector](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapKeyToVectorMultiColumn.cs)] /// ]]></format> /// </example> public static KeyToVectorMappingEstimator MapKeyToVector(this TransformsCatalog.ConversionTransforms catalog, InputOutputColumnPair[] columns, bool outputCountVector = KeyToVectorMappingEstimator.Defaults.OutputCountVector) { var env = CatalogUtils.GetEnvironment(catalog); env.CheckValue(columns, nameof(columns)); var columnOptions = columns.Select(x => new KeyToVectorMappingEstimator.ColumnOptions(x.OutputColumnName, x.InputColumnName, outputCountVector)).ToArray(); return(new KeyToVectorMappingEstimator(env, columnOptions)); }
/// <summary> /// Create a <see cref="TypeConvertingEstimator"/>, which converts the type of the data to the type specified in <paramref name="outputKind"/>. /// </summary> /// <remarks>This transform can operate over several columns.</remarks> /// <param name="catalog">The conversion transform's catalog.</param> /// <param name="columns">The input and output columns. /// This transform operates over numeric, boolean, text, <see cref="System.DateTime"/> and key data types.</param> /// <param name="outputKind">The expected kind of the output column.</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[ConvertType](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ConvertTypeMultiColumn.cs)] /// ]]></format> /// </example> public static TypeConvertingEstimator ConvertType(this TransformsCatalog.ConversionTransforms catalog, InputOutputColumnPair[] columns, DataKind outputKind = ConvertDefaults.DefaultOutputKind) { var env = CatalogUtils.GetEnvironment(catalog); env.CheckValue(columns, nameof(columns)); var columnOptions = columns.Select(x => new TypeConvertingEstimator.ColumnOptions(x.OutputColumnName, outputKind, x.InputColumnName)).ToArray(); return(new TypeConvertingEstimator(env, columnOptions)); }
/// <summary> /// <see cref="ValueMappingEstimator"/> /// </summary> /// <typeparam name="TInputType">The key type.</typeparam> /// <typeparam name="TOutputType">The value type.</typeparam> /// <param name="catalog">The conversion transform's catalog</param> /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> /// <param name="keyValuePairs">Specifies the mapping that will be perfomed. The keys will be mapped to the values as specified in the <paramref name="keyValuePairs"/>.</param> /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param> /// <param name="treatValuesAsKeyType">Whether to treat the values as a <see cref="KeyDataViewType"/>.</param> /// <returns>An instance of the <see cref="ValueMappingEstimator"/></returns> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[ValueMappingEstimator](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs)] /// [!code-csharp[ValueMappingEstimator](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs)] /// [!code-csharp[ValueMappingEstimator](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingFloatToString.cs)] /// [!code-csharp[ValueMappingEstimator](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs)] /// ]]></format> /// </example> public static ValueMappingEstimator <TInputType, TOutputType> MapValue <TInputType, TOutputType>( this TransformsCatalog.ConversionTransforms catalog, string outputColumnName, IEnumerable <KeyValuePair <TInputType, TOutputType> > keyValuePairs, string inputColumnName = null, bool treatValuesAsKeyType = false) { var keys = keyValuePairs.Select(pair => pair.Key); var values = keyValuePairs.Select(pair => pair.Value); return(new ValueMappingEstimator <TInputType, TOutputType>(CatalogUtils.GetEnvironment(catalog), keys, values, treatValuesAsKeyType, new[] { (outputColumnName, inputColumnName ?? outputColumnName) }));
/// <summary> /// Create a <see cref="ValueToKeyMappingEstimator"/>, which converts categorical values into keys. /// </summary> /// <remarks>This transform can operate over multiple pairs of columns, creating a mapping for each pair.</remarks> /// <param name="catalog">The conversion transform's catalog.</param> /// <param name="columns">The input and output columns. /// The input data types can be numeric, text, boolean, <see cref="System.DateTime"/> or <see cref="System.DateTimeOffset"/>. /// </param> /// <param name="maximumNumberOfKeys">Maximum number of keys to keep per column when training.</param> /// <param name="keyOrdinality">The order in which keys are assigned. /// If set to <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByOccurrence"/>, keys are assigned in the order encountered. /// If set to <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByValue"/>, values are sorted, and keys are assigned based on the sort order.</param> /// <param name="addKeyValueAnnotationsAsText">If set to true, use text type /// for values, regardless of the actual input type. When doing the reverse /// mapping, the values are text rather than the original input type.</param> /// <param name="keyData">Use a pre-defined mapping between values and keys, instead of building /// the mapping from the input data during training. If specified, this should be a single column <see cref="IDataView"/> containing the values. /// The keys are allocated based on the value of keyOrdinality.</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[MapValueToKey](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValueToKeyMultiColumn.cs)] /// ]]> /// </format> /// </example> public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.ConversionTransforms catalog, InputOutputColumnPair[] columns, int maximumNumberOfKeys = ValueToKeyMappingEstimator.Defaults.MaximumNumberOfKeys, ValueToKeyMappingEstimator.KeyOrdinality keyOrdinality = ValueToKeyMappingEstimator.Defaults.Ordinality, bool addKeyValueAnnotationsAsText = ValueToKeyMappingEstimator.Defaults.AddKeyValueAnnotationsAsText, IDataView keyData = null) { var env = CatalogUtils.GetEnvironment(catalog); env.CheckValue(columns, nameof(columns)); var columnOptions = columns.Select(x => new ValueToKeyMappingEstimator.ColumnOptions(x.OutputColumnName, x.InputColumnName, maximumNumberOfKeys, keyOrdinality, addKeyValueAnnotationsAsText)).ToArray(); return(new ValueToKeyMappingEstimator(env, columnOptions, keyData)); }
/// <summary> /// Create a <see cref="ValueMappingEstimator"/>, which converts value types into keys, loading the keys to use from <paramref name="keyValuePairs"/>. /// </summary> /// <typeparam name="TInputType">The key type.</typeparam> /// <typeparam name="TOutputType">The value type.</typeparam> /// <param name="catalog">The conversion transform's catalog</param> /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>. /// The output data types can be primitives or vectors of numeric, text, boolean, <see cref="System.DateTime"/>, <see cref="System.DateTimeOffset"/> or <see cref="DataViewRowId"/> types.</param> /// <param name="keyValuePairs">Specifies the mapping that will be performed. The keys will be mapped to the values as specified in the <paramref name="keyValuePairs"/>.</param> /// <param name="inputColumnName">Name of the column to transform. /// If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source. /// The input data types can be primitives or vectors of numeric, text, boolean, <see cref="System.DateTime"/>, <see cref="System.DateTimeOffset"/> or <see cref="DataViewRowId"/> types. /// </param> /// <param name="treatValuesAsKeyType">Whether to treat the values as a key.</param> /// <returns>An instance of the <see cref="ValueMappingEstimator"/></returns> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[MapValue](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapValue.cs)] /// ]]></format> /// </example> public static ValueMappingEstimator <TInputType, TOutputType> MapValue <TInputType, TOutputType>( this TransformsCatalog.ConversionTransforms catalog, string outputColumnName, IEnumerable <KeyValuePair <TInputType, TOutputType> > keyValuePairs, string inputColumnName = null, bool treatValuesAsKeyType = false) { var keys = keyValuePairs.Select(pair => pair.Key); var values = keyValuePairs.Select(pair => pair.Value); var lookupMap = DataViewHelper.CreateDataView(catalog.GetEnvironment(), keys, values, ValueMappingTransformer.DefaultKeyColumnName, ValueMappingTransformer.DefaultValueColumnName, treatValuesAsKeyType); return(new ValueMappingEstimator <TInputType, TOutputType>(catalog.GetEnvironment(), lookupMap, lookupMap.Schema[ValueMappingTransformer.DefaultKeyColumnName], lookupMap.Schema[ValueMappingTransformer.DefaultValueColumnName], new[] { (outputColumnName, inputColumnName ?? outputColumnName) }));
/// <summary> /// Convert the key types (name of the column specified in the first item of the tuple) back to their original values /// (named as specified in the second item of the tuple). /// </summary> /// <param name="catalog">The categorical transform's catalog</param> /// <param name="columns">The pairs of input and output columns.</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[KeyToValueMappingEstimator](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs)] /// ]]></format> /// </example> public static KeyToValueMappingEstimator MapKeyToValue(this TransformsCatalog.ConversionTransforms catalog, params SimpleColumnInfo[] columns) => new KeyToValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), SimpleColumnInfo.ConvertToValueTuples(columns));
/// <summary> /// Hashes the values in the input column. /// </summary> /// <param name="catalog">The transform's catalog.</param> /// <param name="inputColumn">Name of the input column.</param> /// <param name="outputColumn">Name of the column to be transformed. If this is null '<paramref name="inputColumn"/>' will be used.</param> /// <param name="hashBits">Number of bits to hash into. Must be between 1 and 31, inclusive.</param> /// <param name="invertHash">Limit the number of keys used to generate the slot name to this many. 0 means no invert hashing, -1 means no limit.</param> public static HashingEstimator Hash(this TransformsCatalog.ConversionTransforms catalog, string inputColumn, string outputColumn = null, int hashBits = HashDefaults.HashBits, int invertHash = HashDefaults.InvertHash) => new HashingEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn, outputColumn, hashBits, invertHash);
/// <summary> /// Create a <see cref="KeyToVectorMappingEstimator"/>, which maps the value of a key into a floating point vector representing the value. /// </summary> /// <param name="catalog">The conversion transform's catalog.</param> /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>. /// The data type is a vector of <see cref="System.Single"/> representing the input value.</param> /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source. /// This transform operates over keys.</param> /// <param name="outputCountVector">Whether to combine multiple indicator vectors into a single vector of counts instead of concatenating them. /// This is only relevant when the input column is a vector of keys.</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[MapKeyToVector](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/MapKeyToVector.cs)] /// ]]></format> /// </example> public static KeyToVectorMappingEstimator MapKeyToVector(this TransformsCatalog.ConversionTransforms catalog, string outputColumnName, string inputColumnName = null, bool outputCountVector = KeyToVectorMappingEstimator.Defaults.OutputCountVector) => new KeyToVectorMappingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, outputCountVector);
internal static KeyToVectorMappingEstimator MapKeyToVector(this TransformsCatalog.ConversionTransforms catalog, params KeyToVectorMappingEstimator.ColumnOptions[] columns) => new KeyToVectorMappingEstimator(CatalogUtils.GetEnvironment(catalog), columns);
/// <summary> /// Convert the key types back to their original vectors. /// </summary> /// <param name="catalog">The categorical transform's catalog.</param> /// <param name="inputColumn">The name of the input column.</param> /// <param name="outputColumn">The name of the output column.</param> /// <param name="bag">Whether bagging is used for the conversion. </param> public static KeyToVectorMappingEstimator MapKeyToVector(this TransformsCatalog.ConversionTransforms catalog, string inputColumn, string outputColumn = null, bool bag = KeyToVectorMappingEstimator.Defaults.Bag) => new KeyToVectorMappingEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn, outputColumn, bag);
/// <summary> /// Create a <see cref="HashingEstimator"/>, which hashes the input column's data type <see cref="ColumnOptions.InputColumnName" /> /// to a new column: <see cref="ColumnOptions.Name" />. /// </summary> /// <remarks>This transform can operate over several columns.</remarks> /// <param name="catalog">The transform's catalog.</param> /// <param name="columns">Advanced options for the estimator that also contain the input and output column names. /// This estimator operates over text, numeric, boolean, key and <see cref="DataViewRowId"/> data types. /// The new column's data type will be a vector of <see cref="System.UInt32"/>, or a <see cref="System.UInt32"/> based on whether the input column data types /// are vectors or scalars.</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[Hash](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/HashWithOptions.cs)] /// ]]></format> /// </example> public static HashingEstimator Hash(this TransformsCatalog.ConversionTransforms catalog, params ColumnOptions[] columns) => new HashingEstimator(CatalogUtils.GetEnvironment(catalog), columns.Select(x => new ColumnOptionsInternal(x.Name, x.InputColumnName, x.NumberOfBits, x.Seed, x.UseOrderedHashing, x.MaximumNumberOfInverts)).ToArray());
/// <summary> /// Convert the key types back to binary verctor. /// </summary> /// <param name="catalog">The categorical transform's catalog.</param> /// <param name="columns">The input column.</param> public static KeyToBinaryVectorMappingEstimator MapKeyToBinaryVector(this TransformsCatalog.ConversionTransforms catalog, params KeyToBinaryVectorMappingTransformer.ColumnInfo[] columns) => new KeyToBinaryVectorMappingEstimator(CatalogUtils.GetEnvironment(catalog), columns);
/// <summary> /// Convert the key types back to binary vector. /// </summary> /// <param name="catalog">The categorical transform's catalog.</param> /// <param name="columns">The input column.</param> public static KeyToBinaryVectorMappingEstimator MapKeyToBinaryVector(this TransformsCatalog.ConversionTransforms catalog, params ColumnOptions[] columns) => new KeyToBinaryVectorMappingEstimator(CatalogUtils.GetEnvironment(catalog), ColumnOptions.ConvertToValueTuples(columns));
internal static HashingEstimator Hash(this TransformsCatalog.ConversionTransforms catalog, params HashingEstimator.ColumnOptions[] columns) => new HashingEstimator(CatalogUtils.GetEnvironment(catalog), columns);
/// <summary> /// <see cref="ValueMappingEstimator"/> /// </summary> /// <param name="catalog">The conversion transform's catalog</param> /// <param name="lookupMap">An instance of <see cref="IDataView"/> that contains the key and value columns.</param> /// <param name="keyColumnName">Name of the key column in <paramref name="lookupMap"/>.</param> /// <param name="valueColumnName">Name of the value column in <paramref name="lookupMap"/>.</param> /// <param name="columns">The columns to apply this transform on.</param> /// <returns>A instance of the ValueMappingEstimator</returns> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[ValueMappingEstimator](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMapping.cs)] /// [!code-csharp[ValueMappingEstimator](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToKeyType.cs)] /// [!code-csharp[ValueMappingEstimator](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingFloatToString.cs)] /// [!code-csharp[ValueMappingEstimator](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/ValueMappingStringToArray.cs)] /// ]]></format> /// </example> public static ValueMappingEstimator MapValue( this TransformsCatalog.ConversionTransforms catalog, IDataView lookupMap, string keyColumnName, string valueColumnName, params ColumnOptions[] columns) => new ValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), lookupMap, keyColumnName, valueColumnName, ColumnOptions.ConvertToValueTuples(columns));
/// <summary> /// Changes column type of the input column. /// </summary> /// <param name="catalog">The transform's catalog.</param> /// <param name="columns">Description of dataset columns and how to process them.</param> public static TypeConvertingEstimator ConvertType(this TransformsCatalog.ConversionTransforms catalog, params TypeConvertingTransformer.ColumnInfo[] columns) => new TypeConvertingEstimator(CatalogUtils.GetEnvironment(catalog), columns);
/// <summary> /// Converts value types into <see cref="KeyType"/>. /// </summary> /// <param name="catalog">The conversion transform's catalog.</param> /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param> /// <param name="maximumNumberOfKeys">Maximum number of keys to keep per column when auto-training.</param> /// <param name="keyOrdinality">How items should be ordered when vectorized. If <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByOccurrence"/> choosen they will be in the order encountered. /// If <see cref="ValueToKeyMappingEstimator.KeyOrdinality.ByValue"/>, items are sorted according to their default comparison, for example, text sorting will be case sensitive (for example, 'A' then 'Z' then 'a').</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[ValueToKey](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/KeyToValueValueToKey.cs)] /// ]]> /// </format> /// </example> public static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.ConversionTransforms catalog, string outputColumnName, string inputColumnName = null, int maximumNumberOfKeys = ValueToKeyMappingEstimator.Defaults.MaximumNumberOfKeys, ValueToKeyMappingEstimator.KeyOrdinality keyOrdinality = ValueToKeyMappingEstimator.Defaults.Ordinality) => new ValueToKeyMappingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, maximumNumberOfKeys, keyOrdinality);
internal static ValueToKeyMappingEstimator MapValueToKey(this TransformsCatalog.ConversionTransforms catalog, ValueToKeyMappingEstimator.ColumnOptions[] columns, IDataView keyData = null) => new ValueToKeyMappingEstimator(CatalogUtils.GetEnvironment(catalog), columns, keyData);
/// <summary> /// Create a <see cref="KeyToValueMappingEstimator"/>, which converts the key types back to their original values. /// </summary> /// <remarks>This transform often is in the pipeline after one of the overloads of /// <see cref="MapValueToKey(TransformsCatalog.ConversionTransforms, InputOutputColumnPair[], int, ValueToKeyMappingEstimator.KeyOrdinality, bool, IDataView)"/></remarks> /// <param name="catalog">The conversion transform's catalog.</param> /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>. /// Its type will be the original value's type.</param> /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source. /// This transform operates over keys.</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[MapKeyToValue](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/KeyToValueToKey.cs)] /// ]]> /// </format> /// </example> public static KeyToValueMappingEstimator MapKeyToValue(this TransformsCatalog.ConversionTransforms catalog, string outputColumnName, string inputColumnName = null) => new KeyToValueMappingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName);
/// <summary> /// Create a <see cref="HashingEstimator"/>, which hashes the data from the column specified in <paramref name="inputColumnName"/> /// to a new column: <paramref name="outputColumnName"/>. /// </summary> /// <param name="catalog">The conversion transform's catalog.</param> /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>. /// This column's data type will be a vector of keys, or a scalar of key based on whether the input column data types /// are vectors or scalars.</param> /// <param name="inputColumnName">Name of the column whose data will be hashed. /// If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source. /// This estimator operates over vectors or scalars of text, numeric, boolean, key or <see cref="DataViewRowId"/> data types. </param> /// <param name="numberOfBits">Number of bits to hash into. Must be between 1 and 31, inclusive.</param> /// <param name="maximumNumberOfInverts">During hashing we construct mappings between original values and the produced hash values. /// Text representation of original values are stored in the slot names of the annotations for the new column.Hashing, as such, can map many initial values to one. /// <paramref name="maximumNumberOfInverts"/>Specifies the upper bound of the number of distinct input values mapping to a hash that should be retained. /// <value>0</value> does not retain any input values. <value>-1</value> retains all input values mapping to each hash.</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[Hash](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/Hash.cs)] /// ]]></format> /// </example> public static HashingEstimator Hash(this TransformsCatalog.ConversionTransforms catalog, string outputColumnName, string inputColumnName = null, int numberOfBits = HashDefaults.NumberOfBits, int maximumNumberOfInverts = HashDefaults.MaximumNumberOfInverts) => new HashingEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName, numberOfBits, maximumNumberOfInverts);
/// <summary> /// Hashes the values in the input column. /// </summary> /// <param name="catalog">The transform's catalog.</param> /// <param name="columns">Description of dataset columns and how to process them.</param> public static HashingEstimator Hash(this TransformsCatalog.ConversionTransforms catalog, params HashingTransformer.ColumnInfo[] columns) => new HashingEstimator(CatalogUtils.GetEnvironment(catalog), columns);
/// <summary> /// Create a <see cref="TypeConvertingEstimator"/>, which converts the type of the data to the type specified in <paramref name="outputKind"/>. /// </summary> /// <param name="catalog">The conversion transform's catalog.</param> /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source. /// This transform operates over numeric, boolean, text, <see cref="System.DateTime"/> and key data types.</param> /// <param name="outputKind">The expected kind of the output column.</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[ConvertType](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/Conversion/ConvertType.cs)] /// ]]></format> /// </example> public static TypeConvertingEstimator ConvertType(this TransformsCatalog.ConversionTransforms catalog, string outputColumnName, string inputColumnName = null, DataKind outputKind = ConvertDefaults.DefaultOutputKind) => new TypeConvertingEstimator(CatalogUtils.GetEnvironment(catalog), new[] { new TypeConvertingEstimator.ColumnOptions(outputColumnName, outputKind, inputColumnName) });
/// <summary> /// Convert the key types (name of the column specified in the first item of the tuple) back to their original values /// (named as specified in the second item of the tuple). /// </summary> /// <param name="catalog">The categorical transform's catalog</param> /// <param name="columns">The pairs of input and output columns.</param> public static KeyToValueMappingEstimator MapKeyToValue(this TransformsCatalog.ConversionTransforms catalog, params (string input, string output)[] columns)
/// <summary> /// Convert the key types back to binary verctor. /// </summary> /// <param name="catalog">The categorical transform's catalog.</param> /// <param name="inputColumn">The name of the input column of the transformation.</param> /// <param name="outputColumn">The name of the column produced by the transformation.</param> public static KeyToBinaryVectorMappingEstimator MapKeyToBinaryVector(this TransformsCatalog.ConversionTransforms catalog, string inputColumn, string outputColumn = null) => new KeyToBinaryVectorMappingEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn, outputColumn);
internal static TypeConvertingEstimator ConvertType(this TransformsCatalog.ConversionTransforms catalog, params TypeConvertingEstimator.ColumnOptions[] columns) => new TypeConvertingEstimator(CatalogUtils.GetEnvironment(catalog), columns);