public Config(OneHotVectorOutputKind outputKind, KeyOrdinality order, int max, Action <ValueToKeyMappingTransformer.TermMap> onFit) { OutputKind = outputKind; Order = order; Max = max; OnFit = onFit; }
/// <summary> /// Describes how the transformer handles column pairs. /// </summary> /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param> /// <param name="maximumNumberOfKeys">Maximum number of keys to keep per column when auto-training.</param> /// <param name="keyOrdinality">How items should be ordered when vectorized. If <see cref="KeyOrdinality.ByOccurrence"/> choosen they will be in the order encountered. /// If <see cref="KeyOrdinality.ByValue"/>, items are sorted according to their default comparison, for example, text sorting will be case sensitive (for example, 'A' then 'Z' then 'a').</param> /// <param name="addKeyValueAnnotationsAsText">Whether key value annotations should be text, regardless of the actual input type.</param> public ColumnOptions(string outputColumnName, string inputColumnName = null, int maximumNumberOfKeys = Defaults.MaximumNumberOfKeys, KeyOrdinality keyOrdinality = Defaults.Ordinality, bool addKeyValueAnnotationsAsText = false) : base(outputColumnName, inputColumnName, maximumNumberOfKeys, keyOrdinality, addKeyValueAnnotationsAsText) { }
private protected ColumnOptionsBase(string outputColumnName, string inputColumnName, int maximumNumberOfKeys, KeyOrdinality keyOrdinality, bool addKeyValueAnnotationsAsText) { Contracts.CheckNonWhiteSpace(outputColumnName, nameof(outputColumnName)); OutputColumnName = outputColumnName; InputColumnName = inputColumnName ?? outputColumnName; KeyOrdinality = keyOrdinality; MaximumNumberOfKeys = maximumNumberOfKeys; AddKeyValueAnnotationsAsText = addKeyValueAnnotationsAsText; }
/// <summary> /// Initializes a new instance of <see cref="ValueToKeyMappingEstimator"/>. /// </summary> /// <param name="env">Host Environment.</param> /// <param name="outputColumnName">Name of the column resulting from the transformation of <paramref name="inputColumnName"/>.</param> /// <param name="inputColumnName">Name of the column to transform. If set to <see langword="null"/>, the value of the <paramref name="outputColumnName"/> will be used as source.</param> /// <param name="maximumNumberOfKeys">Maximum number of keys to keep per column when auto-training.</param> /// <param name="keyOrdinality">How items should be ordered when vectorized. If <see cref="KeyOrdinality.ByOccurrence"/> choosen they will be in the order encountered. /// If <see cref="KeyOrdinality.ByValue"/>, items are sorted according to their default comparison, for example, text sorting will be case sensitive (for example, 'A' then 'Z' then 'a').</param> internal ValueToKeyMappingEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName = null, int maximumNumberOfKeys = Defaults.MaximumNumberOfKeys, KeyOrdinality keyOrdinality = Defaults.Ordinality) : this(env, new [] { new ColumnOptions(outputColumnName, inputColumnName ?? outputColumnName, maximumNumberOfKeys, keyOrdinality) }) { }
/// <summary> /// Converts the categorical value into an indicator array by building a dictionary of categories based on the data and using the id in the dictionary as the index in the array. /// </summary> /// <param name="input">Incoming data.</param> /// <param name="outputKind">Specify the output type of indicator array: Multiarray, array or binary encoded data.</param> /// <param name="keyOrdinality">How the Id for each value would be assigined: by occurrence or by value.</param> /// <param name="maximumNumberOfItems">Maximum number of ids to keep during data scanning.</param> /// <param name="onFit">Called upon fitting with the learnt enumeration on the dataset.</param> public static Vector <float> OneHotEncoding(this Vector <string> input, OneHotVectorOutputKind outputKind = DefOut, KeyOrdinality keyOrdinality = DefSort, int maximumNumberOfItems = DefMax, ToKeyFitResult <ReadOnlyMemory <char> > .OnFit onFit = null) { Contracts.CheckValue(input, nameof(input)); return(new ImplVector <string>(input, new Config(outputKind, keyOrdinality, maximumNumberOfItems, Wrap(onFit)))); }