public ColumnInfo(string input, string output, CategoricalTransform.OutputKind outputKind = Defaults.OutKind, int maxNumTerms = TermEstimator.Defaults.MaxNumTerms, TermTransform.SortOrder sort = TermEstimator.Defaults.Sort, string[] term = null) : base(input, output, maxNumTerms, sort, term, true) { OutputKind = outputKind; }
/// <summary> /// Describes how the transformer handles one column pair. /// </summary> /// <param name="input">Name of input column.</param> /// <param name="output">Name of output column.</param> /// <param name="outputKind">Kind of output: bag, indicator vector etc.</param> /// <param name="hashBits">Number of bits to hash into. Must be between 1 and 31, inclusive.</param> /// <param name="seed">Hashing seed.</param> /// <param name="ordered">Whether the position of each term should be included in the hash.</param> /// <param name="invertHash">Limit the number of keys used to generate the slot name to this many. 0 means no invert hashing, -1 means no limit.</param> public ColumnInfo(string input, string output, CategoricalTransform.OutputKind outputKind = Defaults.OutputKind, int hashBits = Defaults.HashBits, uint seed = Defaults.Seed, bool ordered = Defaults.Ordered, int invertHash = Defaults.InvertHash) { HashInfo = new HashTransformer.ColumnInfo(input, output, hashBits, seed, ordered, invertHash); OutputKind = outputKind; }
/// <summary> /// A helper method to create <see cref="CategoricalHashTransform"/>. /// </summary> /// <param name="env">Host Environment.</param> /// <param name="input">Input <see cref="IDataView"/>. This is the output from previous transform or loader.</param> /// <param name="name">Name of the output column.</param> /// <param name="source">Name of the column to be transformed. If this is null '<paramref name="name"/>' will be used.</param> /// <param name="hashBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param> /// <param name="invertHash">Limit the number of keys used to generate the slot name to this many. 0 means no invert hashing, -1 means no limit.</param> /// <param name="outputKind">The type of output expected.</param> public static IDataView Create(IHostEnvironment env, IDataView input, string name, string source = null, int hashBits = OneHotHashEncodingEstimator.Defaults.HashBits, int invertHash = OneHotHashEncodingEstimator.Defaults.InvertHash, CategoricalTransform.OutputKind outputKind = OneHotHashEncodingEstimator.Defaults.OutputKind) { return(new OneHotHashEncodingEstimator(env, name, source, outputKind).Fit(input).Transform(input) as IDataView); }
public CategoricalEstimator(IHostEnvironment env, params ColumnInfo[] columns) { Contracts.CheckValue(env, nameof(env)); _host = env.Register(nameof(TermEstimator)); _term = new TermEstimator(_host, columns); var cols = new List <(string input, string output, bool bag)>(); bool binaryEncoding = false; for (int i = 0; i < columns.Length; i++) { var column = columns[i]; bool bag; CategoricalTransform.OutputKind kind = columns[i].OutputKind; switch (kind) { default: throw _host.ExceptUserArg(nameof(column.OutputKind)); case CategoricalTransform.OutputKind.Key: continue; case CategoricalTransform.OutputKind.Bin: binaryEncoding = true; bag = false; break; case CategoricalTransform.OutputKind.Ind: bag = false; break; case CategoricalTransform.OutputKind.Bag: bag = true; break; } cols.Add((column.Output, column.Output, bag)); if (binaryEncoding) { _keyToSomething = new KeyToBinaryVectorEstimator(_host, cols.Select(x => new KeyToBinaryVectorTransform.ColumnInfo(x.input, x.output)).ToArray()); } else { _keyToSomething = new KeyToVectorEstimator(_host, cols.Select(x => new KeyToVectorTransform.ColumnInfo(x.input, x.output, x.bag)).ToArray()); } } }
/// <summary> /// A helper method to create <see cref="CategoricalHashTransform"/> for public facing API. /// </summary> /// <param name="env">Host Environment.</param> /// <param name="input">Input <see cref="IDataView"/>. This is the output from previous transform or loader.</param> /// <param name="name">Name of the output column.</param> /// <param name="source">Name of the column to be transformed. If this is null '<paramref name="name"/>' will be used.</param> /// <param name="hashBits">Number of bits to hash into. Must be between 1 and 30, inclusive.</param> /// <param name="invertHash">Limit the number of keys used to generate the slot name to this many. 0 means no invert hashing, -1 means no limit.</param> /// <param name="outputKind">The type of output expected.</param> public static IDataTransform Create(IHostEnvironment env, IDataView input, string name, string source = null, int hashBits = Defaults.HashBits, int invertHash = Defaults.InvertHash, CategoricalTransform.OutputKind outputKind = Defaults.OutputKind) { var args = new Arguments() { Column = new[] { new Column() { Source = source ?? name, Name = name } }, HashBits = hashBits, InvertHash = invertHash, OutputKind = outputKind }; return(Create(env, args, input)); }
public OneHotHashEncodingEstimator(IHostEnvironment env, params ColumnInfo[] columns) { Contracts.CheckValue(env, nameof(env)); _host = env.Register(nameof(ValueToKeyMappingEstimator)); _hash = new HashingEstimator(_host, columns.Select(x => x.HashInfo).ToArray()); using (var ch = _host.Start(nameof(OneHotHashEncodingEstimator))) { var binaryCols = new List <(string input, string output)>(); var cols = new List <(string input, string output, bool bag)>(); for (int i = 0; i < columns.Length; i++) { var column = columns[i]; CategoricalTransform.OutputKind kind = columns[i].OutputKind; switch (kind) { default: throw _host.ExceptUserArg(nameof(column.OutputKind)); case CategoricalTransform.OutputKind.Key: continue; case CategoricalTransform.OutputKind.Bin: if ((column.HashInfo.InvertHash) != 0) { ch.Warning("Invert hashing is being used with binary encoding."); } binaryCols.Add((column.HashInfo.Output, column.HashInfo.Output)); break; case CategoricalTransform.OutputKind.Ind: cols.Add((column.HashInfo.Output, column.HashInfo.Output, false)); break; case CategoricalTransform.OutputKind.Bag: cols.Add((column.HashInfo.Output, column.HashInfo.Output, true)); break; } } IEstimator <ITransformer> toBinVector = null; IEstimator <ITransformer> toVector = null; if (binaryCols.Count > 0) { toBinVector = new KeyToBinaryVectorMappingEstimator(_host, binaryCols.Select(x => new KeyToBinaryVectorTransform.ColumnInfo(x.input, x.output)).ToArray()); } if (cols.Count > 0) { toVector = new KeyToVectorMappingEstimator(_host, cols.Select(x => new KeyToVectorTransform.ColumnInfo(x.input, x.output, x.bag)).ToArray()); } if (toBinVector != null && toVector != null) { _toSomething = toVector.Append(toBinVector); } else { if (toBinVector != null) { _toSomething = toBinVector; } else { _toSomething = toVector; } } } }
/// A helper method to create <see cref="OneHotHashEncodingEstimator"/> for public facing API. /// <param name="env">Host Environment.</param> /// <param name="inputColumn">Name of the input column.</param> /// <param name="outputColumn">Name of the output column. If this is null '<paramref name="inputColumn"/>' will be used.</param> /// <param name="outputKind">The type of output expected.</param> public OneHotHashEncodingEstimator(IHostEnvironment env, string inputColumn, string outputColumn = null, CategoricalTransform.OutputKind outputKind = Defaults.OutputKind) : this(env, new ColumnInfo(inputColumn, outputColumn ?? inputColumn, outputKind)) { }
public CategoricalEstimator(IHostEnvironment env, params ColumnInfo[] columns) { Contracts.CheckValue(env, nameof(env)); _host = env.Register(nameof(TermEstimator)); _term = new TermEstimator(_host, columns); var binaryCols = new List <(string input, string output)>(); var cols = new List <(string input, string output, bool bag)>(); for (int i = 0; i < columns.Length; i++) { var column = columns[i]; CategoricalTransform.OutputKind kind = columns[i].OutputKind; switch (kind) { default: throw _host.ExceptUserArg(nameof(column.OutputKind)); case CategoricalTransform.OutputKind.Key: continue; case CategoricalTransform.OutputKind.Bin: binaryCols.Add((column.Output, column.Output)); break; case CategoricalTransform.OutputKind.Ind: cols.Add((column.Output, column.Output, false)); break; case CategoricalTransform.OutputKind.Bag: cols.Add((column.Output, column.Output, true)); break; } } IEstimator <ITransformer> toBinVector = null; IEstimator <ITransformer> toVector = null; if (binaryCols.Count > 0) { toBinVector = new KeyToBinaryVectorEstimator(_host, binaryCols.Select(x => new KeyToBinaryVectorTransform.ColumnInfo(x.input, x.output)).ToArray()); } if (cols.Count > 0) { toVector = new KeyToVectorEstimator(_host, cols.Select(x => new KeyToVectorTransform.ColumnInfo(x.input, x.output, x.bag)).ToArray()); } if (toBinVector != null && toVector != null) { _toSomething = toVector.Append(toBinVector); } else { if (toBinVector != null) { _toSomething = toBinVector; } else { _toSomething = toVector; } } }
/// A helper method to create <see cref="CategoricalEstimator"/> for public facing API. /// <param name="env">Host Environment.</param> /// <param name="name">Name of the output column.</param> /// <param name="source">Name of the column to be transformed. If this is null '<paramref name="name"/>' will be used.</param> /// <param name="outputKind">The type of output expected.</param> public CategoricalEstimator(IHostEnvironment env, string name, string source = null, CategoricalTransform.OutputKind outputKind = Defaults.OutKind) : this(env, new ColumnInfo(source ?? name, name, outputKind)) { }
/// <summary> /// Convert a text column into hash-based one-hot encoded vector. /// </summary> /// <param name="catalog">The transform catalog</param> /// <param name="inputColumn">The input column</param> /// <param name="outputColumn">The output column. If <c>null</c>, <paramref name="inputColumn"/> is used.</param> /// <param name="outputKind">The conversion mode.</param> /// <returns></returns> public static OneHotHashEncodingEstimator OneHotHashEncoding(this TransformsCatalog.CategoricalTransforms catalog, string inputColumn, string outputColumn = null, CategoricalTransform.OutputKind outputKind = CategoricalTransform.OutputKind.Ind) => new OneHotHashEncodingEstimator(CatalogUtils.GetEnvironment(catalog), inputColumn, outputColumn, outputKind);
/// A helper method to create <see cref="CategoricalEstimator"/> for public facing API. /// <param name="env">Host Environment.</param> /// <param name="input">Name of the column to be transformed.</param> /// <param name="output">Name of the output column. If this is <c>null</c>, <paramref name="input"/> is used.</param> /// <param name="outputKind">The type of output expected.</param> public CategoricalEstimator(IHostEnvironment env, string input, string output = null, CategoricalTransform.OutputKind outputKind = Defaults.OutKind) : this(env, new[] { new ColumnInfo(input, output ?? input, outputKind) }) { }
private static IDataTransform CreateTransformCore(CategoricalTransform.OutputKind argsOutputKind, OneToOneColumn[] columns, List <CategoricalTransform.OutputKind?> columnOutputKinds, IDataTransform input, IHost h, Arguments catHashArgs = null) { Contracts.CheckValue(columns, nameof(columns)); Contracts.CheckValue(columnOutputKinds, nameof(columnOutputKinds)); Contracts.CheckParam(columns.Length == columnOutputKinds.Count, nameof(columns)); using (var ch = h.Start("Create Transform Core")) { // Create the KeyToVectorTransform, if needed. var cols = new List <KeyToVectorTransform.Column>(); bool binaryEncoding = argsOutputKind == CategoricalTransform.OutputKind.Bin; for (int i = 0; i < columns.Length; i++) { var column = columns[i]; if (!column.TrySanitize()) { throw h.ExceptUserArg(nameof(Column.Name)); } bool?bag; CategoricalTransform.OutputKind kind = columnOutputKinds[i] ?? argsOutputKind; switch (kind) { default: throw ch.ExceptUserArg(nameof(Column.OutputKind)); case CategoricalTransform.OutputKind.Key: continue; case CategoricalTransform.OutputKind.Bin: binaryEncoding = true; bag = false; break; case CategoricalTransform.OutputKind.Ind: bag = false; break; case CategoricalTransform.OutputKind.Bag: bag = true; break; } var col = new KeyToVectorTransform.Column(); col.Name = column.Name; col.Source = column.Name; col.Bag = bag; cols.Add(col); } if (cols.Count == 0) { return(input); } IDataTransform transform; if (binaryEncoding) { if ((catHashArgs?.InvertHash ?? 0) != 0) { ch.Warning("Invert hashing is being used with binary encoding."); } var keyToBinaryVecCols = cols.Select(x => new KeyToBinaryVectorTransform.ColumnInfo(x.Source, x.Name)).ToArray(); transform = KeyToBinaryVectorTransform.Create(h, input, keyToBinaryVecCols); } else { var keyToVecCols = cols.Select(x => new KeyToVectorTransform.ColumnInfo(x.Source, x.Name, x.Bag ?? argsOutputKind == CategoricalTransform.OutputKind.Bag)).ToArray(); transform = KeyToVectorTransform.Create(h, input, keyToVecCols); } ch.Done(); return(transform); } }
public OneHotEncodingEstimator(IHostEnvironment env, ColumnInfo[] columns, string file = null, string termsColumn = null, IComponentFactory <IMultiStreamSource, IDataLoader> loaderFactory = null) { Contracts.CheckValue(env, nameof(env)); _host = env.Register(nameof(OneHotEncodingEstimator)); _term = new ValueToKeyMappingEstimator(_host, columns, file, termsColumn, loaderFactory); var binaryCols = new List <(string input, string output)>(); var cols = new List <(string input, string output, bool bag)>(); for (int i = 0; i < columns.Length; i++) { var column = columns[i]; CategoricalTransform.OutputKind kind = columns[i].OutputKind; switch (kind) { default: throw _host.ExceptUserArg(nameof(column.OutputKind)); case CategoricalTransform.OutputKind.Key: continue; case CategoricalTransform.OutputKind.Bin: binaryCols.Add((column.Output, column.Output)); break; case CategoricalTransform.OutputKind.Ind: cols.Add((column.Output, column.Output, false)); break; case CategoricalTransform.OutputKind.Bag: cols.Add((column.Output, column.Output, true)); break; } } IEstimator <ITransformer> toBinVector = null; IEstimator <ITransformer> toVector = null; if (binaryCols.Count > 0) { toBinVector = new KeyToBinaryVectorMappingEstimator(_host, binaryCols.Select(x => new KeyToBinaryVectorTransform.ColumnInfo(x.input, x.output)).ToArray()); } if (cols.Count > 0) { toVector = new KeyToVectorMappingEstimator(_host, cols.Select(x => new KeyToVectorTransform.ColumnInfo(x.input, x.output, x.bag)).ToArray()); } if (toBinVector != null && toVector != null) { _toSomething = toVector.Append(toBinVector); } else { if (toBinVector != null) { _toSomething = toBinVector; } else { _toSomething = toVector; } } }