internal static IDataTransform Create(IHostEnvironment env, Options options, IDataView input) { Contracts.CheckValue(env, nameof(env)); var h = env.Register("Categorical"); h.CheckValue(options, nameof(options)); h.CheckValue(input, nameof(input)); h.CheckUserArg(Utils.Size(options.Columns) > 0, nameof(options.Columns)); var columns = new List <OneHotEncodingEstimator.ColumnOptions>(); foreach (var column in options.Columns) { var col = new OneHotEncodingEstimator.ColumnOptions( column.Name, column.Source ?? column.Name, column.OutputKind ?? options.OutputKind, column.MaxNumTerms ?? options.MaxNumTerms, column.Sort ?? options.Sort, column.Terms ?? options.Terms); col.SetTerms(column.Term ?? options.Term); columns.Add(col); } IDataView keyData = null; if (!string.IsNullOrEmpty(options.DataFile)) { using (var ch = h.Start("Load term data")) keyData = ValueToKeyMappingTransformer.GetKeyDataViewOrNull(env, ch, options.DataFile, options.TermsColumn, options.Loader, out bool autoLoaded); h.AssertValue(keyData); } var transformed = new OneHotEncodingEstimator(env, columns.ToArray(), keyData).Fit(input).Transform(input); return((IDataTransform)transformed); }
public static CommonOutputs.TransformOutput TextToKey(IHostEnvironment env, ValueToKeyMappingTransformer.Options input) { Contracts.CheckValue(env, nameof(env)); var host = env.Register("Term"); host.CheckValue(input, nameof(input)); EntryPointUtils.CheckInputArgs(host, input); var xf = ValueToKeyMappingTransformer.Create(host, input, input.Data); return(new CommonOutputs.TransformOutput { Model = new TransformModelImpl(env, xf, input.Data), OutputData = xf }); }