/// <summary> /// Public constructor corresponding to SignatureTokenizeTransform. It accepts arguments of type ArgumentsBase, /// and a separate array of columns (constructed from the caller -WordBag/WordHashBag- arguments). /// </summary> public NltTokenizeTransform(IHostEnvironment env, TokenizeArguments args, IDataView input, OneToOneColumn[] columns) : base(env, RegistrationName, input) { Host.CheckValue(args, nameof(args)); Host.CheckUserArg(Utils.Size(columns) > 0, nameof(Arguments.Column)); var tokenizerColumns = new Column[columns.Length]; for (int i = 0; i < columns.Length; i++) { Host.CheckUserArg(!string.IsNullOrWhiteSpace(columns[i].Name), nameof(OneToOneColumn.Name)); Host.CheckUserArg(!string.IsNullOrWhiteSpace(columns[i].Source), nameof(OneToOneColumn.Source)); tokenizerColumns[i] = new Column { Source = columns[i].Source, TokensColumn = columns[i].Name }; } var extendedArgs = new Arguments { Column = tokenizerColumns, Language = args.Language, LanguagesColumn = args.LanguagesColumn, CreateTypesColumn = args.CreateTypesColumn }; using (var ch = Host.Start("Construction")) { _bindings = Bindings.Create(extendedArgs, Source.Schema, ch); CheckResources(ch); ch.Done(); } }
/// <summary> /// Public constructor corresponding to SignatureTokenizeTransform. It accepts arguments of type ArgumentsBase, /// and a separate array of columns (constructed from the caller -WordBag/WordHashBag- arguments). /// </summary> public DelimitedTokenizeTransform(IHostEnvironment env, TokenizeArguments args, IDataView input, OneToOneColumn[] columns) : base(env, RegistrationName, columns, input, TestIsTextItem) { Host.CheckValue(args, nameof(args)); Host.CheckUserArg(Utils.Size(columns) > 0, nameof(Arguments.Column)); // REVIEW: Need to decide whether to inject an NA token between slots in ReadOnlyMemory inputs. Host.AssertNonEmpty(Infos); Host.Assert(Infos.Length == Utils.Size(columns)); _exes = new ColInfoEx[Infos.Length]; for (int iinfo = 0; iinfo < Infos.Length; iinfo++) { _exes[iinfo] = new ColInfoEx(args); } _columnType = new VectorType(TextType.Instance); Metadata.Seal(); }