/// <summary>
        /// Public constructor corresponding to SignatureTokenizeTransform. It accepts arguments of type ArgumentsBase,
        /// and a separate array of columns (constructed from the caller -WordBag/WordHashBag- arguments).
        /// </summary>
        public NltTokenizeTransform(IHostEnvironment env, TokenizeArguments args, IDataView input, OneToOneColumn[] columns)
            : base(env, RegistrationName, input)
        {
            Host.CheckValue(args, nameof(args));
            Host.CheckUserArg(Utils.Size(columns) > 0, nameof(Arguments.Column));

            var tokenizerColumns = new Column[columns.Length];
            for (int i = 0; i < columns.Length; i++)
            {
                Host.CheckUserArg(!string.IsNullOrWhiteSpace(columns[i].Name), nameof(OneToOneColumn.Name));
                Host.CheckUserArg(!string.IsNullOrWhiteSpace(columns[i].Source), nameof(OneToOneColumn.Source));
                tokenizerColumns[i] = new Column { Source = columns[i].Source, TokensColumn = columns[i].Name };
            }

            var extendedArgs = new Arguments
            {
                Column = tokenizerColumns,
                Language = args.Language,
                LanguagesColumn = args.LanguagesColumn,
                CreateTypesColumn = args.CreateTypesColumn
            };

            using (var ch = Host.Start("Construction"))
            {
                _bindings = Bindings.Create(extendedArgs, Source.Schema, ch);
                CheckResources(ch);
                ch.Done();
            }
        }
예제 #2
0
        /// <summary>
        /// Public constructor corresponding to SignatureTokenizeTransform. It accepts arguments of type ArgumentsBase,
        /// and a separate array of columns (constructed from the caller -WordBag/WordHashBag- arguments).
        /// </summary>
        public DelimitedTokenizeTransform(IHostEnvironment env, TokenizeArguments args, IDataView input, OneToOneColumn[] columns)
            : base(env, RegistrationName, columns, input, TestIsTextItem)
        {
            Host.CheckValue(args, nameof(args));
            Host.CheckUserArg(Utils.Size(columns) > 0, nameof(Arguments.Column));

            // REVIEW: Need to decide whether to inject an NA token between slots in ReadOnlyMemory inputs.
            Host.AssertNonEmpty(Infos);
            Host.Assert(Infos.Length == Utils.Size(columns));

            _exes = new ColInfoEx[Infos.Length];
            for (int iinfo = 0; iinfo < Infos.Length; iinfo++)
            {
                _exes[iinfo] = new ColInfoEx(args);
            }

            _columnType = new VectorType(TextType.Instance);
            Metadata.Seal();
        }