Пример #1
0
        internal static IDataTransform Create(IHostEnvironment env, NgramExtractorArguments extractorArgs, IDataView input,
                                              ExtractorColumn[] cols, TermLoaderArguments termLoaderArgs = null)
        {
            Contracts.CheckValue(env, nameof(env));
            var h = env.Register(LoaderSignature);

            h.CheckValue(extractorArgs, nameof(extractorArgs));
            h.CheckValue(input, nameof(input));
            h.CheckUserArg(extractorArgs.SkipLength < extractorArgs.NgramLength, nameof(extractorArgs.SkipLength), "Should be less than " + nameof(extractorArgs.NgramLength));
            h.CheckUserArg(Utils.Size(cols) > 0, nameof(Options.Columns), "Must be specified");
            h.CheckValueOrNull(termLoaderArgs);

            var extractorCols = new Column[cols.Length];

            for (int i = 0; i < cols.Length; i++)
            {
                Contracts.Check(Utils.Size(cols[i].Source) == 1, "too many source columns");
                extractorCols[i] = new Column {
                    Name = cols[i].Name, Source = cols[i].Source[0]
                };
            }

            var options = new Options
            {
                Columns     = extractorCols,
                NgramLength = extractorArgs.NgramLength,
                SkipLength  = extractorArgs.SkipLength,
                AllLengths  = extractorArgs.AllLengths,
                MaxNumTerms = extractorArgs.MaxNumTerms,
                Weighting   = extractorArgs.Weighting
            };

            return(Create(h, options, input, termLoaderArgs));
        }
Пример #2
0
        public static INgramExtractorFactory Create(IHostEnvironment env, NgramExtractorArguments extractorArgs,
            TermLoaderArguments termLoaderArgs)
        {
            Contracts.CheckValue(env, nameof(env));
            var h = env.Register(LoaderSignature);
            h.CheckValue(extractorArgs, nameof(extractorArgs));
            h.CheckValueOrNull(termLoaderArgs);

            return new NgramExtractorFactory(extractorArgs, termLoaderArgs);
        }
        internal static Options CreateNgramExtractorOptions(NgramExtractorArguments extractorArgs, ExtractorColumn[] cols)
        {
            var extractorCols = new Column[cols.Length];

            for (int i = 0; i < cols.Length; i++)
            {
                Contracts.Check(Utils.Size(cols[i].Source) == 1, "too many source columns");
                extractorCols[i] = new Column {
                    Name = cols[i].Name, Source = cols[i].Source[0]
                };
            }

            var options = new Options
            {
                Columns       = extractorCols,
                NgramLength   = extractorArgs.NgramLength,
                SkipLength    = extractorArgs.SkipLength,
                UseAllLengths = extractorArgs.UseAllLengths,
                MaxNumTerms   = extractorArgs.MaxNumTerms,
                Weighting     = extractorArgs.Weighting
            };

            return(options);
        }