Ejemplo n.º 1
0
        public TextTransform(IHostEnvironment env, IEnumerable <string> inputColumns, string outputColumn,
                             Action <Settings> advancedSettings = null)
        {
            Contracts.CheckValue(env, nameof(env));
            _host = env.Register(nameof(TextTransform));
            _host.CheckValue(inputColumns, nameof(inputColumns));
            _host.CheckParam(inputColumns.Any(), nameof(inputColumns));
            _host.CheckParam(!inputColumns.Any(string.IsNullOrWhiteSpace), nameof(inputColumns));
            _host.CheckNonEmpty(outputColumn, nameof(outputColumn));
            _host.CheckValueOrNull(advancedSettings);

            _inputColumns = inputColumns.ToArray();
            OutputColumn  = outputColumn;

            AdvancedSettings = new Settings();
            advancedSettings?.Invoke(AdvancedSettings);

            _stopWordsRemover     = null;
            _dictionary           = null;
            _wordFeatureExtractor = new NgramExtractorTransform.NgramExtractorArguments();
            _charFeatureExtractor = new NgramExtractorTransform.NgramExtractorArguments()
            {
                NgramLength = 3, AllLengths = false
            };
        }
            public TransformApplierParams(TextFeaturizingEstimator parent)
            {
                var host = parent._host;

                host.Check(Enum.IsDefined(typeof(Language), parent.OptionalSettings.Language));
                host.Check(Enum.IsDefined(typeof(CaseMode), parent.OptionalSettings.CaseMode));
                WordExtractorFactory = parent._wordFeatureExtractor?.CreateComponent(host, parent._dictionary);
                CharExtractorFactory = parent._charFeatureExtractor?.CreateComponent(host, parent._dictionary);
                Norm                       = parent.OptionalSettings.Norm;
                Language                   = parent.OptionalSettings.Language;
                StopWordsRemover           = parent._stopWordsRemover;
                TextCase                   = parent.OptionalSettings.CaseMode;
                KeepDiacritics             = parent.OptionalSettings.KeepDiacritics;
                KeepPunctuations           = parent.OptionalSettings.KeepPunctuations;
                KeepNumbers                = parent.OptionalSettings.KeepNumbers;
                OutputTextTokensColumnName = parent.OptionalSettings.OutputTokensColumnName;
                Dictionary                 = parent._dictionary;
            }
Ejemplo n.º 3
0
            public TransformApplierParams(TextTransform parent)
            {
                var host = parent._host;

                host.Check(Enum.IsDefined(typeof(Language), parent.AdvancedSettings.TextLanguage));
                host.Check(Enum.IsDefined(typeof(CaseNormalizationMode), parent.AdvancedSettings.TextCase));
                WordExtractorFactory = parent._wordFeatureExtractor?.CreateComponent(host, parent._dictionary);
                CharExtractorFactory = parent._charFeatureExtractor?.CreateComponent(host, parent._dictionary);
                VectorNormalizer     = parent.AdvancedSettings.VectorNormalizer;
                Language             = parent.AdvancedSettings.TextLanguage;
                StopWordsRemover     = parent._stopWordsRemover;
                TextCase             = parent.AdvancedSettings.TextCase;
                KeepDiacritics       = parent.AdvancedSettings.KeepDiacritics;
                KeepPunctuations     = parent.AdvancedSettings.KeepPunctuations;
                KeepNumbers          = parent.AdvancedSettings.KeepNumbers;
                OutputTextTokens     = parent.AdvancedSettings.OutputTokens;
                Dictionary           = parent._dictionary;
            }
 public TransformApplierParams(IHost host, Arguments args)
 {
     Contracts.AssertValue(host);
     host.CheckUserArg(args.Column != null, nameof(args.Column), "Columns must be specified");
     host.CheckUserArg(args.WordFeatureExtractor != null || args.CharFeatureExtractor != null || args.OutputTokens,
                       nameof(args.WordFeatureExtractor), "At least one feature extractor or OutputTokens must be specified.");
     host.Check(Enum.IsDefined(typeof(Language), args.Language));
     host.Check(Enum.IsDefined(typeof(CaseNormalizationMode), args.TextCase));
     WordExtractorFactory = args.WordFeatureExtractor?.CreateComponent(host, args.Dictionary);
     CharExtractorFactory = args.CharFeatureExtractor?.CreateComponent(host, args.Dictionary);
     VectorNormalizer     = args.VectorNormalizer;
     Language             = args.Language;
     StopWordsRemover     = args.StopWordsRemover;
     TextCase             = args.TextCase;
     KeepDiacritics       = args.KeepDiacritics;
     KeepPunctuations     = args.KeepPunctuations;
     KeepNumbers          = args.KeepNumbers;
     OutputTextTokens     = args.OutputTokens;
     Dictionary           = args.Dictionary;
 }
        internal TextFeaturizingEstimator(IHostEnvironment env, string name, IEnumerable <string> source, Options options = null)
        {
            Contracts.CheckValue(env, nameof(env));
            _host = env.Register(nameof(TextFeaturizingEstimator));
            _host.CheckValue(source, nameof(source));
            _host.CheckParam(source.Any(), nameof(source));
            _host.CheckParam(!source.Any(string.IsNullOrWhiteSpace), nameof(source));
            _host.CheckNonEmpty(name, nameof(name));
            _host.CheckValueOrNull(options);

            _inputColumns = source.ToArray();
            OutputColumn  = name;

            OptionalSettings = new Options();
            if (options != null)
            {
                OptionalSettings = options;
            }

            _stopWordsRemover     = OptionalSettings.StopWordsRemover;
            _dictionary           = null;
            _wordFeatureExtractor = OptionalSettings.WordFeatureExtractorFactory;
            _charFeatureExtractor = OptionalSettings.CharFeatureExtractorFactory;
        }