Exemple #1
0
            public TransformApplierParams(TextTransform parent)
            {
                var host = parent._host;

                host.Check(Enum.IsDefined(typeof(Language), parent.AdvancedSettings.TextLanguage));
                host.Check(Enum.IsDefined(typeof(CaseNormalizationMode), parent.AdvancedSettings.TextCase));
                WordExtractorFactory = parent._wordFeatureExtractor?.CreateComponent(host, parent._dictionary);
                CharExtractorFactory = parent._charFeatureExtractor?.CreateComponent(host, parent._dictionary);
                VectorNormalizer     = parent.AdvancedSettings.VectorNormalizer;
                Language             = parent.AdvancedSettings.TextLanguage;
                StopWordsRemover     = parent._stopWordsRemover;
                TextCase             = parent.AdvancedSettings.TextCase;
                KeepDiacritics       = parent.AdvancedSettings.KeepDiacritics;
                KeepPunctuations     = parent.AdvancedSettings.KeepPunctuations;
                KeepNumbers          = parent.AdvancedSettings.KeepNumbers;
                OutputTextTokens     = parent.AdvancedSettings.OutputTokens;
                Dictionary           = parent._dictionary;
            }
Exemple #2
0
        public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView data)
        {
            Action <Settings> settings = s =>
            {
                s.TextLanguage     = args.Language;
                s.TextCase         = args.TextCase;
                s.KeepDiacritics   = args.KeepDiacritics;
                s.KeepPunctuations = args.KeepPunctuations;
                s.KeepNumbers      = args.KeepNumbers;
                s.OutputTokens     = args.OutputTokens;
                s.VectorNormalizer = args.VectorNormalizer;
            };

            var estimator = new TextTransform(env, args.Column.Source ?? new[] { args.Column.Name }, args.Column.Name, settings);

            estimator._stopWordsRemover     = args.StopWordsRemover;
            estimator._dictionary           = args.Dictionary;
            estimator._wordFeatureExtractor = args.WordFeatureExtractor;
            estimator._charFeatureExtractor = args.CharFeatureExtractor;
            return(estimator.Fit(data).Transform(data) as IDataTransform);
        }