public TextTransform(IHostEnvironment env, IEnumerable <string> inputColumns, string outputColumn, Action <Settings> advancedSettings = null) { Contracts.CheckValue(env, nameof(env)); _host = env.Register(nameof(TextTransform)); _host.CheckValue(inputColumns, nameof(inputColumns)); _host.CheckParam(inputColumns.Any(), nameof(inputColumns)); _host.CheckParam(!inputColumns.Any(string.IsNullOrWhiteSpace), nameof(inputColumns)); _host.CheckNonEmpty(outputColumn, nameof(outputColumn)); _host.CheckValueOrNull(advancedSettings); _inputColumns = inputColumns.ToArray(); OutputColumn = outputColumn; AdvancedSettings = new Settings(); advancedSettings?.Invoke(AdvancedSettings); _stopWordsRemover = null; _dictionary = null; _wordFeatureExtractor = new NgramExtractorTransform.NgramExtractorArguments(); _charFeatureExtractor = new NgramExtractorTransform.NgramExtractorArguments() { NgramLength = 3, AllLengths = false }; }
public TransformApplierParams(TextFeaturizingEstimator parent) { var host = parent._host; host.Check(Enum.IsDefined(typeof(Language), parent.OptionalSettings.Language)); host.Check(Enum.IsDefined(typeof(CaseMode), parent.OptionalSettings.CaseMode)); WordExtractorFactory = parent._wordFeatureExtractor?.CreateComponent(host, parent._dictionary); CharExtractorFactory = parent._charFeatureExtractor?.CreateComponent(host, parent._dictionary); Norm = parent.OptionalSettings.Norm; Language = parent.OptionalSettings.Language; StopWordsRemover = parent._stopWordsRemover; TextCase = parent.OptionalSettings.CaseMode; KeepDiacritics = parent.OptionalSettings.KeepDiacritics; KeepPunctuations = parent.OptionalSettings.KeepPunctuations; KeepNumbers = parent.OptionalSettings.KeepNumbers; OutputTextTokensColumnName = parent.OptionalSettings.OutputTokensColumnName; Dictionary = parent._dictionary; }
public TransformApplierParams(TextTransform parent) { var host = parent._host; host.Check(Enum.IsDefined(typeof(Language), parent.AdvancedSettings.TextLanguage)); host.Check(Enum.IsDefined(typeof(CaseNormalizationMode), parent.AdvancedSettings.TextCase)); WordExtractorFactory = parent._wordFeatureExtractor?.CreateComponent(host, parent._dictionary); CharExtractorFactory = parent._charFeatureExtractor?.CreateComponent(host, parent._dictionary); VectorNormalizer = parent.AdvancedSettings.VectorNormalizer; Language = parent.AdvancedSettings.TextLanguage; StopWordsRemover = parent._stopWordsRemover; TextCase = parent.AdvancedSettings.TextCase; KeepDiacritics = parent.AdvancedSettings.KeepDiacritics; KeepPunctuations = parent.AdvancedSettings.KeepPunctuations; KeepNumbers = parent.AdvancedSettings.KeepNumbers; OutputTextTokens = parent.AdvancedSettings.OutputTokens; Dictionary = parent._dictionary; }
public TransformApplierParams(IHost host, Arguments args) { Contracts.AssertValue(host); host.CheckUserArg(args.Column != null, nameof(args.Column), "Columns must be specified"); host.CheckUserArg(args.WordFeatureExtractor != null || args.CharFeatureExtractor != null || args.OutputTokens, nameof(args.WordFeatureExtractor), "At least one feature extractor or OutputTokens must be specified."); host.Check(Enum.IsDefined(typeof(Language), args.Language)); host.Check(Enum.IsDefined(typeof(CaseNormalizationMode), args.TextCase)); WordExtractorFactory = args.WordFeatureExtractor?.CreateComponent(host, args.Dictionary); CharExtractorFactory = args.CharFeatureExtractor?.CreateComponent(host, args.Dictionary); VectorNormalizer = args.VectorNormalizer; Language = args.Language; StopWordsRemover = args.StopWordsRemover; TextCase = args.TextCase; KeepDiacritics = args.KeepDiacritics; KeepPunctuations = args.KeepPunctuations; KeepNumbers = args.KeepNumbers; OutputTextTokens = args.OutputTokens; Dictionary = args.Dictionary; }
internal TextFeaturizingEstimator(IHostEnvironment env, string name, IEnumerable <string> source, Options options = null) { Contracts.CheckValue(env, nameof(env)); _host = env.Register(nameof(TextFeaturizingEstimator)); _host.CheckValue(source, nameof(source)); _host.CheckParam(source.Any(), nameof(source)); _host.CheckParam(!source.Any(string.IsNullOrWhiteSpace), nameof(source)); _host.CheckNonEmpty(name, nameof(name)); _host.CheckValueOrNull(options); _inputColumns = source.ToArray(); OutputColumn = name; OptionalSettings = new Options(); if (options != null) { OptionalSettings = options; } _stopWordsRemover = OptionalSettings.StopWordsRemover; _dictionary = null; _wordFeatureExtractor = OptionalSettings.WordFeatureExtractorFactory; _charFeatureExtractor = OptionalSettings.CharFeatureExtractorFactory; }