/// <summary> /// Remove stop words from incoming text. /// </summary> /// <param name="input">The column to apply to.</param> /// <param name="language">Langauge of the input text.</param> public static VarVector <string> RemoveStopwords(this VarVector <string> input, Language language = Language.English) => new OutPipelineColumn(input, language);
public OutPipelineColumn(VarVector <Key <uint, string> > input, int hashBits, int ngramLength, int skipLength, bool allLengths, uint seed, bool ordered, int invertHash) : base(new Reconciler(hashBits, ngramLength, skipLength, allLengths, seed, ordered, invertHash), input) { Input = input; }
public OutColumn(VarVector <string> input, string customModelFile = null) : base(new Reconciler(customModelFile), input) { Input = input; }
public OutPipelineColumn(VarVector <string> input, Language language) : base(new Reconciler(language), input) { Input = input; }
/// <include file='../Microsoft.ML.Transforms/Text/doc.xml' path='doc/members/member[@name="WordEmbeddings"]/*' /> /// <param name="input">Vector of tokenized text.</param> /// <param name="customModelFile">The custom word embedding model file.</param> public static Vector <float> WordEmbeddings(this VarVector <string> input, string customModelFile) { Contracts.CheckValue(input, nameof(input)); return(new OutColumn(input, customModelFile)); }
public OutColumn(VarVector <string> input, WordEmbeddingsExtractingTransformer.PretrainedModelKind modelKind = WordEmbeddingsExtractingTransformer.PretrainedModelKind.Sswe) : base(new Reconciler(modelKind), input) { Input = input; }
public OutPipelineColumn(VarVector <Key <uint, string> > input, int numberOfBits, int ngramLength, int skipLength, bool useAllLengths, uint seed, bool useOrderedHashing, int maximumNumberOfInverts) : base(new Reconciler(numberOfBits, ngramLength, skipLength, useAllLengths, seed, useOrderedHashing, maximumNumberOfInverts), input) { Input = input; }
/// <include file='../Microsoft.ML.Transforms/Text/doc.xml' path='doc/members/member[@name="WordEmbeddings"]/*' /> /// <param name="input">Vector of tokenized text.</param> /// <param name="modelKind">The pretrained word embedding model.</param> /// <returns></returns> public static Vector <float> WordEmbeddings(this VarVector <string> input, WordEmbeddingsExtractingTransformer.PretrainedModelKind modelKind = WordEmbeddingsExtractingTransformer.PretrainedModelKind.Sswe) { Contracts.CheckValue(input, nameof(input)); return(new OutColumn(input, modelKind)); }
/// <summary> /// Remove stop words from incoming text. /// </summary> /// <param name="input">The column to apply to.</param> /// <param name="language">Langauge of the input text. It will be used to retrieve a built-in stopword list.</param> public static VarVector <string> RemoveDefaultStopWords(this VarVector <string> input, StopWordsRemovingEstimator.Language language = StopWordsRemovingEstimator.Language.English) => new OutPipelineColumn(input, language);
public OutPipelineColumn(VarVector <string> input, StopWordsRemovingEstimator.Language language) : base(new Reconciler(language), input) { Input = input; }
/// <summary> /// Converts the categorical value into an indicator array by building a dictionary of categories based on the data and using the id in the dictionary as the index in the array /// </summary> /// <param name="input">Incoming data.</param> /// <param name="outputKind">Specify the output type of indicator array: array or binary encoded data.</param> /// <param name="hashBits">Amount of bits to use for hashing.</param> /// <param name="seed">Seed value used for hashing.</param> /// <param name="ordered">Whether the position of each term should be included in the hash.</param> /// <param name="invertHash">During hashing we constuct mappings between original values and the produced hash values. /// Text representation of original values are stored in the slot names of the metadata for the new column.Hashing, as such, can map many initial values to one. /// <paramref name="invertHash"/> specifies the upper bound of the number of distinct input values mapping to a hash that should be retained. /// <value>0</value> does not retain any input values. <value>-1</value> retains all input values mapping to each hash.</param> public static Vector <float> OneHotHashEncoding(this VarVector <string> input, OneHotHashVectorOutputKind outputKind = DefOut, int hashBits = DefHashBits, uint seed = DefSeed, bool ordered = DefOrdered, int invertHash = DefInvertHash) { Contracts.CheckValue(input, nameof(input)); return(new ImplVector <string>(input, new Config(outputKind, hashBits, seed, ordered, invertHash))); }
/// <summary> /// Convert to variable array of floats. /// </summary> /// <param name="input">The input column.</param> /// <returns >Column with variable array of floats.</returns> public static VarVector <float> ToFloat(this VarVector <sbyte> input) => new ImplVarVector <sbyte>(Contracts.CheckRef(input, nameof(input)), DataKind.R4);
/// <summary> /// Convert to variable array of floats. /// </summary> /// <param name="input">The input column.</param> /// <returns >Column with variable array of floats.</returns> public static VarVector <float> ToFloat(this VarVector <string> input) => new ImplVarVector <string>(Contracts.CheckRef(input, nameof(input)), InternalDataKind.R4);