/// <summary>
 /// Remove stop words from incoming text.
 /// </summary>
 /// <param name="input">The column to apply to.</param>
 /// <param name="language">Langauge of the input text.</param>
 public static VarVector <string> RemoveStopwords(this VarVector <string> input,
                                                  Language language = Language.English) => new OutPipelineColumn(input, language);
 public OutPipelineColumn(VarVector <Key <uint, string> > input, int hashBits, int ngramLength, int skipLength, bool allLengths, uint seed, bool ordered, int invertHash)
     : base(new Reconciler(hashBits, ngramLength, skipLength, allLengths, seed, ordered, invertHash), input)
 {
     Input = input;
 }
Exemple #3
0
 public OutColumn(VarVector <string> input, string customModelFile = null)
     : base(new Reconciler(customModelFile), input)
 {
     Input = input;
 }
 public OutPipelineColumn(VarVector <string> input, Language language)
     : base(new Reconciler(language), input)
 {
     Input = input;
 }
Exemple #5
0
 /// <include file='../Microsoft.ML.Transforms/Text/doc.xml' path='doc/members/member[@name="WordEmbeddings"]/*' />
 /// <param name="input">Vector of tokenized text.</param>
 /// <param name="customModelFile">The custom word embedding model file.</param>
 public static Vector <float> WordEmbeddings(this VarVector <string> input, string customModelFile)
 {
     Contracts.CheckValue(input, nameof(input));
     return(new OutColumn(input, customModelFile));
 }
Exemple #6
0
 public OutColumn(VarVector <string> input, WordEmbeddingsExtractingTransformer.PretrainedModelKind modelKind = WordEmbeddingsExtractingTransformer.PretrainedModelKind.Sswe)
     : base(new Reconciler(modelKind), input)
 {
     Input = input;
 }
 public OutPipelineColumn(VarVector <Key <uint, string> > input, int numberOfBits, int ngramLength, int skipLength, bool useAllLengths, uint seed, bool useOrderedHashing, int maximumNumberOfInverts)
     : base(new Reconciler(numberOfBits, ngramLength, skipLength, useAllLengths, seed, useOrderedHashing, maximumNumberOfInverts), input)
 {
     Input = input;
 }
Exemple #8
0
 /// <include file='../Microsoft.ML.Transforms/Text/doc.xml' path='doc/members/member[@name="WordEmbeddings"]/*' />
 /// <param name="input">Vector of tokenized text.</param>
 /// <param name="modelKind">The pretrained word embedding model.</param>
 /// <returns></returns>
 public static Vector <float> WordEmbeddings(this VarVector <string> input, WordEmbeddingsExtractingTransformer.PretrainedModelKind modelKind = WordEmbeddingsExtractingTransformer.PretrainedModelKind.Sswe)
 {
     Contracts.CheckValue(input, nameof(input));
     return(new OutColumn(input, modelKind));
 }
 /// <summary>
 /// Remove stop words from incoming text.
 /// </summary>
 /// <param name="input">The column to apply to.</param>
 /// <param name="language">Langauge of the input text. It will be used to retrieve a built-in stopword list.</param>
 public static VarVector <string> RemoveDefaultStopWords(this VarVector <string> input,
                                                         StopWordsRemovingEstimator.Language language = StopWordsRemovingEstimator.Language.English) => new OutPipelineColumn(input, language);
 public OutPipelineColumn(VarVector <string> input, StopWordsRemovingEstimator.Language language)
     : base(new Reconciler(language), input)
 {
     Input = input;
 }
 /// <summary>
 /// Converts the categorical value into an indicator array by building a dictionary of categories based on the data and using the id in the dictionary as the index in the array
 /// </summary>
 /// <param name="input">Incoming data.</param>
 /// <param name="outputKind">Specify the output type of indicator array: array or binary encoded data.</param>
 /// <param name="hashBits">Amount of bits to use for hashing.</param>
 /// <param name="seed">Seed value used for hashing.</param>
 /// <param name="ordered">Whether the position of each term should be included in the hash.</param>
 /// <param name="invertHash">During hashing we constuct mappings between original values and the produced hash values.
 /// Text representation of original values are stored in the slot names of the  metadata for the new column.Hashing, as such, can map many initial values to one.
 /// <paramref name="invertHash"/> specifies the upper bound of the number of distinct input values mapping to a hash that should be retained.
 /// <value>0</value> does not retain any input values. <value>-1</value> retains all input values mapping to each hash.</param>
 public static Vector <float> OneHotHashEncoding(this VarVector <string> input, OneHotHashVectorOutputKind outputKind = DefOut,
                                                 int hashBits = DefHashBits, uint seed = DefSeed, bool ordered = DefOrdered, int invertHash = DefInvertHash)
 {
     Contracts.CheckValue(input, nameof(input));
     return(new ImplVector <string>(input, new Config(outputKind, hashBits, seed, ordered, invertHash)));
 }
Exemple #12
0
 /// <summary>
 /// Convert to variable array of floats.
 /// </summary>
 /// <param name="input">The input column.</param>
 /// <returns >Column with variable array of floats.</returns>
 public static VarVector <float> ToFloat(this VarVector <sbyte> input) => new ImplVarVector <sbyte>(Contracts.CheckRef(input, nameof(input)), DataKind.R4);
Exemple #13
0
 /// <summary>
 /// Convert to variable array of floats.
 /// </summary>
 /// <param name="input">The input column.</param>
 /// <returns >Column with variable array of floats.</returns>
 public static VarVector <float> ToFloat(this VarVector <string> input) => new ImplVarVector <string>(Contracts.CheckRef(input, nameof(input)), InternalDataKind.R4);