C# (CSharp) WordTokenizeTransform.ColumnInfo Exemples

Langage de programmation: C# (CSharp)

Class/Type: WordTokenizeTransform.ColumnInfo

Exemples au hotexamples.com: 2

C# (CSharp) WordTokenizeTransform.ColumnInfo - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de WordTokenizeTransform.ColumnInfo extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Associées

ApiServiceReader

CertificateEntry

AgentLoginSuccessDto

FolderGalleryConfiguration

PS_tj

Repository

PS_tq

TestDynamicSpriteFontJapanese

FillVisitMediator

EmployeeSqlDAL

Related in langs

Config (PHP)

IconUtilityFixture (PHP)

_unix_mkdir (C++)

mass_stor_findDevice (C++)

NewCached (Go)

TrimRight (Go)

ManagingFS (Java)

HttpEntities (Java)

addListeners (Python)

group_sparse_scores (Python)

Exemple #1

0

Afficher le fichier

Fichier : WordHashBagTransform.cs Projet : mikekiwa/machinelearning-1

public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input) { Contracts.CheckValue(env, nameof(env)); var h = env.Register(RegistrationName); h.CheckValue(args, nameof(args)); h.CheckValue(input, nameof(input)); h.CheckUserArg(Utils.Size(args.Column) > 0, nameof(args.Column), "Columns must be specified"); // To each input column to the WordHashBagTransform, a tokenize transform is applied, // followed by applying WordHashVectorizeTransform. // Since WordHashBagTransform is a many-to-one column transform, for each // WordHashBagTransform.Column we may need to define multiple tokenize transform columns. // NgramHashExtractorTransform may need to define an identical number of HashTransform.Columns. // The intermediate columns are dropped at the end of using a DropColumnsTransform. IDataView view = input; var uniqueSourceNames = NgramExtractionUtils.GenerateUniqueSourceNames(h, args.Column, view.Schema); Contracts.Assert(uniqueSourceNames.Length == args.Column.Length); var tokenizeColumns = new WordTokenizeTransform.ColumnInfo[args.Column.Length]; var extractorCols = new NgramHashExtractorTransform.Column[args.Column.Length]; var colCount = args.Column.Length; List <string> tmpColNames = new List <string>(); for (int iinfo = 0; iinfo < colCount; iinfo++) { var column = args.Column[iinfo]; int srcCount = column.Source.Length; var curTmpNames = new string[srcCount]; Contracts.Assert(uniqueSourceNames[iinfo].Length == args.Column[iinfo].Source.Length); for (int isrc = 0; isrc < srcCount; isrc++) { tokenizeColumns[iinfo] = new WordTokenizeTransform.ColumnInfo(args.Column[iinfo].Source[isrc], curTmpNames[isrc] = uniqueSourceNames[iinfo][isrc]); } tmpColNames.AddRange(curTmpNames); extractorCols[iinfo] = new NgramHashExtractorTransform.Column { Name = column.Name, Source = curTmpNames, HashBits = column.HashBits, NgramLength = column.NgramLength, Seed = column.Seed, SkipLength = column.SkipLength, Ordered = column.Ordered, InvertHash = column.InvertHash, FriendlyNames = args.Column[iinfo].Source, AllLengths = column.AllLengths }; } view = new WordTokenizeEstimator(env, tokenizeColumns).Fit(view).Transform(view); var featurizeArgs = new NgramHashExtractorTransform.Arguments { AllLengths = args.AllLengths, HashBits = args.HashBits, NgramLength = args.NgramLength, SkipLength = args.SkipLength, Ordered = args.Ordered, Seed = args.Seed, Column = extractorCols.ToArray(), InvertHash = args.InvertHash }; view = NgramHashExtractorTransform.Create(h, featurizeArgs, view); // Since we added columns with new names, we need to explicitly drop them before we return the IDataTransform. var dropColsArgs = new DropColumnsTransform.Arguments() { Column = tmpColNames.ToArray() }; return(new DropColumnsTransform(h, dropColsArgs, view)); }

Exemple #2

0

Afficher le fichier

public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input) { Contracts.CheckValue(env, nameof(env)); var h = env.Register(RegistrationName); h.CheckValue(args, nameof(args)); h.CheckValue(input, nameof(input)); h.CheckUserArg(Utils.Size(args.Column) > 0, nameof(args.Column), "Columns must be specified"); // Compose the WordBagTransform from a tokenize transform, // followed by a NgramExtractionTransform. // Since WordBagTransform is a many-to-one column transform, for each // WordBagTransform.Column with multiple sources, we first apply a ConcatTransform. // REVIEW: In order to not get ngrams that cross between vector slots, we need to // enable tokenize transforms to insert a special token between slots. // REVIEW: In order to make it possible to output separate bags for different columns // using the same dictionary, we need to find a way to make ConcatTransform remember the boundaries. var tokenizeColumns = new WordTokenizeTransform.ColumnInfo[args.Column.Length]; var extractorArgs = new NgramExtractorTransform.Arguments() { MaxNumTerms = args.MaxNumTerms, NgramLength = args.NgramLength, SkipLength = args.SkipLength, AllLengths = args.AllLengths, Weighting = args.Weighting, Column = new NgramExtractorTransform.Column[args.Column.Length] }; for (int iinfo = 0; iinfo < args.Column.Length; iinfo++) { var column = args.Column[iinfo]; h.CheckUserArg(!string.IsNullOrWhiteSpace(column.Name), nameof(column.Name)); h.CheckUserArg(Utils.Size(column.Source) > 0, nameof(column.Source)); h.CheckUserArg(column.Source.All(src => !string.IsNullOrWhiteSpace(src)), nameof(column.Source)); tokenizeColumns[iinfo] = new WordTokenizeTransform.ColumnInfo(column.Source.Length > 1 ? column.Name : column.Source[0], column.Name); extractorArgs.Column[iinfo] = new NgramExtractorTransform.Column() { Name = column.Name, Source = column.Name, MaxNumTerms = column.MaxNumTerms, NgramLength = column.NgramLength, SkipLength = column.SkipLength, Weighting = column.Weighting, AllLengths = column.AllLengths }; } IDataView view = input; view = NgramExtractionUtils.ApplyConcatOnSources(h, args.Column, view); view = new WordTokenizeEstimator(env, tokenizeColumns).Fit(view).Transform(view); return(NgramExtractorTransform.Create(h, extractorArgs, view)); }