/// <summary> /// Splits the data based on the splitColumn, and drops that column as it is only /// intended to be used for splitting the data, and shouldn't be part of the output schema. /// </summary> internal static IEnumerable <TrainTestData> CrossValidationSplit(IHostEnvironment env, IDataView data, string splitColumn, int numberOfFolds = 5) { env.CheckValue(splitColumn, nameof(splitColumn)); for (int fold = 0; fold < numberOfFolds; fold++) { var trainFilter = new RangeFilter(env, new RangeFilter.Options { Column = splitColumn, Min = (double)fold / numberOfFolds, Max = (double)(fold + 1) / numberOfFolds, Complement = true, IncludeMin = true, IncludeMax = true, }, data); var testFilter = new RangeFilter(env, new RangeFilter.Options { Column = splitColumn, Min = (double)fold / numberOfFolds, Max = (double)(fold + 1) / numberOfFolds, Complement = false, IncludeMin = true, IncludeMax = true }, data); var trainDV = ColumnSelectingTransformer.CreateDrop(env, trainFilter, splitColumn); var testDV = ColumnSelectingTransformer.CreateDrop(env, testFilter, splitColumn); yield return(new TrainTestData(trainDV, testDV)); } }
public static Output Split(IHostEnvironment env, Input input) { Contracts.CheckValue(env, nameof(env)); var host = env.Register(ModuleName); host.CheckValue(input, nameof(input)); host.Check(0 < input.Fraction && input.Fraction < 1, "The fraction must be in the interval (0,1)."); EntryPointUtils.CheckInputArgs(host, input); var data = input.Data; var stratCol = SplitUtils.CreateStratificationColumn(host, ref data, input.StratificationColumn); IDataView trainData = new RangeFilter(host, new RangeFilter.Options { Column = stratCol, Min = 0, Max = input.Fraction, Complement = false }, data); trainData = ColumnSelectingTransformer.CreateDrop(host, trainData, stratCol); IDataView testData = new RangeFilter(host, new RangeFilter.Options { Column = stratCol, Min = 0, Max = input.Fraction, Complement = true }, data); testData = ColumnSelectingTransformer.CreateDrop(host, testData, stratCol); return(new Output() { TrainData = trainData, TestData = testData }); }
/// <summary> /// Split the dataset into the train set and test set according to the given fraction. /// Respects the <paramref name="samplingKeyColumnName"/> if provided. /// </summary> /// <param name="data">The dataset to split.</param> /// <param name="testFraction">The fraction of data to go into the test set.</param> /// <param name="samplingKeyColumnName">Name of a column to use for grouping rows. If two examples share the same value of the <paramref name="samplingKeyColumnName"/>, /// they are guaranteed to appear in the same subset (train or test). This can be used to ensure no label leakage from the train to the test set. /// Note that when performing a Ranking Experiment, the <paramref name="samplingKeyColumnName"/> must be the GroupId column. /// If <see langword="null"/> no row grouping will be performed.</param> /// <param name="seed">Seed for the random number generator used to select rows for the train-test split.</param> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[TrainTestSplit](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/TrainTestSplit.cs)] /// ]]> /// </format> /// </example> public TrainTestData TrainTestSplit(IDataView data, double testFraction = 0.1, string samplingKeyColumnName = null, int?seed = null) { _env.CheckValue(data, nameof(data)); _env.CheckParam(0 < testFraction && testFraction < 1, nameof(testFraction), "Must be between 0 and 1 exclusive"); _env.CheckValueOrNull(samplingKeyColumnName); var splitColumn = CreateSplitColumn(_env, ref data, samplingKeyColumnName, seed, fallbackInEnvSeed: true); var trainFilter = new RangeFilter(_env, new RangeFilter.Options() { Column = splitColumn, Min = 0, Max = testFraction, Complement = true }, data); var testFilter = new RangeFilter(_env, new RangeFilter.Options() { Column = splitColumn, Min = 0, Max = testFraction, Complement = false }, data); var trainDV = ColumnSelectingTransformer.CreateDrop(_env, trainFilter, splitColumn); var testDV = ColumnSelectingTransformer.CreateDrop(_env, testFilter, splitColumn); return(new TrainTestData(trainDV, testDV)); }
// Factory method for SignatureDataTransform. internal static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input) { Contracts.CheckValue(env, nameof(env)); var h = env.Register(LoaderSignature); h.CheckValue(args, nameof(args)); h.CheckValue(input, nameof(input)); h.CheckNonWhiteSpace(args.Source, nameof(args.Source)); if (string.IsNullOrWhiteSpace(args.Name)) args.Name = args.Source; var file = Utils.FindExistentFileOrNull("pretrained.model", "Sentiment", assemblyForBasePath: typeof(SentimentAnalyzingTransformer)); if (file == null) { throw h.Except("resourcePath", "Missing resource for SentimentAnalyzingTransform."); } // The logic below ensures that any columns in our input IDataView that conflict // with column names known to be used in the pretrained model transform pipeline we're // loading are aliased to temporary column names before we apply the pipeline and then // renamed back to their original names after. We do this to ensure the pretrained model // doesn't shadow or replace columns we aren't expecting it to. // 1. Alias any column in the input IDataView that is known to appear to the pretrained // model into a temporary column so that we can restore them after the pretrained model // is added to the pipeline. KeyValuePair<string, string>[] aliased; input = AliasIfNeeded(env, input, _modelIntermediateColumnNames, out aliased); // 2. Copy source column to a column with the name expected by the pretrained model featurization // transform pipeline. var copyTransformer = new ColumnCopyingTransformer(env, (args.Source, ModelInputColumnName)); input = copyTransformer.Transform(input); // 3. Apply the pretrained model and its featurization transform pipeline. input = LoadTransforms(env, input, file); // 4. Copy the output column from the pretrained model to a temporary column. var scoreTempName = input.Schema.GetTempColumnName("sa_out"); copyTransformer = new ColumnCopyingTransformer(env, (ModelScoreColumnName, scoreTempName)); input = copyTransformer.Transform(input); // 5. Drop all the columns created by the pretrained model, including the expected input column // and the output column, which we have copied to a temporary column in (4). input = ColumnSelectingTransformer.CreateDrop(env, input, _modelIntermediateColumnNames); // 6. Unalias all the original columns that were originally present in the IDataView, but may have // been shadowed by column names in the pretrained model. This method will also drop all the temporary // columns that were created for them in (1). input = UnaliasIfNeeded(env, input, aliased); // 7. Copy the temporary column with the score we created in (4) to a column with the user-specified destination name. copyTransformer = new ColumnCopyingTransformer(env, (scoreTempName, args.Name)); input = copyTransformer.Transform(input); // 8. Drop the temporary column with the score created in (4). return ColumnSelectingTransformer.CreateDrop(env, input, scoreTempName); }
private static IDataView UnaliasIfNeeded(IHostEnvironment env, IDataView input, KeyValuePair<string, string>[] hiddenNames) { if (Utils.Size(hiddenNames) == 0) return input; input = new ColumnCopyingTransformer(env, hiddenNames.Select(x => (Input: x.Key, Output: x.Value)).ToArray()).Transform(input); return ColumnSelectingTransformer.CreateDrop(env, input, hiddenNames.Select(pair => pair.Value).ToArray()); }
public static CommonOutputs.TransformOutput RenameBinaryPredictionScoreColumns(IHostEnvironment env, RenameBinaryPredictionScoreColumnsInput input) { Contracts.CheckValue(env, nameof(env)); var host = env.Register("ScoreModel"); host.CheckValue(input, nameof(input)); EntryPointUtils.CheckInputArgs(host, input); if (input.PredictorModel.Predictor.PredictionKind == PredictionKind.BinaryClassification) { DataViewType labelType; var labelNames = input.PredictorModel.GetLabelInfo(host, out labelType); if (labelNames != null && labelNames.Length == 2) { var positiveClass = labelNames[1]; // Rename all the score columns. int colMax; var maxScoreId = input.Data.Schema.GetMaxAnnotationKind(out colMax, AnnotationUtils.Kinds.ScoreColumnSetId); var copyCols = new List <(string name, string source)>(); for (int i = 0; i < input.Data.Schema.Count; i++) { if (input.Data.Schema[i].IsHidden) { continue; } if (!ShouldAddColumn(input.Data.Schema, i, null, maxScoreId)) { continue; } // Do not rename the PredictedLabel column. ReadOnlyMemory <char> tmp = default; if (input.Data.Schema.TryGetAnnotation(TextDataViewType.Instance, AnnotationUtils.Kinds.ScoreValueKind, i, ref tmp) && ReadOnlyMemoryUtils.EqualsStr(AnnotationUtils.Const.ScoreValueKind.PredictedLabel, tmp)) { continue; } var source = input.Data.Schema[i].Name; var name = source + "." + positiveClass; copyCols.Add((name, source)); } var copyColumn = new ColumnCopyingTransformer(env, copyCols.ToArray()).Transform(input.Data); var dropColumn = ColumnSelectingTransformer.CreateDrop(env, copyColumn, copyCols.Select(c => c.source).ToArray()); return(new CommonOutputs.TransformOutput { Model = new TransformModelImpl(env, dropColumn, input.Data), OutputData = dropColumn }); } } var newView = NopTransform.CreateIfNeeded(env, input.Data); return(new CommonOutputs.TransformOutput { Model = new TransformModelImpl(env, newView, input.Data), OutputData = newView }); }
private void TestSvmLight(string path, string savingPath, int inputSize, int expectedInputSize, bool zeroBased, IDataView expectedData, long?numberOfRows = null) { var data = ML.Data.LoadFromSvmLightFile(path, inputSize: inputSize, zeroBased: zeroBased, numberOfRows: numberOfRows); Assert.True(data.Schema["Features"].Type.GetValueCount() == expectedInputSize); CheckSameValues(data, expectedData, checkId: false); // Save, reload and compare dataviews again. using (var stream = File.Create(savingPath)) ML.Data.SaveInSvmLightFormat(expectedData, stream, zeroBasedIndexing: zeroBased, exampleWeightColumnName: "Weight"); data = ML.Data.LoadFromSvmLightFile(savingPath, inputSize: inputSize, zeroBased: zeroBased); CheckSameValues(ColumnSelectingTransformer.CreateDrop(Env, data, "Comment"), ColumnSelectingTransformer.CreateDrop(Env, expectedData, "Comment"), checkId: false); }
private static IDataView UnaliasIfNeeded(IHostEnvironment env, IDataView input, KeyValuePair <string, string>[] hiddenNames) { if (Utils.Size(hiddenNames) == 0) { return(input); } input = ColumnCopyingTransformer.Create(env, new ColumnCopyingTransformer.Arguments() { Column = hiddenNames.Select(pair => new ColumnCopyingTransformer.Column() { Name = pair.Key, Source = pair.Value }).ToArray() }, input); return(ColumnSelectingTransformer.CreateDrop(env, input, hiddenNames.Select(pair => pair.Value).ToArray())); }
public static Output Split(IHostEnvironment env, Input input) { Contracts.CheckValue(env, nameof(env)); var host = env.Register(ModuleName); host.CheckValue(input, nameof(input)); EntryPointUtils.CheckInputArgs(host, input); var data = input.Data; var stratCol = SplitUtils.CreateStratificationColumn(host, ref data, input.StratificationColumn); int n = input.NumFolds; var output = new Output { TrainData = new IDataView[n], TestData = new IDataView[n] }; // Construct per-fold datasets. double fraction = 1.0 / n; for (int i = 0; i < n; i++) { var trainData = new RangeFilter(host, new RangeFilter.Options { Column = stratCol, Min = i * fraction, Max = (i + 1) * fraction, Complement = true }, data); output.TrainData[i] = ColumnSelectingTransformer.CreateDrop(host, trainData, stratCol); var testData = new RangeFilter(host, new RangeFilter.Options { Column = stratCol, Min = i * fraction, Max = (i + 1) * fraction, Complement = false }, data); output.TestData[i] = ColumnSelectingTransformer.CreateDrop(host, testData, stratCol); } return(output); }
public void NormalizerWorkout() { string dataPath = GetDataPath(TestDatasets.iris.trainFilename); var loader = new TextLoader(Env, new TextLoader.Arguments { Column = new[] { new TextLoader.Column("float1", DataKind.R4, 1), new TextLoader.Column("float4", DataKind.R4, new[] { new TextLoader.Range(1, 4) }), new TextLoader.Column("double1", DataKind.R8, 1), new TextLoader.Column("double4", DataKind.R8, new[] { new TextLoader.Range(1, 4) }), new TextLoader.Column("int1", DataKind.I4, 0), new TextLoader.Column("float0", DataKind.R4, new[] { new TextLoader.Range { Min = 1, VariableEnd = true } }), }, HasHeader = true }, new MultiFileSource(dataPath)); var est = new NormalizingEstimator(Env, new NormalizingEstimator.MinMaxColumn("float1"), new NormalizingEstimator.MinMaxColumn("float4"), new NormalizingEstimator.MinMaxColumn("double1"), new NormalizingEstimator.MinMaxColumn("double4"), new NormalizingEstimator.BinningColumn("float1", "float1bin"), new NormalizingEstimator.BinningColumn("float4", "float4bin"), new NormalizingEstimator.BinningColumn("double1", "double1bin"), new NormalizingEstimator.BinningColumn("double4", "double4bin"), new NormalizingEstimator.SupervisedBinningColumn("float1", "float1supervisedbin", labelColumn: "int1"), new NormalizingEstimator.SupervisedBinningColumn("float4", "float4supervisedbin", labelColumn: "int1"), new NormalizingEstimator.SupervisedBinningColumn("double1", "double1supervisedbin", labelColumn: "int1"), new NormalizingEstimator.SupervisedBinningColumn("double4", "double4supervisedbin", labelColumn: "int1"), new NormalizingEstimator.MeanVarColumn("float1", "float1mv"), new NormalizingEstimator.MeanVarColumn("float4", "float4mv"), new NormalizingEstimator.MeanVarColumn("double1", "double1mv"), new NormalizingEstimator.MeanVarColumn("double4", "double4mv"), new NormalizingEstimator.LogMeanVarColumn("float1", "float1lmv"), new NormalizingEstimator.LogMeanVarColumn("float4", "float4lmv"), new NormalizingEstimator.LogMeanVarColumn("double1", "double1lmv"), new NormalizingEstimator.LogMeanVarColumn("double4", "double4lmv")); var data = loader.Read(dataPath); var badData1 = new ColumnCopyingTransformer(Env, ("int1", "float1")).Transform(data); var badData2 = new ColumnCopyingTransformer(Env, ("float0", "float4")).Transform(data); TestEstimatorCore(est, data, null, badData1); TestEstimatorCore(est, data, null, badData2); var outputPath = GetOutputPath("NormalizerEstimator", "normalized.tsv"); using (var ch = Env.Start("save")) { var saver = new TextSaver(Env, new TextSaver.Arguments { Silent = true }); using (var fs = File.Create(outputPath)) { var dataView = ColumnSelectingTransformer.CreateDrop(Env, est.Fit(data).Transform(data), "float0"); DataSaverUtils.SaveDataView(ch, saver, dataView, fs, keepHidden: true); } } CheckEquality("NormalizerEstimator", "normalized.tsv"); Done(); }
public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input) { Contracts.CheckValue(env, nameof(env)); var h = env.Register(RegistrationName); h.CheckValue(args, nameof(args)); h.CheckValue(input, nameof(input)); h.CheckUserArg(Utils.Size(args.Column) > 0, nameof(args.Column), "Columns must be specified"); // To each input column to the WordHashBagTransform, a tokenize transform is applied, // followed by applying WordHashVectorizeTransform. // Since WordHashBagTransform is a many-to-one column transform, for each // WordHashBagTransform.Column we may need to define multiple tokenize transform columns. // NgramHashExtractorTransform may need to define an identical number of HashTransform.Columns. // The intermediate columns are dropped at the end of using a DropColumnsTransform. IDataView view = input; var uniqueSourceNames = NgramExtractionUtils.GenerateUniqueSourceNames(h, args.Column, view.Schema); Contracts.Assert(uniqueSourceNames.Length == args.Column.Length); var tokenizeColumns = new List <WordTokenizingTransformer.ColumnInfo>(); var extractorCols = new NgramHashExtractingTransformer.Column[args.Column.Length]; var colCount = args.Column.Length; List <string> tmpColNames = new List <string>(); for (int iinfo = 0; iinfo < colCount; iinfo++) { var column = args.Column[iinfo]; int srcCount = column.Source.Length; var curTmpNames = new string[srcCount]; Contracts.Assert(uniqueSourceNames[iinfo].Length == args.Column[iinfo].Source.Length); for (int isrc = 0; isrc < srcCount; isrc++) { tokenizeColumns.Add(new WordTokenizingTransformer.ColumnInfo(args.Column[iinfo].Source[isrc], curTmpNames[isrc] = uniqueSourceNames[iinfo][isrc])); } tmpColNames.AddRange(curTmpNames); extractorCols[iinfo] = new NgramHashExtractingTransformer.Column { Name = column.Name, Source = curTmpNames, HashBits = column.HashBits, NgramLength = column.NgramLength, Seed = column.Seed, SkipLength = column.SkipLength, Ordered = column.Ordered, InvertHash = column.InvertHash, FriendlyNames = args.Column[iinfo].Source, AllLengths = column.AllLengths }; } view = new WordTokenizingEstimator(env, tokenizeColumns.ToArray()).Fit(view).Transform(view); var featurizeArgs = new NgramHashExtractingTransformer.Arguments { AllLengths = args.AllLengths, HashBits = args.HashBits, NgramLength = args.NgramLength, SkipLength = args.SkipLength, Ordered = args.Ordered, Seed = args.Seed, Column = extractorCols.ToArray(), InvertHash = args.InvertHash }; view = NgramHashExtractingTransformer.Create(h, featurizeArgs, view); // Since we added columns with new names, we need to explicitly drop them before we return the IDataTransform. return(ColumnSelectingTransformer.CreateDrop(h, view, tmpColNames.ToArray())); }
IDataTransform AppendToPipeline(IDataView input) { IDataView current = input; if (_shuffleInput) { var args1 = new RowShufflingTransformer.Arguments() { ForceShuffle = false, ForceShuffleSeed = _seedShuffle, PoolRows = _poolRows, PoolOnly = false, }; current = new RowShufflingTransformer(Host, args1, current); } // We generate a random number. var columnName = current.Schema.GetTempColumnName(); var args2 = new GenerateNumberTransform.Arguments() { Column = new GenerateNumberTransform.Column[] { new GenerateNumberTransform.Column() { Name = columnName } }, Seed = _seed ?? 42 }; IDataTransform currentTr = new GenerateNumberTransform(Host, args2, current); // We convert this random number into a part. var cRatios = new float[_ratios.Length]; cRatios[0] = 0; for (int i = 1; i < _ratios.Length; ++i) { cRatios[i] = cRatios[i - 1] + _ratios[i - 1]; } ValueMapper <float, int> mapper = (in float src, ref int dst) => { for (int i = cRatios.Length - 1; i > 0; --i) { if (src >= cRatios[i]) { dst = i; return; } } dst = 0; }; // Get location of columnName int index; currentTr.Schema.TryGetColumnIndex(columnName, out index); var ct = currentTr.Schema.GetColumnType(index); var view = LambdaColumnMapper.Create(Host, "Key to part mapper", currentTr, columnName, _newColumn, ct, NumberType.I4, mapper); // We cache the result to avoid the pipeline to change the random number. var args3 = new ExtendedCacheTransform.Arguments() { inDataFrame = string.IsNullOrEmpty(_cacheFile), numTheads = _numThreads, cacheFile = _cacheFile, reuse = _reuse, }; currentTr = new ExtendedCacheTransform(Host, args3, view); // Removing the temporary column. var finalTr = ColumnSelectingTransformer.CreateDrop(Host, currentTr, new string[] { columnName }); var taggedViews = new List <Tuple <string, ITaggedDataView> >(); // filenames if (_filenames != null || _tags != null) { int nbf = _filenames == null ? 0 : _filenames.Length; if (nbf > 0 && nbf != _ratios.Length) { throw Host.Except("Differen number of filenames and ratios."); } int nbt = _tags == null ? 0 : _tags.Length; if (nbt > 0 && nbt != _ratios.Length) { throw Host.Except("Differen number of filenames and ratios."); } int nb = Math.Max(nbf, nbt); using (var ch = Host.Start("Split the datasets and stores each part.")) { for (int i = 0; i < nb; ++i) { if (_filenames == null || !_filenames.Any()) { ch.Info("Create part {0}: {1} (tag: {2})", i + 1, _ratios[i], _tags[i]); } else { ch.Info("Create part {0}: {1} (file: {2})", i + 1, _ratios[i], _filenames[i]); } var ar1 = new RangeFilter.Arguments() { Column = _newColumn, Min = i, Max = i, IncludeMax = true }; int pardId = i; var filtView = LambdaFilter.Create <int>(Host, string.Format("Select part {0}", i), currentTr, _newColumn, NumberType.I4, (in int part) => { return(part.Equals(pardId)); });
public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input, TermLoaderArguments termLoaderArgs = null) { Contracts.CheckValue(env, nameof(env)); var h = env.Register(LoaderSignature); h.CheckValue(args, nameof(args)); h.CheckValue(input, nameof(input)); h.CheckUserArg(Utils.Size(args.Column) > 0, nameof(args.Column), "Columns must be specified"); // To each input column to the NgramHashExtractorArguments, a HashTransform using 31 // bits (to minimize collisions) is applied first, followed by an NgramHashTransform. IDataView view = input; List <ValueToKeyMappingTransformer.Column> termCols = null; if (termLoaderArgs != null) { termCols = new List <ValueToKeyMappingTransformer.Column>(); } var hashColumns = new List <HashingTransformer.Column>(); var ngramHashColumns = new NgramHashingTransformer.Column[args.Column.Length]; var colCount = args.Column.Length; // The NGramHashExtractor has a ManyToOne column type. To avoid stepping over the source // column name when a 'name' destination column name was specified, we use temporary column names. string[][] tmpColNames = new string[colCount][]; for (int iinfo = 0; iinfo < colCount; iinfo++) { var column = args.Column[iinfo]; h.CheckUserArg(!string.IsNullOrWhiteSpace(column.Name), nameof(column.Name)); h.CheckUserArg(Utils.Size(column.Source) > 0 && column.Source.All(src => !string.IsNullOrWhiteSpace(src)), nameof(column.Source)); int srcCount = column.Source.Length; tmpColNames[iinfo] = new string[srcCount]; for (int isrc = 0; isrc < srcCount; isrc++) { var tmpName = input.Schema.GetTempColumnName(column.Source[isrc]); tmpColNames[iinfo][isrc] = tmpName; if (termLoaderArgs != null) { termCols.Add( new ValueToKeyMappingTransformer.Column { Name = tmpName, Source = column.Source[isrc] }); } hashColumns.Add( new HashingTransformer.Column { Name = tmpName, Source = termLoaderArgs == null ? column.Source[isrc] : tmpName, HashBits = 30, Seed = column.Seed, Ordered = false, InvertHash = column.InvertHash }); } ngramHashColumns[iinfo] = new NgramHashingTransformer.Column { Name = column.Name, Source = tmpColNames[iinfo], AllLengths = column.AllLengths, HashBits = column.HashBits, NgramLength = column.NgramLength, RehashUnigrams = false, Seed = column.Seed, SkipLength = column.SkipLength, Ordered = column.Ordered, InvertHash = column.InvertHash, // REVIEW: This is an ugly internal hack to get around // the problem that we want the *original* source names surfacing // in the descriptions where appropriate, rather than _tmp000 and // what have you. The alternative is we do something elaborate // with metadata or something but I'm not sure that's better. FriendlyNames = column.FriendlyNames }; } if (termLoaderArgs != null) { h.Assert(Utils.Size(termCols) == hashColumns.Count); var termArgs = new ValueToKeyMappingTransformer.Arguments() { MaxNumTerms = int.MaxValue, Terms = termLoaderArgs.Terms, Term = termLoaderArgs.Term, DataFile = termLoaderArgs.DataFile, Loader = termLoaderArgs.Loader, TermsColumn = termLoaderArgs.TermsColumn, Sort = termLoaderArgs.Sort, Column = termCols.ToArray() }; view = ValueToKeyMappingTransformer.Create(h, termArgs, view); if (termLoaderArgs.DropUnknowns) { var naDropArgs = new MissingValueDroppingTransformer.Arguments { Column = new MissingValueDroppingTransformer.Column[termCols.Count] }; for (int iinfo = 0; iinfo < termCols.Count; iinfo++) { naDropArgs.Column[iinfo] = new MissingValueDroppingTransformer.Column { Name = termCols[iinfo].Name, Source = termCols[iinfo].Name }; } view = new MissingValueDroppingTransformer(h, naDropArgs, view); } } // Args for the Hash function with multiple columns var hashArgs = new HashingTransformer.Arguments { HashBits = 31, Seed = args.Seed, Ordered = false, Column = hashColumns.ToArray(), InvertHash = args.InvertHash }; view = HashingTransformer.Create(h, hashArgs, view); // creating the NgramHash function var ngramHashArgs = new NgramHashingTransformer.Arguments { AllLengths = args.AllLengths, HashBits = args.HashBits, NgramLength = args.NgramLength, SkipLength = args.SkipLength, RehashUnigrams = false, Ordered = args.Ordered, Seed = args.Seed, Column = ngramHashColumns, InvertHash = args.InvertHash }; view = new NgramHashingTransformer(h, ngramHashArgs, view); return(ColumnSelectingTransformer.CreateDrop(h, view, tmpColNames.SelectMany(cols => cols).ToArray())); }