public static CommonOutputs.TransformOutput DropColumns(IHostEnvironment env, DropColumnsTransform.Arguments input) { Contracts.CheckValue(env, nameof(env)); var host = env.Register("DropColumns"); host.CheckValue(input, nameof(input)); EntryPointUtils.CheckInputArgs(host, input); var xf = new DropColumnsTransform(env, input, input.Data); return(new CommonOutputs.TransformOutput { Model = new TransformModel(env, xf, input.Data), OutputData = xf }); }
public static CommonOutputs.TransformOutput SelectColumns(IHostEnvironment env, DropColumnsTransform.KeepArguments input) { Contracts.CheckValue(env, nameof(env)); var host = env.Register("SelectColumns"); host.CheckValue(input, nameof(input)); EntryPointUtils.CheckInputArgs(host, input); // We can have an empty Columns array, indicating we // wish to drop all the columns. var xf = new DropColumnsTransform(env, input, input.Data); return(new CommonOutputs.TransformOutput { Model = new TransformModel(env, xf, input.Data), OutputData = xf }); }
public static Output Split(IHostEnvironment env, Input input) { Contracts.CheckValue(env, nameof(env)); var host = env.Register(ModuleName); host.CheckValue(input, nameof(input)); host.Check(0 < input.Fraction && input.Fraction < 1, "The fraction must be in the interval (0,1)."); EntryPointUtils.CheckInputArgs(host, input); var data = input.Data; var stratCol = SplitUtils.CreateStratificationColumn(host, ref data, input.StratificationColumn); IDataView trainData = new RangeFilter(host, new RangeFilter.Arguments { Column = stratCol, Min = 0, Max = input.Fraction, Complement = false }, data); trainData = new DropColumnsTransform(host, new DropColumnsTransform.Arguments { Column = new[] { stratCol } }, trainData); IDataView testData = new RangeFilter(host, new RangeFilter.Arguments { Column = stratCol, Min = 0, Max = input.Fraction, Complement = true }, data); testData = new DropColumnsTransform(host, new DropColumnsTransform.Arguments { Column = new[] { stratCol } }, testData); return(new Output() { TrainData = trainData, TestData = testData }); }
public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input) { Contracts.CheckValue(env, nameof(env)); var h = env.Register(LoaderSignature); h.CheckValue(args, nameof(args)); h.CheckValue(input, nameof(input)); h.CheckNonWhiteSpace(args.Source, nameof(args.Source)); if (string.IsNullOrWhiteSpace(args.Name)) { args.Name = args.Source; } var file = Utils.FindExistentFileOrNull("pretrained.model", "Sentiment", assemblyForBasePath: typeof(SentimentAnalyzingTransform)); if (file == null) { throw h.Except("resourcePath", "Missing resource for SentimentAnalyzingTransform."); } // The logic below ensures that any columns in our input IDataView that conflict // with column names known to be used in the pretrained model transform pipeline we're // loading are aliased to temporary column names before we apply the pipeline and then // renamed back to their original names after. We do this to ensure the pretrained model // doesn't shadow or replace columns we aren't expecting it to. // 1. Alias any column in the input IDataView that is known to appear to the pretrained // model into a temporary column so that we can restore them after the pretrained model // is added to the pipeline. KeyValuePair <string, string>[] aliased; input = AliasIfNeeded(env, input, _modelIntermediateColumnNames, out aliased); // 2. Copy source column to a column with the name expected by the pretrained model featurization // transform pipeline. input = new CopyColumnsTransform(env, new CopyColumnsTransform.Arguments() { Column = new[] { new CopyColumnsTransform.Column() { Source = args.Source, Name = ModelInputColumnName } } }, input); // 3. Apply the pretrained model and its featurization transform pipeline. input = LoadTransforms(env, input, file); // 4. Copy the output column from the pretrained model to a temporary column. var scoreTempName = input.Schema.GetTempColumnName("sa_out"); input = new CopyColumnsTransform(env, new CopyColumnsTransform.Arguments() { Column = new [] { new CopyColumnsTransform.Column() { Name = scoreTempName, Source = ModelScoreColumnName } } }, input); // 5. Drop all the columns created by the pretrained model, including the expected input column // and the output column, which we have copied to a temporary column in (4). input = new DropColumnsTransform(env, new DropColumnsTransform.Arguments() { Column = _modelIntermediateColumnNames }, input); // 6. Unalias all the original columns that were originally present in the IDataView, but may have // been shadowed by column names in the pretrained model. This method will also drop all the temporary // columns that were created for them in (1). input = UnaliasIfNeeded(env, input, aliased); // 7. Copy the temporary column with the score we created in (4) to a column with the user-specified destination name. input = new CopyColumnsTransform(env, new CopyColumnsTransform.Arguments() { Column = new[] { new CopyColumnsTransform.Column() { Name = args.Name, Source = scoreTempName } } }, input); // 8. Drop the temporary column with the score created in (4). return(new DropColumnsTransform(env, new DropColumnsTransform.Arguments() { Column = new[] { scoreTempName } }, input)); }
public static CommonOutputs.TransformOutput RenameBinaryPredictionScoreColumns(IHostEnvironment env, RenameBinaryPredictionScoreColumnsInput input) { Contracts.CheckValue(env, nameof(env)); var host = env.Register("ScoreModel"); host.CheckValue(input, nameof(input)); EntryPointUtils.CheckInputArgs(host, input); if (input.PredictorModel.Predictor.PredictionKind == PredictionKind.BinaryClassification) { ColumnType labelType; var labelNames = input.PredictorModel.GetLabelInfo(host, out labelType); if (labelNames != null && labelNames.Length == 2) { var positiveClass = labelNames[1]; // Rename all the score columns. int colMax; var maxScoreId = input.Data.Schema.GetMaxMetadataKind(out colMax, MetadataUtils.Kinds.ScoreColumnSetId); var copyCols = new List <(string Source, string Name)>(); for (int i = 0; i < input.Data.Schema.ColumnCount; i++) { if (input.Data.Schema.IsHidden(i)) { continue; } if (!ShouldAddColumn(input.Data.Schema, i, null, maxScoreId)) { continue; } // Do not rename the PredictedLabel column. ReadOnlyMemory <char> tmp = default; if (input.Data.Schema.TryGetMetadata(TextType.Instance, MetadataUtils.Kinds.ScoreValueKind, i, ref tmp) && ReadOnlyMemoryUtils.EqualsStr(MetadataUtils.Const.ScoreValueKind.PredictedLabel, tmp)) { continue; } var source = input.Data.Schema.GetColumnName(i); var name = source + "." + positiveClass; copyCols.Add((source, name)); } var copyColumn = new CopyColumnsTransform(env, copyCols.ToArray()).Transform(input.Data); var dropColumn = new DropColumnsTransform(env, new DropColumnsTransform.Arguments() { Column = copyCols.Select(c => c.Source).ToArray() }, copyColumn); return(new CommonOutputs.TransformOutput { Model = new TransformModel(env, dropColumn, input.Data), OutputData = dropColumn }); } } var newView = NopTransform.CreateIfNeeded(env, input.Data); return(new CommonOutputs.TransformOutput { Model = new TransformModel(env, newView, input.Data), OutputData = newView }); }