Example #1
0
        public static CommonOutputs.TransformOutput DropColumns(IHostEnvironment env, DropColumnsTransform.Arguments input)
        {
            Contracts.CheckValue(env, nameof(env));
            var host = env.Register("DropColumns");

            host.CheckValue(input, nameof(input));
            EntryPointUtils.CheckInputArgs(host, input);

            var xf = new DropColumnsTransform(env, input, input.Data);

            return(new CommonOutputs.TransformOutput {
                Model = new TransformModel(env, xf, input.Data), OutputData = xf
            });
        }
Example #2
0
        public static CommonOutputs.TransformOutput SelectColumns(IHostEnvironment env, DropColumnsTransform.KeepArguments input)
        {
            Contracts.CheckValue(env, nameof(env));
            var host = env.Register("SelectColumns");

            host.CheckValue(input, nameof(input));
            EntryPointUtils.CheckInputArgs(host, input);
            // We can have an empty Columns array, indicating we
            // wish to drop all the columns.

            var xf = new DropColumnsTransform(env, input, input.Data);

            return(new CommonOutputs.TransformOutput {
                Model = new TransformModel(env, xf, input.Data), OutputData = xf
            });
        }
Example #3
0
        public static Output Split(IHostEnvironment env, Input input)
        {
            Contracts.CheckValue(env, nameof(env));
            var host = env.Register(ModuleName);

            host.CheckValue(input, nameof(input));
            host.Check(0 < input.Fraction && input.Fraction < 1, "The fraction must be in the interval (0,1).");

            EntryPointUtils.CheckInputArgs(host, input);

            var data     = input.Data;
            var stratCol = SplitUtils.CreateStratificationColumn(host, ref data, input.StratificationColumn);

            IDataView trainData = new RangeFilter(host,
                                                  new RangeFilter.Arguments {
                Column = stratCol, Min = 0, Max = input.Fraction, Complement = false
            }, data);

            trainData = new DropColumnsTransform(host, new DropColumnsTransform.Arguments {
                Column = new[] { stratCol }
            }, trainData);

            IDataView testData = new RangeFilter(host,
                                                 new RangeFilter.Arguments {
                Column = stratCol, Min = 0, Max = input.Fraction, Complement = true
            }, data);

            testData = new DropColumnsTransform(host, new DropColumnsTransform.Arguments {
                Column = new[] { stratCol }
            }, testData);

            return(new Output()
            {
                TrainData = trainData, TestData = testData
            });
        }
Example #4
0
        public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input)
        {
            Contracts.CheckValue(env, nameof(env));
            var h = env.Register(LoaderSignature);

            h.CheckValue(args, nameof(args));
            h.CheckValue(input, nameof(input));
            h.CheckNonWhiteSpace(args.Source, nameof(args.Source));

            if (string.IsNullOrWhiteSpace(args.Name))
            {
                args.Name = args.Source;
            }

            var file = Utils.FindExistentFileOrNull("pretrained.model", "Sentiment", assemblyForBasePath: typeof(SentimentAnalyzingTransform));

            if (file == null)
            {
                throw h.Except("resourcePath", "Missing resource for SentimentAnalyzingTransform.");
            }

            // The logic below ensures that any columns in our input IDataView that conflict
            // with column names known to be used in the pretrained model transform pipeline we're
            // loading are aliased to temporary column names before we apply the pipeline and then
            // renamed back to their original names after. We do this to ensure the pretrained model
            // doesn't shadow or replace columns we aren't expecting it to.

            // 1. Alias any column in the input IDataView that is known to appear to the pretrained
            // model into a temporary column so that we can restore them after the pretrained model
            // is added to the pipeline.
            KeyValuePair <string, string>[] aliased;
            input = AliasIfNeeded(env, input, _modelIntermediateColumnNames, out aliased);

            // 2. Copy source column to a column with the name expected by the pretrained model featurization
            // transform pipeline.
            input = new CopyColumnsTransform(env, new CopyColumnsTransform.Arguments()
            {
                Column = new[] { new CopyColumnsTransform.Column()
                                 {
                                     Source = args.Source, Name = ModelInputColumnName
                                 } }
            }, input);

            // 3. Apply the pretrained model and its featurization transform pipeline.
            input = LoadTransforms(env, input, file);

            // 4. Copy the output column from the pretrained model to a temporary column.
            var scoreTempName = input.Schema.GetTempColumnName("sa_out");

            input = new CopyColumnsTransform(env, new CopyColumnsTransform.Arguments()
            {
                Column = new [] { new CopyColumnsTransform.Column()
                                  {
                                      Name = scoreTempName, Source = ModelScoreColumnName
                                  } }
            }, input);

            // 5. Drop all the columns created by the pretrained model, including the expected input column
            // and the output column, which we have copied to a temporary column in (4).
            input = new DropColumnsTransform(env, new DropColumnsTransform.Arguments()
            {
                Column = _modelIntermediateColumnNames
            }, input);

            // 6. Unalias all the original columns that were originally present in the IDataView, but may have
            // been shadowed by column names in the pretrained model. This method will also drop all the temporary
            // columns that were created for them in (1).
            input = UnaliasIfNeeded(env, input, aliased);

            // 7. Copy the temporary column with the score we created in (4) to a column with the user-specified destination name.
            input = new CopyColumnsTransform(env, new CopyColumnsTransform.Arguments()
            {
                Column = new[] { new CopyColumnsTransform.Column()
                                 {
                                     Name = args.Name, Source = scoreTempName
                                 } }
            }, input);

            // 8. Drop the temporary column with the score created in (4).
            return(new DropColumnsTransform(env, new DropColumnsTransform.Arguments()
            {
                Column = new[] { scoreTempName }
            }, input));
        }
        public static CommonOutputs.TransformOutput RenameBinaryPredictionScoreColumns(IHostEnvironment env,
                                                                                       RenameBinaryPredictionScoreColumnsInput input)
        {
            Contracts.CheckValue(env, nameof(env));
            var host = env.Register("ScoreModel");

            host.CheckValue(input, nameof(input));
            EntryPointUtils.CheckInputArgs(host, input);

            if (input.PredictorModel.Predictor.PredictionKind == PredictionKind.BinaryClassification)
            {
                ColumnType labelType;
                var        labelNames = input.PredictorModel.GetLabelInfo(host, out labelType);
                if (labelNames != null && labelNames.Length == 2)
                {
                    var positiveClass = labelNames[1];

                    // Rename all the score columns.
                    int colMax;
                    var maxScoreId = input.Data.Schema.GetMaxMetadataKind(out colMax, MetadataUtils.Kinds.ScoreColumnSetId);
                    var copyCols   = new List <(string Source, string Name)>();
                    for (int i = 0; i < input.Data.Schema.ColumnCount; i++)
                    {
                        if (input.Data.Schema.IsHidden(i))
                        {
                            continue;
                        }
                        if (!ShouldAddColumn(input.Data.Schema, i, null, maxScoreId))
                        {
                            continue;
                        }
                        // Do not rename the PredictedLabel column.
                        ReadOnlyMemory <char> tmp = default;
                        if (input.Data.Schema.TryGetMetadata(TextType.Instance, MetadataUtils.Kinds.ScoreValueKind, i,
                                                             ref tmp) &&
                            ReadOnlyMemoryUtils.EqualsStr(MetadataUtils.Const.ScoreValueKind.PredictedLabel, tmp))
                        {
                            continue;
                        }
                        var source = input.Data.Schema.GetColumnName(i);
                        var name   = source + "." + positiveClass;
                        copyCols.Add((source, name));
                    }

                    var copyColumn = new CopyColumnsTransform(env, copyCols.ToArray()).Transform(input.Data);
                    var dropColumn = new DropColumnsTransform(env, new DropColumnsTransform.Arguments()
                    {
                        Column = copyCols.Select(c => c.Source).ToArray()
                    }, copyColumn);
                    return(new CommonOutputs.TransformOutput {
                        Model = new TransformModel(env, dropColumn, input.Data), OutputData = dropColumn
                    });
                }
            }

            var newView = NopTransform.CreateIfNeeded(env, input.Data);

            return(new CommonOutputs.TransformOutput {
                Model = new TransformModel(env, newView, input.Data), OutputData = newView
            });
        }