// Factory method for SignatureDataTransform.
        internal static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input)
        {
            Contracts.CheckValue(env, nameof(env));
            var h = env.Register(LoaderSignature);
            h.CheckValue(args, nameof(args));
            h.CheckValue(input, nameof(input));
            h.CheckNonWhiteSpace(args.Source, nameof(args.Source));

            if (string.IsNullOrWhiteSpace(args.Name))
                args.Name = args.Source;

            var file = Utils.FindExistentFileOrNull("pretrained.model", "Sentiment", assemblyForBasePath: typeof(SentimentAnalyzingTransformer));
            if (file == null)
            {
                throw h.Except("resourcePath", "Missing resource for SentimentAnalyzingTransform.");
            }

            // The logic below ensures that any columns in our input IDataView that conflict
            // with column names known to be used in the pretrained model transform pipeline we're
            // loading are aliased to temporary column names before we apply the pipeline and then
            // renamed back to their original names after. We do this to ensure the pretrained model
            // doesn't shadow or replace columns we aren't expecting it to.

            // 1. Alias any column in the input IDataView that is known to appear to the pretrained
            // model into a temporary column so that we can restore them after the pretrained model
            // is added to the pipeline.
            KeyValuePair<string, string>[] aliased;
            input = AliasIfNeeded(env, input, _modelIntermediateColumnNames, out aliased);

            // 2. Copy source column to a column with the name expected by the pretrained model featurization
            // transform pipeline.
            var copyTransformer = new ColumnCopyingTransformer(env, (args.Source, ModelInputColumnName));

            input = copyTransformer.Transform(input);

            // 3. Apply the pretrained model and its featurization transform pipeline.
            input = LoadTransforms(env, input, file);

            // 4. Copy the output column from the pretrained model to a temporary column.
            var scoreTempName = input.Schema.GetTempColumnName("sa_out");
            copyTransformer = new ColumnCopyingTransformer(env, (ModelScoreColumnName, scoreTempName));
            input = copyTransformer.Transform(input);

            // 5. Drop all the columns created by the pretrained model, including the expected input column
            // and the output column, which we have copied to a temporary column in (4).
            input = ColumnSelectingTransformer.CreateDrop(env, input, _modelIntermediateColumnNames);

            // 6. Unalias all the original columns that were originally present in the IDataView, but may have
            // been shadowed by column names in the pretrained model. This method will also drop all the temporary
            // columns that were created for them in (1).
            input = UnaliasIfNeeded(env, input, aliased);

            // 7. Copy the temporary column with the score we created in (4) to a column with the user-specified destination name.
            copyTransformer = new ColumnCopyingTransformer(env, (scoreTempName, args.Name));
            input = copyTransformer.Transform(input);

            // 8. Drop the temporary column with the score created in (4).
            return ColumnSelectingTransformer.CreateDrop(env, input, scoreTempName);
        }
Ejemplo n.º 2
0
        public void KeyToValueWorkout()
        {
            string dataPath = GetDataPath("iris.txt");

            var reader = new TextLoader(Env, new TextLoader.Arguments
            {
                Column = new[]
                {
                    new TextLoader.Column("ScalarString", DataKind.TX, 1),
                    new TextLoader.Column("VectorString", DataKind.TX, new[] { new TextLoader.Range(1, 4) }),
                    new TextLoader.Column
                    {
                        Name     = "BareKey",
                        Source   = new[] { new TextLoader.Range(0) },
                        Type     = DataKind.U4,
                        KeyCount = new KeyCount(6),
                    }
                }
            });

            var data = reader.Read(dataPath);

            data = new ValueToKeyMappingEstimator(Env, new[] {
                new ValueToKeyMappingTransformer.ColumnInfo("A", "ScalarString"),
                new ValueToKeyMappingTransformer.ColumnInfo("B", "VectorString")
            }).Fit(data).Transform(data);

            var badData1 = new ColumnCopyingTransformer(Env, ("A", "BareKey")).Transform(data);
            var badData2 = new ColumnCopyingTransformer(Env, ("B", "VectorString")).Transform(data);

            var est = new KeyToValueMappingEstimator(Env, ("A_back", "A"), ("B_back", "B"));

            TestEstimatorCore(est, data, invalidInput: badData1);
            TestEstimatorCore(est, data, invalidInput: badData2);


            var outputPath = GetOutputPath("KeyToValue", "featurized.tsv");

            using (var ch = Env.Start("save"))
            {
                var saver = new TextSaver(Env, new TextSaver.Arguments {
                    Silent = true
                });
                IDataView savedData = est.Fit(data).Transform(data);
                using (var fs = File.Create(outputPath))
                    DataSaverUtils.SaveDataView(ch, saver, savedData, fs, keepHidden: true);
            }

            CheckEquality("KeyToValue", "featurized.tsv");
            Done();
        }
Ejemplo n.º 3
0
        /// <summary>
        /// If any column names in <param name="colNames" /> are present in <param name="input" />, this
        /// method will create a transform that copies them to temporary columns. It will populate <param name="hiddenNames" />
        /// with an array of string pairs containing the original name and the generated temporary column name, respectively.
        /// </summary>
        /// <param name="env"></param>
        private static IDataView AliasIfNeeded(IHostEnvironment env, IDataView input, string[] colNames, out KeyValuePair <string, string>[] hiddenNames)
        {
            hiddenNames = null;
            var toHide = new List <string>(colNames.Length);

            foreach (var name in colNames)
            {
                int discard;
                if (input.Schema.TryGetColumnIndex(name, out discard))
                {
                    toHide.Add(name);
                }
            }

            if (toHide.Count == 0)
            {
                return(input);
            }

            hiddenNames = toHide.Select(colName =>
                                        new KeyValuePair <string, string>(colName, input.Schema.GetTempColumnName(colName))).ToArray();
            return(ColumnCopyingTransformer.Create(env, new ColumnCopyingTransformer.Arguments()
            {
                Column = hiddenNames.Select(pair => new ColumnCopyingTransformer.Column()
                {
                    Name = pair.Value, Source = pair.Key
                }).ToArray()
            }, input));
        }
Ejemplo n.º 4
0
        public static CommonOutputs.TransformOutput RenameBinaryPredictionScoreColumns(IHostEnvironment env,
                                                                                       RenameBinaryPredictionScoreColumnsInput input)
        {
            Contracts.CheckValue(env, nameof(env));
            var host = env.Register("ScoreModel");

            host.CheckValue(input, nameof(input));
            EntryPointUtils.CheckInputArgs(host, input);

            if (input.PredictorModel.Predictor.PredictionKind == PredictionKind.BinaryClassification)
            {
                DataViewType labelType;
                var          labelNames = input.PredictorModel.GetLabelInfo(host, out labelType);
                if (labelNames != null && labelNames.Length == 2)
                {
                    var positiveClass = labelNames[1];

                    // Rename all the score columns.
                    int colMax;
                    var maxScoreId = input.Data.Schema.GetMaxAnnotationKind(out colMax, AnnotationUtils.Kinds.ScoreColumnSetId);
                    var copyCols   = new List <(string name, string source)>();
                    for (int i = 0; i < input.Data.Schema.Count; i++)
                    {
                        if (input.Data.Schema[i].IsHidden)
                        {
                            continue;
                        }
                        if (!ShouldAddColumn(input.Data.Schema, i, null, maxScoreId))
                        {
                            continue;
                        }
                        // Do not rename the PredictedLabel column.
                        ReadOnlyMemory <char> tmp = default;
                        if (input.Data.Schema.TryGetAnnotation(TextDataViewType.Instance, AnnotationUtils.Kinds.ScoreValueKind, i,
                                                               ref tmp) &&
                            ReadOnlyMemoryUtils.EqualsStr(AnnotationUtils.Const.ScoreValueKind.PredictedLabel, tmp))
                        {
                            continue;
                        }
                        var source = input.Data.Schema[i].Name;
                        var name   = source + "." + positiveClass;
                        copyCols.Add((name, source));
                    }

                    var copyColumn = new ColumnCopyingTransformer(env, copyCols.ToArray()).Transform(input.Data);
                    var dropColumn = ColumnSelectingTransformer.CreateDrop(env, copyColumn, copyCols.Select(c => c.source).ToArray());
                    return(new CommonOutputs.TransformOutput {
                        Model = new TransformModelImpl(env, dropColumn, input.Data), OutputData = dropColumn
                    });
                }
            }

            var newView = NopTransform.CreateIfNeeded(env, input.Data);

            return(new CommonOutputs.TransformOutput {
                Model = new TransformModelImpl(env, newView, input.Data), OutputData = newView
            });
        }
Ejemplo n.º 5
0
        public static CommonOutputs.TransformOutput CopyColumns(IHostEnvironment env, ColumnCopyingTransformer.Arguments input)
        {
            Contracts.CheckValue(env, nameof(env));
            var host = env.Register("CopyColumns");

            host.CheckValue(input, nameof(input));
            EntryPointUtils.CheckInputArgs(host, input);
            var xf = ColumnCopyingTransformer.Create(env, input, input.Data);

            return(new CommonOutputs.TransformOutput {
                Model = new TransformModelImpl(env, xf, input.Data), OutputData = xf
            });
        }
Ejemplo n.º 6
0
        private IDataView WrapPerInstance(RoleMappedData perInst)
        {
            var idv = perInst.Data;

            // Make a list of column names that Maml outputs as part of the per-instance data view, and then wrap
            // the per-instance data computed by the evaluator in a SelectColumnsTransform.
            var cols       = new List <(string name, string source)>();
            var colsToKeep = new List <string>();

            // If perInst is the result of cross-validation and contains a fold Id column, include it.
            int foldCol;

            if (perInst.Schema.Schema.TryGetColumnIndex(MetricKinds.ColumnNames.FoldIndex, out foldCol))
            {
                colsToKeep.Add(MetricKinds.ColumnNames.FoldIndex);
            }

            // Maml always outputs a name column, if it doesn't exist add a GenerateNumberTransform.
            if (perInst.Schema.Name?.Name is string nameName)
            {
                cols.Add(("Instance", nameName));
                colsToKeep.Add("Instance");
            }
            else
            {
                var args = new GenerateNumberTransform.Arguments();
                args.Columns = new[] { new GenerateNumberTransform.Column()
                                       {
                                           Name = "Instance"
                                       } };
                args.UseCounter = true;
                idv             = new GenerateNumberTransform(Host, args, idv);
                colsToKeep.Add("Instance");
            }

            // Maml outputs the weight column if it exists.
            if (perInst.Schema.Weight?.Name is string weightName)
            {
                colsToKeep.Add(weightName);
            }

            // Get the other columns from the evaluator.
            foreach (var col in GetPerInstanceColumnsToSave(perInst.Schema))
            {
                colsToKeep.Add(col);
            }

            idv = new ColumnCopyingTransformer(Host, cols.ToArray()).Transform(idv);
            idv = ColumnSelectingTransformer.CreateKeep(Host, idv, colsToKeep.ToArray());
            return(GetPerInstanceMetricsCore(idv, perInst.Schema));
        }
Ejemplo n.º 7
0
        public static void Initialize()
        {
            Context = new MLContext(seed: 0);

            IDataView data_view_full = null;

            data_view_full = Context
                             .Data
                             .LoadFromTextFile <SomatotypeInputData>
                             (
                Data.File,
                hasHeader: true,
                separatorChar: ','
                             );

            Data.DataViewSchema   = data_view_full.Schema;;
            Data.DataViewSplit    = Context.Data.TrainTestSplit(data_view_full, testFraction: 0.25);
            Data.DataViewTraining = Data.DataViewSplit.TrainSet;
            Data.DataViewTesting  = Data.DataViewSplit.TestSet;


            Data.TransformPipeline = Context.Transforms.CopyColumns
                                     (
                outputColumnName: "Label",
                inputColumnName: "EndomorphicComponent"
                                     );

            Data.TransformPipeline.Append
            (
                Context.Transforms.Concatenate
                (
                    "Features",
                    "Height",
                    "Mass",
                    "BreadthHumerus",
                    "BreadthFemur",
                    "GirthArmUpper",
                    "GirthCalfStanding",
                    "SkinfoldSubscapular",
                    "SkinfoldTriceps",
                    "SkinfoldSupraspinale",
                    "SkinfoldMedialCalf"
                )
            );

            ColumnCopyingTransformer model_endomorphic = null;

            Data.Transformer = model_endomorphic;

            return;
        }
Ejemplo n.º 8
0
        private static IDataView UnaliasIfNeeded(IHostEnvironment env, IDataView input, KeyValuePair <string, string>[] hiddenNames)
        {
            if (Utils.Size(hiddenNames) == 0)
            {
                return(input);
            }

            input = ColumnCopyingTransformer.Create(env, new ColumnCopyingTransformer.Arguments()
            {
                Column = hiddenNames.Select(pair => new ColumnCopyingTransformer.Column()
                {
                    Name = pair.Key, Source = pair.Value
                }).ToArray()
            }, input);

            return(ColumnSelectingTransformer.CreateDrop(env, input, hiddenNames.Select(pair => pair.Value).ToArray()));
        }
Ejemplo n.º 9
0
        public void NormalizerWorkout()
        {
            string dataPath = GetDataPath(TestDatasets.iris.trainFilename);

            var loader = new TextLoader(Env, new TextLoader.Arguments
            {
                Column = new[] {
                    new TextLoader.Column("float1", DataKind.R4, 1),
                    new TextLoader.Column("float4", DataKind.R4, new[] { new TextLoader.Range(1, 4) }),
                    new TextLoader.Column("double1", DataKind.R8, 1),
                    new TextLoader.Column("double4", DataKind.R8, new[] { new TextLoader.Range(1, 4) }),
                    new TextLoader.Column("int1", DataKind.I4, 0),
                    new TextLoader.Column("float0", DataKind.R4, new[] { new TextLoader.Range {
                                                                             Min = 1, VariableEnd = true
                                                                         } }),
                },
                HasHeader = true
            }, new MultiFileSource(dataPath));

            var est = new NormalizingEstimator(Env,
                                               new NormalizingEstimator.MinMaxColumn("float1"),
                                               new NormalizingEstimator.MinMaxColumn("float4"),
                                               new NormalizingEstimator.MinMaxColumn("double1"),
                                               new NormalizingEstimator.MinMaxColumn("double4"),
                                               new NormalizingEstimator.BinningColumn("float1", "float1bin"),
                                               new NormalizingEstimator.BinningColumn("float4", "float4bin"),
                                               new NormalizingEstimator.BinningColumn("double1", "double1bin"),
                                               new NormalizingEstimator.BinningColumn("double4", "double4bin"),
                                               new NormalizingEstimator.SupervisedBinningColumn("float1", "float1supervisedbin", labelColumn: "int1"),
                                               new NormalizingEstimator.SupervisedBinningColumn("float4", "float4supervisedbin", labelColumn: "int1"),
                                               new NormalizingEstimator.SupervisedBinningColumn("double1", "double1supervisedbin", labelColumn: "int1"),
                                               new NormalizingEstimator.SupervisedBinningColumn("double4", "double4supervisedbin", labelColumn: "int1"),
                                               new NormalizingEstimator.MeanVarColumn("float1", "float1mv"),
                                               new NormalizingEstimator.MeanVarColumn("float4", "float4mv"),
                                               new NormalizingEstimator.MeanVarColumn("double1", "double1mv"),
                                               new NormalizingEstimator.MeanVarColumn("double4", "double4mv"),
                                               new NormalizingEstimator.LogMeanVarColumn("float1", "float1lmv"),
                                               new NormalizingEstimator.LogMeanVarColumn("float4", "float4lmv"),
                                               new NormalizingEstimator.LogMeanVarColumn("double1", "double1lmv"),
                                               new NormalizingEstimator.LogMeanVarColumn("double4", "double4lmv"));

            var data = loader.Read(dataPath);

            var badData1 = new ColumnCopyingTransformer(Env, ("int1", "float1")).Transform(data);
            var badData2 = new ColumnCopyingTransformer(Env, ("float0", "float4")).Transform(data);

            TestEstimatorCore(est, data, null, badData1);
            TestEstimatorCore(est, data, null, badData2);

            var outputPath = GetOutputPath("NormalizerEstimator", "normalized.tsv");

            using (var ch = Env.Start("save"))
            {
                var saver = new TextSaver(Env, new TextSaver.Arguments {
                    Silent = true
                });
                using (var fs = File.Create(outputPath))
                {
                    var dataView = ColumnSelectingTransformer.CreateDrop(Env, est.Fit(data).Transform(data), "float0");
                    DataSaverUtils.SaveDataView(ch, saver, dataView, fs, keepHidden: true);
                }
            }

            CheckEquality("NormalizerEstimator", "normalized.tsv");

            Done();
        }