// Factory method for SignatureDataTransform. internal static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input) { Contracts.CheckValue(env, nameof(env)); var h = env.Register(LoaderSignature); h.CheckValue(args, nameof(args)); h.CheckValue(input, nameof(input)); h.CheckNonWhiteSpace(args.Source, nameof(args.Source)); if (string.IsNullOrWhiteSpace(args.Name)) args.Name = args.Source; var file = Utils.FindExistentFileOrNull("pretrained.model", "Sentiment", assemblyForBasePath: typeof(SentimentAnalyzingTransformer)); if (file == null) { throw h.Except("resourcePath", "Missing resource for SentimentAnalyzingTransform."); } // The logic below ensures that any columns in our input IDataView that conflict // with column names known to be used in the pretrained model transform pipeline we're // loading are aliased to temporary column names before we apply the pipeline and then // renamed back to their original names after. We do this to ensure the pretrained model // doesn't shadow or replace columns we aren't expecting it to. // 1. Alias any column in the input IDataView that is known to appear to the pretrained // model into a temporary column so that we can restore them after the pretrained model // is added to the pipeline. KeyValuePair<string, string>[] aliased; input = AliasIfNeeded(env, input, _modelIntermediateColumnNames, out aliased); // 2. Copy source column to a column with the name expected by the pretrained model featurization // transform pipeline. var copyTransformer = new ColumnCopyingTransformer(env, (args.Source, ModelInputColumnName)); input = copyTransformer.Transform(input); // 3. Apply the pretrained model and its featurization transform pipeline. input = LoadTransforms(env, input, file); // 4. Copy the output column from the pretrained model to a temporary column. var scoreTempName = input.Schema.GetTempColumnName("sa_out"); copyTransformer = new ColumnCopyingTransformer(env, (ModelScoreColumnName, scoreTempName)); input = copyTransformer.Transform(input); // 5. Drop all the columns created by the pretrained model, including the expected input column // and the output column, which we have copied to a temporary column in (4). input = ColumnSelectingTransformer.CreateDrop(env, input, _modelIntermediateColumnNames); // 6. Unalias all the original columns that were originally present in the IDataView, but may have // been shadowed by column names in the pretrained model. This method will also drop all the temporary // columns that were created for them in (1). input = UnaliasIfNeeded(env, input, aliased); // 7. Copy the temporary column with the score we created in (4) to a column with the user-specified destination name. copyTransformer = new ColumnCopyingTransformer(env, (scoreTempName, args.Name)); input = copyTransformer.Transform(input); // 8. Drop the temporary column with the score created in (4). return ColumnSelectingTransformer.CreateDrop(env, input, scoreTempName); }
public void KeyToValueWorkout() { string dataPath = GetDataPath("iris.txt"); var reader = new TextLoader(Env, new TextLoader.Arguments { Column = new[] { new TextLoader.Column("ScalarString", DataKind.TX, 1), new TextLoader.Column("VectorString", DataKind.TX, new[] { new TextLoader.Range(1, 4) }), new TextLoader.Column { Name = "BareKey", Source = new[] { new TextLoader.Range(0) }, Type = DataKind.U4, KeyCount = new KeyCount(6), } } }); var data = reader.Read(dataPath); data = new ValueToKeyMappingEstimator(Env, new[] { new ValueToKeyMappingTransformer.ColumnInfo("A", "ScalarString"), new ValueToKeyMappingTransformer.ColumnInfo("B", "VectorString") }).Fit(data).Transform(data); var badData1 = new ColumnCopyingTransformer(Env, ("A", "BareKey")).Transform(data); var badData2 = new ColumnCopyingTransformer(Env, ("B", "VectorString")).Transform(data); var est = new KeyToValueMappingEstimator(Env, ("A_back", "A"), ("B_back", "B")); TestEstimatorCore(est, data, invalidInput: badData1); TestEstimatorCore(est, data, invalidInput: badData2); var outputPath = GetOutputPath("KeyToValue", "featurized.tsv"); using (var ch = Env.Start("save")) { var saver = new TextSaver(Env, new TextSaver.Arguments { Silent = true }); IDataView savedData = est.Fit(data).Transform(data); using (var fs = File.Create(outputPath)) DataSaverUtils.SaveDataView(ch, saver, savedData, fs, keepHidden: true); } CheckEquality("KeyToValue", "featurized.tsv"); Done(); }
/// <summary> /// If any column names in <param name="colNames" /> are present in <param name="input" />, this /// method will create a transform that copies them to temporary columns. It will populate <param name="hiddenNames" /> /// with an array of string pairs containing the original name and the generated temporary column name, respectively. /// </summary> /// <param name="env"></param> private static IDataView AliasIfNeeded(IHostEnvironment env, IDataView input, string[] colNames, out KeyValuePair <string, string>[] hiddenNames) { hiddenNames = null; var toHide = new List <string>(colNames.Length); foreach (var name in colNames) { int discard; if (input.Schema.TryGetColumnIndex(name, out discard)) { toHide.Add(name); } } if (toHide.Count == 0) { return(input); } hiddenNames = toHide.Select(colName => new KeyValuePair <string, string>(colName, input.Schema.GetTempColumnName(colName))).ToArray(); return(ColumnCopyingTransformer.Create(env, new ColumnCopyingTransformer.Arguments() { Column = hiddenNames.Select(pair => new ColumnCopyingTransformer.Column() { Name = pair.Value, Source = pair.Key }).ToArray() }, input)); }
public static CommonOutputs.TransformOutput RenameBinaryPredictionScoreColumns(IHostEnvironment env, RenameBinaryPredictionScoreColumnsInput input) { Contracts.CheckValue(env, nameof(env)); var host = env.Register("ScoreModel"); host.CheckValue(input, nameof(input)); EntryPointUtils.CheckInputArgs(host, input); if (input.PredictorModel.Predictor.PredictionKind == PredictionKind.BinaryClassification) { DataViewType labelType; var labelNames = input.PredictorModel.GetLabelInfo(host, out labelType); if (labelNames != null && labelNames.Length == 2) { var positiveClass = labelNames[1]; // Rename all the score columns. int colMax; var maxScoreId = input.Data.Schema.GetMaxAnnotationKind(out colMax, AnnotationUtils.Kinds.ScoreColumnSetId); var copyCols = new List <(string name, string source)>(); for (int i = 0; i < input.Data.Schema.Count; i++) { if (input.Data.Schema[i].IsHidden) { continue; } if (!ShouldAddColumn(input.Data.Schema, i, null, maxScoreId)) { continue; } // Do not rename the PredictedLabel column. ReadOnlyMemory <char> tmp = default; if (input.Data.Schema.TryGetAnnotation(TextDataViewType.Instance, AnnotationUtils.Kinds.ScoreValueKind, i, ref tmp) && ReadOnlyMemoryUtils.EqualsStr(AnnotationUtils.Const.ScoreValueKind.PredictedLabel, tmp)) { continue; } var source = input.Data.Schema[i].Name; var name = source + "." + positiveClass; copyCols.Add((name, source)); } var copyColumn = new ColumnCopyingTransformer(env, copyCols.ToArray()).Transform(input.Data); var dropColumn = ColumnSelectingTransformer.CreateDrop(env, copyColumn, copyCols.Select(c => c.source).ToArray()); return(new CommonOutputs.TransformOutput { Model = new TransformModelImpl(env, dropColumn, input.Data), OutputData = dropColumn }); } } var newView = NopTransform.CreateIfNeeded(env, input.Data); return(new CommonOutputs.TransformOutput { Model = new TransformModelImpl(env, newView, input.Data), OutputData = newView }); }
public static CommonOutputs.TransformOutput CopyColumns(IHostEnvironment env, ColumnCopyingTransformer.Arguments input) { Contracts.CheckValue(env, nameof(env)); var host = env.Register("CopyColumns"); host.CheckValue(input, nameof(input)); EntryPointUtils.CheckInputArgs(host, input); var xf = ColumnCopyingTransformer.Create(env, input, input.Data); return(new CommonOutputs.TransformOutput { Model = new TransformModelImpl(env, xf, input.Data), OutputData = xf }); }
private IDataView WrapPerInstance(RoleMappedData perInst) { var idv = perInst.Data; // Make a list of column names that Maml outputs as part of the per-instance data view, and then wrap // the per-instance data computed by the evaluator in a SelectColumnsTransform. var cols = new List <(string name, string source)>(); var colsToKeep = new List <string>(); // If perInst is the result of cross-validation and contains a fold Id column, include it. int foldCol; if (perInst.Schema.Schema.TryGetColumnIndex(MetricKinds.ColumnNames.FoldIndex, out foldCol)) { colsToKeep.Add(MetricKinds.ColumnNames.FoldIndex); } // Maml always outputs a name column, if it doesn't exist add a GenerateNumberTransform. if (perInst.Schema.Name?.Name is string nameName) { cols.Add(("Instance", nameName)); colsToKeep.Add("Instance"); } else { var args = new GenerateNumberTransform.Arguments(); args.Columns = new[] { new GenerateNumberTransform.Column() { Name = "Instance" } }; args.UseCounter = true; idv = new GenerateNumberTransform(Host, args, idv); colsToKeep.Add("Instance"); } // Maml outputs the weight column if it exists. if (perInst.Schema.Weight?.Name is string weightName) { colsToKeep.Add(weightName); } // Get the other columns from the evaluator. foreach (var col in GetPerInstanceColumnsToSave(perInst.Schema)) { colsToKeep.Add(col); } idv = new ColumnCopyingTransformer(Host, cols.ToArray()).Transform(idv); idv = ColumnSelectingTransformer.CreateKeep(Host, idv, colsToKeep.ToArray()); return(GetPerInstanceMetricsCore(idv, perInst.Schema)); }
public static void Initialize() { Context = new MLContext(seed: 0); IDataView data_view_full = null; data_view_full = Context .Data .LoadFromTextFile <SomatotypeInputData> ( Data.File, hasHeader: true, separatorChar: ',' ); Data.DataViewSchema = data_view_full.Schema;; Data.DataViewSplit = Context.Data.TrainTestSplit(data_view_full, testFraction: 0.25); Data.DataViewTraining = Data.DataViewSplit.TrainSet; Data.DataViewTesting = Data.DataViewSplit.TestSet; Data.TransformPipeline = Context.Transforms.CopyColumns ( outputColumnName: "Label", inputColumnName: "EndomorphicComponent" ); Data.TransformPipeline.Append ( Context.Transforms.Concatenate ( "Features", "Height", "Mass", "BreadthHumerus", "BreadthFemur", "GirthArmUpper", "GirthCalfStanding", "SkinfoldSubscapular", "SkinfoldTriceps", "SkinfoldSupraspinale", "SkinfoldMedialCalf" ) ); ColumnCopyingTransformer model_endomorphic = null; Data.Transformer = model_endomorphic; return; }
private static IDataView UnaliasIfNeeded(IHostEnvironment env, IDataView input, KeyValuePair <string, string>[] hiddenNames) { if (Utils.Size(hiddenNames) == 0) { return(input); } input = ColumnCopyingTransformer.Create(env, new ColumnCopyingTransformer.Arguments() { Column = hiddenNames.Select(pair => new ColumnCopyingTransformer.Column() { Name = pair.Key, Source = pair.Value }).ToArray() }, input); return(ColumnSelectingTransformer.CreateDrop(env, input, hiddenNames.Select(pair => pair.Value).ToArray())); }
public void NormalizerWorkout() { string dataPath = GetDataPath(TestDatasets.iris.trainFilename); var loader = new TextLoader(Env, new TextLoader.Arguments { Column = new[] { new TextLoader.Column("float1", DataKind.R4, 1), new TextLoader.Column("float4", DataKind.R4, new[] { new TextLoader.Range(1, 4) }), new TextLoader.Column("double1", DataKind.R8, 1), new TextLoader.Column("double4", DataKind.R8, new[] { new TextLoader.Range(1, 4) }), new TextLoader.Column("int1", DataKind.I4, 0), new TextLoader.Column("float0", DataKind.R4, new[] { new TextLoader.Range { Min = 1, VariableEnd = true } }), }, HasHeader = true }, new MultiFileSource(dataPath)); var est = new NormalizingEstimator(Env, new NormalizingEstimator.MinMaxColumn("float1"), new NormalizingEstimator.MinMaxColumn("float4"), new NormalizingEstimator.MinMaxColumn("double1"), new NormalizingEstimator.MinMaxColumn("double4"), new NormalizingEstimator.BinningColumn("float1", "float1bin"), new NormalizingEstimator.BinningColumn("float4", "float4bin"), new NormalizingEstimator.BinningColumn("double1", "double1bin"), new NormalizingEstimator.BinningColumn("double4", "double4bin"), new NormalizingEstimator.SupervisedBinningColumn("float1", "float1supervisedbin", labelColumn: "int1"), new NormalizingEstimator.SupervisedBinningColumn("float4", "float4supervisedbin", labelColumn: "int1"), new NormalizingEstimator.SupervisedBinningColumn("double1", "double1supervisedbin", labelColumn: "int1"), new NormalizingEstimator.SupervisedBinningColumn("double4", "double4supervisedbin", labelColumn: "int1"), new NormalizingEstimator.MeanVarColumn("float1", "float1mv"), new NormalizingEstimator.MeanVarColumn("float4", "float4mv"), new NormalizingEstimator.MeanVarColumn("double1", "double1mv"), new NormalizingEstimator.MeanVarColumn("double4", "double4mv"), new NormalizingEstimator.LogMeanVarColumn("float1", "float1lmv"), new NormalizingEstimator.LogMeanVarColumn("float4", "float4lmv"), new NormalizingEstimator.LogMeanVarColumn("double1", "double1lmv"), new NormalizingEstimator.LogMeanVarColumn("double4", "double4lmv")); var data = loader.Read(dataPath); var badData1 = new ColumnCopyingTransformer(Env, ("int1", "float1")).Transform(data); var badData2 = new ColumnCopyingTransformer(Env, ("float0", "float4")).Transform(data); TestEstimatorCore(est, data, null, badData1); TestEstimatorCore(est, data, null, badData2); var outputPath = GetOutputPath("NormalizerEstimator", "normalized.tsv"); using (var ch = Env.Start("save")) { var saver = new TextSaver(Env, new TextSaver.Arguments { Silent = true }); using (var fs = File.Create(outputPath)) { var dataView = ColumnSelectingTransformer.CreateDrop(Env, est.Fit(data).Transform(data), "float0"); DataSaverUtils.SaveDataView(ch, saver, dataView, fs, keepHidden: true); } } CheckEquality("NormalizerEstimator", "normalized.tsv"); Done(); }