public static CommonOutputs.TransformOutput PrepareRegressionLabel(IHostEnvironment env, RegressionLabelInput input) { Contracts.CheckValue(env, nameof(env)); var host = env.Register("PrepareRegressionLabel"); host.CheckValue(input, nameof(input)); EntryPointUtils.CheckInputArgs(host, input); var labelCol = input.Data.Schema.GetColumnOrNull(input.LabelColumn); if (!labelCol.HasValue) { throw host.Except($"Column '{input.LabelColumn}' not found."); } var labelType = labelCol.Value.Type; if (labelType == NumberDataViewType.Single || !(labelType is NumberDataViewType)) { var nop = NopTransform.CreateIfNeeded(env, input.Data); return(new CommonOutputs.TransformOutput { Model = new TransformModelImpl(env, nop, input.Data), OutputData = nop }); } var xf = new TypeConvertingTransformer(host, new TypeConvertingEstimator.ColumnOptions(input.LabelColumn, DataKind.Single, input.LabelColumn)).Transform(input.Data); return(new CommonOutputs.TransformOutput { Model = new TransformModelImpl(env, xf, input.Data), OutputData = xf }); }
// Factory method for SignatureDataTransform. private static IDataTransform Create(IHostEnvironment env, Options options, IDataView input) { Contracts.CheckValue(env, nameof(env)); var host = env.Register(RegistrationName); host.CheckValue(options, nameof(options)); host.CheckValue(input, nameof(input)); options.Check(host); var scores = default(VBuffer <Single>); TrainCore(host, input, options, ref scores); using (var ch = host.Start("Dropping Slots")) { int selectedCount; var column = CreateDropSlotsColumn(options, in scores, out selectedCount); if (column == null) { ch.Info("No features are being dropped."); return(NopTransform.CreateIfNeeded(host, input)); } ch.Info(MessageSensitivity.Schema, "Selected {0} slots out of {1} in column '{2}'", selectedCount, scores.Length, options.FeatureColumn); return(new SlotsDroppingTransformer(host, column).Transform(input) as IDataTransform); } }
/// <summary> /// Create method corresponding to SignatureDataTransform. /// </summary> public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input) { Contracts.CheckValue(env, nameof(env)); var host = env.Register(RegistrationName); host.CheckValue(args, nameof(args)); host.CheckValue(input, nameof(input)); args.Check(host); var scores = default(VBuffer <Single>); TrainCore(host, input, args, ref scores); using (var ch = host.Start("Dropping Slots")) { int selectedCount; var column = CreateDropSlotsColumn(args, ref scores, out selectedCount); if (column == null) { ch.Info("No features are being dropped."); return(NopTransform.CreateIfNeeded(host, input)); } ch.Info(MessageSensitivity.Schema, "Selected {0} slots out of {1} in column '{2}'", selectedCount, scores.Length, args.FeatureColumn); var dsArgs = new DropSlotsTransform.Arguments(); dsArgs.Column = new[] { column }; return(new DropSlotsTransform(host, dsArgs, input)); } }
public static CommonOutputs.TransformOutput ConvertPredictedLabel(IHostEnvironment env, PredictedLabelInput input) { Contracts.CheckValue(env, nameof(env)); var host = env.Register("ConvertPredictedLabel"); host.CheckValue(input, nameof(input)); EntryPointUtils.CheckInputArgs(host, input); var predictedLabelCol = input.Data.Schema.GetColumnOrNull(input.PredictedLabelColumn); if (!predictedLabelCol.HasValue) { throw host.ExceptSchemaMismatch(nameof(input), "label", input.PredictedLabelColumn); } var predictedLabelType = predictedLabelCol.Value.Type; if (predictedLabelType is NumberDataViewType || predictedLabelType is BooleanDataViewType) { var nop = NopTransform.CreateIfNeeded(env, input.Data); return(new CommonOutputs.TransformOutput { Model = new TransformModelImpl(env, nop, input.Data), OutputData = nop }); } var xf = new KeyToValueMappingTransformer(host, input.PredictedLabelColumn).Transform(input.Data); return(new CommonOutputs.TransformOutput { Model = new TransformModelImpl(env, xf, input.Data), OutputData = xf }); }
public static CommonOutputs.TransformOutput ConvertPredictedLabel(IHostEnvironment env, PredictedLabelInput input) { Contracts.CheckValue(env, nameof(env)); var host = env.Register("ConvertPredictedLabel"); host.CheckValue(input, nameof(input)); EntryPointUtils.CheckInputArgs(host, input); int predictedLabelCol; if (!input.Data.Schema.TryGetColumnIndex(input.PredictedLabelColumn, out predictedLabelCol)) { throw host.Except($"Column '{input.PredictedLabelColumn}' not found."); } var predictedLabelType = input.Data.Schema[predictedLabelCol].Type; if (predictedLabelType.IsNumber || predictedLabelType.IsBool) { var nop = NopTransform.CreateIfNeeded(env, input.Data); return(new CommonOutputs.TransformOutput { Model = new TransformModelImpl(env, nop, input.Data), OutputData = nop }); } var xf = new KeyToValueMappingTransformer(host, input.PredictedLabelColumn).Transform(input.Data); return(new CommonOutputs.TransformOutput { Model = new TransformModelImpl(env, xf, input.Data), OutputData = xf }); }
public static CommonOutputs.TransformOutput PrepareRegressionLabel(IHostEnvironment env, RegressionLabelInput input) { Contracts.CheckValue(env, nameof(env)); var host = env.Register("PrepareRegressionLabel"); host.CheckValue(input, nameof(input)); EntryPointUtils.CheckInputArgs(host, input); var labelCol = input.Data.Schema.GetColumnOrNull(input.LabelColumn); if (!labelCol.HasValue) throw host.Except($"Column '{input.LabelColumn}' not found."); var labelType = labelCol.Value.Type; if (labelType == NumberType.R4 || !(labelType is NumberType)) { var nop = NopTransform.CreateIfNeeded(env, input.Data); return new CommonOutputs.TransformOutput { Model = new TransformModelImpl(env, nop, input.Data), OutputData = nop }; } var args = new TypeConvertingTransformer.Arguments() { Column = new[] { new TypeConvertingTransformer.Column() { Name = input.LabelColumn, Source = input.LabelColumn, ResultType = DataKind.R4 } } }; var xf = new TypeConvertingTransformer(host, new TypeConvertingTransformer.ColumnInfo(input.LabelColumn, DataKind.R4, input.LabelColumn)).Transform(input.Data); return new CommonOutputs.TransformOutput { Model = new TransformModelImpl(env, xf, input.Data), OutputData = xf }; }
public static CommonOutputs.TransformOutput PrepareClassificationLabel(IHostEnvironment env, ClassificationLabelInput input) { Contracts.CheckValue(env, nameof(env)); var host = env.Register("PrepareClassificationLabel"); host.CheckValue(input, nameof(input)); EntryPointUtils.CheckInputArgs(host, input); var labelCol = input.Data.Schema.GetColumnOrNull(input.LabelColumn); if (!labelCol.HasValue) throw host.ExceptSchemaMismatch(nameof(input), "Label", input.LabelColumn); var labelType = labelCol.Value.Type; if (labelType is KeyType || labelType is BoolType) { var nop = NopTransform.CreateIfNeeded(env, input.Data); return new CommonOutputs.TransformOutput { Model = new TransformModelImpl(env, nop, input.Data), OutputData = nop }; } var args = new ValueToKeyMappingTransformer.Arguments() { Column = new[] { new ValueToKeyMappingTransformer.Column() { Name = input.LabelColumn, Source = input.LabelColumn, TextKeyValues = input.TextKeyValues, Sort = ValueToKeyMappingTransformer.SortOrder.Value } } }; var xf = ValueToKeyMappingTransformer.Create(host, args, input.Data); return new CommonOutputs.TransformOutput { Model = new TransformModelImpl(env, xf, input.Data), OutputData = xf }; }
public static CommonOutputs.TransformOutput RenameBinaryPredictionScoreColumns(IHostEnvironment env, RenameBinaryPredictionScoreColumnsInput input) { Contracts.CheckValue(env, nameof(env)); var host = env.Register("ScoreModel"); host.CheckValue(input, nameof(input)); EntryPointUtils.CheckInputArgs(host, input); if (input.PredictorModel.Predictor.PredictionKind == PredictionKind.BinaryClassification) { DataViewType labelType; var labelNames = input.PredictorModel.GetLabelInfo(host, out labelType); if (labelNames != null && labelNames.Length == 2) { var positiveClass = labelNames[1]; // Rename all the score columns. int colMax; var maxScoreId = input.Data.Schema.GetMaxAnnotationKind(out colMax, AnnotationUtils.Kinds.ScoreColumnSetId); var copyCols = new List <(string name, string source)>(); for (int i = 0; i < input.Data.Schema.Count; i++) { if (input.Data.Schema[i].IsHidden) { continue; } if (!ShouldAddColumn(input.Data.Schema, i, null, maxScoreId)) { continue; } // Do not rename the PredictedLabel column. ReadOnlyMemory <char> tmp = default; if (input.Data.Schema.TryGetAnnotation(TextDataViewType.Instance, AnnotationUtils.Kinds.ScoreValueKind, i, ref tmp) && ReadOnlyMemoryUtils.EqualsStr(AnnotationUtils.Const.ScoreValueKind.PredictedLabel, tmp)) { continue; } var source = input.Data.Schema[i].Name; var name = source + "." + positiveClass; copyCols.Add((name, source)); } var copyColumn = new ColumnCopyingTransformer(env, copyCols.ToArray()).Transform(input.Data); var dropColumn = ColumnSelectingTransformer.CreateDrop(env, copyColumn, copyCols.Select(c => c.source).ToArray()); return(new CommonOutputs.TransformOutput { Model = new TransformModelImpl(env, dropColumn, input.Data), OutputData = dropColumn }); } } var newView = NopTransform.CreateIfNeeded(env, input.Data); return(new CommonOutputs.TransformOutput { Model = new TransformModelImpl(env, newView, input.Data), OutputData = newView }); }
/// <summary> /// Create method corresponding to SignatureDataTransform. /// </summary> public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input) { Contracts.CheckValue(env, nameof(env)); var host = env.Register(RegistrationName); host.CheckValue(args, nameof(args)); host.CheckValue(input, nameof(input)); host.CheckUserArg(Utils.Size(args.Column) > 0, nameof(args.Column)); host.CheckUserArg(args.SlotsInOutput > 0, nameof(args.SlotsInOutput)); host.CheckNonWhiteSpace(args.LabelColumn, nameof(args.LabelColumn)); host.Check(args.NumBins > 1, "numBins must be greater than 1."); using (var ch = host.Start("Selecting Slots")) { ch.Info("Computing mutual information"); var sw = new Stopwatch(); sw.Start(); var colSet = new HashSet <string>(); foreach (var col in args.Column) { if (!colSet.Add(col)) { ch.Warning("Column '{0}' specified multiple time.", col); } } var colArr = colSet.ToArray(); var colSizes = new int[colArr.Length]; var scores = MutualInformationFeatureSelectionUtils.TrainCore(host, input, args.LabelColumn, colArr, args.NumBins, colSizes); sw.Stop(); ch.Info("Finished mutual information computation in {0}", sw.Elapsed); ch.Info("Selecting features to drop"); var threshold = ComputeThreshold(scores, args.SlotsInOutput, out int tiedScoresToKeep); var columns = CreateDropSlotsColumns(colArr, colArr.Length, scores, threshold, tiedScoresToKeep, out int[] selectedCount); if (columns.Count <= 0) { ch.Info("No features are being dropped."); return(NopTransform.CreateIfNeeded(host, input)); } for (int i = 0; i < selectedCount.Length; i++) { ch.Info("Selected {0} slots out of {1} in column '{2}'", selectedCount[i], colSizes[i], colArr[i]); } ch.Info("Total number of slots selected: {0}", selectedCount.Sum()); var dsArgs = new DropSlotsTransform.Arguments(); dsArgs.Column = columns.ToArray(); return(new DropSlotsTransform(host, dsArgs, input)); } }
public static CommonOutputs.TransformOutput PrepareClassificationLabel(IHostEnvironment env, ClassificationLabelInput input) { Contracts.CheckValue(env, nameof(env)); var host = env.Register("PrepareClassificationLabel"); host.CheckValue(input, nameof(input)); EntryPointUtils.CheckInputArgs(host, input); int labelCol; if (!input.Data.Schema.TryGetColumnIndex(input.LabelColumn, out labelCol)) { throw host.Except($"Column '{input.LabelColumn}' not found."); } var labelType = input.Data.Schema[labelCol].Type; if (labelType.IsKey || labelType.IsBool) { var nop = NopTransform.CreateIfNeeded(env, input.Data); return(new CommonOutputs.TransformOutput { Model = new TransformModelImpl(env, nop, input.Data), OutputData = nop }); } var args = new ValueToKeyMappingTransformer.Arguments() { Column = new[] { new ValueToKeyMappingTransformer.Column() { Name = input.LabelColumn, Source = input.LabelColumn, TextKeyValues = input.TextKeyValues, Sort = ValueToKeyMappingTransformer.SortOrder.Value } } }; var xf = ValueToKeyMappingTransformer.Create(host, args, input.Data); return(new CommonOutputs.TransformOutput { Model = new TransformModelImpl(env, xf, input.Data), OutputData = xf }); }
public static CommonOutputs.TransformOutput PrepareRegressionLabel(IHostEnvironment env, RegressionLabelInput input) { Contracts.CheckValue(env, nameof(env)); var host = env.Register("PrepareRegressionLabel"); host.CheckValue(input, nameof(input)); EntryPointUtils.CheckInputArgs(host, input); int labelCol; if (!input.Data.Schema.TryGetColumnIndex(input.LabelColumn, out labelCol)) { throw host.Except($"Column '{input.LabelColumn}' not found."); } var labelType = input.Data.Schema.GetColumnType(labelCol); if (labelType == NumberType.R4 || !labelType.IsNumber) { var nop = NopTransform.CreateIfNeeded(env, input.Data); return(new CommonOutputs.TransformOutput { Model = new TransformModel(env, nop, input.Data), OutputData = nop }); } var args = new ConvertTransform.Arguments() { Column = new[] { new ConvertTransform.Column() { Name = input.LabelColumn, Source = input.LabelColumn, ResultType = DataKind.R4 } } }; var xf = new ConvertTransform(host, args, input.Data); return(new CommonOutputs.TransformOutput { Model = new TransformModel(env, xf, input.Data), OutputData = xf }); }
/// <summary> /// Create method corresponding to SignatureDataTransform. /// </summary> public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input) { Contracts.CheckValue(env, nameof(env)); var host = env.Register(RegistrationName); host.CheckValue(args, nameof(args)); host.CheckValue(input, nameof(input)); host.CheckUserArg(Utils.Size(args.Column) > 0, nameof(args.Column)); host.CheckUserArg(args.Count > 0, nameof(args.Count)); int[] colSizes; var scores = CountFeatureSelectionUtils.Train(host, input, args.Column, out colSizes); var size = args.Column.Length; using (var ch = host.Start("Dropping Slots")) { int[] selectedCount; var columns = CreateDropSlotsColumns(args, size, scores, out selectedCount); if (columns.Count <= 0) { ch.Info("No features are being dropped."); return(NopTransform.CreateIfNeeded(host, input)); } for (int i = 0; i < selectedCount.Length; i++) { ch.Info(MessageSensitivity.Schema, "Selected {0} slots out of {1} in column '{2}'", selectedCount[i], colSizes[i], args.Column[i]); } ch.Info("Total number of slots selected: {0}", selectedCount.Sum()); var dsArgs = new DropSlotsTransform.Arguments(); dsArgs.Column = columns.ToArray(); return(new DropSlotsTransform(host, dsArgs, input)); } }
public static CommonOutputs.TransformOutput RenameBinaryPredictionScoreColumns(IHostEnvironment env, RenameBinaryPredictionScoreColumnsInput input) { Contracts.CheckValue(env, nameof(env)); var host = env.Register("ScoreModel"); host.CheckValue(input, nameof(input)); EntryPointUtils.CheckInputArgs(host, input); if (input.PredictorModel.Predictor.PredictionKind == PredictionKind.BinaryClassification) { ColumnType labelType; var labelNames = input.PredictorModel.GetLabelInfo(host, out labelType); if (labelNames != null && labelNames.Length == 2) { var positiveClass = labelNames[1]; // Rename all the score columns. int colMax; var maxScoreId = input.Data.Schema.GetMaxMetadataKind(out colMax, MetadataUtils.Kinds.ScoreColumnSetId); var copyCols = new List <CopyColumnsTransform.Column>(); for (int i = 0; i < input.Data.Schema.ColumnCount; i++) { if (input.Data.Schema.IsHidden(i)) { continue; } if (!ShouldAddColumn(input.Data.Schema, i, null, maxScoreId)) { continue; } // Do not rename the PredictedLabel column. DvText tmp = default(DvText); if (input.Data.Schema.TryGetMetadata(TextType.Instance, MetadataUtils.Kinds.ScoreValueKind, i, ref tmp) && tmp.EqualsStr(MetadataUtils.Const.ScoreValueKind.PredictedLabel)) { continue; } var source = input.Data.Schema.GetColumnName(i); var name = source + "." + positiveClass; copyCols.Add(new CopyColumnsTransform.Column() { Name = name, Source = source }); } var copyColumn = new CopyColumnsTransform(env, new CopyColumnsTransform.Arguments() { Column = copyCols.ToArray() }, input.Data); var dropColumn = new DropColumnsTransform(env, new DropColumnsTransform.Arguments() { Column = copyCols.Select(c => c.Source).ToArray() }, copyColumn); return(new CommonOutputs.TransformOutput { Model = new TransformModel(env, dropColumn, input.Data), OutputData = dropColumn }); } } var newView = NopTransform.CreateIfNeeded(env, input.Data); return(new CommonOutputs.TransformOutput { Model = new TransformModel(env, newView, input.Data), OutputData = newView }); }