public BindableMapper(IHostEnvironment env, ModelLoadContext ctx) { Contracts.CheckValue(env, nameof(env)); _env = env; _env.CheckValue(ctx, nameof(ctx)); ctx.CheckAtModel(GetVersionInfo()); // *** Binary format *** // IFeatureContributionMapper: Predictor // int: topContributionsCount // int: bottomContributionsCount // bool: normalize // bool: stringify ctx.LoadModel <IFeatureContributionMapper, SignatureLoadModel>(env, out Predictor, ModelFileUtils.DirPredictor); GenericMapper = ScoreUtils.GetSchemaBindableMapper(_env, Predictor, null); _topContributionsCount = ctx.Reader.ReadInt32(); Contracts.CheckDecode(0 <= _topContributionsCount); _bottomContributionsCount = ctx.Reader.ReadInt32(); Contracts.CheckDecode(0 <= _bottomContributionsCount); _normalize = ctx.Reader.ReadBoolByte(); Stringify = ctx.Reader.ReadBoolByte(); }
/// <summary> /// This method takes a <see cref="RoleMappedData"/> as input, saves it as an in-memory <see cref="ZipArchive"/> /// and returns two arrays indexed by the entries in the zip: /// 1. An array of byte arrays, containing the byte sequences of each entry. /// 2. An array of strings, containing the name of each entry. /// /// This method is used for comparing pipelines. Its outputs can be passed to <see cref="CheckSamePipeline"/> /// to check if this pipeline is identical to another pipeline. /// </summary> public static void SerializeRoleMappedData(IHostEnvironment env, IChannel ch, RoleMappedData data, out byte[][] dataSerialized, out string[] dataZipEntryNames) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(ch, nameof(ch)); ch.CheckValue(data, nameof(data)); using (var ms = new MemoryStream()) { TrainUtils.SaveModel(env, ch, ms, null, data); var zip = new ZipArchive(ms); var entries = zip.Entries.OrderBy(e => e.FullName).ToArray(); dataSerialized = new byte[Utils.Size(entries)][]; dataZipEntryNames = new string[Utils.Size(entries)]; for (int i = 0; i < Utils.Size(entries); i++) { dataZipEntryNames[i] = entries[i].FullName; dataSerialized[i] = new byte[entries[i].Length]; using (var s = entries[i].Open()) s.Read(dataSerialized[i], 0, (int)entries[i].Length); } } }
public NelderMeadSweeper(IHostEnvironment env, Arguments args) { Contracts.CheckValue(env, nameof(env)); env.CheckUserArg(-1 < args.DeltaInsideContraction, nameof(args.DeltaInsideContraction), "Must be greater than -1"); env.CheckUserArg(args.DeltaInsideContraction < 0, nameof(args.DeltaInsideContraction), "Must be less than 0"); env.CheckUserArg(0 < args.DeltaOutsideContraction, nameof(args.DeltaOutsideContraction), "Must be greater than 0"); env.CheckUserArg(args.DeltaReflection > args.DeltaOutsideContraction, nameof(args.DeltaReflection), "Must be greater than " + nameof(args.DeltaOutsideContraction)); env.CheckUserArg(args.DeltaExpansion > args.DeltaReflection, nameof(args.DeltaExpansion), "Must be greater than " + nameof(args.DeltaReflection)); env.CheckUserArg(0 < args.GammaShrink && args.GammaShrink < 1, nameof(args.GammaShrink), "Must be between 0 and 1"); env.CheckValue(args.FirstBatchSweeper, nameof(args.FirstBatchSweeper), "First Batch Sweeper Contains Null Value"); _args = args; _sweepParameters = new List <IValueGenerator>(); foreach (var sweptParameter in args.SweptParameters) { var parameter = sweptParameter.CreateComponent(env); // REVIEW: ideas about how to support discrete values: // 1. assign each discrete value a random number (1-n) to make mirroring possible // 2. each time we need to mirror a discrete value, sample from the remaining value // 2.1. make the sampling non-uniform by learning "weights" for the different discrete values based on // the metric values that we get when using them. (For example, if, for a given discrete value, we get a bad result, // we lower its weight, but if we get a good result we increase its weight). var parameterNumeric = parameter as INumericValueGenerator; env.CheckUserArg(parameterNumeric != null, nameof(args.SweptParameters), "Nelder-Mead sweeper can only sweep over numeric parameters"); _sweepParameters.Add(parameterNumeric); } _initSweeper = args.FirstBatchSweeper.CreateComponent(env, _sweepParameters.ToArray()); _dim = _sweepParameters.Count; env.CheckUserArg(_dim > 1, nameof(args.SweptParameters), "Nelder-Mead sweeper needs at least two parameters to sweep over."); _simplexVertices = new SortedList <IRunResult, Float[]>(new SimplexVertexComparer()); _stage = OptimizationStage.NeedReflectionPoint; _pendingSweeps = new List <KeyValuePair <ParameterSet, Float[]> >(); _pendingSweepsNotSubmitted = new Queue <KeyValuePair <ParameterSet, Float[]> >(); }
// Factory for SignatureLoadModel. private static ITransformer Create(IHostEnvironment env, ModelLoadContext ctx) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(ctx, nameof(ctx)); ctx.CheckAtModel(GetVersionInfo()); var contractName = ctx.LoadString(); if (ctx.Header.ModelVerWritten >= VerAssemblyNameSaved) { var contractAssembly = ctx.LoadString(); Assembly assembly = Assembly.Load(contractAssembly); env.ComponentCatalog.RegisterAssembly(assembly); } object factoryObject = env.ComponentCatalog.GetExtensionValue(env, typeof(CustomMappingFactoryAttributeAttribute), contractName); if (!(factoryObject is ICustomMappingFactory mappingFactory)) { throw env.Except($"The class with contract '{contractName}' must derive from '{typeof(CustomMappingFactory<,>).FullName}' or from '{typeof(StatefulCustomMappingFactory<,,>).FullName}'."); } return(mappingFactory.CreateTransformer(env, contractName)); }
/// <summary> /// This function performs a number of checks on the inputs and, if appropriate and possible, will produce /// a mapper with slots names on the output score column properly mapped. If this is not possible for any /// reason, it will just return the input bound mapper. /// </summary> private static ISchemaBoundMapper WrapIfNeeded(IHostEnvironment env, ISchemaBoundMapper mapper, RoleMappedSchema trainSchema) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(mapper, nameof(mapper)); env.CheckValueOrNull(trainSchema); // The idea is that we will take the key values from the train schema label, and present // them as slot name metadata. But there are a number of conditions for this to actually // happen, so we test those here. If these are not if (trainSchema?.Label == null) { return(mapper); // We don't even have a label identified in a training schema. } var keyType = trainSchema.Label.Value.Metadata.Schema.GetColumnOrNull(MetadataUtils.Kinds.KeyValues)?.Type as VectorType; if (keyType == null || !CanWrap(mapper, keyType)) { return(mapper); } // Great!! All checks pass. return(Utils.MarshalInvoke(WrapCore <int>, keyType.ItemType.RawType, env, mapper, trainSchema)); }
public static SchemaBindablePipelineEnsembleBase Create(IHostEnvironment env, ModelLoadContext ctx) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(ctx, nameof(ctx)); ctx.CheckAtModel(GetVersionInfo()); var scoreColumnKind = ctx.LoadNonEmptyString(); switch (scoreColumnKind) { case MetadataUtils.Const.ScoreColumnKind.BinaryClassification: return(new ImplOneWithCalibrator(env, ctx, scoreColumnKind)); case MetadataUtils.Const.ScoreColumnKind.Regression: case MetadataUtils.Const.ScoreColumnKind.AnomalyDetection: return(new ImplOne(env, ctx, scoreColumnKind)); case MetadataUtils.Const.ScoreColumnKind.MultiClassClassification: return(new ImplVec(env, ctx, scoreColumnKind)); default: throw env.Except("Unknown score kind"); } }
/// <summary> /// Load a transform model. /// </summary> public TransformModel(IHostEnvironment env, Stream stream) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(stream, nameof(stream)); // REVIEW: Should this preserve the "tags" for the transforms? using (var ch = env.Start("Loading transform model")) { _chain = ModelFileUtils.LoadPipeline(env, stream, new MultiFileSource(null), extractInnerPipe: true); } // Find the root schema. for (IDataView view = _chain; ;) { var xf = view as IDataTransform; if (xf == null) { _schemaRoot = view.Schema; break; } view = xf.Source; env.AssertValue(view); } }
public void SetInput(IHostEnvironment environment, Experiment experiment) { _dataView = GetDataView(environment); environment.CheckValue(_dataView, nameof(_dataView)); experiment.SetInput(_dataViewEntryPoint.Data, _dataView); }
public IDataTransform ApplyToData(IHostEnvironment env, IDataView newSource) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(newSource, nameof(newSource)); return(new StatefulFilterTransform <TSrc, TDst, TState>(env, this, newSource)); }
/// <summary> /// Returns the feature selection scores for each slot of each column. /// </summary> /// <param name="env">The host environment.</param> /// <param name="input">The input dataview.</param> /// <param name="columns">The columns for which to compute the feature selection scores.</param> /// <param name="colSizes">Outputs an array containing the vector sizes of the input columns</param> /// <returns>A list of scores.</returns> public static long[][] Train(IHostEnvironment env, IDataView input, string[] columns, out int[] colSizes) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(input, nameof(input)); env.CheckParam(Utils.Size(columns) > 0, nameof(columns)); var schema = input.Schema; var size = columns.Length; var activeInput = new bool[schema.ColumnCount]; var colSrcs = new int[size]; var colTypes = new ColumnType[size]; colSizes = new int[size]; for (int i = 0; i < size; i++) { int colSrc; var colName = columns[i]; if (!schema.TryGetColumnIndex(colName, out colSrc)) { throw env.ExceptUserArg(nameof(CountFeatureSelectionTransform.Arguments.Column), "Source column '{0}' not found", colName); } var colType = schema.GetColumnType(colSrc); if (colType.IsVector && !colType.IsKnownSizeVector) { throw env.ExceptUserArg(nameof(CountFeatureSelectionTransform.Arguments.Column), "Variable length column '{0}' is not allowed", colName); } activeInput[colSrc] = true; colSrcs[i] = colSrc; colTypes[i] = colType; colSizes[i] = colType.ValueCount; } var aggregators = new CountAggregator[size]; long rowCur = 0; double rowCount = input.GetRowCount(true) ?? double.NaN; using (var pch = env.StartProgressChannel("Aggregating counts")) using (var cursor = input.GetRowCursor(col => activeInput[col])) { var header = new ProgressHeader(new[] { "rows" }); pch.SetHeader(header, e => { e.SetProgress(0, rowCur, rowCount); }); for (int i = 0; i < size; i++) { if (colTypes[i].IsVector) { aggregators[i] = GetVecAggregator(cursor, colTypes[i], colSrcs[i]); } else { aggregators[i] = GetOneAggregator(cursor, colTypes[i], colSrcs[i]); } } while (cursor.MoveNext()) { for (int i = 0; i < size; i++) { aggregators[i].ProcessValue(); } rowCur++; } pch.Checkpoint(rowCur); } return(aggregators.Select(a => a.Count).ToArray()); }
public static CommonOutputs.MacroOutput <Output> CrossValidate( IHostEnvironment env, Arguments input, EntryPointNode node) { env.CheckValue(input, nameof(input)); // This will be the final resulting list of nodes that is returned from the macro. var subGraphNodes = new List <EntryPointNode>(); //the input transform model VariableBinding transformModelVarName = null; if (input.TransformModel != null) { transformModelVarName = node.GetInputVariable(nameof(input.TransformModel)); } // Split the input data into folds. var exp = new Experiment(env); var cvSplit = new Models.CrossValidatorDatasetSplitter(); cvSplit.Data.VarName = node.GetInputVariable("Data").ToJson(); cvSplit.NumFolds = input.NumFolds; cvSplit.StratificationColumn = input.StratificationColumn; var cvSplitOutput = exp.Add(cvSplit); subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog)); var predModelVars = new Var <IPredictorModel> [input.NumFolds]; var transformModelVars = new Var <ITransformModel> [input.NumFolds]; var inputTransformModelVars = new Var <IPredictorModel> [input.NumFolds]; var warningsVars = new Var <IDataView> [input.NumFolds]; var overallMetricsVars = new Var <IDataView> [input.NumFolds]; var instanceMetricsVars = new Var <IDataView> [input.NumFolds]; var confusionMatrixVars = new Var <IDataView> [input.NumFolds]; // Instantiate the subgraph for each fold. for (int k = 0; k < input.NumFolds; k++) { // Parse the nodes in input.Nodes into a temporary run context. var context = new RunContext(env); var graph = EntryPointNode.ValidateNodes(env, context, input.Nodes, node.Catalog); // Rename all the variables such that they don't conflict with the ones in the outer run context. var mapping = new Dictionary <string, string>(); foreach (var entryPointNode in graph) { entryPointNode.RenameAllVariables(mapping); } // Instantiate a TrainTest entry point for this fold. var args = new TrainTestMacro.Arguments { Nodes = new JArray(graph.Select(n => n.ToJson()).ToArray()), TransformModel = null, LabelColumn = input.LabelColumn, GroupColumn = input.GroupColumn, WeightColumn = input.WeightColumn }; if (transformModelVarName != null) { args.TransformModel = new Var <ITransformModel> { VarName = transformModelVarName.VariableName } } ; args.Inputs.Data = new Var <IDataView> { VarName = mapping[input.Inputs.Data.VarName] }; if (input.Outputs.PredictorModel != null && mapping.ContainsKey(input.Outputs.PredictorModel.VarName)) { args.Outputs.PredictorModel = new Var <IPredictorModel> { VarName = mapping[input.Outputs.PredictorModel.VarName] }; } else { args.Outputs.PredictorModel = null; } if (input.Outputs.TransformModel != null && mapping.ContainsKey(input.Outputs.TransformModel.VarName)) { args.Outputs.TransformModel = new Var <ITransformModel> { VarName = mapping[input.Outputs.TransformModel.VarName] }; } else { args.Outputs.TransformModel = null; } // Set train/test trainer kind to match. args.Kind = input.Kind; // Set the input bindings for the TrainTest entry point. var inputBindingMap = new Dictionary <string, List <ParameterBinding> >(); var inputMap = new Dictionary <ParameterBinding, VariableBinding>(); var trainingData = new SimpleParameterBinding(nameof(args.TrainingData)); inputBindingMap.Add(nameof(args.TrainingData), new List <ParameterBinding> { trainingData }); inputMap.Add(trainingData, new ArrayIndexVariableBinding(cvSplitOutput.TrainData.VarName, k)); var testingData = new SimpleParameterBinding(nameof(args.TestingData)); inputBindingMap.Add(nameof(args.TestingData), new List <ParameterBinding> { testingData }); inputMap.Add(testingData, new ArrayIndexVariableBinding(cvSplitOutput.TestData.VarName, k)); var outputMap = new Dictionary <string, string>(); var transformModelVar = new Var <ITransformModel>(); var predModelVar = new Var <IPredictorModel>(); if (input.Outputs.PredictorModel == null) { outputMap.Add(nameof(TrainTestMacro.Output.TransformModel), transformModelVar.VarName); transformModelVars[k] = transformModelVar; ML.Transforms.ModelCombiner.Output modelCombineOutput = null; if (transformModelVarName != null && transformModelVarName.VariableName != null) { var modelCombine = new ML.Transforms.ModelCombiner { Models = new ArrayVar <ITransformModel>( new Var <ITransformModel>[] { new Var <ITransformModel> { VarName = transformModelVarName.VariableName }, transformModelVar } ) }; exp.Reset(); modelCombineOutput = exp.Add(modelCombine); subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog)); transformModelVars[k] = modelCombineOutput.OutputModel; } } else { outputMap.Add(nameof(TrainTestMacro.Output.PredictorModel), predModelVar.VarName); predModelVars[k] = predModelVar; ML.Transforms.TwoHeterogeneousModelCombiner.Output modelCombineOutput = null; if (transformModelVarName != null && transformModelVarName.VariableName != null) { var modelCombine = new ML.Transforms.TwoHeterogeneousModelCombiner { TransformModel = { VarName = transformModelVarName.VariableName }, PredictorModel = predModelVar }; exp.Reset(); modelCombineOutput = exp.Add(modelCombine); subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog)); predModelVars[k] = modelCombineOutput.PredictorModel; } } var warningVar = new Var <IDataView>(); outputMap.Add(nameof(TrainTestMacro.Output.Warnings), warningVar.VarName); warningsVars[k] = warningVar; var overallMetric = new Var <IDataView>(); outputMap.Add(nameof(TrainTestMacro.Output.OverallMetrics), overallMetric.VarName); overallMetricsVars[k] = overallMetric; var instanceMetric = new Var <IDataView>(); outputMap.Add(nameof(TrainTestMacro.Output.PerInstanceMetrics), instanceMetric.VarName); instanceMetricsVars[k] = instanceMetric; var confusionMatrix = new Var <IDataView>(); outputMap.Add(nameof(TrainTestMacro.Output.ConfusionMatrix), confusionMatrix.VarName); confusionMatrixVars[k] = confusionMatrix; const string trainTestEvaluatorMacroEntryPoint = "Models.TrainTestEvaluator"; subGraphNodes.Add(EntryPointNode.Create(env, trainTestEvaluatorMacroEntryPoint, args, node.Catalog, node.Context, inputBindingMap, inputMap, outputMap)); } exp.Reset(); // Convert predictors from all folds into an array of predictors. if (input.Outputs.PredictorModel == null) { var outModels = new ML.Data.TransformModelArrayConverter { TransformModel = new ArrayVar <ITransformModel>(transformModelVars) }; var outModelsOutput = new ML.Data.TransformModelArrayConverter.Output(); outModelsOutput.OutputModel.VarName = node.GetOutputVariableName(nameof(Output.TransformModel)); exp.Add(outModels, outModelsOutput); } else { var outModels = new ML.Data.PredictorModelArrayConverter { Model = new ArrayVar <IPredictorModel>(predModelVars) }; var outModelsOutput = new ML.Data.PredictorModelArrayConverter.Output(); outModelsOutput.OutputModel.VarName = node.GetOutputVariableName(nameof(Output.PredictorModel)); exp.Add(outModels, outModelsOutput); } // Convert warnings data views from all folds into an array of data views. var warnings = new ML.Data.IDataViewArrayConverter { Data = new ArrayVar <IDataView>(warningsVars) }; var warningsOutput = new ML.Data.IDataViewArrayConverter.Output(); exp.Add(warnings, warningsOutput); // Convert overall metrics data views from all folds into an array of data views. var overallMetrics = new ML.Data.IDataViewArrayConverter { Data = new ArrayVar <IDataView>(overallMetricsVars) }; var overallMetricsOutput = new ML.Data.IDataViewArrayConverter.Output(); exp.Add(overallMetrics, overallMetricsOutput); // Convert per instance data views from all folds into an array of data views. var instanceMetrics = new ML.Data.IDataViewArrayConverter { Data = new ArrayVar <IDataView>(instanceMetricsVars) }; var instanceMetricsOutput = new ML.Data.IDataViewArrayConverter.Output(); exp.Add(instanceMetrics, instanceMetricsOutput); ML.Data.IDataViewArrayConverter.Output confusionMatricesOutput = null; if (input.Kind == MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer || input.Kind == MacroUtils.TrainerKinds.SignatureMultiClassClassifierTrainer) { // Convert confusion matrix data views from all folds into an array of data views. var confusionMatrices = new ML.Data.IDataViewArrayConverter { Data = new ArrayVar <IDataView>(confusionMatrixVars) }; confusionMatricesOutput = new ML.Data.IDataViewArrayConverter.Output(); exp.Add(confusionMatrices, confusionMatricesOutput); } var combineArgs = new CombineMetricsInput(); combineArgs.Kind = input.Kind; combineArgs.LabelColumn = input.LabelColumn; combineArgs.WeightColumn = input.WeightColumn; combineArgs.GroupColumn = input.GroupColumn; // Set the input bindings for the CombineMetrics entry point. var combineInputBindingMap = new Dictionary <string, List <ParameterBinding> >(); var combineInputMap = new Dictionary <ParameterBinding, VariableBinding>(); var overallArray = new SimpleParameterBinding(nameof(combineArgs.OverallMetrics)); combineInputBindingMap.Add(nameof(combineArgs.OverallMetrics), new List <ParameterBinding> { overallArray }); combineInputMap.Add(overallArray, new SimpleVariableBinding(overallMetricsOutput.OutputData.VarName)); var combinePerInstArray = new SimpleParameterBinding(nameof(combineArgs.PerInstanceMetrics)); combineInputBindingMap.Add(nameof(combineArgs.PerInstanceMetrics), new List <ParameterBinding> { combinePerInstArray }); combineInputMap.Add(combinePerInstArray, new SimpleVariableBinding(instanceMetricsOutput.OutputData.VarName)); if (confusionMatricesOutput != null) { var combineConfArray = new SimpleParameterBinding(nameof(combineArgs.ConfusionMatrix)); combineInputBindingMap.Add(nameof(combineArgs.ConfusionMatrix), new List <ParameterBinding> { combineConfArray }); combineInputMap.Add(combineConfArray, new SimpleVariableBinding(confusionMatricesOutput.OutputData.VarName)); } var combineOutputMap = new Dictionary <string, string>(); var combineWarningVar = new Var <IDataView>(); combineWarningVar.VarName = node.GetOutputVariableName(nameof(Output.Warnings)); combineOutputMap.Add(nameof(Output.Warnings), combineWarningVar.VarName); var combineOverallMetric = new Var <IDataView>(); combineOverallMetric.VarName = node.GetOutputVariableName(nameof(Output.OverallMetrics)); combineOutputMap.Add(nameof(Output.OverallMetrics), combineOverallMetric.VarName); var combineInstanceMetric = new Var <IDataView>(); combineInstanceMetric.VarName = node.GetOutputVariableName(nameof(Output.PerInstanceMetrics)); combineOutputMap.Add(nameof(Output.PerInstanceMetrics), combineInstanceMetric.VarName); if (confusionMatricesOutput != null) { var combineConfusionMatrix = new Var <IDataView>(); combineConfusionMatrix.VarName = node.GetOutputVariableName(nameof(Output.ConfusionMatrix)); combineOutputMap.Add(nameof(TrainTestMacro.Output.ConfusionMatrix), combineConfusionMatrix.VarName); } subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog)); subGraphNodes.Add(EntryPointNode.Create(env, "Models.CrossValidationResultsCombiner", combineArgs, node.Catalog, node.Context, combineInputBindingMap, combineInputMap, combineOutputMap)); return(new CommonOutputs.MacroOutput <Output>() { Nodes = subGraphNodes }); }
// REVIEW: It would be nice to support propagation of select metadata. public static IDataView Create <TSrc, TDst>(IHostEnvironment env, string name, IDataView input, string src, string dst, ColumnType typeSrc, ColumnType typeDst, ValueMapper <TSrc, TDst> mapper, ValueGetter <VBuffer <ReadOnlyMemory <char> > > keyValueGetter = null, ValueGetter <VBuffer <ReadOnlyMemory <char> > > slotNamesGetter = null) { Contracts.CheckValue(env, nameof(env)); env.CheckNonEmpty(name, nameof(name)); env.CheckValue(input, nameof(input)); env.CheckNonEmpty(src, nameof(src)); env.CheckNonEmpty(dst, nameof(dst)); env.CheckValue(typeSrc, nameof(typeSrc)); env.CheckValue(typeDst, nameof(typeDst)); env.CheckValue(mapper, nameof(mapper)); env.Check(keyValueGetter == null || typeDst.GetItemType() is KeyType); env.Check(slotNamesGetter == null || typeDst.IsKnownSizeVector()); if (typeSrc.RawType != typeof(TSrc)) { throw env.ExceptParam(nameof(mapper), "The source column type '{0}' doesn't match the input type of the mapper", typeSrc); } if (typeDst.RawType != typeof(TDst)) { throw env.ExceptParam(nameof(mapper), "The destination column type '{0}' doesn't match the output type of the mapper", typeDst); } bool tmp = input.Schema.TryGetColumnIndex(src, out int colSrc); if (!tmp) { throw env.ExceptParam(nameof(src), "The input data doesn't have a column named '{0}'", src); } var typeOrig = input.Schema[colSrc].Type; // REVIEW: Ideally this should support vector-type conversion. It currently doesn't. bool ident; Delegate conv; if (typeOrig.SameSizeAndItemType(typeSrc)) { ident = true; conv = null; } else if (!Conversions.Instance.TryGetStandardConversion(typeOrig, typeSrc, out conv, out ident)) { throw env.ExceptParam(nameof(mapper), "The type of column '{0}', '{1}', cannot be converted to the input type of the mapper '{2}'", src, typeOrig, typeSrc); } var col = new Column(src, dst); IDataView impl; if (ident) { impl = new Impl <TSrc, TDst, TDst>(env, name, input, col, typeDst, mapper, keyValueGetter: keyValueGetter, slotNamesGetter: slotNamesGetter); } else { Func <IHostEnvironment, string, IDataView, Column, ColumnType, ValueMapper <int, int>, ValueMapper <int, int>, ValueGetter <VBuffer <ReadOnlyMemory <char> > >, ValueGetter <VBuffer <ReadOnlyMemory <char> > >, Impl <int, int, int> > del = CreateImpl <int, int, int>; var meth = del.GetMethodInfo().GetGenericMethodDefinition() .MakeGenericMethod(typeOrig.RawType, typeof(TSrc), typeof(TDst)); impl = (IDataView)meth.Invoke(null, new object[] { env, name, input, col, typeDst, conv, mapper, keyValueGetter, slotNamesGetter }); } return(new OpaqueDataView(impl)); }
public static CommonOutputs.MacroOutput <Output> OneVersusAll( IHostEnvironment env, Arguments input, EntryPointNode node) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(input, nameof(input)); env.Assert(input.Nodes.Count > 0); var numClasses = GetNumberOfClasses(env, input, out var label); var predModelVars = new Var <PredictorModel> [numClasses]; // This will be the final resulting list of nodes that is returned from the macro. var macroNodes = new List <EntryPointNode>(); // Instantiate the subgraph for each label value. for (int k = 0; k < numClasses; k++) { predModelVars[k] = ProcessClass(env, macroNodes, k, label, input, node); } // Convert the predictor models to an array of predictor models. var modelsArray = new Var <PredictorModel[]>(); MacroUtils.ConvertIPredictorModelsToArray(env, node.Context, macroNodes, predModelVars, modelsArray.VarName); // Use OVA model combiner to combine these models into one. // Takes in array of models that are binary predictor models and // produces single multiclass predictor model. var combineArgs = new ModelOperations.CombineOvaPredictorModelsInput(); combineArgs.Caching = input.Caching; combineArgs.FeatureColumnName = input.FeatureColumnName; combineArgs.LabelColumnName = input.LabelColumnName; combineArgs.NormalizeFeatures = input.NormalizeFeatures; combineArgs.UseProbabilities = input.UseProbabilities; var inputBindingMap = new Dictionary <string, List <ParameterBinding> >(); var inputMap = new Dictionary <ParameterBinding, VariableBinding>(); var combineNodeModelArrayInput = new SimpleVariableBinding(modelsArray.VarName); var paramBinding = new SimpleParameterBinding(nameof(combineArgs.ModelArray)); inputBindingMap.Add(nameof(combineArgs.ModelArray), new List <ParameterBinding>() { paramBinding }); inputMap.Add(paramBinding, combineNodeModelArrayInput); paramBinding = new SimpleParameterBinding(nameof(combineArgs.TrainingData)); inputBindingMap.Add(nameof(combineArgs.TrainingData), new List <ParameterBinding>() { paramBinding }); inputMap.Add(paramBinding, node.GetInputVariable(nameof(input.TrainingData))); var outputMap = new Dictionary <string, string>(); outputMap.Add(nameof(Output.PredictorModel), node.GetOutputVariableName(nameof(Output.PredictorModel))); var combineModelsNode = EntryPointNode.Create(env, "Models.OvaModelCombiner", combineArgs, node.Context, inputBindingMap, inputMap, outputMap); macroNodes.Add(combineModelsNode); return(new CommonOutputs.MacroOutput <Output>() { Nodes = macroNodes }); }
/// <summary> /// Extract all values of one column of the data view in a form of an <see cref="IEnumerable{T}"/>. /// </summary> /// <typeparam name="T">The type of the values. This must match the actual column type.</typeparam> /// <param name="data">The data view to get the column from.</param> /// <param name="env">The current host environment.</param> /// <param name="columnName">The name of the column to extract.</param> public static IEnumerable <T> GetColumn <T>(this IDataView data, IHostEnvironment env, string columnName) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(data, nameof(data)); env.CheckNonEmpty(columnName, nameof(columnName)); if (!data.Schema.TryGetColumnIndex(columnName, out int col)) { throw env.ExceptSchemaMismatch(nameof(columnName), "input", columnName); } // There are two decisions that we make here: // - Is the T an array type? // - If yes, we need to map VBuffer to array and densify. // - If no, this is not needed. // - Does T (or item type of T if it's an array) equal to the data view type? // - If this is the same type, we can map directly. // - Otherwise, we need a conversion delegate. var colType = data.Schema.GetColumnType(col); if (colType.RawType == typeof(T)) { // Direct mapping is possible. return(GetColumnDirect <T>(data, col)); } else if (typeof(T) == typeof(string) && colType.IsText) { // Special case of ROM<char> to string conversion. Delegate convert = (Func <ReadOnlyMemory <char>, string>)((ReadOnlyMemory <char> txt) => txt.ToString()); Func <IDataView, int, Func <int, T>, IEnumerable <T> > del = GetColumnConvert; var meth = del.Method.GetGenericMethodDefinition().MakeGenericMethod(typeof(T), colType.RawType); return((IEnumerable <T>)(meth.Invoke(null, new object[] { data, col, convert }))); } else if (typeof(T).IsArray) { // Output is an array type. if (!colType.IsVector) { throw env.ExceptSchemaMismatch(nameof(columnName), "input", columnName, "vector", "scalar"); } var elementType = typeof(T).GetElementType(); if (elementType == colType.ItemType.RawType) { // Direct mapping of items. Func <IDataView, int, IEnumerable <int[]> > del = GetColumnArrayDirect <int>; var meth = del.Method.GetGenericMethodDefinition().MakeGenericMethod(elementType); return((IEnumerable <T>)meth.Invoke(null, new object[] { data, col })); } else if (elementType == typeof(string) && colType.ItemType.IsText) { // Conversion of DvText items to string items. Delegate convert = (Func <ReadOnlyMemory <char>, string>)((ReadOnlyMemory <char> txt) => txt.ToString()); Func <IDataView, int, Func <int, long>, IEnumerable <long[]> > del = GetColumnArrayConvert; var meth = del.Method.GetGenericMethodDefinition().MakeGenericMethod(elementType, colType.ItemType.RawType); return((IEnumerable <T>)meth.Invoke(null, new object[] { data, col, convert })); } // Fall through to the failure. } throw env.Except($"Could not map a data view column '{columnName}' of type {colType} to {typeof(T)}."); }
IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input, out IDataView sourceCtx) { sourceCtx = input; Contracts.CheckValue(env, "env"); env.CheckValue(args, "args"); env.CheckValue(input, "input"); env.CheckValue(args.tag, "tag is empty"); env.CheckValue(args.trainer, "trainer", "Trainer cannot be null. If your model is already trained, please use ScoreTransform instead."); var views = TagHelper.EnumerateTaggedView(true, input).Where(c => c.Item1 == args.tag); if (views.Any()) { throw env.Except("Tag '{0}' is already used.", args.tag); } var host = env.Register("TagTrainOrScoreTransform"); using (var ch = host.Start("Train")) { ch.Trace("Constructing trainer"); var trainerSett = ScikitSubComponent <ITrainer, SignatureTrainer> .AsSubComponent(args.trainer); ITrainer trainer = trainerSett.CreateInstance(host); var customCols = TrainUtils.CheckAndGenerateCustomColumns(env, args.CustomColumn); string feat; string group; var data = CreateDataFromArgs(_host, ch, new OpaqueDataView(input), args, out feat, out group); ICalibratorTrainer calibrator = args.calibrator == null ? null : ScikitSubComponent <ICalibratorTrainer, SignatureCalibrator> .AsSubComponent(args.calibrator).CreateInstance(host); var nameTrainer = args.trainer.ToString().Replace("{", "").Replace("}", "").Replace(" ", "").Replace("=", "").Replace("+", "Y").Replace("-", "N"); var extTrainer = new ExtendedTrainer(trainer, nameTrainer); _predictor = extTrainer.Train(host, ch, data, null, calibrator, args.maxCalibrationExamples); if (!string.IsNullOrEmpty(args.outputModel)) { ch.Info("Saving model into '{0}'", args.outputModel); using (var fs = File.Create(args.outputModel)) TrainUtils.SaveModel(env, ch, fs, _predictor, data); ch.Info("Done."); } if (_cali != null) { throw ch.ExceptNotImpl("Calibrator is not implemented yet."); } ch.Trace("Scoring"); if (_args.scorer != null) { var mapper = new SchemaBindablePredictorWrapper(_predictor); var roles = new RoleMappedSchema(input.Schema, null, feat, group: group); var bound = mapper.Bind(_host, roles); var scorPars = ScikitSubComponent <IDataScorerTransform, SignatureDataScorer> .AsSubComponent(_args.scorer); _scorer = scorPars.CreateInstance(_host, input, bound, roles); } else { _scorer = PredictorHelper.CreateDefaultScorer(_host, input, feat, group, _predictor); } ch.Info("Tagging with tag '{0}'.", args.tag); var ar = new TagViewTransform.Arguments { tag = args.tag }; var res = new TagViewTransform(env, ar, _scorer, _predictor); return(res); } }
PermutationFeatureImportance <TMetric, TResult>( IHostEnvironment env, ITransformer model, IDataView data, Func <TResult> resultInitializer, Func <IDataView, TMetric> evaluationFunc, Func <TMetric, TMetric, TMetric> deltaFunc, int permutationCount, bool useFeatureWeightFilter, int?numberOfExamplesToUse) where TResult : IMetricsStatistics <TMetric> { env.CheckValue(data, nameof(data)); env.CheckValue(model, nameof(model)); ISingleFeaturePredictionTransformer lastTransformer = null; if (model is ITransformerChainAccessor chain) { foreach (var transformer in chain.Transformers.Reverse()) { if (transformer is ISingleFeaturePredictionTransformer singlePredictionTransformer) { lastTransformer = singlePredictionTransformer; break; } } } else { lastTransformer = model as ISingleFeaturePredictionTransformer; } env.CheckValue(lastTransformer, nameof(lastTransformer), "The model provided does not have a compatible predictor"); string featureColumnName = lastTransformer.FeatureColumnName; var predictionTransformerGenericType = GetImplementedIPredictionTransformer(lastTransformer.GetType()); Type[] types = { predictionTransformerGenericType.GenericTypeArguments[0], typeof(TMetric), typeof(TResult) }; Type pfiGenericType = typeof(PermutationFeatureImportance <, ,>).MakeGenericType(types); object[] param = { env, lastTransformer, data, resultInitializer, evaluationFunc, deltaFunc, featureColumnName, permutationCount, useFeatureWeightFilter, numberOfExamplesToUse }; MethodInfo mi = pfiGenericType.GetMethod("GetImportanceMetricsMatrix", BindingFlags.Static | BindingFlags.Public); var permutationFeatureImportance = (ImmutableArray <TResult>)mi.Invoke(null, param); VBuffer <ReadOnlyMemory <char> > nameBuffer = default; data.Schema[featureColumnName].Annotations.GetValue("SlotNames", ref nameBuffer); var featureColumnNames = nameBuffer.DenseValues().ToList(); var output = new Dictionary <string, TResult>(); for (int i = 0; i < permutationFeatureImportance.Length; i++) { var name = featureColumnNames[i].ToString(); // If the slot wasn't given a name, default to just the slot number. if (string.IsNullOrEmpty(name)) { name = $"Slot {i}"; } output.Add(name, permutationFeatureImportance[i]); } return(output.ToImmutableDictionary()); }
/// <summary> /// Create a new data view which is obtained by appending all columns of all the source data views. /// If the data views are of different length, the resulting data view will have the length equal to the /// length of the shortest source. /// </summary> /// <param name="env">The host environment to use.</param> /// <param name="sources">A non-empty collection of data views to zip together.</param> /// <returns>The resulting data view.</returns> public static IDataView Zip(this IHostEnvironment env, IEnumerable <IDataView> sources) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(sources, nameof(sources)); return(ZipDataView.Create(env, sources)); }
/// <summary> /// This method detects this predictable interval (or period) by adopting techniques of fourier analysis. /// Returns -1 if no such pattern is found, that is, the input values do not follow a seasonal fluctuation. /// </summary> /// <param name="host">The detect seasonality host environment.</param> /// <param name="input">Input DataView.The data is an instance of <see cref="Microsoft.ML.IDataView"/>.</param> /// <param name="inputColumnName">Name of column to process. The column data must be <see cref="System.Double"/>.</param> /// <param name="seasonalityWindowSize">An upper bound on the number of values to be considered in the input values. /// When set to -1, use the whole input to fit model; when set to a positive integer, use this number as batch size. /// Default value is -1.</param> /// <param name="randomessThreshold">Randomness threshold, ranging from [0, 1]. It specifies how confidence the input /// follows a predictable pattern recurring as seasonal data. By default, it is set as 0.95. /// </param> /// <returns>The detected period if seasonality period exists, otherwise return -1.</returns> public int DetectSeasonality( IHostEnvironment host, IDataView input, string inputColumnName, int seasonalityWindowSize, double randomessThreshold) { host.CheckValue(input, nameof(input)); host.CheckValue(inputColumnName, nameof(inputColumnName)); host.CheckUserArg(seasonalityWindowSize == -1 || seasonalityWindowSize >= 0, nameof(seasonalityWindowSize)); var column = input.Schema.GetColumnOrNull(inputColumnName); host.CheckUserArg(column.HasValue, nameof(inputColumnName)); var rowCursor = input.GetRowCursor(new List <DataViewSchema.Column>() { column.Value }); var valueDelegate = rowCursor.GetGetter <double>(column.Value); int length = 0; double valueRef = 0; var valueCache = seasonalityWindowSize == -1 ? new List <double>() : new List <double>(seasonalityWindowSize); while (rowCursor.MoveNext()) { valueDelegate.Invoke(ref valueRef); length++; valueCache.Add(valueRef); if (seasonalityWindowSize != -1 && length >= seasonalityWindowSize) { break; } } double[] fftRe = new double[length]; double[] fftIm = new double[length]; double[] inputRe = valueCache.ToArray(); FftUtils.ComputeForwardFft(inputRe, Enumerable.Repeat(0.0, length).ToArray(), fftRe, fftIm, length); var energies = fftRe.Select((m, i) => new Complex(m, fftIm[i])).ToArray(); /* Periodogram indicates the square of "energy" on the frequency domain. * Specifically, periodogram[j] = a[j]^2+b[j]^2, where a and b are Fourier Coefficients for cosine and sine, * x(t) = a0+sum(a[j]cos(2Pi * f[j]t)+b[j]sin(2Pi * f[j]t) */ var periodogram = energies.Select((t, i) => t * Complex.Conjugate(t)).ToArray(); FindBestTwoFrequencies(periodogram, length, out var bestFreq, out var secondFreq); double[] ifftRe = new double[length]; double[] ifftIm = new double[length]; FftUtils.ComputeBackwardFft( periodogram.Select(t => t.Real).ToArray(), periodogram.Select(t => t.Imaginary).ToArray(), ifftRe, ifftIm, length); var values = ifftRe.Select((t, i) => new Complex(t, ifftIm[i])).ToArray(); int period = FindActualPeriod(values, bestFreq, secondFreq, length, randomessThreshold); return(period < 0 ? -1 : period); }
// Factory method for SignatureLoadModel. private static OnnxTransformer Create(IHostEnvironment env, ModelLoadContext ctx) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(ctx, nameof(ctx)); ctx.CheckAtModel(GetVersionInfo()); byte[] modelBytes = null; if (!ctx.TryLoadBinaryStream("OnnxModel", r => modelBytes = r.ReadByteArray())) { throw env.ExceptDecode(); } bool supportsMultiInputOutput = ctx.Header.ModelVerWritten > 0x00010001; var numInputs = (supportsMultiInputOutput) ? ctx.Reader.ReadInt32() : 1; env.CheckDecode(numInputs > 0); var inputs = new string[numInputs]; for (int j = 0; j < inputs.Length; j++) { inputs[j] = ctx.LoadNonEmptyString(); } var numOutputs = (supportsMultiInputOutput) ? ctx.Reader.ReadInt32() : 1; env.CheckDecode(numOutputs > 0); var outputs = new string[numOutputs]; for (int j = 0; j < outputs.Length; j++) { outputs[j] = ctx.LoadNonEmptyString(); } // Save custom-provided shapes. Those shapes overwrite shapes loaded from the ONNX model file. int customShapeInfosLength = ctx.Reader.ReadInt32(); // 0 means no custom shape. Non-zero means count of custom shapes. CustomShapeInfo[] loadedCustomShapeInfos = null; if (customShapeInfosLength > 0) { loadedCustomShapeInfos = new CustomShapeInfo[customShapeInfosLength]; for (int i = 0; i < customShapeInfosLength; ++i) { var name = ctx.LoadNonEmptyString(); var shape = ctx.Reader.ReadIntArray(); loadedCustomShapeInfos[i] = new CustomShapeInfo() { Name = name, Shape = shape }; } } int recursionLimit; // Recursion limit change if (ctx.Header.ModelVerWritten >= 0x00010003) { recursionLimit = ctx.Reader.ReadInt32(); } else { // Default if not written inside ONNX model recursionLimit = 100; } var options = new Options() { InputColumns = inputs, OutputColumns = outputs, CustomShapeInfos = loadedCustomShapeInfos, RecursionLimit = recursionLimit }; return(new OnnxTransformer(env, options, modelBytes)); }
protected virtual IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input, out IDataView sourceCtx, IPredictor overwritePredictor) { sourceCtx = input; Contracts.CheckValue(env, "env"); env.CheckValue(args, "args"); env.CheckValue(input, "input"); IPredictor predictor; if (overwritePredictor == null) { throw env.Except("No defined predictor."); } else { predictor = overwritePredictor; } // The function is returning something and modifying a member of the class. Not very fancy. _predictor = predictor; string feat = TrainUtils.MatchNameOrDefaultOrNull(env, input.Schema, "featureColumn", args.featureColumn, DefaultColumnNames.Features); int index = SchemaHelper.GetColumnIndex(input.Schema, feat); var type = input.Schema[index].Type; if (!type.IsVector() || type.AsVector().ItemType().RawKind() != DataKind.Single) { throw env.Except("Features must a vector of floats"); } if (args.useProb) { var valueMapper = predictor as IValueMapperDist; if (valueMapper == null) { throw env.Except("Predictor must be a IValueMapper."); } var output = valueMapper.DistType; if (output.IsVector()) { return(CreateTransformValueMapperDist <VBuffer <float>, VBuffer <float>, VBuffer <float> >(valueMapper, feat, args.outputColumn)); } else { return(CreateTransformValueMapperDist <VBuffer <float>, VBuffer <float>, float>(valueMapper, feat, args.outputColumn)); } } else { var valueMapper = predictor as IValueMapper; if (valueMapper == null) { throw env.Except("Predictor must be a IValueMapper."); } var output = valueMapper.OutputType; if (output.IsVector()) { return(CreateTransformValueMapper <VBuffer <float>, VBuffer <float> >(valueMapper, feat, args.outputColumn)); } else { return(CreateTransformValueMapper <VBuffer <float>, float>(valueMapper, feat, args.outputColumn)); } } }
// Factory method for SignatureLoadModel. private static TensorFlowTransform Create(IHostEnvironment env, ModelLoadContext ctx) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(ctx, nameof(ctx)); ctx.CheckAtModel(GetVersionInfo()); // *** Binary format *** // byte: indicator for frozen models // stream: tensorFlow model. // int: number of input columns // for each input column // int: id of int column name // int: number of output columns // for each output column // int: id of output column name GetModelInfo(env, ctx, out string[] inputs, out string[] outputs, out bool isFrozen); if (isFrozen) { byte[] modelBytes = null; if (!ctx.TryLoadBinaryStream("TFModel", r => modelBytes = r.ReadByteArray())) { throw env.ExceptDecode(); } return(new TensorFlowTransform(env, TensorFlowUtils.LoadTFSession(env, modelBytes), inputs, outputs, null, false)); } var tempDirPath = Path.GetFullPath(Path.Combine(Path.GetTempPath(), RegistrationName + "_" + Guid.NewGuid())); TensorFlowUtils.CreateFolderWithAclIfNotExists(env, tempDirPath); try { var load = ctx.TryLoadBinaryStream("TFSavedModel", br => { int count = br.ReadInt32(); for (int n = 0; n < count; n++) { string relativeFile = br.ReadString(); long fileLength = br.ReadInt64(); string fullFilePath = Path.Combine(tempDirPath, relativeFile); string fullFileDir = Path.GetDirectoryName(fullFilePath); if (fullFileDir != tempDirPath) { TensorFlowUtils.CreateFolderWithAclIfNotExists(env, fullFileDir); } using (var fs = new FileStream(fullFilePath, FileMode.Create, FileAccess.Write)) { long actualRead = br.BaseStream.CopyRange(fs, fileLength); env.Assert(actualRead == fileLength); } } }); return(new TensorFlowTransform(env, TensorFlowUtils.GetSession(env, tempDirPath), inputs, outputs, tempDirPath, true)); } catch (Exception) { TensorFlowUtils.DeleteFolderWithRetries(env, tempDirPath); throw; } }
public static CommonOutputs.MacroOutput <Output> CrossValidate( IHostEnvironment env, Arguments input, EntryPointNode node) { env.CheckValue(input, nameof(input)); // This will be the final resulting list of nodes that is returned from the macro. var subGraphNodes = new List <EntryPointNode>(); //the input transform model VariableBinding transformModelVarName = null; if (input.TransformModel != null) { transformModelVarName = node.GetInputVariable(nameof(input.TransformModel)); } // Split the input data into folds. var splitArgs = new CVSplit.Input(); splitArgs.NumFolds = input.NumFolds; splitArgs.StratificationColumn = input.StratificationColumn; var inputBindingMap = new Dictionary <string, List <ParameterBinding> >(); var inputMap = new Dictionary <ParameterBinding, VariableBinding>(); var inputData = node.GetInputVariable(nameof(splitArgs.Data)); ParameterBinding paramBinding = new SimpleParameterBinding(nameof(splitArgs.Data)); inputBindingMap.Add(nameof(splitArgs.Data), new List <ParameterBinding>() { paramBinding }); inputMap.Add(paramBinding, inputData); var outputMap = new Dictionary <string, string>(); var splitOutputTrainData = new ArrayVar <IDataView>(); var splitOutputTestData = new ArrayVar <IDataView>(); outputMap.Add(nameof(CVSplit.Output.TrainData), splitOutputTrainData.VarName); outputMap.Add(nameof(CVSplit.Output.TestData), splitOutputTestData.VarName); var splitNode = EntryPointNode.Create(env, "Models.CrossValidatorDatasetSplitter", splitArgs, node.Context, inputBindingMap, inputMap, outputMap); subGraphNodes.Add(splitNode); var predModelVars = new Var <PredictorModel> [input.NumFolds]; var inputTransformModelVars = new Var <PredictorModel> [input.NumFolds]; var warningsVars = new Var <IDataView> [input.NumFolds]; var overallMetricsVars = new Var <IDataView> [input.NumFolds]; var instanceMetricsVars = new Var <IDataView> [input.NumFolds]; var confusionMatrixVars = new Var <IDataView> [input.NumFolds]; // Instantiate the subgraph for each fold. for (int k = 0; k < input.NumFolds; k++) { // Parse the nodes in input.Nodes into a temporary run context. var context = new RunContext(env); var graph = EntryPointNode.ValidateNodes(env, context, input.Nodes); // Rename all the variables such that they don't conflict with the ones in the outer run context. var mapping = new Dictionary <string, string>(); foreach (var entryPointNode in graph) { entryPointNode.RenameAllVariables(mapping); } // Instantiate a TrainTest entry point for this fold. var args = new TrainTestMacro.Arguments { Nodes = new JArray(graph.Select(n => n.ToJson()).ToArray()), TransformModel = null, LabelColumn = input.LabelColumn, GroupColumn = input.GroupColumn, WeightColumn = input.WeightColumn, NameColumn = input.NameColumn }; if (transformModelVarName != null) { args.TransformModel = new Var <TransformModel> { VarName = transformModelVarName.VariableName } } ; args.Inputs.Data = new Var <IDataView> { VarName = mapping[input.Inputs.Data.VarName] }; args.Outputs.PredictorModel = new Var <PredictorModel> { VarName = mapping[input.Outputs.PredictorModel.VarName] }; // Set train/test trainer kind to match. args.Kind = input.Kind; // Set the input bindings for the TrainTest entry point. inputBindingMap = new Dictionary <string, List <ParameterBinding> >(); inputMap = new Dictionary <ParameterBinding, VariableBinding>(); var trainingData = new SimpleParameterBinding(nameof(args.TrainingData)); inputBindingMap.Add(nameof(args.TrainingData), new List <ParameterBinding> { trainingData }); inputMap.Add(trainingData, new ArrayIndexVariableBinding(splitOutputTrainData.VarName, k)); var testingData = new SimpleParameterBinding(nameof(args.TestingData)); inputBindingMap.Add(nameof(args.TestingData), new List <ParameterBinding> { testingData }); inputMap.Add(testingData, new ArrayIndexVariableBinding(splitOutputTestData.VarName, k)); outputMap = new Dictionary <string, string>(); var transformModelVar = new Var <TransformModel>(); var predModelVar = new Var <PredictorModel>(); outputMap.Add(nameof(TrainTestMacro.Output.PredictorModel), predModelVar.VarName); predModelVars[k] = predModelVar; if (transformModelVarName != null && transformModelVarName.VariableName != null) { var combineModelsArgs = new ModelOperations.SimplePredictorModelInput(); inputBindingMap = new Dictionary <string, List <ParameterBinding> >(); inputMap = new Dictionary <ParameterBinding, VariableBinding>(); var inputTransformModel = new SimpleVariableBinding(transformModelVarName.VariableName); var inputPredictorModel = new SimpleVariableBinding(predModelVar.VarName); paramBinding = new SimpleParameterBinding(nameof(combineModelsArgs.TransformModel)); inputBindingMap.Add(nameof(combineModelsArgs.TransformModel), new List <ParameterBinding>() { paramBinding }); inputMap.Add(paramBinding, inputTransformModel); paramBinding = new SimpleParameterBinding(nameof(combineModelsArgs.PredictorModel)); inputBindingMap.Add(nameof(combineModelsArgs.PredictorModel), new List <ParameterBinding>() { paramBinding }); inputMap.Add(paramBinding, inputPredictorModel); outputMap = new Dictionary <string, string>(); var combineNodeOutputPredictorModel = new Var <PredictorModel>(); predModelVars[k] = combineNodeOutputPredictorModel; outputMap.Add(nameof(ModelOperations.PredictorModelOutput.PredictorModel), combineNodeOutputPredictorModel.VarName); EntryPointNode combineNode = EntryPointNode.Create(env, "Transforms.TwoHeterogeneousModelCombiner", combineModelsArgs, node.Context, inputBindingMap, inputMap, outputMap); subGraphNodes.Add(combineNode); } var warningVar = new Var <IDataView>(); outputMap.Add(nameof(TrainTestMacro.Output.Warnings), warningVar.VarName); warningsVars[k] = warningVar; var overallMetric = new Var <IDataView>(); outputMap.Add(nameof(TrainTestMacro.Output.OverallMetrics), overallMetric.VarName); overallMetricsVars[k] = overallMetric; var instanceMetric = new Var <IDataView>(); outputMap.Add(nameof(TrainTestMacro.Output.PerInstanceMetrics), instanceMetric.VarName); instanceMetricsVars[k] = instanceMetric; var confusionMatrix = new Var <IDataView>(); outputMap.Add(nameof(TrainTestMacro.Output.ConfusionMatrix), confusionMatrix.VarName); confusionMatrixVars[k] = confusionMatrix; const string trainTestEvaluatorMacroEntryPoint = "Models.TrainTestEvaluator"; subGraphNodes.Add(EntryPointNode.Create(env, trainTestEvaluatorMacroEntryPoint, args, node.Context, inputBindingMap, inputMap, outputMap)); } // Convert the predictor models to an array of predictor models. MacroUtils.ConvertIPredictorModelsToArray(env, node.Context, subGraphNodes, predModelVars, node.GetOutputVariableName(nameof(Output.PredictorModel))); // Convert the warnings, overall, per instance and confusion matrix data views into an array. var warningsArrayVar = new ArrayVar <IDataView>(); var overallArrayVar = new ArrayVar <IDataView>(); var instanceArrayVar = new ArrayVar <IDataView>(); ArrayVar <IDataView> confusionMatrixArrayVar = null; MacroUtils.ConvertIdataViewsToArray(env, node.Context, subGraphNodes, warningsVars, warningsArrayVar.VarName); MacroUtils.ConvertIdataViewsToArray(env, node.Context, subGraphNodes, overallMetricsVars, overallArrayVar.VarName); MacroUtils.ConvertIdataViewsToArray(env, node.Context, subGraphNodes, instanceMetricsVars, instanceArrayVar.VarName); if (input.Kind == MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer || input.Kind == MacroUtils.TrainerKinds.SignatureMultiClassClassifierTrainer) { confusionMatrixArrayVar = new ArrayVar <IDataView>(); MacroUtils.ConvertIdataViewsToArray(env, node.Context, subGraphNodes, confusionMatrixVars, confusionMatrixArrayVar.VarName); } var combineArgs = new CombineMetricsInput(); combineArgs.Kind = input.Kind; combineArgs.LabelColumn = input.LabelColumn; combineArgs.WeightColumn = input.WeightColumn; combineArgs.GroupColumn = input.GroupColumn; combineArgs.NameColumn = input.NameColumn; // Set the input bindings for the CombineMetrics entry point. var combineInputBindingMap = new Dictionary <string, List <ParameterBinding> >(); var combineInputMap = new Dictionary <ParameterBinding, VariableBinding>(); var warningsArray = new SimpleParameterBinding(nameof(combineArgs.Warnings)); combineInputBindingMap.Add(nameof(combineArgs.Warnings), new List <ParameterBinding> { warningsArray }); combineInputMap.Add(warningsArray, new SimpleVariableBinding(warningsArrayVar.VarName)); var overallArray = new SimpleParameterBinding(nameof(combineArgs.OverallMetrics)); combineInputBindingMap.Add(nameof(combineArgs.OverallMetrics), new List <ParameterBinding> { overallArray }); combineInputMap.Add(overallArray, new SimpleVariableBinding(overallArrayVar.VarName)); var combinePerInstArray = new SimpleParameterBinding(nameof(combineArgs.PerInstanceMetrics)); combineInputBindingMap.Add(nameof(combineArgs.PerInstanceMetrics), new List <ParameterBinding> { combinePerInstArray }); combineInputMap.Add(combinePerInstArray, new SimpleVariableBinding(instanceArrayVar.VarName)); if (confusionMatrixArrayVar != null) { var combineConfArray = new SimpleParameterBinding(nameof(combineArgs.ConfusionMatrix)); combineInputBindingMap.Add(nameof(combineArgs.ConfusionMatrix), new List <ParameterBinding> { combineConfArray }); combineInputMap.Add(combineConfArray, new SimpleVariableBinding(confusionMatrixArrayVar.VarName)); } var combineOutputMap = new Dictionary <string, string>(); var combineWarningVar = new Var <IDataView>(); combineWarningVar.VarName = node.GetOutputVariableName(nameof(Output.Warnings)); combineOutputMap.Add(nameof(Output.Warnings), combineWarningVar.VarName); var combineOverallMetric = new Var <IDataView>(); combineOverallMetric.VarName = node.GetOutputVariableName(nameof(Output.OverallMetrics)); combineOutputMap.Add(nameof(Output.OverallMetrics), combineOverallMetric.VarName); var combineInstanceMetric = new Var <IDataView>(); combineInstanceMetric.VarName = node.GetOutputVariableName(nameof(Output.PerInstanceMetrics)); combineOutputMap.Add(nameof(Output.PerInstanceMetrics), combineInstanceMetric.VarName); if (confusionMatrixArrayVar != null) { var combineConfusionMatrix = new Var <IDataView>(); combineConfusionMatrix.VarName = node.GetOutputVariableName(nameof(Output.ConfusionMatrix)); combineOutputMap.Add(nameof(TrainTestMacro.Output.ConfusionMatrix), combineConfusionMatrix.VarName); } var combineMetricsNode = EntryPointNode.Create(env, "Models.CrossValidationResultsCombiner", combineArgs, node.Context, combineInputBindingMap, combineInputMap, combineOutputMap); subGraphNodes.Add(combineMetricsNode); return(new CommonOutputs.MacroOutput <Output>() { Nodes = subGraphNodes }); }
/// <summary> /// Create a new <see cref="IDataView"/> over an enumerable of the items of user-defined type. /// The user maintains ownership of the <paramref name="data"/> and the resulting data view will /// never alter the contents of the <paramref name="data"/>. /// Since <see cref="IDataView"/> is assumed to be immutable, the user is expected to support /// multiple enumerations of the <paramref name="data"/> that would return the same results, unless /// the user knows that the data will only be cursored once. /// /// One typical usage for streaming data view could be: create the data view that lazily loads data /// as needed, then apply pre-trained transformations to it and cursor through it for transformation /// results. /// </summary> /// <typeparam name="TRow">The user-defined item type.</typeparam> /// <param name="data">The enumerable data containing type <typeparamref name="TRow"/> to convert to an<see cref="IDataView"/>.</param> /// <param name="schemaDefinition">The optional schema definition of the data view to create. If <c>null</c>, /// the schema definition is inferred from <typeparamref name="TRow"/>.</param> /// <returns>The constructed <see cref="IDataView"/>.</returns> /// <example> /// <format type="text/markdown"> /// <![CDATA[ /// [!code-csharp[LoadFromEnumerable](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/LoadFromEnumerable.cs)] /// ]]> /// </format> /// </example> public IDataView LoadFromEnumerable <TRow>(IEnumerable <TRow> data, SchemaDefinition schemaDefinition = null) where TRow : class { _env.CheckValue(data, nameof(data)); _env.CheckValueOrNull(schemaDefinition); return(DataViewConstructionUtils.CreateFromEnumerable(_env, data, schemaDefinition)); }
public static CommonOutputs.MacroOutput <Output> OneVersusAll( IHostEnvironment env, Arguments input, EntryPointNode node) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(input, nameof(input)); env.Assert(input.Nodes.Count > 0); var numClasses = GetNumberOfClasses(env, input, out var label); var predModelVars = new Var <IPredictorModel> [numClasses]; // This will be the final resulting list of nodes that is returned from the macro. var macroNodes = new List <EntryPointNode>(); // Instantiate the subgraph for each label value. for (int k = 0; k < numClasses; k++) { var result = ProcessClass(env, k, label, input, node); predModelVars[k] = result.Item2; macroNodes.AddRange(result.Item1); } // Use OVA model combiner to combine these models into one. // Takes in array of models that are binary predictor models and // produces single multiclass predictor model. var macroExperiment = new Experiment(env); var combinerNode = new Legacy.Models.OvaModelCombiner { ModelArray = new ArrayVar <IPredictorModel>(predModelVars), TrainingData = new Var <IDataView> { VarName = node.GetInputVariable(nameof(input.TrainingData)).VariableName }, Caching = (Legacy.Models.CachingOptions)input.Caching, FeatureColumn = input.FeatureColumn, NormalizeFeatures = (Legacy.Models.NormalizeOption)input.NormalizeFeatures, LabelColumn = input.LabelColumn, UseProbabilities = input.UseProbabilities }; // Get output model variable. if (!node.OutputMap.TryGetValue(nameof(Output.PredictorModel), out var outVariableName)) { throw new Exception("Cannot find OVA model output."); } // Map macro's output back to OVA combiner (so OVA combiner will set the value on our output variable). var combinerOutput = new Legacy.Models.OvaModelCombiner.Output { PredictorModel = new Var <IPredictorModel> { VarName = outVariableName } }; // Add to experiment (must be done AFTER we assign variable name to output). macroExperiment.Add(combinerNode, combinerOutput); // Add nodes to main experiment. var nodes = macroExperiment.GetNodes(); var expNodes = EntryPointNode.ValidateNodes(env, node.Context, nodes); macroNodes.AddRange(expNodes); return(new CommonOutputs.MacroOutput <Output>() { Nodes = macroNodes }); }
public void Save(ModelSaveContext ctx) { Contracts.AssertValue(_env); _env.CheckValue(ctx, nameof(ctx)); SaveCore(ctx); }
// Returns true if a normalizer was added. public static bool AddNormalizerIfNeeded(IHostEnvironment env, IChannel ch, ITrainer trainer, ref IDataView view, string featureColumn, NormalizeOption autoNorm) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(ch, nameof(ch)); ch.CheckValue(trainer, nameof(trainer)); ch.CheckValue(view, nameof(view)); ch.CheckValueOrNull(featureColumn); ch.CheckUserArg(Enum.IsDefined(typeof(NormalizeOption), autoNorm), nameof(TrainCommand.Arguments.NormalizeFeatures), "Normalize option is invalid. Specify one of 'norm=No', 'norm=Warn', 'norm=Auto', or 'norm=Yes'."); if (autoNorm == NormalizeOption.No) { ch.Info("Not adding a normalizer."); return(false); } if (string.IsNullOrEmpty(featureColumn)) { return(false); } int featCol; var schema = view.Schema; if (schema.TryGetColumnIndex(featureColumn, out featCol)) { if (autoNorm != NormalizeOption.Yes) { var nn = trainer as ITrainerEx; DvBool isNormalized = DvBool.False; if (nn == null || !nn.NeedNormalization || (schema.TryGetMetadata(BoolType.Instance, MetadataUtils.Kinds.IsNormalized, featCol, ref isNormalized) && isNormalized.IsTrue)) { ch.Info("Not adding a normalizer."); return(false); } if (autoNorm == NormalizeOption.Warn) { ch.Warning("A normalizer is needed for this trainer. Either add a normalizing transform or use the 'norm=Auto', 'norm=Yes' or 'norm=No' options."); return(false); } } ch.Info("Automatically adding a MinMax normalization transform, use 'norm=Warn' or 'norm=No' to turn this behavior off."); // Quote the feature column name string quotedFeatureColumnName = featureColumn; StringBuilder sb = new StringBuilder(); if (CmdQuoter.QuoteValue(quotedFeatureColumnName, sb)) { quotedFeatureColumnName = sb.ToString(); } var component = new SubComponent <IDataTransform, SignatureDataTransform>("MinMax", string.Format("col={{ name={0} source={0} }}", quotedFeatureColumnName)); var loader = view as IDataLoader; if (loader != null) { view = CompositeDataLoader.Create(env, loader, new KeyValuePair <string, SubComponent <IDataTransform, SignatureDataTransform> >(null, component)); } else { view = component.CreateInstance(env, view); } return(true); } return(false); }
/// <summary> /// Apply this transform model to the given input data. /// </summary> public IDataView Apply(IHostEnvironment env, IDataView input) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(input, nameof(input)); return(ApplyTransformUtils.ApplyAllTransformsToData(env, _chain, input)); }
private static Session LoadTFSession(IHostEnvironment env, string exportDirSavedModel) { Contracts.Check(env != null, nameof(env)); env.CheckValue(exportDirSavedModel, nameof(exportDirSavedModel)); return(Session.LoadFromSavedModel(exportDirSavedModel)); }
static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input, out IDataView sourceCtx) { sourceCtx = input; env.CheckValue(args.tag, "Tag cannot be empty."); if (TagHelper.EnumerateTaggedView(true, input).Where(c => c.Item1 == args.tag).Any()) { throw env.Except("Tag '{0}' is already used.", args.tag); } env.CheckValue(args.selectTag, "Selected tag cannot be empty."); if (string.IsNullOrEmpty(args.filename)) { var selected = TagHelper.EnumerateTaggedView(true, input).Where(c => c.Item1 == args.selectTag); if (!selected.Any()) { throw env.Except("Unable to find a view to select with tag '{0}'. Did you forget to specify a filename?", args.selectTag); } var first = selected.First(); if (selected.Skip(1).Any()) { throw env.Except("Tag '{0}' is ambiguous, {1} views were found.", args.selectTag, selected.Count()); } var tagged = input as ITaggedDataView; if (tagged == null) { var ag = new TagViewTransform.Arguments { tag = args.tag }; tagged = new TagViewTransform(env, ag, input); } first.Item2.AddRange(new[] { new Tuple <string, ITaggedDataView>(args.tag, tagged) }); tagged.AddRange(new[] { new Tuple <string, ITaggedDataView>(args.selectTag, first.Item2) }); #if (DEBUG_TIP) long count = DataViewUtils.ComputeRowCount(tagged); if (count == 0) { throw env.Except("Replaced view is empty."); } count = DataViewUtils.ComputeRowCount(first.Item2); if (count == 0) { throw env.Except("Selected view is empty."); } #endif var tr = first.Item2 as IDataTransform; env.AssertValue(tr); return(tr); } else { if (!File.Exists(args.filename)) { throw env.Except("Unable to find file '{0}'.", args.filename); } var selected = TagHelper.EnumerateTaggedView(true, input).Where(c => c.Item1 == args.selectTag); if (selected.Any()) { throw env.Except("Tag '{0}' was already given. It cannot be assigned to the new file.", args.selectTag); } var loaderArgs = new BinaryLoader.Arguments(); var file = new MultiFileSource(args.filename); var loadSettings = ScikitSubComponent <ILegacyDataLoader, SignatureDataLoader> .AsSubComponent(args.loaderSettings); IDataView loader = loadSettings.CreateInstance(env, file); var ag = new TagViewTransform.Arguments { tag = args.selectTag }; var newInput = new TagViewTransform(env, ag, loader); var tagged = input as ITaggedDataView; if (tagged == null) { ag = new TagViewTransform.Arguments { tag = args.tag }; tagged = new TagViewTransform(env, ag, input); } newInput.AddRange(new[] { new Tuple <string, ITaggedDataView>(args.tag, tagged) }); tagged.AddRange(new[] { new Tuple <string, ITaggedDataView>(args.selectTag, newInput) }); var schema = loader.Schema; if (schema.Count == 0) { throw env.Except("The loaded view '{0}' is empty (empty schema).", args.filename); } return(newInput); } }
/// <summary> /// Creates a data transform from the 'LoadName{settings}' string. /// </summary> public static IDataTransform CreateTransform(this IHostEnvironment env, string settings, IDataView source) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(source, nameof(source)); return(CreateCore <IDataTransform, SignatureDataTransform>(env, settings, source)); }