/// <summary> /// Try to load from the given repository entry using the default loader(s) specified in the header. /// Returns false iff the default loader(s) could not be bound to a compatible loadable class. /// </summary> private static bool TryLoadModel <TRes, TSig>(IHostEnvironment env, out TRes result, RepositoryReader rep, Repository.Entry ent, string dir, params object[] extra) where TRes : class { Contracts.CheckValue(env, nameof(env)); env.CheckValue(rep, nameof(rep)); long fp = ent.Stream.Position; using (var ctx = new ModelLoadContext(rep, ent, dir)) { env.Assert(fp == ctx.FpMin); if (ctx.TryLoadModelCore <TRes, TSig>(env, out result, extra)) { return(true); } } // TryLoadModelCore should rewind on failure. Contracts.Assert(fp == ent.Stream.Position); return(false); }
/// <summary> /// Loads and returns the loader and transforms from the specified repository reader. /// </summary> /// <param name="env">The host environment to use.</param> /// <param name="rep">The repository reader.</param> /// <param name="files">The data source to initialize the loader with.</param> /// <param name="extractInnerPipe">Whether to extract the transforms and loader from the wrapped CompositeDataLoader.</param> /// <returns>The created data view.</returns> public static IDataView LoadPipeline(IHostEnvironment env, RepositoryReader rep, IMultiStreamSource files, bool extractInnerPipe = false) { // REVIEW: Should not duplicate loading loader/transforms code. This method should call LoadLoader. Contracts.CheckValue(env, nameof(env)); env.CheckValue(rep, nameof(rep)); env.CheckValue(files, nameof(files)); using (var ent = rep.OpenEntry(DirDataLoaderModel, ModelLoadContext.ModelStreamName)) { IDataLoader loader; env.Assert(ent.Stream.Position == 0); ModelLoadContext.LoadModel <IDataLoader, SignatureLoadDataLoader>(env, out loader, rep, ent, DirDataLoaderModel, files); IDataView result = loader; if (extractInnerPipe) { var cdl = loader as CompositeDataLoader; result = cdl == null ? loader : cdl.View; } return(result); } }
public RowMapper(IHostEnvironment env, BindableMapper parent, RoleMappedSchema schema) { Contracts.AssertValue(env); _env = env; _env.AssertValue(schema); _env.AssertValue(parent); _env.Assert(schema.Feature.HasValue); _parent = parent; InputRoleMappedSchema = schema; var genericMapper = parent.GenericMapper.Bind(_env, schema); _genericRowMapper = genericMapper as ISchemaBoundRowMapper; if (parent.Stringify) { var builder = new SchemaBuilder(); builder.AddColumn(DefaultColumnNames.FeatureContributions, TextType.Instance, null); _outputSchema = builder.GetSchema(); if (FeatureColumn.HasSlotNames(FeatureColumn.Type.VectorSize)) { FeatureColumn.Metadata.GetValue(MetadataUtils.Kinds.SlotNames, ref _slotNames); } else { _slotNames = VBufferUtils.CreateEmpty <ReadOnlyMemory <char> >(FeatureColumn.Type.VectorSize); } } else { _outputSchema = Schema.Create(new FeatureContributionSchema(_env, DefaultColumnNames.FeatureContributions, new VectorType(NumberType.R4, FeatureColumn.Type as VectorType), InputSchema, FeatureColumn.Index)); } _outputGenericSchema = _genericRowMapper.OutputSchema; OutputSchema = new ZipBinding(new Schema[] { _outputGenericSchema, _outputSchema, }).OutputSchema; }
protected void SampleHyperparameters(RecipeInference.SuggestedRecipe.SuggestedLearner learner, ISweeper sweeper, bool isMaximizingMetric, PipelinePattern[] history) { // Make sure there are hyperparameters to sweep over. var hyperParams = learner.PipelineNode.SweepParams; if (hyperParams.Length == 0) { return; } // Get new set of hyperparameter values. var proposedParamSet = sweeper.ProposeSweeps(1, AutoMlUtils.ConvertToRunResults(history, isMaximizingMetric)).First(); Env.Assert(proposedParamSet != null && proposedParamSet.All(ps => hyperParams.Any(hp => hp.Name == ps.Name))); // Associate proposed param set with learner, so that smart hyperparam // sweepers (like KDO) can map them back. learner.PipelineNode.HyperSweeperParamSet = proposedParamSet; var generatorSet = hyperParams.Select(AutoMlUtils.ToIValueGenerator).ToArray(); var values = SweeperProbabilityUtils.ParameterSetAsFloatArray(Host, generatorSet, proposedParamSet, false); // Update hyperparameters. for (int i = 0; i < hyperParams.Length; i++) { if (hyperParams[i] is TlcModule.SweepableDiscreteParamAttribute dp) { hyperParams[i].RawValue = (int)values[i]; } else { hyperParams[i].RawValue = values[i]; } } }
public TransformInfo(IHostEnvironment env, ModelLoadContext ctx, int colValueCount, string directoryName) { env.AssertValue(env); env.Assert(colValueCount > 0); // *** Binary format *** // int: d (number of untransformed features) // int: NewDim (number of transformed features) // bool: UseSin // uint[4]: the seeds for the pseudo random number generator. SrcDim = ctx.Reader.ReadInt32(); env.CheckDecode(SrcDim == colValueCount); NewDim = ctx.Reader.ReadInt32(); env.CheckDecode(NewDim > 0); _useSin = ctx.Reader.ReadBoolByte(); var length = ctx.Reader.ReadInt32(); env.CheckDecode(length == 4); _state = TauswortheHybrid.State.Load(ctx.Reader); _rand = new TauswortheHybrid(_state); env.CheckDecode(ctx.Repository != null && ctx.LoadModelOrNull <IFourierDistributionSampler, SignatureLoadModel>(env, out _matrixGenerator, directoryName)); // initialize the transform matrix int roundedUpD = RoundUp(NewDim, _cfltAlign); int roundedUpNumFeatures = RoundUp(SrcDim, _cfltAlign); RndFourierVectors = new AlignedArray(roundedUpD * roundedUpNumFeatures, CpuMathUtils.GetVectorAlignment()); RotationTerms = _useSin ? null : new AlignedArray(roundedUpD, CpuMathUtils.GetVectorAlignment()); InitializeFourierCoefficients(roundedUpNumFeatures, roundedUpD); }
public static CombinedOutput CombineMetrics(IHostEnvironment env, CombineMetricsInput input) { var eval = GetEvaluator(env, input.Kind); var perInst = EvaluateUtils.ConcatenatePerInstanceDataViews(env, eval, true, true, input.PerInstanceMetrics.Select( idv => RoleMappedData.CreateOpt(idv, new[] { RoleMappedSchema.CreatePair(RoleMappedSchema.ColumnRole.Label, input.LabelColumn), RoleMappedSchema.CreatePair(RoleMappedSchema.ColumnRole.Weight, input.WeightColumn.Value), RoleMappedSchema.CreatePair(RoleMappedSchema.ColumnRole.Group, input.GroupColumn.Value) })).ToArray(), out var variableSizeVectorColumnNames); var warnings = input.Warnings != null ? new List <IDataView>(input.Warnings) : new List <IDataView>(); if (variableSizeVectorColumnNames.Length > 0) { var dvBldr = new ArrayDataViewBuilder(env); var warn = $"Detected columns of variable length: {string.Join(", ", variableSizeVectorColumnNames)}." + $" Consider setting collateMetrics- for meaningful per-Folds results."; dvBldr.AddColumn(MetricKinds.ColumnNames.WarningText, TextType.Instance, new DvText(warn)); warnings.Add(dvBldr.GetDataView()); } env.Assert(Utils.Size(perInst) == 1); var overall = eval.GetOverallResults(input.OverallMetrics); overall = EvaluateUtils.CombineFoldMetricsDataViews(env, overall, input.OverallMetrics.Length); IDataView conf = null; if (Utils.Size(input.ConfusionMatrix) > 0) { EvaluateUtils.ReconcileSlotNames <double>(env, input.ConfusionMatrix, MetricKinds.ColumnNames.Count, NumberType.R8); for (int i = 0; i < input.ConfusionMatrix.Length; i++) { var idv = input.ConfusionMatrix[i]; // Find the old Count column and drop it. for (int col = 0; col < idv.Schema.ColumnCount; col++) { if (idv.Schema.IsHidden(col) && idv.Schema.GetColumnName(col).Equals(MetricKinds.ColumnNames.Count)) { input.ConfusionMatrix[i] = new ChooseColumnsByIndexTransform(env, new ChooseColumnsByIndexTransform.Arguments() { Drop = true, Index = new[] { col } }, idv); break; } } } conf = EvaluateUtils.ConcatenateOverallMetrics(env, input.ConfusionMatrix); } var warningsIdv = warnings.Count > 0 ? AppendRowsDataView.Create(env, warnings[0].Schema, warnings.ToArray()) : null; return(new CombinedOutput() { PerInstanceMetrics = perInst[0], OverallMetrics = overall, ConfusionMatrix = conf, Warnings = warningsIdv }); }
public static CommonOutputs.MacroOutput <Output> OneVersusAll( IHostEnvironment env, Arguments input, EntryPointNode node) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(input, nameof(input)); env.Assert(input.Nodes.Count > 0); var numClasses = GetNumberOfClasses(env, input, out var label); var predModelVars = new Var <PredictorModel> [numClasses]; // This will be the final resulting list of nodes that is returned from the macro. var macroNodes = new List <EntryPointNode>(); // Instantiate the subgraph for each label value. for (int k = 0; k < numClasses; k++) { predModelVars[k] = ProcessClass(env, macroNodes, k, label, input, node); } // Convert the predictor models to an array of predictor models. var modelsArray = new Var <PredictorModel[]>(); MacroUtils.ConvertIPredictorModelsToArray(env, node.Context, macroNodes, predModelVars, modelsArray.VarName); // Use OVA model combiner to combine these models into one. // Takes in array of models that are binary predictor models and // produces single multiclass predictor model. var combineArgs = new ModelOperations.CombineOvaPredictorModelsInput(); combineArgs.Caching = input.Caching; combineArgs.FeatureColumnName = input.FeatureColumnName; combineArgs.LabelColumnName = input.LabelColumnName; combineArgs.NormalizeFeatures = input.NormalizeFeatures; combineArgs.UseProbabilities = input.UseProbabilities; var inputBindingMap = new Dictionary <string, List <ParameterBinding> >(); var inputMap = new Dictionary <ParameterBinding, VariableBinding>(); var combineNodeModelArrayInput = new SimpleVariableBinding(modelsArray.VarName); var paramBinding = new SimpleParameterBinding(nameof(combineArgs.ModelArray)); inputBindingMap.Add(nameof(combineArgs.ModelArray), new List <ParameterBinding>() { paramBinding }); inputMap.Add(paramBinding, combineNodeModelArrayInput); paramBinding = new SimpleParameterBinding(nameof(combineArgs.TrainingData)); inputBindingMap.Add(nameof(combineArgs.TrainingData), new List <ParameterBinding>() { paramBinding }); inputMap.Add(paramBinding, node.GetInputVariable(nameof(input.TrainingData))); var outputMap = new Dictionary <string, string>(); outputMap.Add(nameof(Output.PredictorModel), node.GetOutputVariableName(nameof(Output.PredictorModel))); var combineModelsNode = EntryPointNode.Create(env, "Models.OvaModelCombiner", combineArgs, node.Context, inputBindingMap, inputMap, outputMap); macroNodes.Add(combineModelsNode); return(new CommonOutputs.MacroOutput <Output>() { Nodes = macroNodes }); }
/// <summary> /// Produces the estimator. Note that this is made out of <see cref="ReconcileCore(IHostEnvironment, string[])"/>'s /// return value, plus whatever usages of <see cref="ColumnCopyingEstimator"/> are necessary to avoid collisions with /// the output names fed to the constructor. This class provides the implementation, and subclasses should instead /// override <see cref="ReconcileCore(IHostEnvironment, string[])"/>. /// </summary> public sealed override IEstimator <ITransformer> Reconcile(IHostEnvironment env, PipelineColumn[] toOutput, IReadOnlyDictionary <PipelineColumn, string> inputNames, IReadOnlyDictionary <PipelineColumn, string> outputNames, IReadOnlyCollection <string> usedNames) { Contracts.AssertValue(env); env.AssertValue(toOutput); env.AssertValue(inputNames); env.AssertValue(outputNames); env.AssertValue(usedNames); // The reconciler should have been called with all the input columns having names. env.Assert(inputNames.Keys.All(Inputs.Contains) && Inputs.All(inputNames.Keys.Contains)); // The output name map should contain only outputs as their keys. Yet, it is possible not all // outputs will be required in which case these will both be subsets of those outputs indicated // at construction. env.Assert(outputNames.Keys.All(Outputs.Contains)); env.Assert(toOutput.All(Outputs.Contains)); env.Assert(Outputs.Count() == _outputNames.Length); IEstimator <ITransformer> result = null; // In the case where we have names used that conflict with the fixed output names, we must have some // renaming logic. var collisions = new HashSet <string>(_outputNames); collisions.IntersectWith(usedNames); var old2New = new Dictionary <string, string>(); if (collisions.Count > 0) { // First get the old names to some temporary names. int tempNum = 0; foreach (var c in collisions) { old2New[c] = $"#TrainTemp{tempNum++}"; } // In the case where the input names have anything that is used, we must reconstitute the input mapping. if (inputNames.Values.Any(old2New.ContainsKey)) { var newInputNames = new Dictionary <PipelineColumn, string>(); foreach (var p in inputNames) { newInputNames[p.Key] = old2New.ContainsKey(p.Value) ? old2New[p.Value] : p.Value; } inputNames = newInputNames; } result = new ColumnCopyingEstimator(env, old2New.Select(p => (p.Value, p.Key)).ToArray()); } // Map the inputs to the names. string[] mappedInputNames = Inputs.Select(c => inputNames[c]).ToArray(); // Finally produce the trainer. var trainerEst = ReconcileCore(env, mappedInputNames); if (result == null) { result = trainerEst; } else { result = result.Append(trainerEst); } // OK. Now handle the final renamings from the fixed names, to the desired names, in the case // where the output was desired, and a renaming is even necessary. var toRename = new List <(string outputColumnName, string inputColumnName)>(); foreach ((PipelineColumn outCol, string fixedName) in Outputs.Zip(_outputNames, (c, n) => (c, n))) { if (outputNames.TryGetValue(outCol, out string desiredName)) { toRename.Add((desiredName, fixedName)); } else { env.Assert(!toOutput.Contains(outCol)); } } // Finally if applicable handle the renaming back from the temp names to the original names. foreach (var p in old2New) { toRename.Add((p.Key, p.Value)); } if (toRename.Count > 0) { result = result.Append(new ColumnCopyingEstimator(env, toRename.ToArray())); } return(result); }
protected override IEstimator <ITransformer> ReconcileCore(IHostEnvironment env, string[] inputNames) { Contracts.AssertValue(env); env.Assert(Utils.Size(inputNames) == Inputs.Length); return(_estFact(env, inputNames[0], inputNames[1], inputNames.Length > 2 ? inputNames[2] : null)); }
internal LinearModelStatistics(IHostEnvironment env, long trainingExampleCount, int paramCount, Single deviance, Single nullDeviance, ref VBuffer <Single> coeffStdError) : this(env, trainingExampleCount, paramCount, deviance, nullDeviance) { _env.Assert(coeffStdError.Count == _paramCount); _coeffStdError = coeffStdError; }
public static CommonOutputs.MacroOutput <Output> OneVersusAll( IHostEnvironment env, Arguments input, EntryPointNode node) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(input, nameof(input)); env.Assert(input.Nodes.Count > 0); var numClasses = GetNumberOfClasses(env, input, out var label); var predModelVars = new Var <IPredictorModel> [numClasses]; // This will be the final resulting list of nodes that is returned from the macro. var macroNodes = new List <EntryPointNode>(); // Instantiate the subgraph for each label value. for (int k = 0; k < numClasses; k++) { var result = ProcessClass(env, k, label, input, node); predModelVars[k] = result.Item2; macroNodes.AddRange(result.Item1); } // Use OVA model combiner to combine these models into one. // Takes in array of models that are binary predictor models and // produces single multiclass predictor model. var macroExperiment = new Experiment(env); var combinerNode = new Legacy.Models.OvaModelCombiner { ModelArray = new ArrayVar <IPredictorModel>(predModelVars), TrainingData = new Var <IDataView> { VarName = node.GetInputVariable(nameof(input.TrainingData)).VariableName }, Caching = (Legacy.Models.CachingOptions)input.Caching, FeatureColumn = input.FeatureColumn, NormalizeFeatures = (Legacy.Models.NormalizeOption)input.NormalizeFeatures, LabelColumn = input.LabelColumn, UseProbabilities = input.UseProbabilities }; // Get output model variable. if (!node.OutputMap.TryGetValue(nameof(Output.PredictorModel), out var outVariableName)) { throw new Exception("Cannot find OVA model output."); } // Map macro's output back to OVA combiner (so OVA combiner will set the value on our output variable). var combinerOutput = new Legacy.Models.OvaModelCombiner.Output { PredictorModel = new Var <IPredictorModel> { VarName = outVariableName } }; // Add to experiment (must be done AFTER we assign variable name to output). macroExperiment.Add(combinerNode, combinerOutput); // Add nodes to main experiment. var nodes = macroExperiment.GetNodes(); var expNodes = EntryPointNode.ValidateNodes(env, node.Context, nodes); macroNodes.AddRange(expNodes); return(new CommonOutputs.MacroOutput <Output>() { Nodes = macroNodes }); }
private void ShowHelp(IndentingTextWriter writer, int?columns = null) { _env.AssertValue(_component); string name = _component.Trim(); string sig = _kind?.ToLowerInvariant(); // Note that we don't check IsHidden here. The current policy is when IsHidden is true, we don't // show the item in "list all" functionality, but will still show help when explicitly requested. var infos = _env.ComponentCatalog.FindLoadableClasses(name) .OrderBy(x => ComponentCatalog.SignatureToString(x.SignatureTypes[0]).ToLowerInvariant()); var kinds = new StringBuilder(); var components = new List <Component>(); foreach (var info in infos) { _env.AssertValue(info.SignatureTypes); kinds.Clear(); bool foundSig = false; foreach (var signature in info.SignatureTypes) { _env.Assert(signature.BaseType == typeof(MulticastDelegate)); string kind; if (signature == typeof(SignatureDefault)) { kind = "Component"; if (sig == null || "default".StartsWithInvariantCulture(sig)) { foundSig = true; } } else { kind = ComponentCatalog.SignatureToString(signature); if (sig == null || kind.StartsWithInvariantCultureIgnoreCase(sig)) { foundSig = true; } } if (kinds.Length > 0) { kinds.Append(", "); } kinds.Append(kind); } if (foundSig) { string kindsStr = kinds.ToString(); var args = info.CreateArguments(); ShowUsage(writer, kindsStr, info.Summary, info.LoadNames[0], info.LoadNames, args, columns); components.Add(new Component(kindsStr, info, args)); } } if (components.Count == 0) { writer.WriteLine("Unknown component: '{0}'", name); } else { Serialize(components); } }
// Factory method for SignatureLoadModel. private static TensorFlowTransform Create(IHostEnvironment env, ModelLoadContext ctx) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(ctx, nameof(ctx)); ctx.CheckAtModel(GetVersionInfo()); // *** Binary format *** // byte: indicator for frozen models // stream: tensorFlow model. // int: number of input columns // for each input column // int: id of int column name // int: number of output columns // for each output column // int: id of output column name GetModelInfo(env, ctx, out string[] inputs, out string[] outputs, out bool isFrozen); if (isFrozen) { byte[] modelBytes = null; if (!ctx.TryLoadBinaryStream("TFModel", r => modelBytes = r.ReadByteArray())) { throw env.ExceptDecode(); } return(new TensorFlowTransform(env, TensorFlowUtils.LoadTFSession(env, modelBytes), inputs, outputs, null, false)); } var tempDirPath = Path.GetFullPath(Path.Combine(Path.GetTempPath(), RegistrationName + "_" + Guid.NewGuid())); TensorFlowUtils.CreateFolderWithAclIfNotExists(env, tempDirPath); try { var load = ctx.TryLoadBinaryStream("TFSavedModel", br => { int count = br.ReadInt32(); for (int n = 0; n < count; n++) { string relativeFile = br.ReadString(); long fileLength = br.ReadInt64(); string fullFilePath = Path.Combine(tempDirPath, relativeFile); string fullFileDir = Path.GetDirectoryName(fullFilePath); if (fullFileDir != tempDirPath) { TensorFlowUtils.CreateFolderWithAclIfNotExists(env, fullFileDir); } using (var fs = new FileStream(fullFilePath, FileMode.Create, FileAccess.Write)) { long actualRead = br.BaseStream.CopyRange(fs, fileLength); env.Assert(actualRead == fileLength); } } }); return(new TensorFlowTransform(env, TensorFlowUtils.GetSession(env, tempDirPath), inputs, outputs, tempDirPath, true)); } catch (Exception) { TensorFlowUtils.DeleteFolderWithRetries(env, tempDirPath); throw; } }