/// <summary>
        /// Try to load from the given repository entry using the default loader(s) specified in the header.
        /// Returns false iff the default loader(s) could not be bound to a compatible loadable class.
        /// </summary>
        private static bool TryLoadModel <TRes, TSig>(IHostEnvironment env, out TRes result, RepositoryReader rep, Repository.Entry ent, string dir, params object[] extra)
            where TRes : class
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(rep, nameof(rep));
            long fp = ent.Stream.Position;

            using (var ctx = new ModelLoadContext(rep, ent, dir))
            {
                env.Assert(fp == ctx.FpMin);
                if (ctx.TryLoadModelCore <TRes, TSig>(env, out result, extra))
                {
                    return(true);
                }
            }

            // TryLoadModelCore should rewind on failure.
            Contracts.Assert(fp == ent.Stream.Position);

            return(false);
        }
Esempio n. 2
0
        /// <summary>
        /// Loads and returns the loader and transforms from the specified repository reader.
        /// </summary>
        /// <param name="env">The host environment to use.</param>
        /// <param name="rep">The repository reader.</param>
        /// <param name="files">The data source to initialize the loader with.</param>
        /// <param name="extractInnerPipe">Whether to extract the transforms and loader from the wrapped CompositeDataLoader.</param>
        /// <returns>The created data view.</returns>
        public static IDataView LoadPipeline(IHostEnvironment env, RepositoryReader rep, IMultiStreamSource files, bool extractInnerPipe = false)
        {
            // REVIEW: Should not duplicate loading loader/transforms code. This method should call LoadLoader.
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(rep, nameof(rep));
            env.CheckValue(files, nameof(files));
            using (var ent = rep.OpenEntry(DirDataLoaderModel, ModelLoadContext.ModelStreamName))
            {
                IDataLoader loader;
                env.Assert(ent.Stream.Position == 0);
                ModelLoadContext.LoadModel <IDataLoader, SignatureLoadDataLoader>(env, out loader, rep, ent, DirDataLoaderModel, files);
                IDataView result = loader;
                if (extractInnerPipe)
                {
                    var cdl = loader as CompositeDataLoader;
                    result = cdl == null ? loader : cdl.View;
                }

                return(result);
            }
        }
            public RowMapper(IHostEnvironment env, BindableMapper parent, RoleMappedSchema schema)
            {
                Contracts.AssertValue(env);
                _env = env;
                _env.AssertValue(schema);
                _env.AssertValue(parent);
                _env.Assert(schema.Feature.HasValue);
                _parent = parent;
                InputRoleMappedSchema = schema;
                var genericMapper = parent.GenericMapper.Bind(_env, schema);

                _genericRowMapper = genericMapper as ISchemaBoundRowMapper;

                if (parent.Stringify)
                {
                    var builder = new SchemaBuilder();
                    builder.AddColumn(DefaultColumnNames.FeatureContributions, TextType.Instance, null);
                    _outputSchema = builder.GetSchema();
                    if (FeatureColumn.HasSlotNames(FeatureColumn.Type.VectorSize))
                    {
                        FeatureColumn.Metadata.GetValue(MetadataUtils.Kinds.SlotNames, ref _slotNames);
                    }
                    else
                    {
                        _slotNames = VBufferUtils.CreateEmpty <ReadOnlyMemory <char> >(FeatureColumn.Type.VectorSize);
                    }
                }
                else
                {
                    _outputSchema = Schema.Create(new FeatureContributionSchema(_env, DefaultColumnNames.FeatureContributions,
                                                                                new VectorType(NumberType.R4, FeatureColumn.Type as VectorType),
                                                                                InputSchema, FeatureColumn.Index));
                }

                _outputGenericSchema = _genericRowMapper.OutputSchema;
                OutputSchema         = new ZipBinding(new Schema[] { _outputGenericSchema, _outputSchema, }).OutputSchema;
            }
        protected void SampleHyperparameters(RecipeInference.SuggestedRecipe.SuggestedLearner learner, ISweeper sweeper,
                                             bool isMaximizingMetric, PipelinePattern[] history)
        {
            // Make sure there are hyperparameters to sweep over.
            var hyperParams = learner.PipelineNode.SweepParams;

            if (hyperParams.Length == 0)
            {
                return;
            }

            // Get new set of hyperparameter values.
            var proposedParamSet = sweeper.ProposeSweeps(1, AutoMlUtils.ConvertToRunResults(history, isMaximizingMetric)).First();

            Env.Assert(proposedParamSet != null && proposedParamSet.All(ps => hyperParams.Any(hp => hp.Name == ps.Name)));

            // Associate proposed param set with learner, so that smart hyperparam
            // sweepers (like KDO) can map them back.
            learner.PipelineNode.HyperSweeperParamSet = proposedParamSet;

            var generatorSet = hyperParams.Select(AutoMlUtils.ToIValueGenerator).ToArray();
            var values       = SweeperProbabilityUtils.ParameterSetAsFloatArray(Host, generatorSet, proposedParamSet, false);

            // Update hyperparameters.
            for (int i = 0; i < hyperParams.Length; i++)
            {
                if (hyperParams[i] is TlcModule.SweepableDiscreteParamAttribute dp)
                {
                    hyperParams[i].RawValue = (int)values[i];
                }
                else
                {
                    hyperParams[i].RawValue = values[i];
                }
            }
        }
Esempio n. 5
0
            public TransformInfo(IHostEnvironment env, ModelLoadContext ctx, int colValueCount, string directoryName)
            {
                env.AssertValue(env);
                env.Assert(colValueCount > 0);

                // *** Binary format ***
                // int: d (number of untransformed features)
                // int: NewDim (number of transformed features)
                // bool: UseSin
                // uint[4]: the seeds for the pseudo random number generator.

                SrcDim = ctx.Reader.ReadInt32();
                env.CheckDecode(SrcDim == colValueCount);

                NewDim = ctx.Reader.ReadInt32();
                env.CheckDecode(NewDim > 0);

                _useSin = ctx.Reader.ReadBoolByte();

                var length = ctx.Reader.ReadInt32();

                env.CheckDecode(length == 4);
                _state = TauswortheHybrid.State.Load(ctx.Reader);
                _rand  = new TauswortheHybrid(_state);

                env.CheckDecode(ctx.Repository != null &&
                                ctx.LoadModelOrNull <IFourierDistributionSampler, SignatureLoadModel>(env, out _matrixGenerator, directoryName));

                // initialize the transform matrix
                int roundedUpD           = RoundUp(NewDim, _cfltAlign);
                int roundedUpNumFeatures = RoundUp(SrcDim, _cfltAlign);

                RndFourierVectors = new AlignedArray(roundedUpD * roundedUpNumFeatures, CpuMathUtils.GetVectorAlignment());
                RotationTerms     = _useSin ? null : new AlignedArray(roundedUpD, CpuMathUtils.GetVectorAlignment());
                InitializeFourierCoefficients(roundedUpNumFeatures, roundedUpD);
            }
        public static CombinedOutput CombineMetrics(IHostEnvironment env, CombineMetricsInput input)
        {
            var eval = GetEvaluator(env, input.Kind);

            var perInst = EvaluateUtils.ConcatenatePerInstanceDataViews(env, eval, true, true, input.PerInstanceMetrics.Select(
                                                                            idv => RoleMappedData.CreateOpt(idv, new[]
            {
                RoleMappedSchema.CreatePair(RoleMappedSchema.ColumnRole.Label, input.LabelColumn),
                RoleMappedSchema.CreatePair(RoleMappedSchema.ColumnRole.Weight, input.WeightColumn.Value),
                RoleMappedSchema.CreatePair(RoleMappedSchema.ColumnRole.Group, input.GroupColumn.Value)
            })).ToArray(),
                                                                        out var variableSizeVectorColumnNames);

            var warnings = input.Warnings != null ? new List <IDataView>(input.Warnings) : new List <IDataView>();

            if (variableSizeVectorColumnNames.Length > 0)
            {
                var dvBldr = new ArrayDataViewBuilder(env);
                var warn   = $"Detected columns of variable length: {string.Join(", ", variableSizeVectorColumnNames)}." +
                             $" Consider setting collateMetrics- for meaningful per-Folds results.";
                dvBldr.AddColumn(MetricKinds.ColumnNames.WarningText, TextType.Instance, new DvText(warn));
                warnings.Add(dvBldr.GetDataView());
            }

            env.Assert(Utils.Size(perInst) == 1);

            var overall = eval.GetOverallResults(input.OverallMetrics);

            overall = EvaluateUtils.CombineFoldMetricsDataViews(env, overall, input.OverallMetrics.Length);

            IDataView conf = null;

            if (Utils.Size(input.ConfusionMatrix) > 0)
            {
                EvaluateUtils.ReconcileSlotNames <double>(env, input.ConfusionMatrix, MetricKinds.ColumnNames.Count, NumberType.R8);

                for (int i = 0; i < input.ConfusionMatrix.Length; i++)
                {
                    var idv = input.ConfusionMatrix[i];
                    // Find the old Count column and drop it.
                    for (int col = 0; col < idv.Schema.ColumnCount; col++)
                    {
                        if (idv.Schema.IsHidden(col) &&
                            idv.Schema.GetColumnName(col).Equals(MetricKinds.ColumnNames.Count))
                        {
                            input.ConfusionMatrix[i] = new ChooseColumnsByIndexTransform(env,
                                                                                         new ChooseColumnsByIndexTransform.Arguments()
                            {
                                Drop = true, Index = new[] { col }
                            }, idv);
                            break;
                        }
                    }
                }

                conf = EvaluateUtils.ConcatenateOverallMetrics(env, input.ConfusionMatrix);
            }

            var warningsIdv = warnings.Count > 0 ? AppendRowsDataView.Create(env, warnings[0].Schema, warnings.ToArray()) : null;

            return(new CombinedOutput()
            {
                PerInstanceMetrics = perInst[0],
                OverallMetrics = overall,
                ConfusionMatrix = conf,
                Warnings = warningsIdv
            });
        }
Esempio n. 7
0
        public static CommonOutputs.MacroOutput <Output> OneVersusAll(
            IHostEnvironment env,
            Arguments input,
            EntryPointNode node)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(input, nameof(input));
            env.Assert(input.Nodes.Count > 0);

            var numClasses    = GetNumberOfClasses(env, input, out var label);
            var predModelVars = new Var <PredictorModel> [numClasses];

            // This will be the final resulting list of nodes that is returned from the macro.
            var macroNodes = new List <EntryPointNode>();

            // Instantiate the subgraph for each label value.
            for (int k = 0; k < numClasses; k++)
            {
                predModelVars[k] = ProcessClass(env, macroNodes, k, label, input, node);
            }

            // Convert the predictor models to an array of predictor models.
            var modelsArray = new Var <PredictorModel[]>();

            MacroUtils.ConvertIPredictorModelsToArray(env, node.Context, macroNodes, predModelVars, modelsArray.VarName);

            // Use OVA model combiner to combine these models into one.
            // Takes in array of models that are binary predictor models and
            // produces single multiclass predictor model.
            var combineArgs = new ModelOperations.CombineOvaPredictorModelsInput();

            combineArgs.Caching           = input.Caching;
            combineArgs.FeatureColumnName = input.FeatureColumnName;
            combineArgs.LabelColumnName   = input.LabelColumnName;
            combineArgs.NormalizeFeatures = input.NormalizeFeatures;
            combineArgs.UseProbabilities  = input.UseProbabilities;

            var inputBindingMap            = new Dictionary <string, List <ParameterBinding> >();
            var inputMap                   = new Dictionary <ParameterBinding, VariableBinding>();
            var combineNodeModelArrayInput = new SimpleVariableBinding(modelsArray.VarName);
            var paramBinding               = new SimpleParameterBinding(nameof(combineArgs.ModelArray));

            inputBindingMap.Add(nameof(combineArgs.ModelArray), new List <ParameterBinding>()
            {
                paramBinding
            });
            inputMap.Add(paramBinding, combineNodeModelArrayInput);
            paramBinding = new SimpleParameterBinding(nameof(combineArgs.TrainingData));
            inputBindingMap.Add(nameof(combineArgs.TrainingData), new List <ParameterBinding>()
            {
                paramBinding
            });
            inputMap.Add(paramBinding, node.GetInputVariable(nameof(input.TrainingData)));

            var outputMap = new Dictionary <string, string>();

            outputMap.Add(nameof(Output.PredictorModel), node.GetOutputVariableName(nameof(Output.PredictorModel)));
            var combineModelsNode = EntryPointNode.Create(env, "Models.OvaModelCombiner",
                                                          combineArgs, node.Context, inputBindingMap, inputMap, outputMap);

            macroNodes.Add(combineModelsNode);

            return(new CommonOutputs.MacroOutput <Output>()
            {
                Nodes = macroNodes
            });
        }
Esempio n. 8
0
        /// <summary>
        /// Produces the estimator. Note that this is made out of <see cref="ReconcileCore(IHostEnvironment, string[])"/>'s
        /// return value, plus whatever usages of <see cref="ColumnCopyingEstimator"/> are necessary to avoid collisions with
        /// the output names fed to the constructor. This class provides the implementation, and subclasses should instead
        /// override <see cref="ReconcileCore(IHostEnvironment, string[])"/>.
        /// </summary>
        public sealed override IEstimator <ITransformer> Reconcile(IHostEnvironment env,
                                                                   PipelineColumn[] toOutput,
                                                                   IReadOnlyDictionary <PipelineColumn, string> inputNames,
                                                                   IReadOnlyDictionary <PipelineColumn, string> outputNames,
                                                                   IReadOnlyCollection <string> usedNames)
        {
            Contracts.AssertValue(env);
            env.AssertValue(toOutput);
            env.AssertValue(inputNames);
            env.AssertValue(outputNames);
            env.AssertValue(usedNames);

            // The reconciler should have been called with all the input columns having names.
            env.Assert(inputNames.Keys.All(Inputs.Contains) && Inputs.All(inputNames.Keys.Contains));
            // The output name map should contain only outputs as their keys. Yet, it is possible not all
            // outputs will be required in which case these will both be subsets of those outputs indicated
            // at construction.
            env.Assert(outputNames.Keys.All(Outputs.Contains));
            env.Assert(toOutput.All(Outputs.Contains));
            env.Assert(Outputs.Count() == _outputNames.Length);

            IEstimator <ITransformer> result = null;

            // In the case where we have names used that conflict with the fixed output names, we must have some
            // renaming logic.
            var collisions = new HashSet <string>(_outputNames);

            collisions.IntersectWith(usedNames);
            var old2New = new Dictionary <string, string>();

            if (collisions.Count > 0)
            {
                // First get the old names to some temporary names.
                int tempNum = 0;
                foreach (var c in collisions)
                {
                    old2New[c] = $"#TrainTemp{tempNum++}";
                }
                // In the case where the input names have anything that is used, we must reconstitute the input mapping.
                if (inputNames.Values.Any(old2New.ContainsKey))
                {
                    var newInputNames = new Dictionary <PipelineColumn, string>();
                    foreach (var p in inputNames)
                    {
                        newInputNames[p.Key] = old2New.ContainsKey(p.Value) ? old2New[p.Value] : p.Value;
                    }
                    inputNames = newInputNames;
                }
                result = new ColumnCopyingEstimator(env, old2New.Select(p => (p.Value, p.Key)).ToArray());
            }

            // Map the inputs to the names.
            string[] mappedInputNames = Inputs.Select(c => inputNames[c]).ToArray();
            // Finally produce the trainer.
            var trainerEst = ReconcileCore(env, mappedInputNames);

            if (result == null)
            {
                result = trainerEst;
            }
            else
            {
                result = result.Append(trainerEst);
            }

            // OK. Now handle the final renamings from the fixed names, to the desired names, in the case
            // where the output was desired, and a renaming is even necessary.
            var toRename = new List <(string outputColumnName, string inputColumnName)>();

            foreach ((PipelineColumn outCol, string fixedName) in Outputs.Zip(_outputNames, (c, n) => (c, n)))
            {
                if (outputNames.TryGetValue(outCol, out string desiredName))
                {
                    toRename.Add((desiredName, fixedName));
                }
                else
                {
                    env.Assert(!toOutput.Contains(outCol));
                }
            }
            // Finally if applicable handle the renaming back from the temp names to the original names.
            foreach (var p in old2New)
            {
                toRename.Add((p.Key, p.Value));
            }
            if (toRename.Count > 0)
            {
                result = result.Append(new ColumnCopyingEstimator(env, toRename.ToArray()));
            }

            return(result);
        }
Esempio n. 9
0
 protected override IEstimator <ITransformer> ReconcileCore(IHostEnvironment env, string[] inputNames)
 {
     Contracts.AssertValue(env);
     env.Assert(Utils.Size(inputNames) == Inputs.Length);
     return(_estFact(env, inputNames[0], inputNames[1], inputNames.Length > 2 ? inputNames[2] : null));
 }
 internal LinearModelStatistics(IHostEnvironment env, long trainingExampleCount, int paramCount, Single deviance, Single nullDeviance, ref VBuffer <Single> coeffStdError)
     : this(env, trainingExampleCount, paramCount, deviance, nullDeviance)
 {
     _env.Assert(coeffStdError.Count == _paramCount);
     _coeffStdError = coeffStdError;
 }
        public static CommonOutputs.MacroOutput <Output> OneVersusAll(
            IHostEnvironment env,
            Arguments input,
            EntryPointNode node)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(input, nameof(input));
            env.Assert(input.Nodes.Count > 0);

            var numClasses    = GetNumberOfClasses(env, input, out var label);
            var predModelVars = new Var <IPredictorModel> [numClasses];

            // This will be the final resulting list of nodes that is returned from the macro.
            var macroNodes = new List <EntryPointNode>();

            // Instantiate the subgraph for each label value.
            for (int k = 0; k < numClasses; k++)
            {
                var result = ProcessClass(env, k, label, input, node);
                predModelVars[k] = result.Item2;
                macroNodes.AddRange(result.Item1);
            }

            // Use OVA model combiner to combine these models into one.
            // Takes in array of models that are binary predictor models and
            // produces single multiclass predictor model.
            var macroExperiment = new Experiment(env);
            var combinerNode    = new Legacy.Models.OvaModelCombiner
            {
                ModelArray   = new ArrayVar <IPredictorModel>(predModelVars),
                TrainingData = new Var <IDataView> {
                    VarName = node.GetInputVariable(nameof(input.TrainingData)).VariableName
                },
                Caching           = (Legacy.Models.CachingOptions)input.Caching,
                FeatureColumn     = input.FeatureColumn,
                NormalizeFeatures = (Legacy.Models.NormalizeOption)input.NormalizeFeatures,
                LabelColumn       = input.LabelColumn,
                UseProbabilities  = input.UseProbabilities
            };

            // Get output model variable.
            if (!node.OutputMap.TryGetValue(nameof(Output.PredictorModel), out var outVariableName))
            {
                throw new Exception("Cannot find OVA model output.");
            }

            // Map macro's output back to OVA combiner (so OVA combiner will set the value on our output variable).
            var combinerOutput = new Legacy.Models.OvaModelCombiner.Output {
                PredictorModel = new Var <IPredictorModel> {
                    VarName = outVariableName
                }
            };

            // Add to experiment (must be done AFTER we assign variable name to output).
            macroExperiment.Add(combinerNode, combinerOutput);

            // Add nodes to main experiment.
            var nodes    = macroExperiment.GetNodes();
            var expNodes = EntryPointNode.ValidateNodes(env, node.Context, nodes);

            macroNodes.AddRange(expNodes);

            return(new CommonOutputs.MacroOutput <Output>()
            {
                Nodes = macroNodes
            });
        }
        private void ShowHelp(IndentingTextWriter writer, int?columns = null)
        {
            _env.AssertValue(_component);

            string name = _component.Trim();

            string sig = _kind?.ToLowerInvariant();

            // Note that we don't check IsHidden here. The current policy is when IsHidden is true, we don't
            // show the item in "list all" functionality, but will still show help when explicitly requested.

            var infos = _env.ComponentCatalog.FindLoadableClasses(name)
                        .OrderBy(x => ComponentCatalog.SignatureToString(x.SignatureTypes[0]).ToLowerInvariant());
            var kinds      = new StringBuilder();
            var components = new List <Component>();

            foreach (var info in infos)
            {
                _env.AssertValue(info.SignatureTypes);
                kinds.Clear();
                bool foundSig = false;
                foreach (var signature in info.SignatureTypes)
                {
                    _env.Assert(signature.BaseType == typeof(MulticastDelegate));

                    string kind;
                    if (signature == typeof(SignatureDefault))
                    {
                        kind = "Component";
                        if (sig == null || "default".StartsWithInvariantCulture(sig))
                        {
                            foundSig = true;
                        }
                    }
                    else
                    {
                        kind = ComponentCatalog.SignatureToString(signature);
                        if (sig == null || kind.StartsWithInvariantCultureIgnoreCase(sig))
                        {
                            foundSig = true;
                        }
                    }

                    if (kinds.Length > 0)
                    {
                        kinds.Append(", ");
                    }
                    kinds.Append(kind);
                }
                if (foundSig)
                {
                    string kindsStr = kinds.ToString();
                    var    args     = info.CreateArguments();

                    ShowUsage(writer, kindsStr, info.Summary, info.LoadNames[0], info.LoadNames, args, columns);
                    components.Add(new Component(kindsStr, info, args));
                }
            }

            if (components.Count == 0)
            {
                writer.WriteLine("Unknown component: '{0}'", name);
            }
            else
            {
                Serialize(components);
            }
        }
        // Factory method for SignatureLoadModel.
        private static TensorFlowTransform Create(IHostEnvironment env, ModelLoadContext ctx)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(ctx, nameof(ctx));
            ctx.CheckAtModel(GetVersionInfo());

            // *** Binary format ***
            // byte: indicator for frozen models
            // stream: tensorFlow model.
            // int: number of input columns
            // for each input column
            //   int: id of int column name
            // int: number of output columns
            // for each output column
            //   int: id of output column name
            GetModelInfo(env, ctx, out string[] inputs, out string[] outputs, out bool isFrozen);
            if (isFrozen)
            {
                byte[] modelBytes = null;
                if (!ctx.TryLoadBinaryStream("TFModel", r => modelBytes = r.ReadByteArray()))
                {
                    throw env.ExceptDecode();
                }
                return(new TensorFlowTransform(env, TensorFlowUtils.LoadTFSession(env, modelBytes), inputs, outputs, null, false));
            }

            var tempDirPath = Path.GetFullPath(Path.Combine(Path.GetTempPath(), RegistrationName + "_" + Guid.NewGuid()));

            TensorFlowUtils.CreateFolderWithAclIfNotExists(env, tempDirPath);
            try
            {
                var load = ctx.TryLoadBinaryStream("TFSavedModel", br =>
                {
                    int count = br.ReadInt32();
                    for (int n = 0; n < count; n++)
                    {
                        string relativeFile = br.ReadString();
                        long fileLength     = br.ReadInt64();

                        string fullFilePath = Path.Combine(tempDirPath, relativeFile);
                        string fullFileDir  = Path.GetDirectoryName(fullFilePath);
                        if (fullFileDir != tempDirPath)
                        {
                            TensorFlowUtils.CreateFolderWithAclIfNotExists(env, fullFileDir);
                        }
                        using (var fs = new FileStream(fullFilePath, FileMode.Create, FileAccess.Write))
                        {
                            long actualRead = br.BaseStream.CopyRange(fs, fileLength);
                            env.Assert(actualRead == fileLength);
                        }
                    }
                });

                return(new TensorFlowTransform(env, TensorFlowUtils.GetSession(env, tempDirPath), inputs, outputs, tempDirPath, true));
            }
            catch (Exception)
            {
                TensorFlowUtils.DeleteFolderWithRetries(env, tempDirPath);
                throw;
            }
        }