public BindableMapper(IHostEnvironment env, ModelLoadContext ctx)
            {
                Contracts.CheckValue(env, nameof(env));
                _env = env;
                _env.CheckValue(ctx, nameof(ctx));
                ctx.CheckAtModel(GetVersionInfo());

                // *** Binary format ***
                // IFeatureContributionMapper: Predictor
                // int: topContributionsCount
                // int: bottomContributionsCount
                // bool: normalize
                // bool: stringify

                ctx.LoadModel <IFeatureContributionMapper, SignatureLoadModel>(env, out Predictor, ModelFileUtils.DirPredictor);
                GenericMapper          = ScoreUtils.GetSchemaBindableMapper(_env, Predictor, null);
                _topContributionsCount = ctx.Reader.ReadInt32();
                Contracts.CheckDecode(0 <= _topContributionsCount);
                _bottomContributionsCount = ctx.Reader.ReadInt32();
                Contracts.CheckDecode(0 <= _bottomContributionsCount);
                _normalize = ctx.Reader.ReadBoolByte();
                Stringify  = ctx.Reader.ReadBoolByte();
            }
예제 #2
0
        /// <summary>
        /// This method takes a <see cref="RoleMappedData"/> as input, saves it as an in-memory <see cref="ZipArchive"/>
        /// and returns two arrays indexed by the entries in the zip:
        /// 1. An array of byte arrays, containing the byte sequences of each entry.
        /// 2. An array of strings, containing the name of each entry.
        ///
        /// This method is used for comparing pipelines. Its outputs can be passed to <see cref="CheckSamePipeline"/>
        /// to check if this pipeline is identical to another pipeline.
        /// </summary>
        public static void SerializeRoleMappedData(IHostEnvironment env, IChannel ch, RoleMappedData data,
                                                   out byte[][] dataSerialized, out string[] dataZipEntryNames)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(ch, nameof(ch));
            ch.CheckValue(data, nameof(data));

            using (var ms = new MemoryStream())
            {
                TrainUtils.SaveModel(env, ch, ms, null, data);
                var zip     = new ZipArchive(ms);
                var entries = zip.Entries.OrderBy(e => e.FullName).ToArray();
                dataSerialized    = new byte[Utils.Size(entries)][];
                dataZipEntryNames = new string[Utils.Size(entries)];
                for (int i = 0; i < Utils.Size(entries); i++)
                {
                    dataZipEntryNames[i] = entries[i].FullName;
                    dataSerialized[i]    = new byte[entries[i].Length];
                    using (var s = entries[i].Open())
                        s.Read(dataSerialized[i], 0, (int)entries[i].Length);
                }
            }
        }
예제 #3
0
        public NelderMeadSweeper(IHostEnvironment env, Arguments args)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckUserArg(-1 < args.DeltaInsideContraction, nameof(args.DeltaInsideContraction), "Must be greater than -1");
            env.CheckUserArg(args.DeltaInsideContraction < 0, nameof(args.DeltaInsideContraction), "Must be less than 0");
            env.CheckUserArg(0 < args.DeltaOutsideContraction, nameof(args.DeltaOutsideContraction), "Must be greater than 0");
            env.CheckUserArg(args.DeltaReflection > args.DeltaOutsideContraction, nameof(args.DeltaReflection), "Must be greater than " + nameof(args.DeltaOutsideContraction));
            env.CheckUserArg(args.DeltaExpansion > args.DeltaReflection, nameof(args.DeltaExpansion), "Must be greater than " + nameof(args.DeltaReflection));
            env.CheckUserArg(0 < args.GammaShrink && args.GammaShrink < 1, nameof(args.GammaShrink), "Must be between 0 and 1");
            env.CheckValue(args.FirstBatchSweeper, nameof(args.FirstBatchSweeper), "First Batch Sweeper Contains Null Value");

            _args = args;

            _sweepParameters = new List <IValueGenerator>();
            foreach (var sweptParameter in args.SweptParameters)
            {
                var parameter = sweptParameter.CreateComponent(env);
                // REVIEW: ideas about how to support discrete values:
                // 1. assign each discrete value a random number (1-n) to make mirroring possible
                // 2. each time we need to mirror a discrete value, sample from the remaining value
                // 2.1. make the sampling non-uniform by learning "weights" for the different discrete values based on
                // the metric values that we get when using them. (For example, if, for a given discrete value, we get a bad result,
                // we lower its weight, but if we get a good result we increase its weight).
                var parameterNumeric = parameter as INumericValueGenerator;
                env.CheckUserArg(parameterNumeric != null, nameof(args.SweptParameters), "Nelder-Mead sweeper can only sweep over numeric parameters");
                _sweepParameters.Add(parameterNumeric);
            }

            _initSweeper = args.FirstBatchSweeper.CreateComponent(env, _sweepParameters.ToArray());
            _dim         = _sweepParameters.Count;
            env.CheckUserArg(_dim > 1, nameof(args.SweptParameters), "Nelder-Mead sweeper needs at least two parameters to sweep over.");

            _simplexVertices           = new SortedList <IRunResult, Float[]>(new SimplexVertexComparer());
            _stage                     = OptimizationStage.NeedReflectionPoint;
            _pendingSweeps             = new List <KeyValuePair <ParameterSet, Float[]> >();
            _pendingSweepsNotSubmitted = new Queue <KeyValuePair <ParameterSet, Float[]> >();
        }
        // Factory for SignatureLoadModel.
        private static ITransformer Create(IHostEnvironment env, ModelLoadContext ctx)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(ctx, nameof(ctx));
            ctx.CheckAtModel(GetVersionInfo());

            var contractName = ctx.LoadString();

            if (ctx.Header.ModelVerWritten >= VerAssemblyNameSaved)
            {
                var      contractAssembly = ctx.LoadString();
                Assembly assembly         = Assembly.Load(contractAssembly);
                env.ComponentCatalog.RegisterAssembly(assembly);
            }

            object factoryObject = env.ComponentCatalog.GetExtensionValue(env, typeof(CustomMappingFactoryAttributeAttribute), contractName);

            if (!(factoryObject is ICustomMappingFactory mappingFactory))
            {
                throw env.Except($"The class with contract '{contractName}' must derive from '{typeof(CustomMappingFactory<,>).FullName}' or from '{typeof(StatefulCustomMappingFactory<,,>).FullName}'.");
            }

            return(mappingFactory.CreateTransformer(env, contractName));
        }
        /// <summary>
        /// This function performs a number of checks on the inputs and, if appropriate and possible, will produce
        /// a mapper with slots names on the output score column properly mapped. If this is not possible for any
        /// reason, it will just return the input bound mapper.
        /// </summary>
        private static ISchemaBoundMapper WrapIfNeeded(IHostEnvironment env, ISchemaBoundMapper mapper, RoleMappedSchema trainSchema)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(mapper, nameof(mapper));
            env.CheckValueOrNull(trainSchema);

            // The idea is that we will take the key values from the train schema label, and present
            // them as slot name metadata. But there are a number of conditions for this to actually
            // happen, so we test those here. If these are not

            if (trainSchema?.Label == null)
            {
                return(mapper); // We don't even have a label identified in a training schema.
            }
            var keyType = trainSchema.Label.Value.Metadata.Schema.GetColumnOrNull(MetadataUtils.Kinds.KeyValues)?.Type as VectorType;

            if (keyType == null || !CanWrap(mapper, keyType))
            {
                return(mapper);
            }

            // Great!! All checks pass.
            return(Utils.MarshalInvoke(WrapCore <int>, keyType.ItemType.RawType, env, mapper, trainSchema));
        }
예제 #6
0
        public static SchemaBindablePipelineEnsembleBase Create(IHostEnvironment env, ModelLoadContext ctx)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(ctx, nameof(ctx));
            ctx.CheckAtModel(GetVersionInfo());

            var scoreColumnKind = ctx.LoadNonEmptyString();

            switch (scoreColumnKind)
            {
            case MetadataUtils.Const.ScoreColumnKind.BinaryClassification:
                return(new ImplOneWithCalibrator(env, ctx, scoreColumnKind));

            case MetadataUtils.Const.ScoreColumnKind.Regression:
            case MetadataUtils.Const.ScoreColumnKind.AnomalyDetection:
                return(new ImplOne(env, ctx, scoreColumnKind));

            case MetadataUtils.Const.ScoreColumnKind.MultiClassClassification:
                return(new ImplVec(env, ctx, scoreColumnKind));

            default:
                throw env.Except("Unknown score kind");
            }
        }
예제 #7
0
        /// <summary>
        /// Load a transform model.
        /// </summary>
        public TransformModel(IHostEnvironment env, Stream stream)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(stream, nameof(stream));

            // REVIEW: Should this preserve the "tags" for the transforms?
            using (var ch = env.Start("Loading transform model"))
            {
                _chain = ModelFileUtils.LoadPipeline(env, stream, new MultiFileSource(null), extractInnerPipe: true);
            }

            // Find the root schema.
            for (IDataView view = _chain; ;)
            {
                var xf = view as IDataTransform;
                if (xf == null)
                {
                    _schemaRoot = view.Schema;
                    break;
                }
                view = xf.Source;
                env.AssertValue(view);
            }
        }
 public void SetInput(IHostEnvironment environment, Experiment experiment)
 {
     _dataView = GetDataView(environment);
     environment.CheckValue(_dataView, nameof(_dataView));
     experiment.SetInput(_dataViewEntryPoint.Data, _dataView);
 }
 public IDataTransform ApplyToData(IHostEnvironment env, IDataView newSource)
 {
     Contracts.CheckValue(env, nameof(env));
     env.CheckValue(newSource, nameof(newSource));
     return(new StatefulFilterTransform <TSrc, TDst, TState>(env, this, newSource));
 }
        /// <summary>
        /// Returns the feature selection scores for each slot of each column.
        /// </summary>
        /// <param name="env">The host environment.</param>
        /// <param name="input">The input dataview.</param>
        /// <param name="columns">The columns for which to compute the feature selection scores.</param>
        /// <param name="colSizes">Outputs an array containing the vector sizes of the input columns</param>
        /// <returns>A list of scores.</returns>
        public static long[][] Train(IHostEnvironment env, IDataView input, string[] columns, out int[] colSizes)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(input, nameof(input));
            env.CheckParam(Utils.Size(columns) > 0, nameof(columns));

            var schema      = input.Schema;
            var size        = columns.Length;
            var activeInput = new bool[schema.ColumnCount];
            var colSrcs     = new int[size];
            var colTypes    = new ColumnType[size];

            colSizes = new int[size];
            for (int i = 0; i < size; i++)
            {
                int colSrc;
                var colName = columns[i];
                if (!schema.TryGetColumnIndex(colName, out colSrc))
                {
                    throw env.ExceptUserArg(nameof(CountFeatureSelectionTransform.Arguments.Column), "Source column '{0}' not found", colName);
                }

                var colType = schema.GetColumnType(colSrc);
                if (colType.IsVector && !colType.IsKnownSizeVector)
                {
                    throw env.ExceptUserArg(nameof(CountFeatureSelectionTransform.Arguments.Column), "Variable length column '{0}' is not allowed", colName);
                }

                activeInput[colSrc] = true;
                colSrcs[i]          = colSrc;
                colTypes[i]         = colType;
                colSizes[i]         = colType.ValueCount;
            }

            var    aggregators = new CountAggregator[size];
            long   rowCur      = 0;
            double rowCount    = input.GetRowCount(true) ?? double.NaN;

            using (var pch = env.StartProgressChannel("Aggregating counts"))
                using (var cursor = input.GetRowCursor(col => activeInput[col]))
                {
                    var header = new ProgressHeader(new[] { "rows" });
                    pch.SetHeader(header, e => { e.SetProgress(0, rowCur, rowCount); });
                    for (int i = 0; i < size; i++)
                    {
                        if (colTypes[i].IsVector)
                        {
                            aggregators[i] = GetVecAggregator(cursor, colTypes[i], colSrcs[i]);
                        }
                        else
                        {
                            aggregators[i] = GetOneAggregator(cursor, colTypes[i], colSrcs[i]);
                        }
                    }

                    while (cursor.MoveNext())
                    {
                        for (int i = 0; i < size; i++)
                        {
                            aggregators[i].ProcessValue();
                        }
                        rowCur++;
                    }
                    pch.Checkpoint(rowCur);
                }
            return(aggregators.Select(a => a.Count).ToArray());
        }
예제 #11
0
        public static CommonOutputs.MacroOutput <Output> CrossValidate(
            IHostEnvironment env,
            Arguments input,
            EntryPointNode node)
        {
            env.CheckValue(input, nameof(input));

            // This will be the final resulting list of nodes that is returned from the macro.
            var subGraphNodes = new List <EntryPointNode>();

            //the input transform model
            VariableBinding transformModelVarName = null;

            if (input.TransformModel != null)
            {
                transformModelVarName = node.GetInputVariable(nameof(input.TransformModel));
            }

            // Split the input data into folds.
            var exp     = new Experiment(env);
            var cvSplit = new Models.CrossValidatorDatasetSplitter();

            cvSplit.Data.VarName         = node.GetInputVariable("Data").ToJson();
            cvSplit.NumFolds             = input.NumFolds;
            cvSplit.StratificationColumn = input.StratificationColumn;
            var cvSplitOutput = exp.Add(cvSplit);

            subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog));

            var predModelVars           = new Var <IPredictorModel> [input.NumFolds];
            var transformModelVars      = new Var <ITransformModel> [input.NumFolds];
            var inputTransformModelVars = new Var <IPredictorModel> [input.NumFolds];
            var warningsVars            = new Var <IDataView> [input.NumFolds];
            var overallMetricsVars      = new Var <IDataView> [input.NumFolds];
            var instanceMetricsVars     = new Var <IDataView> [input.NumFolds];
            var confusionMatrixVars     = new Var <IDataView> [input.NumFolds];

            // Instantiate the subgraph for each fold.
            for (int k = 0; k < input.NumFolds; k++)
            {
                // Parse the nodes in input.Nodes into a temporary run context.
                var context = new RunContext(env);
                var graph   = EntryPointNode.ValidateNodes(env, context, input.Nodes, node.Catalog);

                // Rename all the variables such that they don't conflict with the ones in the outer run context.
                var mapping = new Dictionary <string, string>();
                foreach (var entryPointNode in graph)
                {
                    entryPointNode.RenameAllVariables(mapping);
                }

                // Instantiate a TrainTest entry point for this fold.
                var args = new TrainTestMacro.Arguments
                {
                    Nodes          = new JArray(graph.Select(n => n.ToJson()).ToArray()),
                    TransformModel = null,
                    LabelColumn    = input.LabelColumn,
                    GroupColumn    = input.GroupColumn,
                    WeightColumn   = input.WeightColumn
                };

                if (transformModelVarName != null)
                {
                    args.TransformModel = new Var <ITransformModel> {
                        VarName = transformModelVarName.VariableName
                    }
                }
                ;

                args.Inputs.Data = new Var <IDataView>
                {
                    VarName = mapping[input.Inputs.Data.VarName]
                };

                if (input.Outputs.PredictorModel != null && mapping.ContainsKey(input.Outputs.PredictorModel.VarName))
                {
                    args.Outputs.PredictorModel = new Var <IPredictorModel>
                    {
                        VarName = mapping[input.Outputs.PredictorModel.VarName]
                    };
                }
                else
                {
                    args.Outputs.PredictorModel = null;
                }

                if (input.Outputs.TransformModel != null && mapping.ContainsKey(input.Outputs.TransformModel.VarName))
                {
                    args.Outputs.TransformModel = new Var <ITransformModel>
                    {
                        VarName = mapping[input.Outputs.TransformModel.VarName]
                    };
                }
                else
                {
                    args.Outputs.TransformModel = null;
                }

                // Set train/test trainer kind to match.
                args.Kind = input.Kind;

                // Set the input bindings for the TrainTest entry point.
                var inputBindingMap = new Dictionary <string, List <ParameterBinding> >();
                var inputMap        = new Dictionary <ParameterBinding, VariableBinding>();
                var trainingData    = new SimpleParameterBinding(nameof(args.TrainingData));
                inputBindingMap.Add(nameof(args.TrainingData), new List <ParameterBinding> {
                    trainingData
                });
                inputMap.Add(trainingData, new ArrayIndexVariableBinding(cvSplitOutput.TrainData.VarName, k));
                var testingData = new SimpleParameterBinding(nameof(args.TestingData));
                inputBindingMap.Add(nameof(args.TestingData), new List <ParameterBinding> {
                    testingData
                });
                inputMap.Add(testingData, new ArrayIndexVariableBinding(cvSplitOutput.TestData.VarName, k));
                var outputMap         = new Dictionary <string, string>();
                var transformModelVar = new Var <ITransformModel>();
                var predModelVar      = new Var <IPredictorModel>();
                if (input.Outputs.PredictorModel == null)
                {
                    outputMap.Add(nameof(TrainTestMacro.Output.TransformModel), transformModelVar.VarName);
                    transformModelVars[k] = transformModelVar;
                    ML.Transforms.ModelCombiner.Output modelCombineOutput = null;
                    if (transformModelVarName != null && transformModelVarName.VariableName != null)
                    {
                        var modelCombine = new ML.Transforms.ModelCombiner
                        {
                            Models = new ArrayVar <ITransformModel>(
                                new Var <ITransformModel>[] {
                                new Var <ITransformModel> {
                                    VarName = transformModelVarName.VariableName
                                },
                                transformModelVar
                            }
                                )
                        };

                        exp.Reset();
                        modelCombineOutput = exp.Add(modelCombine);
                        subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog));
                        transformModelVars[k] = modelCombineOutput.OutputModel;
                    }
                }
                else
                {
                    outputMap.Add(nameof(TrainTestMacro.Output.PredictorModel), predModelVar.VarName);
                    predModelVars[k] = predModelVar;
                    ML.Transforms.TwoHeterogeneousModelCombiner.Output modelCombineOutput = null;
                    if (transformModelVarName != null && transformModelVarName.VariableName != null)
                    {
                        var modelCombine = new ML.Transforms.TwoHeterogeneousModelCombiner
                        {
                            TransformModel = { VarName = transformModelVarName.VariableName },
                            PredictorModel = predModelVar
                        };

                        exp.Reset();
                        modelCombineOutput = exp.Add(modelCombine);
                        subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog));
                        predModelVars[k] = modelCombineOutput.PredictorModel;
                    }
                }

                var warningVar = new Var <IDataView>();
                outputMap.Add(nameof(TrainTestMacro.Output.Warnings), warningVar.VarName);
                warningsVars[k] = warningVar;
                var overallMetric = new Var <IDataView>();
                outputMap.Add(nameof(TrainTestMacro.Output.OverallMetrics), overallMetric.VarName);
                overallMetricsVars[k] = overallMetric;
                var instanceMetric = new Var <IDataView>();
                outputMap.Add(nameof(TrainTestMacro.Output.PerInstanceMetrics), instanceMetric.VarName);
                instanceMetricsVars[k] = instanceMetric;
                var confusionMatrix = new Var <IDataView>();
                outputMap.Add(nameof(TrainTestMacro.Output.ConfusionMatrix), confusionMatrix.VarName);
                confusionMatrixVars[k] = confusionMatrix;
                const string trainTestEvaluatorMacroEntryPoint = "Models.TrainTestEvaluator";
                subGraphNodes.Add(EntryPointNode.Create(env, trainTestEvaluatorMacroEntryPoint, args, node.Catalog, node.Context, inputBindingMap, inputMap, outputMap));
            }

            exp.Reset();

            // Convert predictors from all folds into an array of predictors.

            if (input.Outputs.PredictorModel == null)
            {
                var outModels = new ML.Data.TransformModelArrayConverter
                {
                    TransformModel = new ArrayVar <ITransformModel>(transformModelVars)
                };
                var outModelsOutput = new ML.Data.TransformModelArrayConverter.Output();
                outModelsOutput.OutputModel.VarName = node.GetOutputVariableName(nameof(Output.TransformModel));
                exp.Add(outModels, outModelsOutput);
            }
            else
            {
                var outModels = new ML.Data.PredictorModelArrayConverter
                {
                    Model = new ArrayVar <IPredictorModel>(predModelVars)
                };
                var outModelsOutput = new ML.Data.PredictorModelArrayConverter.Output();
                outModelsOutput.OutputModel.VarName = node.GetOutputVariableName(nameof(Output.PredictorModel));
                exp.Add(outModels, outModelsOutput);
            }

            // Convert warnings data views from all folds into an array of data views.
            var warnings = new ML.Data.IDataViewArrayConverter
            {
                Data = new ArrayVar <IDataView>(warningsVars)
            };
            var warningsOutput = new ML.Data.IDataViewArrayConverter.Output();

            exp.Add(warnings, warningsOutput);

            // Convert overall metrics data views from all folds into an array of data views.
            var overallMetrics = new ML.Data.IDataViewArrayConverter
            {
                Data = new ArrayVar <IDataView>(overallMetricsVars)
            };
            var overallMetricsOutput = new ML.Data.IDataViewArrayConverter.Output();

            exp.Add(overallMetrics, overallMetricsOutput);

            // Convert per instance data views from all folds into an array of data views.
            var instanceMetrics = new ML.Data.IDataViewArrayConverter
            {
                Data = new ArrayVar <IDataView>(instanceMetricsVars)
            };
            var instanceMetricsOutput = new ML.Data.IDataViewArrayConverter.Output();

            exp.Add(instanceMetrics, instanceMetricsOutput);

            ML.Data.IDataViewArrayConverter.Output confusionMatricesOutput = null;
            if (input.Kind == MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer ||
                input.Kind == MacroUtils.TrainerKinds.SignatureMultiClassClassifierTrainer)
            {
                // Convert confusion matrix data views from all folds into an array of data views.
                var confusionMatrices = new ML.Data.IDataViewArrayConverter
                {
                    Data = new ArrayVar <IDataView>(confusionMatrixVars)
                };
                confusionMatricesOutput = new ML.Data.IDataViewArrayConverter.Output();
                exp.Add(confusionMatrices, confusionMatricesOutput);
            }

            var combineArgs = new CombineMetricsInput();

            combineArgs.Kind         = input.Kind;
            combineArgs.LabelColumn  = input.LabelColumn;
            combineArgs.WeightColumn = input.WeightColumn;
            combineArgs.GroupColumn  = input.GroupColumn;

            // Set the input bindings for the CombineMetrics entry point.
            var combineInputBindingMap = new Dictionary <string, List <ParameterBinding> >();
            var combineInputMap        = new Dictionary <ParameterBinding, VariableBinding>();
            var overallArray           = new SimpleParameterBinding(nameof(combineArgs.OverallMetrics));

            combineInputBindingMap.Add(nameof(combineArgs.OverallMetrics), new List <ParameterBinding> {
                overallArray
            });
            combineInputMap.Add(overallArray, new SimpleVariableBinding(overallMetricsOutput.OutputData.VarName));
            var combinePerInstArray = new SimpleParameterBinding(nameof(combineArgs.PerInstanceMetrics));

            combineInputBindingMap.Add(nameof(combineArgs.PerInstanceMetrics), new List <ParameterBinding> {
                combinePerInstArray
            });
            combineInputMap.Add(combinePerInstArray, new SimpleVariableBinding(instanceMetricsOutput.OutputData.VarName));
            if (confusionMatricesOutput != null)
            {
                var combineConfArray = new SimpleParameterBinding(nameof(combineArgs.ConfusionMatrix));
                combineInputBindingMap.Add(nameof(combineArgs.ConfusionMatrix), new List <ParameterBinding> {
                    combineConfArray
                });
                combineInputMap.Add(combineConfArray, new SimpleVariableBinding(confusionMatricesOutput.OutputData.VarName));
            }

            var combineOutputMap  = new Dictionary <string, string>();
            var combineWarningVar = new Var <IDataView>();

            combineWarningVar.VarName = node.GetOutputVariableName(nameof(Output.Warnings));
            combineOutputMap.Add(nameof(Output.Warnings), combineWarningVar.VarName);
            var combineOverallMetric = new Var <IDataView>();

            combineOverallMetric.VarName = node.GetOutputVariableName(nameof(Output.OverallMetrics));
            combineOutputMap.Add(nameof(Output.OverallMetrics), combineOverallMetric.VarName);
            var combineInstanceMetric = new Var <IDataView>();

            combineInstanceMetric.VarName = node.GetOutputVariableName(nameof(Output.PerInstanceMetrics));
            combineOutputMap.Add(nameof(Output.PerInstanceMetrics), combineInstanceMetric.VarName);
            if (confusionMatricesOutput != null)
            {
                var combineConfusionMatrix = new Var <IDataView>();
                combineConfusionMatrix.VarName = node.GetOutputVariableName(nameof(Output.ConfusionMatrix));
                combineOutputMap.Add(nameof(TrainTestMacro.Output.ConfusionMatrix), combineConfusionMatrix.VarName);
            }
            subGraphNodes.AddRange(EntryPointNode.ValidateNodes(env, node.Context, exp.GetNodes(), node.Catalog));
            subGraphNodes.Add(EntryPointNode.Create(env, "Models.CrossValidationResultsCombiner", combineArgs, node.Catalog, node.Context, combineInputBindingMap, combineInputMap, combineOutputMap));
            return(new CommonOutputs.MacroOutput <Output>()
            {
                Nodes = subGraphNodes
            });
        }
예제 #12
0
        // REVIEW: It would be nice to support propagation of select metadata.
        public static IDataView Create <TSrc, TDst>(IHostEnvironment env, string name, IDataView input,
                                                    string src, string dst, ColumnType typeSrc, ColumnType typeDst, ValueMapper <TSrc, TDst> mapper,
                                                    ValueGetter <VBuffer <ReadOnlyMemory <char> > > keyValueGetter = null, ValueGetter <VBuffer <ReadOnlyMemory <char> > > slotNamesGetter = null)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckNonEmpty(name, nameof(name));
            env.CheckValue(input, nameof(input));
            env.CheckNonEmpty(src, nameof(src));
            env.CheckNonEmpty(dst, nameof(dst));
            env.CheckValue(typeSrc, nameof(typeSrc));
            env.CheckValue(typeDst, nameof(typeDst));
            env.CheckValue(mapper, nameof(mapper));
            env.Check(keyValueGetter == null || typeDst.GetItemType() is KeyType);
            env.Check(slotNamesGetter == null || typeDst.IsKnownSizeVector());

            if (typeSrc.RawType != typeof(TSrc))
            {
                throw env.ExceptParam(nameof(mapper),
                                      "The source column type '{0}' doesn't match the input type of the mapper", typeSrc);
            }
            if (typeDst.RawType != typeof(TDst))
            {
                throw env.ExceptParam(nameof(mapper),
                                      "The destination column type '{0}' doesn't match the output type of the mapper", typeDst);
            }

            bool tmp = input.Schema.TryGetColumnIndex(src, out int colSrc);

            if (!tmp)
            {
                throw env.ExceptParam(nameof(src), "The input data doesn't have a column named '{0}'", src);
            }
            var typeOrig = input.Schema[colSrc].Type;

            // REVIEW: Ideally this should support vector-type conversion. It currently doesn't.
            bool     ident;
            Delegate conv;

            if (typeOrig.SameSizeAndItemType(typeSrc))
            {
                ident = true;
                conv  = null;
            }
            else if (!Conversions.Instance.TryGetStandardConversion(typeOrig, typeSrc, out conv, out ident))
            {
                throw env.ExceptParam(nameof(mapper),
                                      "The type of column '{0}', '{1}', cannot be converted to the input type of the mapper '{2}'",
                                      src, typeOrig, typeSrc);
            }

            var       col = new Column(src, dst);
            IDataView impl;

            if (ident)
            {
                impl = new Impl <TSrc, TDst, TDst>(env, name, input, col, typeDst, mapper, keyValueGetter: keyValueGetter, slotNamesGetter: slotNamesGetter);
            }
            else
            {
                Func <IHostEnvironment, string, IDataView, Column, ColumnType, ValueMapper <int, int>,
                      ValueMapper <int, int>, ValueGetter <VBuffer <ReadOnlyMemory <char> > >, ValueGetter <VBuffer <ReadOnlyMemory <char> > >,
                      Impl <int, int, int> > del = CreateImpl <int, int, int>;
                var meth = del.GetMethodInfo().GetGenericMethodDefinition()
                           .MakeGenericMethod(typeOrig.RawType, typeof(TSrc), typeof(TDst));
                impl = (IDataView)meth.Invoke(null, new object[] { env, name, input, col, typeDst, conv, mapper, keyValueGetter, slotNamesGetter });
            }

            return(new OpaqueDataView(impl));
        }
예제 #13
0
        public static CommonOutputs.MacroOutput <Output> OneVersusAll(
            IHostEnvironment env,
            Arguments input,
            EntryPointNode node)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(input, nameof(input));
            env.Assert(input.Nodes.Count > 0);

            var numClasses    = GetNumberOfClasses(env, input, out var label);
            var predModelVars = new Var <PredictorModel> [numClasses];

            // This will be the final resulting list of nodes that is returned from the macro.
            var macroNodes = new List <EntryPointNode>();

            // Instantiate the subgraph for each label value.
            for (int k = 0; k < numClasses; k++)
            {
                predModelVars[k] = ProcessClass(env, macroNodes, k, label, input, node);
            }

            // Convert the predictor models to an array of predictor models.
            var modelsArray = new Var <PredictorModel[]>();

            MacroUtils.ConvertIPredictorModelsToArray(env, node.Context, macroNodes, predModelVars, modelsArray.VarName);

            // Use OVA model combiner to combine these models into one.
            // Takes in array of models that are binary predictor models and
            // produces single multiclass predictor model.
            var combineArgs = new ModelOperations.CombineOvaPredictorModelsInput();

            combineArgs.Caching           = input.Caching;
            combineArgs.FeatureColumnName = input.FeatureColumnName;
            combineArgs.LabelColumnName   = input.LabelColumnName;
            combineArgs.NormalizeFeatures = input.NormalizeFeatures;
            combineArgs.UseProbabilities  = input.UseProbabilities;

            var inputBindingMap            = new Dictionary <string, List <ParameterBinding> >();
            var inputMap                   = new Dictionary <ParameterBinding, VariableBinding>();
            var combineNodeModelArrayInput = new SimpleVariableBinding(modelsArray.VarName);
            var paramBinding               = new SimpleParameterBinding(nameof(combineArgs.ModelArray));

            inputBindingMap.Add(nameof(combineArgs.ModelArray), new List <ParameterBinding>()
            {
                paramBinding
            });
            inputMap.Add(paramBinding, combineNodeModelArrayInput);
            paramBinding = new SimpleParameterBinding(nameof(combineArgs.TrainingData));
            inputBindingMap.Add(nameof(combineArgs.TrainingData), new List <ParameterBinding>()
            {
                paramBinding
            });
            inputMap.Add(paramBinding, node.GetInputVariable(nameof(input.TrainingData)));

            var outputMap = new Dictionary <string, string>();

            outputMap.Add(nameof(Output.PredictorModel), node.GetOutputVariableName(nameof(Output.PredictorModel)));
            var combineModelsNode = EntryPointNode.Create(env, "Models.OvaModelCombiner",
                                                          combineArgs, node.Context, inputBindingMap, inputMap, outputMap);

            macroNodes.Add(combineModelsNode);

            return(new CommonOutputs.MacroOutput <Output>()
            {
                Nodes = macroNodes
            });
        }
예제 #14
0
        /// <summary>
        /// Extract all values of one column of the data view in a form of an <see cref="IEnumerable{T}"/>.
        /// </summary>
        /// <typeparam name="T">The type of the values. This must match the actual column type.</typeparam>
        /// <param name="data">The data view to get the column from.</param>
        /// <param name="env">The current host environment.</param>
        /// <param name="columnName">The name of the column to extract.</param>
        public static IEnumerable <T> GetColumn <T>(this IDataView data, IHostEnvironment env, string columnName)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(data, nameof(data));
            env.CheckNonEmpty(columnName, nameof(columnName));

            if (!data.Schema.TryGetColumnIndex(columnName, out int col))
            {
                throw env.ExceptSchemaMismatch(nameof(columnName), "input", columnName);
            }

            // There are two decisions that we make here:
            // - Is the T an array type?
            //     - If yes, we need to map VBuffer to array and densify.
            //     - If no, this is not needed.
            // - Does T (or item type of T if it's an array) equal to the data view type?
            //     - If this is the same type, we can map directly.
            //     - Otherwise, we need a conversion delegate.

            var colType = data.Schema.GetColumnType(col);

            if (colType.RawType == typeof(T))
            {
                // Direct mapping is possible.
                return(GetColumnDirect <T>(data, col));
            }
            else if (typeof(T) == typeof(string) && colType.IsText)
            {
                // Special case of ROM<char> to string conversion.
                Delegate convert = (Func <ReadOnlyMemory <char>, string>)((ReadOnlyMemory <char> txt) => txt.ToString());
                Func <IDataView, int, Func <int, T>, IEnumerable <T> > del = GetColumnConvert;
                var meth = del.Method.GetGenericMethodDefinition().MakeGenericMethod(typeof(T), colType.RawType);
                return((IEnumerable <T>)(meth.Invoke(null, new object[] { data, col, convert })));
            }
            else if (typeof(T).IsArray)
            {
                // Output is an array type.
                if (!colType.IsVector)
                {
                    throw env.ExceptSchemaMismatch(nameof(columnName), "input", columnName, "vector", "scalar");
                }
                var elementType = typeof(T).GetElementType();
                if (elementType == colType.ItemType.RawType)
                {
                    // Direct mapping of items.
                    Func <IDataView, int, IEnumerable <int[]> > del = GetColumnArrayDirect <int>;
                    var meth = del.Method.GetGenericMethodDefinition().MakeGenericMethod(elementType);
                    return((IEnumerable <T>)meth.Invoke(null, new object[] { data, col }));
                }
                else if (elementType == typeof(string) && colType.ItemType.IsText)
                {
                    // Conversion of DvText items to string items.
                    Delegate convert = (Func <ReadOnlyMemory <char>, string>)((ReadOnlyMemory <char> txt) => txt.ToString());
                    Func <IDataView, int, Func <int, long>, IEnumerable <long[]> > del = GetColumnArrayConvert;
                    var meth = del.Method.GetGenericMethodDefinition().MakeGenericMethod(elementType, colType.ItemType.RawType);
                    return((IEnumerable <T>)meth.Invoke(null, new object[] { data, col, convert }));
                }
                // Fall through to the failure.
            }
            throw env.Except($"Could not map a data view column '{columnName}' of type {colType} to {typeof(T)}.");
        }
예제 #15
0
        IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input, out IDataView sourceCtx)
        {
            sourceCtx = input;
            Contracts.CheckValue(env, "env");
            env.CheckValue(args, "args");
            env.CheckValue(input, "input");
            env.CheckValue(args.tag, "tag is empty");
            env.CheckValue(args.trainer, "trainer",
                           "Trainer cannot be null. If your model is already trained, please use ScoreTransform instead.");

            var views = TagHelper.EnumerateTaggedView(true, input).Where(c => c.Item1 == args.tag);

            if (views.Any())
            {
                throw env.Except("Tag '{0}' is already used.", args.tag);
            }

            var host = env.Register("TagTrainOrScoreTransform");

            using (var ch = host.Start("Train"))
            {
                ch.Trace("Constructing trainer");
                var trainerSett = ScikitSubComponent <ITrainer, SignatureTrainer> .AsSubComponent(args.trainer);

                ITrainer trainer    = trainerSett.CreateInstance(host);
                var      customCols = TrainUtils.CheckAndGenerateCustomColumns(env, args.CustomColumn);

                string             feat;
                string             group;
                var                data       = CreateDataFromArgs(_host, ch, new OpaqueDataView(input), args, out feat, out group);
                ICalibratorTrainer calibrator = args.calibrator == null
                                    ? null
                                    : ScikitSubComponent <ICalibratorTrainer, SignatureCalibrator> .AsSubComponent(args.calibrator).CreateInstance(host);

                var nameTrainer = args.trainer.ToString().Replace("{", "").Replace("}", "").Replace(" ", "").Replace("=", "").Replace("+", "Y").Replace("-", "N");
                var extTrainer  = new ExtendedTrainer(trainer, nameTrainer);
                _predictor = extTrainer.Train(host, ch, data, null, calibrator, args.maxCalibrationExamples);

                if (!string.IsNullOrEmpty(args.outputModel))
                {
                    ch.Info("Saving model into '{0}'", args.outputModel);
                    using (var fs = File.Create(args.outputModel))
                        TrainUtils.SaveModel(env, ch, fs, _predictor, data);
                    ch.Info("Done.");
                }

                if (_cali != null)
                {
                    throw ch.ExceptNotImpl("Calibrator is not implemented yet.");
                }

                ch.Trace("Scoring");
                if (_args.scorer != null)
                {
                    var mapper   = new SchemaBindablePredictorWrapper(_predictor);
                    var roles    = new RoleMappedSchema(input.Schema, null, feat, group: group);
                    var bound    = mapper.Bind(_host, roles);
                    var scorPars = ScikitSubComponent <IDataScorerTransform, SignatureDataScorer> .AsSubComponent(_args.scorer);

                    _scorer = scorPars.CreateInstance(_host, input, bound, roles);
                }
                else
                {
                    _scorer = PredictorHelper.CreateDefaultScorer(_host, input, feat, group, _predictor);
                }

                ch.Info("Tagging with tag '{0}'.", args.tag);

                var ar = new TagViewTransform.Arguments {
                    tag = args.tag
                };
                var res = new TagViewTransform(env, ar, _scorer, _predictor);
                return(res);
            }
        }
예제 #16
0
        PermutationFeatureImportance <TMetric, TResult>(
            IHostEnvironment env,
            ITransformer model,
            IDataView data,
            Func <TResult> resultInitializer,
            Func <IDataView, TMetric> evaluationFunc,
            Func <TMetric, TMetric, TMetric> deltaFunc,
            int permutationCount,
            bool useFeatureWeightFilter,
            int?numberOfExamplesToUse) where TResult : IMetricsStatistics <TMetric>
        {
            env.CheckValue(data, nameof(data));
            env.CheckValue(model, nameof(model));

            ISingleFeaturePredictionTransformer lastTransformer = null;

            if (model is ITransformerChainAccessor chain)
            {
                foreach (var transformer in chain.Transformers.Reverse())
                {
                    if (transformer is ISingleFeaturePredictionTransformer singlePredictionTransformer)
                    {
                        lastTransformer = singlePredictionTransformer;
                        break;
                    }
                }
            }
            else
            {
                lastTransformer = model as ISingleFeaturePredictionTransformer;
            }

            env.CheckValue(lastTransformer, nameof(lastTransformer), "The model provided does not have a compatible predictor");

            string featureColumnName = lastTransformer.FeatureColumnName;
            var    predictionTransformerGenericType = GetImplementedIPredictionTransformer(lastTransformer.GetType());

            Type[] types          = { predictionTransformerGenericType.GenericTypeArguments[0], typeof(TMetric), typeof(TResult) };
            Type   pfiGenericType = typeof(PermutationFeatureImportance <, ,>).MakeGenericType(types);

            object[] param = { env,
                               lastTransformer,
                               data,
                               resultInitializer,
                               evaluationFunc,
                               deltaFunc,
                               featureColumnName,
                               permutationCount,
                               useFeatureWeightFilter,
                               numberOfExamplesToUse };

            MethodInfo mi = pfiGenericType.GetMethod("GetImportanceMetricsMatrix", BindingFlags.Static | BindingFlags.Public);
            var        permutationFeatureImportance = (ImmutableArray <TResult>)mi.Invoke(null, param);

            VBuffer <ReadOnlyMemory <char> > nameBuffer = default;

            data.Schema[featureColumnName].Annotations.GetValue("SlotNames", ref nameBuffer);
            var featureColumnNames = nameBuffer.DenseValues().ToList();

            var output = new Dictionary <string, TResult>();

            for (int i = 0; i < permutationFeatureImportance.Length; i++)
            {
                var name = featureColumnNames[i].ToString();

                // If the slot wasn't given a name, default to just the slot number.
                if (string.IsNullOrEmpty(name))
                {
                    name = $"Slot {i}";
                }
                output.Add(name, permutationFeatureImportance[i]);
            }

            return(output.ToImmutableDictionary());
        }
예제 #17
0
 /// <summary>
 /// Create a new data view which is obtained by appending all columns of all the source data views.
 /// If the data views are of different length, the resulting data view will have the length equal to the
 /// length of the shortest source.
 /// </summary>
 /// <param name="env">The host environment to use.</param>
 /// <param name="sources">A non-empty collection of data views to zip together.</param>
 /// <returns>The resulting data view.</returns>
 public static IDataView Zip(this IHostEnvironment env, IEnumerable <IDataView> sources)
 {
     Contracts.CheckValue(env, nameof(env));
     env.CheckValue(sources, nameof(sources));
     return(ZipDataView.Create(env, sources));
 }
        /// <summary>
        /// This method detects this predictable interval (or period) by adopting techniques of fourier analysis.
        /// Returns -1 if no such pattern is found, that is, the input values do not follow a seasonal fluctuation.
        /// </summary>
        /// <param name="host">The detect seasonality host environment.</param>
        /// <param name="input">Input DataView.The data is an instance of <see cref="Microsoft.ML.IDataView"/>.</param>
        /// <param name="inputColumnName">Name of column to process. The column data must be <see cref="System.Double"/>.</param>
        /// <param name="seasonalityWindowSize">An upper bound on the number of values to be considered in the input values.
        /// When set to -1, use the whole input to fit model; when set to a positive integer, use this number as batch size.
        /// Default value is -1.</param>
        /// <param name="randomessThreshold">Randomness threshold, ranging from [0, 1]. It specifies how confidence the input
        /// follows a predictable pattern recurring as seasonal data. By default, it is set as 0.95.
        /// </param>
        /// <returns>The detected period if seasonality period exists, otherwise return -1.</returns>
        public int DetectSeasonality(
            IHostEnvironment host,
            IDataView input,
            string inputColumnName,
            int seasonalityWindowSize,
            double randomessThreshold)
        {
            host.CheckValue(input, nameof(input));
            host.CheckValue(inputColumnName, nameof(inputColumnName));
            host.CheckUserArg(seasonalityWindowSize == -1 || seasonalityWindowSize >= 0, nameof(seasonalityWindowSize));

            var column = input.Schema.GetColumnOrNull(inputColumnName);

            host.CheckUserArg(column.HasValue, nameof(inputColumnName));

            var rowCursor = input.GetRowCursor(new List <DataViewSchema.Column>()
            {
                column.Value
            });
            var valueDelegate = rowCursor.GetGetter <double>(column.Value);

            int    length     = 0;
            double valueRef   = 0;
            var    valueCache = seasonalityWindowSize == -1 ? new List <double>() : new List <double>(seasonalityWindowSize);

            while (rowCursor.MoveNext())
            {
                valueDelegate.Invoke(ref valueRef);
                length++;
                valueCache.Add(valueRef);
                if (seasonalityWindowSize != -1 && length >= seasonalityWindowSize)
                {
                    break;
                }
            }

            double[] fftRe   = new double[length];
            double[] fftIm   = new double[length];
            double[] inputRe = valueCache.ToArray();

            FftUtils.ComputeForwardFft(inputRe, Enumerable.Repeat(0.0, length).ToArray(), fftRe, fftIm, length);

            var energies = fftRe.Select((m, i) => new Complex(m, fftIm[i])).ToArray();

            /* Periodogram indicates the square of "energy" on the  frequency domain.
             * Specifically, periodogram[j] = a[j]^2+b[j]^2, where a and b are Fourier Coefficients for cosine and sine,
             * x(t) = a0+sum(a[j]cos(2Pi * f[j]t)+b[j]sin(2Pi * f[j]t)
             */
            var periodogram = energies.Select((t, i) => t * Complex.Conjugate(t)).ToArray();

            FindBestTwoFrequencies(periodogram, length, out var bestFreq, out var secondFreq);

            double[] ifftRe = new double[length];
            double[] ifftIm = new double[length];
            FftUtils.ComputeBackwardFft(
                periodogram.Select(t => t.Real).ToArray(),
                periodogram.Select(t => t.Imaginary).ToArray(),
                ifftRe,
                ifftIm,
                length);
            var values = ifftRe.Select((t, i) => new Complex(t, ifftIm[i])).ToArray();

            int period = FindActualPeriod(values, bestFreq, secondFreq, length, randomessThreshold);

            return(period < 0 ? -1 : period);
        }
예제 #19
0
        // Factory method for SignatureLoadModel.
        private static OnnxTransformer Create(IHostEnvironment env, ModelLoadContext ctx)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(ctx, nameof(ctx));
            ctx.CheckAtModel(GetVersionInfo());

            byte[] modelBytes = null;
            if (!ctx.TryLoadBinaryStream("OnnxModel", r => modelBytes = r.ReadByteArray()))
            {
                throw env.ExceptDecode();
            }

            bool supportsMultiInputOutput = ctx.Header.ModelVerWritten > 0x00010001;

            var numInputs = (supportsMultiInputOutput) ? ctx.Reader.ReadInt32() : 1;

            env.CheckDecode(numInputs > 0);
            var inputs = new string[numInputs];

            for (int j = 0; j < inputs.Length; j++)
            {
                inputs[j] = ctx.LoadNonEmptyString();
            }

            var numOutputs = (supportsMultiInputOutput) ? ctx.Reader.ReadInt32() : 1;

            env.CheckDecode(numOutputs > 0);
            var outputs = new string[numOutputs];

            for (int j = 0; j < outputs.Length; j++)
            {
                outputs[j] = ctx.LoadNonEmptyString();
            }

            // Save custom-provided shapes. Those shapes overwrite shapes loaded from the ONNX model file.
            int customShapeInfosLength = ctx.Reader.ReadInt32(); // 0 means no custom shape. Non-zero means count of custom shapes.

            CustomShapeInfo[] loadedCustomShapeInfos = null;
            if (customShapeInfosLength > 0)
            {
                loadedCustomShapeInfos = new CustomShapeInfo[customShapeInfosLength];
                for (int i = 0; i < customShapeInfosLength; ++i)
                {
                    var name  = ctx.LoadNonEmptyString();
                    var shape = ctx.Reader.ReadIntArray();
                    loadedCustomShapeInfos[i] = new CustomShapeInfo()
                    {
                        Name = name, Shape = shape
                    };
                }
            }

            int recursionLimit;

            // Recursion limit change
            if (ctx.Header.ModelVerWritten >= 0x00010003)
            {
                recursionLimit = ctx.Reader.ReadInt32();
            }
            else
            {
                // Default if not written inside ONNX model
                recursionLimit = 100;
            }

            var options = new Options()
            {
                InputColumns     = inputs,
                OutputColumns    = outputs,
                CustomShapeInfos = loadedCustomShapeInfos,
                RecursionLimit   = recursionLimit
            };

            return(new OnnxTransformer(env, options, modelBytes));
        }
예제 #20
0
        protected virtual IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input, out IDataView sourceCtx, IPredictor overwritePredictor)
        {
            sourceCtx = input;
            Contracts.CheckValue(env, "env");
            env.CheckValue(args, "args");
            env.CheckValue(input, "input");

            IPredictor predictor;

            if (overwritePredictor == null)
            {
                throw env.Except("No defined predictor.");
            }
            else
            {
                predictor = overwritePredictor;
            }

            // The function is returning something and modifying a member of the class. Not very fancy.
            _predictor = predictor;

            string feat = TrainUtils.MatchNameOrDefaultOrNull(env, input.Schema,
                                                              "featureColumn", args.featureColumn, DefaultColumnNames.Features);
            int index = SchemaHelper.GetColumnIndex(input.Schema, feat);
            var type  = input.Schema[index].Type;

            if (!type.IsVector() || type.AsVector().ItemType().RawKind() != DataKind.Single)
            {
                throw env.Except("Features must a vector of floats");
            }

            if (args.useProb)
            {
                var valueMapper = predictor as IValueMapperDist;
                if (valueMapper == null)
                {
                    throw env.Except("Predictor must be a IValueMapper.");
                }
                var output = valueMapper.DistType;
                if (output.IsVector())
                {
                    return(CreateTransformValueMapperDist <VBuffer <float>, VBuffer <float>, VBuffer <float> >(valueMapper, feat, args.outputColumn));
                }
                else
                {
                    return(CreateTransformValueMapperDist <VBuffer <float>, VBuffer <float>, float>(valueMapper, feat, args.outputColumn));
                }
            }
            else
            {
                var valueMapper = predictor as IValueMapper;
                if (valueMapper == null)
                {
                    throw env.Except("Predictor must be a IValueMapper.");
                }
                var output = valueMapper.OutputType;
                if (output.IsVector())
                {
                    return(CreateTransformValueMapper <VBuffer <float>, VBuffer <float> >(valueMapper, feat, args.outputColumn));
                }
                else
                {
                    return(CreateTransformValueMapper <VBuffer <float>, float>(valueMapper, feat, args.outputColumn));
                }
            }
        }
        // Factory method for SignatureLoadModel.
        private static TensorFlowTransform Create(IHostEnvironment env, ModelLoadContext ctx)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(ctx, nameof(ctx));
            ctx.CheckAtModel(GetVersionInfo());

            // *** Binary format ***
            // byte: indicator for frozen models
            // stream: tensorFlow model.
            // int: number of input columns
            // for each input column
            //   int: id of int column name
            // int: number of output columns
            // for each output column
            //   int: id of output column name
            GetModelInfo(env, ctx, out string[] inputs, out string[] outputs, out bool isFrozen);
            if (isFrozen)
            {
                byte[] modelBytes = null;
                if (!ctx.TryLoadBinaryStream("TFModel", r => modelBytes = r.ReadByteArray()))
                {
                    throw env.ExceptDecode();
                }
                return(new TensorFlowTransform(env, TensorFlowUtils.LoadTFSession(env, modelBytes), inputs, outputs, null, false));
            }

            var tempDirPath = Path.GetFullPath(Path.Combine(Path.GetTempPath(), RegistrationName + "_" + Guid.NewGuid()));

            TensorFlowUtils.CreateFolderWithAclIfNotExists(env, tempDirPath);
            try
            {
                var load = ctx.TryLoadBinaryStream("TFSavedModel", br =>
                {
                    int count = br.ReadInt32();
                    for (int n = 0; n < count; n++)
                    {
                        string relativeFile = br.ReadString();
                        long fileLength     = br.ReadInt64();

                        string fullFilePath = Path.Combine(tempDirPath, relativeFile);
                        string fullFileDir  = Path.GetDirectoryName(fullFilePath);
                        if (fullFileDir != tempDirPath)
                        {
                            TensorFlowUtils.CreateFolderWithAclIfNotExists(env, fullFileDir);
                        }
                        using (var fs = new FileStream(fullFilePath, FileMode.Create, FileAccess.Write))
                        {
                            long actualRead = br.BaseStream.CopyRange(fs, fileLength);
                            env.Assert(actualRead == fileLength);
                        }
                    }
                });

                return(new TensorFlowTransform(env, TensorFlowUtils.GetSession(env, tempDirPath), inputs, outputs, tempDirPath, true));
            }
            catch (Exception)
            {
                TensorFlowUtils.DeleteFolderWithRetries(env, tempDirPath);
                throw;
            }
        }
        public static CommonOutputs.MacroOutput <Output> CrossValidate(
            IHostEnvironment env,
            Arguments input,
            EntryPointNode node)
        {
            env.CheckValue(input, nameof(input));

            // This will be the final resulting list of nodes that is returned from the macro.
            var subGraphNodes = new List <EntryPointNode>();

            //the input transform model
            VariableBinding transformModelVarName = null;

            if (input.TransformModel != null)
            {
                transformModelVarName = node.GetInputVariable(nameof(input.TransformModel));
            }

            // Split the input data into folds.
            var splitArgs = new CVSplit.Input();

            splitArgs.NumFolds             = input.NumFolds;
            splitArgs.StratificationColumn = input.StratificationColumn;
            var inputBindingMap           = new Dictionary <string, List <ParameterBinding> >();
            var inputMap                  = new Dictionary <ParameterBinding, VariableBinding>();
            var inputData                 = node.GetInputVariable(nameof(splitArgs.Data));
            ParameterBinding paramBinding = new SimpleParameterBinding(nameof(splitArgs.Data));

            inputBindingMap.Add(nameof(splitArgs.Data), new List <ParameterBinding>()
            {
                paramBinding
            });
            inputMap.Add(paramBinding, inputData);
            var outputMap            = new Dictionary <string, string>();
            var splitOutputTrainData = new ArrayVar <IDataView>();
            var splitOutputTestData  = new ArrayVar <IDataView>();

            outputMap.Add(nameof(CVSplit.Output.TrainData), splitOutputTrainData.VarName);
            outputMap.Add(nameof(CVSplit.Output.TestData), splitOutputTestData.VarName);
            var splitNode = EntryPointNode.Create(env, "Models.CrossValidatorDatasetSplitter", splitArgs,
                                                  node.Context, inputBindingMap, inputMap, outputMap);

            subGraphNodes.Add(splitNode);

            var predModelVars           = new Var <PredictorModel> [input.NumFolds];
            var inputTransformModelVars = new Var <PredictorModel> [input.NumFolds];
            var warningsVars            = new Var <IDataView> [input.NumFolds];
            var overallMetricsVars      = new Var <IDataView> [input.NumFolds];
            var instanceMetricsVars     = new Var <IDataView> [input.NumFolds];
            var confusionMatrixVars     = new Var <IDataView> [input.NumFolds];

            // Instantiate the subgraph for each fold.
            for (int k = 0; k < input.NumFolds; k++)
            {
                // Parse the nodes in input.Nodes into a temporary run context.
                var context = new RunContext(env);
                var graph   = EntryPointNode.ValidateNodes(env, context, input.Nodes);

                // Rename all the variables such that they don't conflict with the ones in the outer run context.
                var mapping = new Dictionary <string, string>();
                foreach (var entryPointNode in graph)
                {
                    entryPointNode.RenameAllVariables(mapping);
                }

                // Instantiate a TrainTest entry point for this fold.
                var args = new TrainTestMacro.Arguments
                {
                    Nodes          = new JArray(graph.Select(n => n.ToJson()).ToArray()),
                    TransformModel = null,
                    LabelColumn    = input.LabelColumn,
                    GroupColumn    = input.GroupColumn,
                    WeightColumn   = input.WeightColumn,
                    NameColumn     = input.NameColumn
                };

                if (transformModelVarName != null)
                {
                    args.TransformModel = new Var <TransformModel> {
                        VarName = transformModelVarName.VariableName
                    }
                }
                ;

                args.Inputs.Data = new Var <IDataView>
                {
                    VarName = mapping[input.Inputs.Data.VarName]
                };
                args.Outputs.PredictorModel = new Var <PredictorModel>
                {
                    VarName = mapping[input.Outputs.PredictorModel.VarName]
                };

                // Set train/test trainer kind to match.
                args.Kind = input.Kind;

                // Set the input bindings for the TrainTest entry point.
                inputBindingMap = new Dictionary <string, List <ParameterBinding> >();
                inputMap        = new Dictionary <ParameterBinding, VariableBinding>();
                var trainingData = new SimpleParameterBinding(nameof(args.TrainingData));
                inputBindingMap.Add(nameof(args.TrainingData), new List <ParameterBinding> {
                    trainingData
                });
                inputMap.Add(trainingData, new ArrayIndexVariableBinding(splitOutputTrainData.VarName, k));
                var testingData = new SimpleParameterBinding(nameof(args.TestingData));
                inputBindingMap.Add(nameof(args.TestingData), new List <ParameterBinding> {
                    testingData
                });
                inputMap.Add(testingData, new ArrayIndexVariableBinding(splitOutputTestData.VarName, k));
                outputMap = new Dictionary <string, string>();
                var transformModelVar = new Var <TransformModel>();
                var predModelVar      = new Var <PredictorModel>();
                outputMap.Add(nameof(TrainTestMacro.Output.PredictorModel), predModelVar.VarName);
                predModelVars[k] = predModelVar;
                if (transformModelVarName != null && transformModelVarName.VariableName != null)
                {
                    var combineModelsArgs = new ModelOperations.SimplePredictorModelInput();
                    inputBindingMap = new Dictionary <string, List <ParameterBinding> >();
                    inputMap        = new Dictionary <ParameterBinding, VariableBinding>();

                    var inputTransformModel = new SimpleVariableBinding(transformModelVarName.VariableName);
                    var inputPredictorModel = new SimpleVariableBinding(predModelVar.VarName);
                    paramBinding = new SimpleParameterBinding(nameof(combineModelsArgs.TransformModel));
                    inputBindingMap.Add(nameof(combineModelsArgs.TransformModel), new List <ParameterBinding>()
                    {
                        paramBinding
                    });
                    inputMap.Add(paramBinding, inputTransformModel);
                    paramBinding = new SimpleParameterBinding(nameof(combineModelsArgs.PredictorModel));
                    inputBindingMap.Add(nameof(combineModelsArgs.PredictorModel), new List <ParameterBinding>()
                    {
                        paramBinding
                    });
                    inputMap.Add(paramBinding, inputPredictorModel);
                    outputMap = new Dictionary <string, string>();

                    var combineNodeOutputPredictorModel = new Var <PredictorModel>();
                    predModelVars[k] = combineNodeOutputPredictorModel;
                    outputMap.Add(nameof(ModelOperations.PredictorModelOutput.PredictorModel), combineNodeOutputPredictorModel.VarName);
                    EntryPointNode combineNode = EntryPointNode.Create(env, "Transforms.TwoHeterogeneousModelCombiner", combineModelsArgs,
                                                                       node.Context, inputBindingMap, inputMap, outputMap);
                    subGraphNodes.Add(combineNode);
                }

                var warningVar = new Var <IDataView>();
                outputMap.Add(nameof(TrainTestMacro.Output.Warnings), warningVar.VarName);
                warningsVars[k] = warningVar;
                var overallMetric = new Var <IDataView>();
                outputMap.Add(nameof(TrainTestMacro.Output.OverallMetrics), overallMetric.VarName);
                overallMetricsVars[k] = overallMetric;
                var instanceMetric = new Var <IDataView>();
                outputMap.Add(nameof(TrainTestMacro.Output.PerInstanceMetrics), instanceMetric.VarName);
                instanceMetricsVars[k] = instanceMetric;
                var confusionMatrix = new Var <IDataView>();
                outputMap.Add(nameof(TrainTestMacro.Output.ConfusionMatrix), confusionMatrix.VarName);
                confusionMatrixVars[k] = confusionMatrix;
                const string trainTestEvaluatorMacroEntryPoint = "Models.TrainTestEvaluator";
                subGraphNodes.Add(EntryPointNode.Create(env, trainTestEvaluatorMacroEntryPoint, args, node.Context, inputBindingMap, inputMap, outputMap));
            }

            // Convert the predictor models to an array of predictor models.
            MacroUtils.ConvertIPredictorModelsToArray(env, node.Context, subGraphNodes, predModelVars, node.GetOutputVariableName(nameof(Output.PredictorModel)));

            // Convert the warnings, overall, per instance and confusion matrix data views into an array.
            var warningsArrayVar = new ArrayVar <IDataView>();
            var overallArrayVar  = new ArrayVar <IDataView>();
            var instanceArrayVar = new ArrayVar <IDataView>();
            ArrayVar <IDataView> confusionMatrixArrayVar = null;

            MacroUtils.ConvertIdataViewsToArray(env, node.Context, subGraphNodes, warningsVars, warningsArrayVar.VarName);
            MacroUtils.ConvertIdataViewsToArray(env, node.Context, subGraphNodes, overallMetricsVars, overallArrayVar.VarName);
            MacroUtils.ConvertIdataViewsToArray(env, node.Context, subGraphNodes, instanceMetricsVars, instanceArrayVar.VarName);
            if (input.Kind == MacroUtils.TrainerKinds.SignatureBinaryClassifierTrainer ||
                input.Kind == MacroUtils.TrainerKinds.SignatureMultiClassClassifierTrainer)
            {
                confusionMatrixArrayVar = new ArrayVar <IDataView>();
                MacroUtils.ConvertIdataViewsToArray(env, node.Context, subGraphNodes, confusionMatrixVars, confusionMatrixArrayVar.VarName);
            }

            var combineArgs = new CombineMetricsInput();

            combineArgs.Kind         = input.Kind;
            combineArgs.LabelColumn  = input.LabelColumn;
            combineArgs.WeightColumn = input.WeightColumn;
            combineArgs.GroupColumn  = input.GroupColumn;
            combineArgs.NameColumn   = input.NameColumn;

            // Set the input bindings for the CombineMetrics entry point.
            var combineInputBindingMap = new Dictionary <string, List <ParameterBinding> >();
            var combineInputMap        = new Dictionary <ParameterBinding, VariableBinding>();

            var warningsArray = new SimpleParameterBinding(nameof(combineArgs.Warnings));

            combineInputBindingMap.Add(nameof(combineArgs.Warnings), new List <ParameterBinding> {
                warningsArray
            });
            combineInputMap.Add(warningsArray, new SimpleVariableBinding(warningsArrayVar.VarName));
            var overallArray = new SimpleParameterBinding(nameof(combineArgs.OverallMetrics));

            combineInputBindingMap.Add(nameof(combineArgs.OverallMetrics), new List <ParameterBinding> {
                overallArray
            });
            combineInputMap.Add(overallArray, new SimpleVariableBinding(overallArrayVar.VarName));
            var combinePerInstArray = new SimpleParameterBinding(nameof(combineArgs.PerInstanceMetrics));

            combineInputBindingMap.Add(nameof(combineArgs.PerInstanceMetrics), new List <ParameterBinding> {
                combinePerInstArray
            });
            combineInputMap.Add(combinePerInstArray, new SimpleVariableBinding(instanceArrayVar.VarName));
            if (confusionMatrixArrayVar != null)
            {
                var combineConfArray = new SimpleParameterBinding(nameof(combineArgs.ConfusionMatrix));
                combineInputBindingMap.Add(nameof(combineArgs.ConfusionMatrix), new List <ParameterBinding> {
                    combineConfArray
                });
                combineInputMap.Add(combineConfArray, new SimpleVariableBinding(confusionMatrixArrayVar.VarName));
            }

            var combineOutputMap  = new Dictionary <string, string>();
            var combineWarningVar = new Var <IDataView>();

            combineWarningVar.VarName = node.GetOutputVariableName(nameof(Output.Warnings));
            combineOutputMap.Add(nameof(Output.Warnings), combineWarningVar.VarName);
            var combineOverallMetric = new Var <IDataView>();

            combineOverallMetric.VarName = node.GetOutputVariableName(nameof(Output.OverallMetrics));
            combineOutputMap.Add(nameof(Output.OverallMetrics), combineOverallMetric.VarName);
            var combineInstanceMetric = new Var <IDataView>();

            combineInstanceMetric.VarName = node.GetOutputVariableName(nameof(Output.PerInstanceMetrics));
            combineOutputMap.Add(nameof(Output.PerInstanceMetrics), combineInstanceMetric.VarName);
            if (confusionMatrixArrayVar != null)
            {
                var combineConfusionMatrix = new Var <IDataView>();
                combineConfusionMatrix.VarName = node.GetOutputVariableName(nameof(Output.ConfusionMatrix));
                combineOutputMap.Add(nameof(TrainTestMacro.Output.ConfusionMatrix), combineConfusionMatrix.VarName);
            }
            var combineMetricsNode = EntryPointNode.Create(env, "Models.CrossValidationResultsCombiner",
                                                           combineArgs, node.Context, combineInputBindingMap, combineInputMap, combineOutputMap);

            subGraphNodes.Add(combineMetricsNode);
            return(new CommonOutputs.MacroOutput <Output>()
            {
                Nodes = subGraphNodes
            });
        }
예제 #23
0
 /// <summary>
 /// Create a new <see cref="IDataView"/> over an enumerable of the items of user-defined type.
 /// The user maintains ownership of the <paramref name="data"/> and the resulting data view will
 /// never alter the contents of the <paramref name="data"/>.
 /// Since <see cref="IDataView"/> is assumed to be immutable, the user is expected to support
 /// multiple enumerations of the <paramref name="data"/> that would return the same results, unless
 /// the user knows that the data will only be cursored once.
 ///
 /// One typical usage for streaming data view could be: create the data view that lazily loads data
 /// as needed, then apply pre-trained transformations to it and cursor through it for transformation
 /// results.
 /// </summary>
 /// <typeparam name="TRow">The user-defined item type.</typeparam>
 /// <param name="data">The enumerable data containing type <typeparamref name="TRow"/> to convert to an<see cref="IDataView"/>.</param>
 /// <param name="schemaDefinition">The optional schema definition of the data view to create. If <c>null</c>,
 /// the schema definition is inferred from <typeparamref name="TRow"/>.</param>
 /// <returns>The constructed <see cref="IDataView"/>.</returns>
 /// <example>
 /// <format type="text/markdown">
 /// <![CDATA[
 /// [!code-csharp[LoadFromEnumerable](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/DataOperations/LoadFromEnumerable.cs)]
 /// ]]>
 /// </format>
 /// </example>
 public IDataView LoadFromEnumerable <TRow>(IEnumerable <TRow> data, SchemaDefinition schemaDefinition = null)
     where TRow : class
 {
     _env.CheckValue(data, nameof(data));
     _env.CheckValueOrNull(schemaDefinition);
     return(DataViewConstructionUtils.CreateFromEnumerable(_env, data, schemaDefinition));
 }
        public static CommonOutputs.MacroOutput <Output> OneVersusAll(
            IHostEnvironment env,
            Arguments input,
            EntryPointNode node)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(input, nameof(input));
            env.Assert(input.Nodes.Count > 0);

            var numClasses    = GetNumberOfClasses(env, input, out var label);
            var predModelVars = new Var <IPredictorModel> [numClasses];

            // This will be the final resulting list of nodes that is returned from the macro.
            var macroNodes = new List <EntryPointNode>();

            // Instantiate the subgraph for each label value.
            for (int k = 0; k < numClasses; k++)
            {
                var result = ProcessClass(env, k, label, input, node);
                predModelVars[k] = result.Item2;
                macroNodes.AddRange(result.Item1);
            }

            // Use OVA model combiner to combine these models into one.
            // Takes in array of models that are binary predictor models and
            // produces single multiclass predictor model.
            var macroExperiment = new Experiment(env);
            var combinerNode    = new Legacy.Models.OvaModelCombiner
            {
                ModelArray   = new ArrayVar <IPredictorModel>(predModelVars),
                TrainingData = new Var <IDataView> {
                    VarName = node.GetInputVariable(nameof(input.TrainingData)).VariableName
                },
                Caching           = (Legacy.Models.CachingOptions)input.Caching,
                FeatureColumn     = input.FeatureColumn,
                NormalizeFeatures = (Legacy.Models.NormalizeOption)input.NormalizeFeatures,
                LabelColumn       = input.LabelColumn,
                UseProbabilities  = input.UseProbabilities
            };

            // Get output model variable.
            if (!node.OutputMap.TryGetValue(nameof(Output.PredictorModel), out var outVariableName))
            {
                throw new Exception("Cannot find OVA model output.");
            }

            // Map macro's output back to OVA combiner (so OVA combiner will set the value on our output variable).
            var combinerOutput = new Legacy.Models.OvaModelCombiner.Output {
                PredictorModel = new Var <IPredictorModel> {
                    VarName = outVariableName
                }
            };

            // Add to experiment (must be done AFTER we assign variable name to output).
            macroExperiment.Add(combinerNode, combinerOutput);

            // Add nodes to main experiment.
            var nodes    = macroExperiment.GetNodes();
            var expNodes = EntryPointNode.ValidateNodes(env, node.Context, nodes);

            macroNodes.AddRange(expNodes);

            return(new CommonOutputs.MacroOutput <Output>()
            {
                Nodes = macroNodes
            });
        }
예제 #25
0
 public void Save(ModelSaveContext ctx)
 {
     Contracts.AssertValue(_env);
     _env.CheckValue(ctx, nameof(ctx));
     SaveCore(ctx);
 }
예제 #26
0
        // Returns true if a normalizer was added.
        public static bool AddNormalizerIfNeeded(IHostEnvironment env, IChannel ch, ITrainer trainer, ref IDataView view, string featureColumn, NormalizeOption autoNorm)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(ch, nameof(ch));
            ch.CheckValue(trainer, nameof(trainer));
            ch.CheckValue(view, nameof(view));
            ch.CheckValueOrNull(featureColumn);
            ch.CheckUserArg(Enum.IsDefined(typeof(NormalizeOption), autoNorm), nameof(TrainCommand.Arguments.NormalizeFeatures),
                            "Normalize option is invalid. Specify one of 'norm=No', 'norm=Warn', 'norm=Auto', or 'norm=Yes'.");

            if (autoNorm == NormalizeOption.No)
            {
                ch.Info("Not adding a normalizer.");
                return(false);
            }

            if (string.IsNullOrEmpty(featureColumn))
            {
                return(false);
            }

            int featCol;
            var schema = view.Schema;

            if (schema.TryGetColumnIndex(featureColumn, out featCol))
            {
                if (autoNorm != NormalizeOption.Yes)
                {
                    var    nn           = trainer as ITrainerEx;
                    DvBool isNormalized = DvBool.False;
                    if (nn == null || !nn.NeedNormalization ||
                        (schema.TryGetMetadata(BoolType.Instance, MetadataUtils.Kinds.IsNormalized, featCol, ref isNormalized) &&
                         isNormalized.IsTrue))
                    {
                        ch.Info("Not adding a normalizer.");
                        return(false);
                    }
                    if (autoNorm == NormalizeOption.Warn)
                    {
                        ch.Warning("A normalizer is needed for this trainer. Either add a normalizing transform or use the 'norm=Auto', 'norm=Yes' or 'norm=No' options.");
                        return(false);
                    }
                }
                ch.Info("Automatically adding a MinMax normalization transform, use 'norm=Warn' or 'norm=No' to turn this behavior off.");
                // Quote the feature column name
                string        quotedFeatureColumnName = featureColumn;
                StringBuilder sb = new StringBuilder();
                if (CmdQuoter.QuoteValue(quotedFeatureColumnName, sb))
                {
                    quotedFeatureColumnName = sb.ToString();
                }
                var component = new SubComponent <IDataTransform, SignatureDataTransform>("MinMax", string.Format("col={{ name={0} source={0} }}", quotedFeatureColumnName));
                var loader    = view as IDataLoader;
                if (loader != null)
                {
                    view = CompositeDataLoader.Create(env, loader,
                                                      new KeyValuePair <string, SubComponent <IDataTransform, SignatureDataTransform> >(null, component));
                }
                else
                {
                    view = component.CreateInstance(env, view);
                }
                return(true);
            }
            return(false);
        }
예제 #27
0
 /// <summary>
 /// Apply this transform model to the given input data.
 /// </summary>
 public IDataView Apply(IHostEnvironment env, IDataView input)
 {
     Contracts.CheckValue(env, nameof(env));
     env.CheckValue(input, nameof(input));
     return(ApplyTransformUtils.ApplyAllTransformsToData(env, _chain, input));
 }
예제 #28
0
 private static Session LoadTFSession(IHostEnvironment env, string exportDirSavedModel)
 {
     Contracts.Check(env != null, nameof(env));
     env.CheckValue(exportDirSavedModel, nameof(exportDirSavedModel));
     return(Session.LoadFromSavedModel(exportDirSavedModel));
 }
        static IDataTransform Create(IHostEnvironment env, Arguments args, IDataView input, out IDataView sourceCtx)
        {
            sourceCtx = input;
            env.CheckValue(args.tag, "Tag cannot be empty.");
            if (TagHelper.EnumerateTaggedView(true, input).Where(c => c.Item1 == args.tag).Any())
            {
                throw env.Except("Tag '{0}' is already used.", args.tag);
            }
            env.CheckValue(args.selectTag, "Selected tag cannot be empty.");

            if (string.IsNullOrEmpty(args.filename))
            {
                var selected = TagHelper.EnumerateTaggedView(true, input).Where(c => c.Item1 == args.selectTag);
                if (!selected.Any())
                {
                    throw env.Except("Unable to find a view to select with tag '{0}'. Did you forget to specify a filename?", args.selectTag);
                }
                var first = selected.First();
                if (selected.Skip(1).Any())
                {
                    throw env.Except("Tag '{0}' is ambiguous, {1} views were found.", args.selectTag, selected.Count());
                }
                var tagged = input as ITaggedDataView;
                if (tagged == null)
                {
                    var ag = new TagViewTransform.Arguments {
                        tag = args.tag
                    };
                    tagged = new TagViewTransform(env, ag, input);
                }
                first.Item2.AddRange(new[] { new Tuple <string, ITaggedDataView>(args.tag, tagged) });
                tagged.AddRange(new[] { new Tuple <string, ITaggedDataView>(args.selectTag, first.Item2) });
#if (DEBUG_TIP)
                long count = DataViewUtils.ComputeRowCount(tagged);
                if (count == 0)
                {
                    throw env.Except("Replaced view is empty.");
                }
                count = DataViewUtils.ComputeRowCount(first.Item2);
                if (count == 0)
                {
                    throw env.Except("Selected view is empty.");
                }
#endif
                var tr = first.Item2 as IDataTransform;
                env.AssertValue(tr);
                return(tr);
            }
            else
            {
                if (!File.Exists(args.filename))
                {
                    throw env.Except("Unable to find file '{0}'.", args.filename);
                }
                var selected = TagHelper.EnumerateTaggedView(true, input).Where(c => c.Item1 == args.selectTag);
                if (selected.Any())
                {
                    throw env.Except("Tag '{0}' was already given. It cannot be assigned to the new file.", args.selectTag);
                }
                var loaderArgs   = new BinaryLoader.Arguments();
                var file         = new MultiFileSource(args.filename);
                var loadSettings = ScikitSubComponent <ILegacyDataLoader, SignatureDataLoader> .AsSubComponent(args.loaderSettings);

                IDataView loader = loadSettings.CreateInstance(env, file);

                var ag = new TagViewTransform.Arguments {
                    tag = args.selectTag
                };
                var newInput = new TagViewTransform(env, ag, loader);
                var tagged   = input as ITaggedDataView;
                if (tagged == null)
                {
                    ag = new TagViewTransform.Arguments {
                        tag = args.tag
                    };
                    tagged = new TagViewTransform(env, ag, input);
                }

                newInput.AddRange(new[] { new Tuple <string, ITaggedDataView>(args.tag, tagged) });
                tagged.AddRange(new[] { new Tuple <string, ITaggedDataView>(args.selectTag, newInput) });

                var schema = loader.Schema;
                if (schema.Count == 0)
                {
                    throw env.Except("The loaded view '{0}' is empty (empty schema).", args.filename);
                }
                return(newInput);
            }
        }
예제 #30
0
 /// <summary>
 /// Creates a data transform from the 'LoadName{settings}' string.
 /// </summary>
 public static IDataTransform CreateTransform(this IHostEnvironment env, string settings, IDataView source)
 {
     Contracts.CheckValue(env, nameof(env));
     env.CheckValue(source, nameof(source));
     return(CreateCore <IDataTransform, SignatureDataTransform>(env, settings, source));
 }