public static CommonOutputs.MacroOutput <Output> PipelineSweep( IHostEnvironment env, Arguments input, EntryPointNode node) { env.Check(input.StateArguments != null || input.State is AutoInference.AutoMlMlState, "Must have a valid AutoML State, or pass arguments to create one."); env.Check(input.BatchSize > 0, "Batch size must be > 0."); // If no current state, create object and set data. if (input.State == null) { input.State = input.StateArguments?.CreateComponent(env); if (input.State is AutoInference.AutoMlMlState inState) { inState.SetTrainTestData(input.TrainingData, input.TestingData); } else { throw env.Except($"Incompatible type. Expecting type {typeof(AutoInference.AutoMlMlState)}, received type {input.State?.GetType()}."); } var result = node.AddNewVariable("State", input.State); node.Context.AddInputVariable(result.Item2, typeof(IMlState)); } var autoMlState = (AutoInference.AutoMlMlState)input.State; // The indicators are just so the macro knows those pipelines need to // be run before performing next expansion. If we add them as inputs // to the next iteration, the next iteration cannot run until they have // their values set. Thus, indicators are needed. var pipelineIndicators = new List <Var <IDataView> >(); var expNodes = new List <EntryPointNode>(); // Keep versions of the training and testing var names var training = new Var <IDataView> { VarName = node.GetInputVariable("TrainingData").VariableName }; var testing = new Var <IDataView> { VarName = node.GetInputVariable("TestingData").VariableName }; var amlsVarObj = new Var <IMlState>() { VarName = node.GetInputVariable(nameof(input.State)).VariableName }; // Make sure search space is defined. If not, infer, // with default number of transform levels. if (!autoMlState.IsSearchSpaceDefined()) { autoMlState.InferSearchSpace(numTransformLevels: 1); } // Extract performance summaries and assign to previous candidate pipelines. foreach (var pipeline in autoMlState.BatchCandidates) { if (node.Context.TryGetVariable(ExperimentUtils.GenerateOverallMetricVarName(pipeline.UniqueId), out var v)) { pipeline.PerformanceSummary = AutoMlUtils.ExtractRunSummary(env, (IDataView)v.Value, autoMlState.Metric.Name); autoMlState.AddEvaluated(pipeline); } } node.OutputMap.TryGetValue("Results", out string outDvName); var outDvVar = new Var <IDataView>() { VarName = outDvName }; node.OutputMap.TryGetValue("State", out string outStateName); var outStateVar = new Var <IMlState>() { VarName = outStateName }; // Get next set of candidates. var candidatePipelines = autoMlState.GetNextCandidates(input.BatchSize); // Check if termination condition was met, i.e. no more candidates were returned. // If so, end expansion and add a node to extract the sweep result. if (candidatePipelines == null || candidatePipelines.Length == 0) { // Add a node to extract the sweep result. return(new CommonOutputs.MacroOutput <Output>() { Nodes = expNodes }); } // Prep all returned candidates foreach (var p in candidatePipelines) { // Add train test experiments to current graph for candidate pipeline var subgraph = new Experiment(env); var trainTestOutput = p.AddAsTrainTest(training, testing, autoMlState.TrainerKind, subgraph); // Change variable name to reference pipeline ID in output map, context and entrypoint output. var uniqueName = ExperimentUtils.GenerateOverallMetricVarName(p.UniqueId); var sgNode = EntryPointNode.ValidateNodes(env, node.Context, new JArray(subgraph.GetNodes().Last()), node.Catalog).Last(); sgNode.RenameOutputVariable(trainTestOutput.OverallMetrics.VarName, uniqueName, cascadeChanges: true); trainTestOutput.OverallMetrics.VarName = uniqueName; expNodes.Add(sgNode); // Store indicators, to pass to next iteration of macro. pipelineIndicators.Add(trainTestOutput.OverallMetrics); } return(new CommonOutputs.MacroOutput <Output>() { Nodes = expNodes }); }