/// <summary> /// Given a predictor type returns a set of all permissible learners (with their sweeper params, if defined). /// </summary> /// <returns>Array of viable learners.</returns> public static SuggestedRecipe.SuggestedLearner[] AllowedLearners(IHostEnvironment env, MacroUtils.TrainerKinds trainerKind) { //not all learners advertised in the API are available in CORE. var catalog = ModuleCatalog.CreateInstance(env); var availableLearnersList = catalog.AllEntryPoints().Where( x => x.InputKinds?.FirstOrDefault(i => i == typeof(CommonInputs.ITrainerInput)) != null); var learners = new List <SuggestedRecipe.SuggestedLearner>(); var type = typeof(CommonInputs.ITrainerInput); var trainerTypes = typeof(Experiment).Assembly.GetTypes() .Where(p => type.IsAssignableFrom(p) && MacroUtils.IsTrainerOfKind(p, trainerKind)); foreach (var tt in trainerTypes) { var sweepParams = AutoMlUtils.GetSweepRanges(tt); var epInputObj = (CommonInputs.ITrainerInput)tt.GetConstructor(Type.EmptyTypes)?.Invoke(new object[] { }); var sl = new SuggestedRecipe.SuggestedLearner { PipelineNode = new TrainerPipelineNode(epInputObj, sweepParams), LearnerName = tt.Name }; if (sl.PipelineNode != null && availableLearnersList.FirstOrDefault(l => l.Name.Equals(sl.PipelineNode.GetEpName())) != null) { learners.Add(sl); } } return(learners.ToArray()); }
public void Run() { JObject graph; try { graph = JObject.Parse(File.ReadAllText(_path)); } catch (JsonReaderException ex) { throw _host.Except(ex, "Failed to parse experiment graph: {0}", ex.Message); } var catalog = ModuleCatalog.CreateInstance(_host); var runner = new GraphRunner(_host, catalog, graph[FieldNames.Nodes] as JArray); // Setting inputs. var jInputs = graph[FieldNames.Inputs] as JObject; if (graph[FieldNames.Inputs] != null && jInputs == null) { throw _host.Except("Unexpected value for '{0}': {1}", FieldNames.Inputs, graph[FieldNames.Inputs]); } if (jInputs != null) { foreach (var kvp in jInputs) { var path = kvp.Value as JValue; if (path == null) { throw _host.Except("Invalid value for input: {0}", kvp.Value); } var varName = kvp.Key; var type = runner.GetPortDataKind(varName); SetInputFromPath(runner, varName, path.Value <string>(), type); } } runner.RunAll(); // Reading outputs. var jOutputs = graph[FieldNames.Outputs] as JObject; if (jOutputs != null) { foreach (var kvp in jOutputs) { var path = kvp.Value as JValue; if (path == null) { throw _host.Except("Invalid value for output: {0}", kvp.Value); } var varName = kvp.Key; var type = runner.GetPortDataKind(varName); GetOutputToPath(runner, varName, path.Value <string>(), type); } } }
/// <summary> /// Creates what might be considered a good "default" server factory, if possible, /// or <c>null</c> if no good default was possible. A <c>null</c> value could be returned, /// for example, if a user opted to remove all implementations of <see cref="IServer"/> and /// the associated <see cref="IServerFactory"/> for security reasons. /// </summary> public static IServerFactory CreateDefaultServerFactoryOrNull(IExceptionContext ectx) { Contracts.CheckValue(ectx, nameof(ectx)); // REVIEW: There should be a better way. There currently isn't, // but there should be. This is pretty horrifying, but it is preferable to // the alternative of having core components depend on an actual server // implementation, since we want those to be removable because of security // concerns in certain environments (since not everyone will be wild about // web servers popping up everywhere). var cat = ModuleCatalog.CreateInstance(ectx); ModuleCatalog.ComponentInfo component; if (!cat.TryFindComponent(typeof(IServerFactory), "mini", out component)) { return(null); } IServerFactory factory = (IServerFactory)Activator.CreateInstance(component.ArgumentType); var field = factory.GetType().GetField("Port"); if (field?.FieldType != typeof(int)) { return(null); } field.SetValue(factory, 12345); return(factory); }
public Experiment(Runtime.IHostEnvironment env) { _env = env; _catalog = ModuleCatalog.CreateInstance(_env); _jsonNodes = new List <string>(); _serializer = new JsonSerializer(); _serializer.Converters.Add(new StringEnumConverter()); _helper = new SerializationHelper(); }
public void Generate(IEnumerable <HelpCommand.Component> infos) { var catalog = ModuleCatalog.CreateInstance(_host); using (var sw = new StreamWriter(_csFilename)) { var writer = IndentingTextWriter.Wrap(sw, " "); // Generate header CSharpGeneratorUtils.GenerateHeader(writer); foreach (var entryPointInfo in catalog.AllEntryPoints().Where(x => !_excludedSet.Contains(x.Name)).OrderBy(x => x.Name)) { // Generate method CSharpGeneratorUtils.GenerateMethod(writer, entryPointInfo.Name, _defaultNamespace); } // Generate footer CSharpGeneratorUtils.GenerateFooter(writer); CSharpGeneratorUtils.GenerateFooter(writer); foreach (var entryPointInfo in catalog.AllEntryPoints().Where(x => !_excludedSet.Contains(x.Name)).OrderBy(x => x.Name)) { // Generate input and output classes GenerateInputOutput(writer, entryPointInfo, catalog); } writer.WriteLine("namespace Runtime"); writer.WriteLine("{"); writer.Indent(); foreach (var kind in catalog.GetAllComponentKinds()) { // Generate kind base class GenerateComponentKind(writer, kind); foreach (var component in catalog.GetAllComponents(kind)) { // Generate component GenerateComponent(writer, component, catalog); } } CSharpGeneratorUtils.GenerateFooter(writer); CSharpGeneratorUtils.GenerateFooter(writer); writer.WriteLine("#pragma warning restore"); } }
public static void Main() { var env = new TlcEnvironment(); var catalog = ModuleCatalog.CreateInstance(env); var jObj = JsonManifestUtils.BuildAllManifests(env, catalog); var jPath = "manifest.json"; using (var file = File.OpenWrite(jPath)) using (var writer = new StreamWriter(file)) using (var jw = new JsonTextWriter(writer)) { jw.Formatting = Formatting.Indented; jObj.WriteTo(jw); } }
public void TestRequestedLearners() { // Get datasets var pathData = GetDataPath("adult.train"); var pathDataTest = GetDataPath("adult.test"); const int numOfSampleRows = 100; const string schema = "sep=, col=Features:R4:0,2,4,10-12 col=workclass:TX:1 col=education:TX:3 col=marital_status:TX:5 col=occupation:TX:6 " + "col=relationship:TX:7 col=race:TX:8 col=sex:TX:9 col=native_country:TX:13 col=label_IsOver50K_:R4:14 header=+"; var inputFileTrain = new SimpleFileHandle(Env, pathData, false, false); #pragma warning disable 0618 var datasetTrain = ImportTextData.ImportText(Env, new ImportTextData.Input { InputFile = inputFileTrain, CustomSchema = schema }).Data.Take(numOfSampleRows); var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false); var datasetTest = ImportTextData.ImportText(Env, new ImportTextData.Input { InputFile = inputFileTest, CustomSchema = schema }).Data.Take(numOfSampleRows); var requestedLearners = new[] { $"LogisticRegressionBinaryClassifier", $"FastTreeBinaryClassifier" }; #pragma warning restore 0618 // Define entrypoint graph string inputGraph = @" { 'Nodes': [ { 'Name': 'Models.PipelineSweeper', 'Inputs': { 'TrainingData': '$TrainingData', 'TestingData': '$TestingData', 'StateArguments': { 'Name': 'AutoMlState', 'Settings': { 'Metric': 'Auc', 'Engine': { 'Name': 'Rocket', 'Settings' : { 'TopKLearners' : 2, 'SecondRoundTrialsPerLearner' : 0 }, }, 'TerminatorArgs': { 'Name': 'IterationLimited', 'Settings': { 'FinalHistoryLength': 35 } }, 'TrainerKind': 'SignatureBinaryClassifierTrainer', 'RequestedLearners' : [ 'LogisticRegressionBinaryClassifier', 'FastTreeBinaryClassifier' ] } }, 'BatchSize': 5 }, 'Outputs': { 'State': '$StateOut', 'Results': '$ResultsOut' } }, ] }"; JObject graph = JObject.Parse(inputGraph); var catalog = ModuleCatalog.CreateInstance(Env); var runner = new GraphRunner(Env, catalog, graph[FieldNames.Nodes] as JArray); runner.SetInput("TrainingData", datasetTrain); runner.SetInput("TestingData", datasetTest); runner.RunAll(); var autoMlState = runner.GetOutput <AutoInference.AutoMlMlState>("StateOut"); Assert.NotNull(autoMlState); var space = autoMlState.GetSearchSpace(); // Make sure only learners left are those retained. Assert.Equal(requestedLearners.Length, space.Item2.Length); Assert.True(space.Item2.All(l => requestedLearners.Any(r => r == l.LearnerName))); }
public void TestRocketPipelineEngine() { // Get datasets var pathData = GetDataPath("adult.train"); var pathDataTest = GetDataPath("adult.test"); const int numOfSampleRows = 1000; int numIterations = 35; const string schema = "sep=, col=Features:R4:0,2,4,10-12 col=workclass:TX:1 col=education:TX:3 col=marital_status:TX:5 col=occupation:TX:6 " + "col=relationship:TX:7 col=ethnicity:TX:8 col=sex:TX:9 col=native_country:TX:13 col=label_IsOver50K_:R4:14 header=+"; var inputFileTrain = new SimpleFileHandle(Env, pathData, false, false); #pragma warning disable 0618 var datasetTrain = ImportTextData.ImportText(Env, new ImportTextData.Input { InputFile = inputFileTrain, CustomSchema = schema }).Data.Take(numOfSampleRows); var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false); var datasetTest = ImportTextData.ImportText(Env, new ImportTextData.Input { InputFile = inputFileTest, CustomSchema = schema }).Data.Take(numOfSampleRows); #pragma warning restore 0618 // Define entrypoint graph string inputGraph = @" { 'Nodes': [ { 'Name': 'Models.PipelineSweeper', 'Inputs': { 'TrainingData': '$TrainingData', 'TestingData': '$TestingData', 'StateArguments': { 'Name': 'AutoMlState', 'Settings': { 'Metric': 'Auc', 'Engine': { 'Name': 'Rocket', 'Settings' : { 'TopKLearners' : 2, 'SecondRoundTrialsPerLearner' : 5 }, }, 'TerminatorArgs': { 'Name': 'IterationLimited', 'Settings': { 'FinalHistoryLength': 35 } }, 'TrainerKind': 'SignatureBinaryClassifierTrainer' } }, 'BatchSize': 5 }, 'Outputs': { 'State': '$StateOut', 'Results': '$ResultsOut' } }, ] }"; JObject graph = JObject.Parse(inputGraph); var catalog = ModuleCatalog.CreateInstance(Env); var runner = new GraphRunner(Env, catalog, graph[FieldNames.Nodes] as JArray); runner.SetInput("TrainingData", datasetTrain); runner.SetInput("TestingData", datasetTest); runner.RunAll(); var autoMlState = runner.GetOutput <AutoInference.AutoMlMlState>("StateOut"); Assert.NotNull(autoMlState); var allPipelines = autoMlState.GetAllEvaluatedPipelines(); var bestPipeline = autoMlState.GetBestPipeline(); Assert.Equal(allPipelines.Length, numIterations); Assert.True(bestPipeline.PerformanceSummary.MetricValue > 0.1); var results = runner.GetOutput <IDataView>("ResultsOut"); Assert.NotNull(results); var rows = PipelinePattern.ExtractResults(Env, results, "Graph", "MetricValue", "PipelineId", "TrainingMetricValue", "FirstInput", "PredictorModel"); Assert.True(rows.Length == numIterations); }
public void EntryPointPipelineSweep() { // Get datasets var pathData = GetDataPath("adult.tiny.with-schema.txt"); var pathDataTest = GetDataPath("adult.tiny.with-schema.txt"); const int numOfSampleRows = 1000; int numIterations = 4; var inputFileTrain = new SimpleFileHandle(Env, pathData, false, false); #pragma warning disable 0618 var datasetTrain = ImportTextData.ImportText(Env, new ImportTextData.Input { InputFile = inputFileTrain }).Data.Take(numOfSampleRows); var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false); var datasetTest = ImportTextData.ImportText(Env, new ImportTextData.Input { InputFile = inputFileTest }).Data.Take(numOfSampleRows); #pragma warning restore 0618 // Define entrypoint graph string inputGraph = @" { 'Nodes': [ { 'Name': 'Models.PipelineSweeper', 'Inputs': { 'TrainingData': '$TrainingData', 'TestingData': '$TestingData', 'StateArguments': { 'Name': 'AutoMlState', 'Settings': { 'Metric': 'Auc', 'Engine': { 'Name': 'UniformRandom' }, 'TerminatorArgs': { 'Name': 'IterationLimited', 'Settings': { 'FinalHistoryLength': 4 } }, 'TrainerKind': 'SignatureBinaryClassifierTrainer' } }, 'BatchSize': 2 }, 'Outputs': { 'State': '$StateOut', 'Results': '$ResultsOut' } }, ] }"; JObject graph = JObject.Parse(inputGraph); var catalog = ModuleCatalog.CreateInstance(Env); var runner = new GraphRunner(Env, catalog, graph[FieldNames.Nodes] as JArray); runner.SetInput("TrainingData", datasetTrain); runner.SetInput("TestingData", datasetTest); runner.RunAll(); var autoMlState = runner.GetOutput <AutoInference.AutoMlMlState>("StateOut"); Assert.NotNull(autoMlState); var allPipelines = autoMlState.GetAllEvaluatedPipelines(); var bestPipeline = autoMlState.GetBestPipeline(); Assert.Equal(allPipelines.Length, numIterations); Assert.True(bestPipeline.PerformanceSummary.MetricValue > 0.1); var results = runner.GetOutput <IDataView>("ResultsOut"); Assert.NotNull(results); var rows = PipelinePattern.ExtractResults(Env, results, "Graph", "MetricValue", "PipelineId", "TrainingMetricValue", "FirstInput", "PredictorModel"); Assert.True(rows.Length == numIterations); Assert.True(rows.All(r => r.TrainingMetricValue > 0.1)); }
public void EntryPointPipelineSweepSerialization() { // Get datasets var pathData = GetDataPath("adult.train"); var pathDataTest = GetDataPath("adult.test"); const int numOfSampleRows = 1000; int numIterations = 10; const string schema = "sep=, col=Features:R4:0,2,4,10-12 col=workclass:TX:1 col=education:TX:3 col=marital_status:TX:5 col=occupation:TX:6 " + "col=relationship:TX:7 col=ethnicity:TX:8 col=sex:TX:9 col=native_country:TX:13 col=label_IsOver50K_:R4:14 header=+"; var inputFileTrain = new SimpleFileHandle(Env, pathData, false, false); #pragma warning disable 0618 var datasetTrain = ImportTextData.ImportText(Env, new ImportTextData.Input { InputFile = inputFileTrain, CustomSchema = schema }).Data.Take(numOfSampleRows); var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false); var datasetTest = ImportTextData.ImportText(Env, new ImportTextData.Input { InputFile = inputFileTest, CustomSchema = schema }).Data.Take(numOfSampleRows); #pragma warning restore 0618 // Define entrypoint graph string inputGraph = @" { 'Nodes': [ { 'Name': 'Models.PipelineSweeper', 'Inputs': { 'TrainingData': '$TrainingData', 'TestingData': '$TestingData', 'StateArguments': { 'Name': 'AutoMlState', 'Settings': { 'Metric': 'Auc', 'Engine': { 'Name': 'UniformRandom' }, 'TerminatorArgs': { 'Name': 'IterationLimited', 'Settings': { 'FinalHistoryLength': 10 } }, 'TrainerKind': 'SignatureBinaryClassifierTrainer' } }, 'BatchSize': 5 }, 'Outputs': { 'State': '$StateOut', 'Results': '$ResultsOut' } }, ] }"; JObject graphJson = JObject.Parse(inputGraph); var catalog = ModuleCatalog.CreateInstance(Env); var graph = new EntryPointGraph(Env, catalog, graphJson[FieldNames.Nodes] as JArray); // Test if ToJson() works properly. var nodes = new JArray(graph.AllNodes.Select(node => node.ToJson())); var runner = new GraphRunner(Env, catalog, nodes); runner.SetInput("TrainingData", datasetTrain); runner.SetInput("TestingData", datasetTest); runner.RunAll(); var results = runner.GetOutput <IDataView>("ResultsOut"); Assert.NotNull(results); var rows = PipelinePattern.ExtractResults(Env, results, "Graph", "MetricValue", "PipelineId", "TrainingMetricValue", "FirstInput", "PredictorModel"); Assert.True(rows.Length == numIterations); }
public void PipelineSweeperMultiClassClassification() { // Get datasets // TODO (agoswami) : For now we use the same dataset for train and test since the repo does not have a separate test file for the iris dataset. // In the future the PipelineSweeper Macro will have an option to take just one dataset as input, and do the train-test split internally. var pathData = GetDataPath(@"iris.txt"); var pathDataTest = GetDataPath(@"iris.txt"); int numIterations = 2; const string schema = "col=Species:R4:0 col=SepalLength:R4:1 col=SepalWidth:R4:2 col=PetalLength:R4:3 col=PetalWidth:R4:4"; var inputFileTrain = new SimpleFileHandle(Env, pathData, false, false); #pragma warning disable 0618 var datasetTrain = ImportTextData.ImportText(Env, new ImportTextData.Input { InputFile = inputFileTrain, CustomSchema = schema }).Data; var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false); var datasetTest = ImportTextData.ImportText(Env, new ImportTextData.Input { InputFile = inputFileTest, CustomSchema = schema }).Data; #pragma warning restore 0618 // Define entrypoint graph string inputGraph = @" { 'Nodes': [ { 'Name': 'Models.PipelineSweeper', 'Inputs': { 'TrainingData': '$TrainingData', 'TestingData': '$TestingData', 'LabelColumns': ['Species'], 'StateArguments': { 'Name': 'AutoMlState', 'Settings': { 'Metric': 'AccuracyMicro', 'Engine': { 'Name': 'Defaults' }, 'TerminatorArgs': { 'Name': 'IterationLimited', 'Settings': { 'FinalHistoryLength': 2 } }, 'TrainerKind': 'SignatureMultiClassClassifierTrainer', 'RequestedLearners' : [ 'LogisticRegressionClassifier', 'StochasticDualCoordinateAscentClassifier' ] } }, 'BatchSize': 1 }, 'Outputs': { 'State': '$StateOut', 'Results': '$ResultsOut' } }, ] }"; JObject graphJson = JObject.Parse(inputGraph); var catalog = ModuleCatalog.CreateInstance(Env); var runner = new GraphRunner(Env, catalog, graphJson[FieldNames.Nodes] as JArray); runner.SetInput("TrainingData", datasetTrain); runner.SetInput("TestingData", datasetTest); runner.RunAll(); var autoMlState = runner.GetOutput <AutoInference.AutoMlMlState>("StateOut"); Assert.NotNull(autoMlState); var allPipelines = autoMlState.GetAllEvaluatedPipelines(); var bestPipeline = autoMlState.GetBestPipeline(); Assert.Equal(allPipelines.Length, numIterations); var bestMicroAccuracyTrain = bestPipeline.PerformanceSummary.TrainingMetricValue; var bestMicroAccuracyTest = bestPipeline.PerformanceSummary.MetricValue; Assert.True((0.97 < bestMicroAccuracyTrain) && (bestMicroAccuracyTrain < 0.99)); Assert.True((0.97 < bestMicroAccuracyTest) && (bestMicroAccuracyTest < 0.99)); var results = runner.GetOutput <IDataView>("ResultsOut"); Assert.NotNull(results); var rows = PipelinePattern.ExtractResults(Env, results, "Graph", "MetricValue", "PipelineId", "TrainingMetricValue", "FirstInput", "PredictorModel"); Assert.True(rows.Length == numIterations); Assert.True(rows.All(r => r.MetricValue > 0.9)); }
public void PipelineSweeperRoles() { // Get datasets var pathData = GetDataPath("adult.train"); var pathDataTest = GetDataPath("adult.test"); const int numOfSampleRows = 100; int numIterations = 2; const string schema = "sep=, col=age:R4:0 col=workclass:TX:1 col=fnlwgt:R4:2 col=education:TX:3 col=education_num:R4:4 col=marital_status:TX:5 col=occupation:TX:6 " + "col=relationship:TX:7 col=ethnicity:TX:8 col=sex:TX:9 col=Features:R4:10-12 col=native_country:TX:13 col=IsOver50K_:R4:14 header=+"; var inputFileTrain = new SimpleFileHandle(Env, pathData, false, false); #pragma warning disable 0618 var datasetTrain = ImportTextData.ImportText(Env, new ImportTextData.Input { InputFile = inputFileTrain, CustomSchema = schema }).Data.Take(numOfSampleRows); var inputFileTest = new SimpleFileHandle(Env, pathDataTest, false, false); var datasetTest = ImportTextData.ImportText(Env, new ImportTextData.Input { InputFile = inputFileTest, CustomSchema = schema }).Data.Take(numOfSampleRows); #pragma warning restore 0618 // Define entrypoint graph string inputGraph = @" { 'Nodes': [ { 'Name': 'Models.PipelineSweeper', 'Inputs': { 'TrainingData': '$TrainingData', 'TestingData': '$TestingData', 'LabelColumns': ['IsOver50K_'], 'WeightColumns': ['education_num'], 'NameColumns': ['education'], 'TextFeatureColumns': ['workclass', 'marital_status', 'occupation'], 'StateArguments': { 'Name': 'AutoMlState', 'Settings': { 'Metric': 'Auc', 'Engine': { 'Name': 'Defaults' }, 'TerminatorArgs': { 'Name': 'IterationLimited', 'Settings': { 'FinalHistoryLength': 2 } }, 'TrainerKind': 'SignatureBinaryClassifierTrainer', 'RequestedLearners' : [ 'LogisticRegressionBinaryClassifier', 'FastTreeBinaryClassifier' ] } }, 'BatchSize': 1 }, 'Outputs': { 'State': '$StateOut', 'Results': '$ResultsOut' } }, ] }"; JObject graphJson = JObject.Parse(inputGraph); var catalog = ModuleCatalog.CreateInstance(Env); var runner = new GraphRunner(Env, catalog, graphJson[FieldNames.Nodes] as JArray); runner.SetInput("TrainingData", datasetTrain); runner.SetInput("TestingData", datasetTest); runner.RunAll(); var autoMlState = runner.GetOutput <AutoInference.AutoMlMlState>("StateOut"); Assert.NotNull(autoMlState); var allPipelines = autoMlState.GetAllEvaluatedPipelines(); var bestPipeline = autoMlState.GetBestPipeline(); Assert.Equal(allPipelines.Length, numIterations); var trainAuc = bestPipeline.PerformanceSummary.TrainingMetricValue; var testAuc = bestPipeline.PerformanceSummary.MetricValue; Assert.True((0.94 < trainAuc) && (trainAuc < 0.95)); Assert.True((0.815 < testAuc) && (testAuc < 0.825)); var results = runner.GetOutput <IDataView>("ResultsOut"); Assert.NotNull(results); var rows = PipelinePattern.ExtractResults(Env, results, "Graph", "MetricValue", "PipelineId", "TrainingMetricValue", "FirstInput", "PredictorModel"); Assert.True(rows.Length == numIterations); Assert.True(rows.All(r => r.TrainingMetricValue > 0.1)); }