public static AutoMlMlState InferPipelines(IHostEnvironment env, PipelineOptimizerBase autoMlEngine, string trainDataPath, string schemaDefinitionFile, out string schemaDefinition, int numTransformLevels, int batchSize, SupportedMetric metric, out PipelinePattern bestPipeline, int numOfSampleRows, ITerminator terminator, MacroUtils.TrainerKinds trainerKind) { Contracts.CheckValue(env, nameof(env)); // REVIEW: Should be able to infer schema by itself, without having to // infer recipes. Look into this. // Set loader settings through inference RecipeInference.InferRecipesFromData(env, trainDataPath, schemaDefinitionFile, out var _, out schemaDefinition, out var _, true); #pragma warning disable 0618 var data = ImportTextData.ImportText(env, new ImportTextData.Input { InputFile = new SimpleFileHandle(env, trainDataPath, false, false), CustomSchema = schemaDefinition }).Data; #pragma warning restore 0618 var splitOutput = TrainTestSplit.Split(env, new TrainTestSplit.Input { Data = data, Fraction = 0.8f }); AutoMlMlState amls = new AutoMlMlState(env, metric, autoMlEngine, terminator, trainerKind, splitOutput.TrainData.Take(numOfSampleRows), splitOutput.TestData.Take(numOfSampleRows)); bestPipeline = amls.InferPipelines(numTransformLevels, batchSize, numOfSampleRows); return(amls); }
/// <summary> /// Given a predictor type returns a set of all permissible learners (with their sweeper params, if defined). /// </summary> /// <returns>Array of viable learners.</returns> public static SuggestedRecipe.SuggestedLearner[] AllowedLearners(IHostEnvironment env, MacroUtils.TrainerKinds trainerKind) { //not all learners advertised in the API are available in CORE. var catalog = env.ComponentCatalog; var availableLearnersList = catalog.AllEntryPoints().Where( x => x.InputKinds?.FirstOrDefault(i => i == typeof(CommonInputs.ITrainerInput)) != null); var learners = new List <SuggestedRecipe.SuggestedLearner>(); var type = typeof(CommonInputs.ITrainerInput); var trainerTypes = typeof(Experiment).Assembly.GetTypes() .Where(p => type.IsAssignableFrom(p) && MacroUtils.IsTrainerOfKind(p, trainerKind)); foreach (var tt in trainerTypes) { var sweepParams = AutoMlUtils.GetSweepRanges(tt); var epInputObj = (CommonInputs.ITrainerInput)tt.GetConstructor(Type.EmptyTypes)?.Invoke(new object[] { }); var sl = new SuggestedRecipe.SuggestedLearner { PipelineNode = new TrainerPipelineNode(epInputObj, sweepParams), LearnerName = tt.Name }; if (sl.PipelineNode != null && availableLearnersList.FirstOrDefault(l => l.Name.Equals(sl.PipelineNode.GetEpName())) != null) { learners.Add(sl); } } return(learners.ToArray()); }
/// <summary> /// The InferPipelines methods are just public portals to the internal function that handle different /// types of data being passed in: training IDataView, path to training file, or train and test files. /// </summary> public static AutoMlMlState InferPipelines(IHostEnvironment env, PipelineOptimizerBase autoMlEngine, IDataView trainData, IDataView testData, int numTransformLevels, int batchSize, SupportedMetric metric, out PipelinePattern bestPipeline, ITerminator terminator, MacroUtils.TrainerKinds trainerKind) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(trainData, nameof(trainData)); env.CheckValue(testData, nameof(testData)); int numOfRows = (int)(trainData.GetRowCount(false) ?? 1000); AutoMlMlState amls = new AutoMlMlState(env, metric, autoMlEngine, terminator, trainerKind, trainData, testData); bestPipeline = amls.InferPipelines(numTransformLevels, batchSize, numOfRows); return(amls); }