public static AutoMlMlState InferPipelines(IHostEnvironment env, PipelineOptimizerBase autoMlEngine, string trainDataPath, string schemaDefinitionFile, out string schemaDefinition, int numTransformLevels, int batchSize, SupportedMetric metric, out PipelinePattern bestPipeline, int numOfSampleRows, ITerminator terminator, MacroUtils.TrainerKinds trainerKind) { Contracts.CheckValue(env, nameof(env)); // REVIEW: Should be able to infer schema by itself, without having to // infer recipes. Look into this. // Set loader settings through inference RecipeInference.InferRecipesFromData(env, trainDataPath, schemaDefinitionFile, out var _, out schemaDefinition, out var _, true); #pragma warning disable 0618 var data = ImportTextData.ImportText(env, new ImportTextData.Input { InputFile = new SimpleFileHandle(env, trainDataPath, false, false), CustomSchema = schemaDefinition }).Data; #pragma warning restore 0618 var splitOutput = TrainTestSplit.Split(env, new TrainTestSplit.Input { Data = data, Fraction = 0.8f }); AutoMlMlState amls = new AutoMlMlState(env, metric, autoMlEngine, terminator, trainerKind, splitOutput.TrainData.Take(numOfSampleRows), splitOutput.TestData.Take(numOfSampleRows)); bestPipeline = amls.InferPipelines(numTransformLevels, batchSize, numOfSampleRows); return(amls); }
/// <summary> /// The InferPipelines methods are just public portals to the internal function that handle different /// types of data being passed in: training IDataView, path to training file, or train and test files. /// </summary> public static AutoMlMlState InferPipelines(IHostEnvironment env, PipelineOptimizerBase autoMlEngine, IDataView trainData, IDataView testData, int numTransformLevels, int batchSize, SupportedMetric metric, out PipelinePattern bestPipeline, ITerminator terminator, MacroUtils.TrainerKinds trainerKind) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(trainData, nameof(trainData)); env.CheckValue(testData, nameof(testData)); int numOfRows = (int)(trainData.GetRowCount(false) ?? 1000); AutoMlMlState amls = new AutoMlMlState(env, metric, autoMlEngine, terminator, trainerKind, trainData, testData); bestPipeline = amls.InferPipelines(numTransformLevels, batchSize, numOfRows); return(amls); }
public static AutoMlMlState InferPipelines(IHostEnvironment env, PipelineOptimizerBase autoMlEngine, IDataView data, int numTransformLevels, int batchSize, SupportedMetric metric, out PipelinePattern bestPipeline, int numOfSampleRows, ITerminator terminator, MacroUtils.TrainerKinds trainerKind) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(data, nameof(data)); var splitOutput = TrainTestSplit.Split(env, new TrainTestSplit.Input { Data = data, Fraction = 0.8f }); AutoMlMlState amls = new AutoMlMlState(env, metric, autoMlEngine, terminator, trainerKind, splitOutput.TrainData.Take(numOfSampleRows), splitOutput.TestData.Take(numOfSampleRows)); bestPipeline = amls.InferPipelines(numTransformLevels, batchSize, numOfSampleRows); return(amls); }