public void TestMinimizingMetricTransformations() { var values = new[] { 100d, 10d, -2d, -1d, 5.8d, -3.1d }; var maxWeight = values.Max(); var processed = values.Select(v => AutoMlUtils.ProcessWeight(v, maxWeight, false)); var expectedResult = new[] { 0d, 90d, 102d, 101d, 94.2d, 103.1d }; Assert.True(processed.Select((x, idx) => System.Math.Abs(x - expectedResult[idx]) < 0.001).All(r => r)); }
public override IEstimator <ITransformer> BuildFromOption(MLContext context, FastForestOption param) { var option = new FastForestBinaryTrainer.Options() { NumberOfTrees = param.NumberOfTrees, LabelColumnName = param.LabelColumnName, FeatureColumnName = param.FeatureColumnName, ExampleWeightColumnName = param.ExampleWeightColumnName, FeatureFraction = param.FeatureFraction, NumberOfThreads = AutoMlUtils.GetNumberOfThreadFromEnvrionment(), }; return(context.MulticlassClassification.Trainers.OneVersusAll(context.BinaryClassification.Trainers.FastForest(option), labelColumnName: param.LabelColumnName)); }
public override IEstimator <ITransformer> BuildFromOption(MLContext context, FastForestOption param) { var option = new FastForestRegressionTrainer.Options() { NumberOfTrees = param.NumberOfTrees, FeatureFraction = param.FeatureFraction, LabelColumnName = param.LabelColumnName, FeatureColumnName = param.FeatureColumnName, ExampleWeightColumnName = param.ExampleWeightColumnName, NumberOfThreads = AutoMlUtils.GetNumberOfThreadFromEnvrionment(), }; return(context.Regression.Trainers.FastForest(option)); }
public override IEstimator <ITransformer> BuildFromOption(MLContext context, SdcaOption param) { var option = new SdcaLogisticRegressionBinaryTrainer.Options() { LabelColumnName = param.LabelColumnName, FeatureColumnName = param.FeatureColumnName, ExampleWeightColumnName = param.ExampleWeightColumnName, L1Regularization = param.L1Regularization, L2Regularization = param.L2Regularization, NumberOfThreads = AutoMlUtils.GetNumberOfThreadFromEnvrionment(), }; return(context.BinaryClassification.Trainers.SdcaLogisticRegression(option)); }
public override IEstimator <ITransformer> BuildFromOption(MLContext context, LbfgsOption param) { var option = new LbfgsPoissonRegressionTrainer.Options() { L1Regularization = param.L1Regularization, L2Regularization = param.L2Regularization, LabelColumnName = param.LabelColumnName, FeatureColumnName = param.FeatureColumnName, ExampleWeightColumnName = param.ExampleWeightColumnName, NumberOfThreads = AutoMlUtils.GetNumberOfThreadFromEnvrionment(), }; return(context.Regression.Trainers.LbfgsPoissonRegression(option)); }
public override IEstimator <ITransformer> BuildFromOption(MLContext context, LbfgsOption param) { var option = new LbfgsLogisticRegressionBinaryTrainer.Options() { L1Regularization = param.L1Regularization, L2Regularization = param.L2Regularization, LabelColumnName = param.LabelColumnName, FeatureColumnName = param.FeatureColumnName, ExampleWeightColumnName = param.ExampleWeightColumnName, NumberOfThreads = AutoMlUtils.GetNumberOfThreadFromEnvrionment(), }; var binaryTrainer = context.BinaryClassification.Trainers.LbfgsLogisticRegression(option); return(context.MulticlassClassification.Trainers.OneVersusAll(binaryTrainer, param.LabelColumnName)); }
public override IEstimator <ITransformer> BuildFromOption(MLContext context, FastTreeOption param) { var option = new FastTreeBinaryTrainer.Options() { NumberOfLeaves = param.NumberOfLeaves, NumberOfTrees = param.NumberOfTrees, MinimumExampleCountPerLeaf = param.MinimumExampleCountPerLeaf, LearningRate = param.LearningRate, LabelColumnName = param.LabelColumnName, FeatureColumnName = param.FeatureColumnName, ExampleWeightColumnName = param.ExampleWeightColumnName, NumberOfThreads = AutoMlUtils.GetNumberOfThreadFromEnvrionment(), MaximumBinCountPerFeature = param.MaximumBinCountPerFeature, FeatureFraction = param.FeatureFraction, }; return(context.BinaryClassification.Trainers.FastTree(option)); }
public override IEstimator <ITransformer> BuildFromOption(MLContext context, LgbmOption param) { var option = new LightGbmRegressionTrainer.Options() { NumberOfLeaves = param.NumberOfLeaves, NumberOfIterations = param.NumberOfTrees, MinimumExampleCountPerLeaf = param.MinimumExampleCountPerLeaf, LearningRate = param.LearningRate, NumberOfThreads = AutoMlUtils.GetNumberOfThreadFromEnvrionment(), LabelColumnName = param.LabelColumnName, FeatureColumnName = param.FeatureColumnName, ExampleWeightColumnName = param.ExampleWeightColumnName, Booster = new GradientBooster.Options() { SubsampleFraction = param.SubsampleFraction, FeatureFraction = param.FeatureFraction, L1Regularization = param.L1Regularization, L2Regularization = param.L2Regularization, }, MaximumBinCountPerFeature = param.MaximumBinCountPerFeature, }; return(context.Regression.Trainers.LightGbm(option)); }
public static CommonOutputs.MacroOutput <Output> PipelineSweep( IHostEnvironment env, Arguments input, EntryPointNode node) { env.Check(input.StateArguments != null || input.State is AutoInference.AutoMlMlState, "Must have a valid AutoML State, or pass arguments to create one."); env.Check(input.BatchSize > 0, "Batch size must be > 0."); // If no current state, create object and set data. if (input.State == null) { input.State = input.StateArguments?.CreateComponent(env); if (input.State is AutoInference.AutoMlMlState inState) { inState.SetTrainTestData(input.TrainingData, input.TestingData); } else { throw env.Except($"Incompatible type. Expecting type {typeof(AutoInference.AutoMlMlState)}, received type {input.State?.GetType()}."); } var result = node.AddNewVariable("State", input.State); node.Context.AddInputVariable(result.Item2, typeof(IMlState)); } var autoMlState = (AutoInference.AutoMlMlState)input.State; // The indicators are just so the macro knows those pipelines need to // be run before performing next expansion. If we add them as inputs // to the next iteration, the next iteration cannot run until they have // their values set. Thus, indicators are needed. var pipelineIndicators = new List <Var <IDataView> >(); var expNodes = new List <EntryPointNode>(); // Keep versions of the training and testing var names var training = new Var <IDataView> { VarName = node.GetInputVariable("TrainingData").VariableName }; var testing = new Var <IDataView> { VarName = node.GetInputVariable("TestingData").VariableName }; var amlsVarObj = new Var <IMlState>() { VarName = node.GetInputVariable(nameof(input.State)).VariableName }; // Make sure search space is defined. If not, infer, // with default number of transform levels. if (!autoMlState.IsSearchSpaceDefined()) { autoMlState.InferSearchSpace(numTransformLevels: 1); } // Extract performance summaries and assign to previous candidate pipelines. foreach (var pipeline in autoMlState.BatchCandidates) { if (node.Context.TryGetVariable(ExperimentUtils.GenerateOverallMetricVarName(pipeline.UniqueId), out var v)) { pipeline.PerformanceSummary = AutoMlUtils.ExtractRunSummary(env, (IDataView)v.Value, autoMlState.Metric.Name); autoMlState.AddEvaluated(pipeline); } } node.OutputMap.TryGetValue("Results", out string outDvName); var outDvVar = new Var <IDataView>() { VarName = outDvName }; node.OutputMap.TryGetValue("State", out string outStateName); var outStateVar = new Var <IMlState>() { VarName = outStateName }; // Get next set of candidates. var candidatePipelines = autoMlState.GetNextCandidates(input.BatchSize); // Check if termination condition was met, i.e. no more candidates were returned. // If so, end expansion and add a node to extract the sweep result. if (candidatePipelines == null || candidatePipelines.Length == 0) { // Add a node to extract the sweep result. return(new CommonOutputs.MacroOutput <Output>() { Nodes = expNodes }); } // Prep all returned candidates foreach (var p in candidatePipelines) { // Add train test experiments to current graph for candidate pipeline var subgraph = new Experiment(env); var trainTestOutput = p.AddAsTrainTest(training, testing, autoMlState.TrainerKind, subgraph); // Change variable name to reference pipeline ID in output map, context and entrypoint output. var uniqueName = ExperimentUtils.GenerateOverallMetricVarName(p.UniqueId); var sgNode = EntryPointNode.ValidateNodes(env, node.Context, new JArray(subgraph.GetNodes().Last()), node.Catalog).Last(); sgNode.RenameOutputVariable(trainTestOutput.OverallMetrics.VarName, uniqueName, cascadeChanges: true); trainTestOutput.OverallMetrics.VarName = uniqueName; expNodes.Add(sgNode); // Store indicators, to pass to next iteration of macro. pipelineIndicators.Add(trainTestOutput.OverallMetrics); } return(new CommonOutputs.MacroOutput <Output>() { Nodes = expNodes }); }
public override IEstimator <ITransformer> BuildFromOption(MLContext context, ReplaceMissingValueOption param) { var inputOutputPairs = AutoMlUtils.CreateInputOutputColumnPairsFromStrings(param.InputColumnNames, param.OutputColumnNames); return(context.Transforms.ReplaceMissingValues(inputOutputPairs)); }
public override IEstimator <ITransformer> BuildFromOption(MLContext context, ConvertTypeOption param) { var inputOutputPairs = AutoMlUtils.CreateInputOutputColumnPairsFromStrings(param.InputColumnNames, param.OutputColumnNames); return(context.Transforms.Conversion.ConvertType(inputOutputPairs)); }
public override IEstimator <ITransformer> BuildFromOption(MLContext context, OneHotOption param) { var inputOutputPairs = AutoMlUtils.CreateInputOutputColumnPairsFromStrings(param.InputColumnNames, param.OutputColumnNames); return(context.Transforms.Categorical.OneHotEncoding(inputOutputPairs)); }