Beispiel #1
0
        public void TestMinimizingMetricTransformations()
        {
            var values         = new[] { 100d, 10d, -2d, -1d, 5.8d, -3.1d };
            var maxWeight      = values.Max();
            var processed      = values.Select(v => AutoMlUtils.ProcessWeight(v, maxWeight, false));
            var expectedResult = new[] { 0d, 90d, 102d, 101d, 94.2d, 103.1d };

            Assert.True(processed.Select((x, idx) => System.Math.Abs(x - expectedResult[idx]) < 0.001).All(r => r));
        }
Beispiel #2
0
        public override IEstimator <ITransformer> BuildFromOption(MLContext context, FastForestOption param)
        {
            var option = new FastForestBinaryTrainer.Options()
            {
                NumberOfTrees           = param.NumberOfTrees,
                LabelColumnName         = param.LabelColumnName,
                FeatureColumnName       = param.FeatureColumnName,
                ExampleWeightColumnName = param.ExampleWeightColumnName,
                FeatureFraction         = param.FeatureFraction,
                NumberOfThreads         = AutoMlUtils.GetNumberOfThreadFromEnvrionment(),
            };

            return(context.MulticlassClassification.Trainers.OneVersusAll(context.BinaryClassification.Trainers.FastForest(option), labelColumnName: param.LabelColumnName));
        }
Beispiel #3
0
        public override IEstimator <ITransformer> BuildFromOption(MLContext context, FastForestOption param)
        {
            var option = new FastForestRegressionTrainer.Options()
            {
                NumberOfTrees           = param.NumberOfTrees,
                FeatureFraction         = param.FeatureFraction,
                LabelColumnName         = param.LabelColumnName,
                FeatureColumnName       = param.FeatureColumnName,
                ExampleWeightColumnName = param.ExampleWeightColumnName,
                NumberOfThreads         = AutoMlUtils.GetNumberOfThreadFromEnvrionment(),
            };

            return(context.Regression.Trainers.FastForest(option));
        }
Beispiel #4
0
        public override IEstimator <ITransformer> BuildFromOption(MLContext context, SdcaOption param)
        {
            var option = new SdcaLogisticRegressionBinaryTrainer.Options()
            {
                LabelColumnName         = param.LabelColumnName,
                FeatureColumnName       = param.FeatureColumnName,
                ExampleWeightColumnName = param.ExampleWeightColumnName,
                L1Regularization        = param.L1Regularization,
                L2Regularization        = param.L2Regularization,
                NumberOfThreads         = AutoMlUtils.GetNumberOfThreadFromEnvrionment(),
            };

            return(context.BinaryClassification.Trainers.SdcaLogisticRegression(option));
        }
Beispiel #5
0
        public override IEstimator <ITransformer> BuildFromOption(MLContext context, LbfgsOption param)
        {
            var option = new LbfgsPoissonRegressionTrainer.Options()
            {
                L1Regularization        = param.L1Regularization,
                L2Regularization        = param.L2Regularization,
                LabelColumnName         = param.LabelColumnName,
                FeatureColumnName       = param.FeatureColumnName,
                ExampleWeightColumnName = param.ExampleWeightColumnName,
                NumberOfThreads         = AutoMlUtils.GetNumberOfThreadFromEnvrionment(),
            };

            return(context.Regression.Trainers.LbfgsPoissonRegression(option));
        }
Beispiel #6
0
        public override IEstimator <ITransformer> BuildFromOption(MLContext context, LbfgsOption param)
        {
            var option = new LbfgsLogisticRegressionBinaryTrainer.Options()
            {
                L1Regularization        = param.L1Regularization,
                L2Regularization        = param.L2Regularization,
                LabelColumnName         = param.LabelColumnName,
                FeatureColumnName       = param.FeatureColumnName,
                ExampleWeightColumnName = param.ExampleWeightColumnName,
                NumberOfThreads         = AutoMlUtils.GetNumberOfThreadFromEnvrionment(),
            };

            var binaryTrainer = context.BinaryClassification.Trainers.LbfgsLogisticRegression(option);

            return(context.MulticlassClassification.Trainers.OneVersusAll(binaryTrainer, param.LabelColumnName));
        }
Beispiel #7
0
        public override IEstimator <ITransformer> BuildFromOption(MLContext context, FastTreeOption param)
        {
            var option = new FastTreeBinaryTrainer.Options()
            {
                NumberOfLeaves             = param.NumberOfLeaves,
                NumberOfTrees              = param.NumberOfTrees,
                MinimumExampleCountPerLeaf = param.MinimumExampleCountPerLeaf,
                LearningRate              = param.LearningRate,
                LabelColumnName           = param.LabelColumnName,
                FeatureColumnName         = param.FeatureColumnName,
                ExampleWeightColumnName   = param.ExampleWeightColumnName,
                NumberOfThreads           = AutoMlUtils.GetNumberOfThreadFromEnvrionment(),
                MaximumBinCountPerFeature = param.MaximumBinCountPerFeature,
                FeatureFraction           = param.FeatureFraction,
            };

            return(context.BinaryClassification.Trainers.FastTree(option));
        }
Beispiel #8
0
        public override IEstimator <ITransformer> BuildFromOption(MLContext context, LgbmOption param)
        {
            var option = new LightGbmRegressionTrainer.Options()
            {
                NumberOfLeaves             = param.NumberOfLeaves,
                NumberOfIterations         = param.NumberOfTrees,
                MinimumExampleCountPerLeaf = param.MinimumExampleCountPerLeaf,
                LearningRate            = param.LearningRate,
                NumberOfThreads         = AutoMlUtils.GetNumberOfThreadFromEnvrionment(),
                LabelColumnName         = param.LabelColumnName,
                FeatureColumnName       = param.FeatureColumnName,
                ExampleWeightColumnName = param.ExampleWeightColumnName,
                Booster = new GradientBooster.Options()
                {
                    SubsampleFraction = param.SubsampleFraction,
                    FeatureFraction   = param.FeatureFraction,
                    L1Regularization  = param.L1Regularization,
                    L2Regularization  = param.L2Regularization,
                },
                MaximumBinCountPerFeature = param.MaximumBinCountPerFeature,
            };

            return(context.Regression.Trainers.LightGbm(option));
        }
Beispiel #9
0
        public static CommonOutputs.MacroOutput <Output> PipelineSweep(
            IHostEnvironment env,
            Arguments input,
            EntryPointNode node)
        {
            env.Check(input.StateArguments != null || input.State is AutoInference.AutoMlMlState,
                      "Must have a valid AutoML State, or pass arguments to create one.");
            env.Check(input.BatchSize > 0, "Batch size must be > 0.");

            // If no current state, create object and set data.
            if (input.State == null)
            {
                input.State = input.StateArguments?.CreateComponent(env);

                if (input.State is AutoInference.AutoMlMlState inState)
                {
                    inState.SetTrainTestData(input.TrainingData, input.TestingData);
                }
                else
                {
                    throw env.Except($"Incompatible type. Expecting type {typeof(AutoInference.AutoMlMlState)}, received type {input.State?.GetType()}.");
                }

                var result = node.AddNewVariable("State", input.State);
                node.Context.AddInputVariable(result.Item2, typeof(IMlState));
            }
            var autoMlState = (AutoInference.AutoMlMlState)input.State;

            // The indicators are just so the macro knows those pipelines need to
            // be run before performing next expansion. If we add them as inputs
            // to the next iteration, the next iteration cannot run until they have
            // their values set. Thus, indicators are needed.
            var pipelineIndicators = new List <Var <IDataView> >();

            var expNodes = new List <EntryPointNode>();

            // Keep versions of the training and testing var names
            var training = new Var <IDataView> {
                VarName = node.GetInputVariable("TrainingData").VariableName
            };
            var testing = new Var <IDataView> {
                VarName = node.GetInputVariable("TestingData").VariableName
            };
            var amlsVarObj =
                new Var <IMlState>()
            {
                VarName = node.GetInputVariable(nameof(input.State)).VariableName
            };

            // Make sure search space is defined. If not, infer,
            // with default number of transform levels.
            if (!autoMlState.IsSearchSpaceDefined())
            {
                autoMlState.InferSearchSpace(numTransformLevels: 1);
            }

            // Extract performance summaries and assign to previous candidate pipelines.
            foreach (var pipeline in autoMlState.BatchCandidates)
            {
                if (node.Context.TryGetVariable(ExperimentUtils.GenerateOverallMetricVarName(pipeline.UniqueId),
                                                out var v))
                {
                    pipeline.PerformanceSummary =
                        AutoMlUtils.ExtractRunSummary(env, (IDataView)v.Value, autoMlState.Metric.Name);
                    autoMlState.AddEvaluated(pipeline);
                }
            }

            node.OutputMap.TryGetValue("Results", out string outDvName);
            var outDvVar = new Var <IDataView>()
            {
                VarName = outDvName
            };

            node.OutputMap.TryGetValue("State", out string outStateName);
            var outStateVar = new Var <IMlState>()
            {
                VarName = outStateName
            };

            // Get next set of candidates.
            var candidatePipelines = autoMlState.GetNextCandidates(input.BatchSize);

            // Check if termination condition was met, i.e. no more candidates were returned.
            // If so, end expansion and add a node to extract the sweep result.
            if (candidatePipelines == null || candidatePipelines.Length == 0)
            {
                // Add a node to extract the sweep result.
                return(new CommonOutputs.MacroOutput <Output>()
                {
                    Nodes = expNodes
                });
            }

            // Prep all returned candidates
            foreach (var p in candidatePipelines)
            {
                // Add train test experiments to current graph for candidate pipeline
                var subgraph        = new Experiment(env);
                var trainTestOutput = p.AddAsTrainTest(training, testing, autoMlState.TrainerKind, subgraph);

                // Change variable name to reference pipeline ID in output map, context and entrypoint output.
                var uniqueName = ExperimentUtils.GenerateOverallMetricVarName(p.UniqueId);
                var sgNode     = EntryPointNode.ValidateNodes(env, node.Context,
                                                              new JArray(subgraph.GetNodes().Last()), node.Catalog).Last();
                sgNode.RenameOutputVariable(trainTestOutput.OverallMetrics.VarName, uniqueName, cascadeChanges: true);
                trainTestOutput.OverallMetrics.VarName = uniqueName;
                expNodes.Add(sgNode);

                // Store indicators, to pass to next iteration of macro.
                pipelineIndicators.Add(trainTestOutput.OverallMetrics);
            }

            return(new CommonOutputs.MacroOutput <Output>()
            {
                Nodes = expNodes
            });
        }
        public override IEstimator <ITransformer> BuildFromOption(MLContext context, ReplaceMissingValueOption param)
        {
            var inputOutputPairs = AutoMlUtils.CreateInputOutputColumnPairsFromStrings(param.InputColumnNames, param.OutputColumnNames);

            return(context.Transforms.ReplaceMissingValues(inputOutputPairs));
        }
Beispiel #11
0
        public override IEstimator <ITransformer> BuildFromOption(MLContext context, ConvertTypeOption param)
        {
            var inputOutputPairs = AutoMlUtils.CreateInputOutputColumnPairsFromStrings(param.InputColumnNames, param.OutputColumnNames);

            return(context.Transforms.Conversion.ConvertType(inputOutputPairs));
        }
Beispiel #12
0
        public override IEstimator <ITransformer> BuildFromOption(MLContext context, OneHotOption param)
        {
            var inputOutputPairs = AutoMlUtils.CreateInputOutputColumnPairsFromStrings(param.InputColumnNames, param.OutputColumnNames);

            return(context.Transforms.Categorical.OneHotEncoding(inputOutputPairs));
        }