private void ProcessPipeline(Sweeper.Algorithms.SweeperProbabilityUtils utils, Stopwatch stopwatch, PipelinePattern candidate, int numOfTrainingRows) { // Create a randomized numer of rows to do train/test with. int randomizedNumberOfRows = (int)Math.Floor(utils.NormalRVs(1, numOfTrainingRows, (double)numOfTrainingRows / 10).First()); if (randomizedNumberOfRows > numOfTrainingRows) { randomizedNumberOfRows = numOfTrainingRows - (randomizedNumberOfRows - numOfTrainingRows); } // Run pipeline, and time how long it takes stopwatch.Restart(); candidate.RunTrainTestExperiment(_trainData.Take(randomizedNumberOfRows), _testData, Metric, TrainerKind, out var testMetricVal, out var trainMetricVal); stopwatch.Stop(); // Handle key collisions on sorted list while (_sortedSampledElements.ContainsKey(testMetricVal)) { testMetricVal += 1e-10; } // Save performance score candidate.PerformanceSummary = new RunSummary(testMetricVal, randomizedNumberOfRows, stopwatch.ElapsedMilliseconds, trainMetricVal); _sortedSampledElements.Add(candidate.PerformanceSummary.MetricValue, candidate); _history.Add(candidate); }
private void MainLearningLoop(int batchSize, int numOfTrainingRows) { var stopwatch = new Stopwatch(); var probabilityUtils = new Sweeper.Algorithms.SweeperProbabilityUtils(_host); while (!_terminator.ShouldTerminate(_history)) { // Get next set of candidates var currentBatchSize = batchSize; if (_terminator is IterationTerminator itr) { currentBatchSize = Math.Min(itr.RemainingIterations(_history), batchSize); } var candidates = AutoMlEngine.GetNextCandidates(_sortedSampledElements.Values, currentBatchSize); // Break if no candidates returned, means no valid pipeline available. if (candidates.Length == 0) { break; } // Evaluate them on subset of data foreach (var candidate in candidates) { try { ProcessPipeline(probabilityUtils, stopwatch, candidate, numOfTrainingRows); } catch (Exception) { stopwatch.Stop(); return; } } } }