private TransformInference.SuggestedTransform[] SampleTransforms(out long transformsBitMask) { // For now, return all transforms. var sampledTransforms = AvailableTransforms.ToList(); transformsBitMask = AutoMlUtils.TransformsToBitmask(sampledTransforms.ToArray()); // Add final features concat transform. sampledTransforms.AddRange(AutoMlUtils.GetFinalFeatureConcat(Env, FullyTransformedData, DependencyMapping, sampledTransforms.ToArray(), AvailableTransforms)); return(sampledTransforms.ToArray()); }
private PipelinePattern[] GetRandomPipelines(int numOfPipelines) { Host.Check(AvailableLearners.All(l => l.PipelineNode != null)); Host.Check(AvailableTransforms.All(t => t.PipelineNode != null)); int atomicGroupLimit = AvailableTransforms.Select(t => t.AtomicGroupId) .DefaultIfEmpty(-1).Max() + 1; var pipelines = new List <PipelinePattern>(); int collisions = 0; int totalCount = 0; while (pipelines.Count < numOfPipelines) { // Generate random bitmask (set of transform atomic group IDs) long transformsBitMask = Host.Rand.Next((int)Math.Pow(2, atomicGroupLimit)); // Include all "always on" transforms, such as autolabel. transformsBitMask |= AutoMlUtils.IncludeMandatoryTransforms(AvailableTransforms.ToList()); // Get actual learner and transforms for pipeline var selectedLearner = AvailableLearners[Host.Rand.Next(AvailableLearners.Length)]; var selectedTransforms = AvailableTransforms.Where(t => AutoMlUtils.AtomicGroupPresent(transformsBitMask, t.AtomicGroupId)).ToList(); // Randomly change transform sweepable hyperparameter settings selectedTransforms.ForEach(t => RandomlyPerturbSweepableHyperparameters(t.PipelineNode)); // Randomly change learner sweepable hyperparameter settings RandomlyPerturbSweepableHyperparameters(selectedLearner.PipelineNode); // Always include features concat transform selectedTransforms.AddRange(AutoMlUtils.GetFinalFeatureConcat(Env, FullyTransformedData, DependencyMapping, selectedTransforms.ToArray(), AvailableTransforms, DataRoles)); // Compute hash key for checking if we've already seen this pipeline. // However, if we keep missing, don't want to get stuck in infinite loop. // Try for a good number of times (for example, numOfPipelines * 4), then just add // all generated pipelines to get us out of rut. string hashKey = GetHashKey(transformsBitMask, selectedLearner); if (collisions < numOfPipelines * 4 && VisitedPipelines.Contains(hashKey)) { collisions++; continue; } VisitedPipelines.Add(hashKey); collisions = 0; totalCount++; // Keep pipeline if valid var pipeline = new PipelinePattern(selectedTransforms.ToArray(), selectedLearner, "", Env); if (!TransformsMaskValidity.ContainsKey(transformsBitMask)) { TransformsMaskValidity.Add(transformsBitMask, PipelineVerifier(pipeline, transformsBitMask)); } if (TransformsMaskValidity[transformsBitMask]) { pipelines.Add(pipeline); } // Only invalid pipelines available, stuck in loop. // Break out and return no pipelines. if (totalCount > numOfPipelines * 10) { break; } } return(pipelines.ToArray()); }