/// <summary> /// Using the dependencyMapping and included transforms, determines whether every /// transform present only consumes columns produced by a lower- or same-level transform, /// or existed in the original dataset. Note, a column could be produced by a /// transform on the same level, such as in multipart (atomic group) transforms. /// </summary> public static bool AreColumnsConsistent(TransformInference.SuggestedTransform[] includedTransforms, AutoInference.DependencyMap dependencyMapping) { foreach (var transform in includedTransforms) { foreach (var colConsumed in transform.RoutingStructure.ColumnsConsumed) { AutoInference.LevelDependencyMap ldm = dependencyMapping[transform.RoutingStructure.Level]; var colInfo = ldm.Keys.FirstOrDefault(k => k.Name == colConsumed.Name); // Consumed column does not exist at this sublevel. Since we never drop columns // it will not exist at any lower levels, either. Thus, problem with column consumption. if (colInfo.Name == null) { return(false); } // If this column could have been produced by a transform, make sure at least one // of the possible producer transforms in in our included transforms list. if (ldm[colInfo].Count > 0 && !ldm[colInfo].Any(t => includedTransforms.Contains(t))) { return(false); } } } // Passed all tests return(true); }
/// <summary> /// Using the dependencyMapping and included transforms, computes which subset of columns in dataSample /// will be present in the final transformed dataset when only the transforms present are applied. /// </summary> private static int[] GetExcludedColumnIndices(TransformInference.SuggestedTransform[] includedTransforms, IDataView dataSample, AutoInference.DependencyMap dependencyMapping) { List <int> includedColumnIndices = new List <int>(); // For every column, see if either present in initial dataset, or // produced by a transform used in current pipeline. for (int columnIndex = 0; columnIndex < dataSample.Schema.ColumnCount; columnIndex++) { // Create ColumnInfo object for indexing dictionary var colInfo = new AutoInference.ColumnInfo { Name = dataSample.Schema.GetColumnName(columnIndex), ItemType = dataSample.Schema.GetColumnType(columnIndex).ItemType, IsHidden = dataSample.Schema.IsHidden(columnIndex) }; // Exclude all hidden and non-numeric columns if (colInfo.IsHidden || !colInfo.ItemType.IsNumber) { continue; } foreach (var level in dependencyMapping.Keys.Reverse()) { var levelResponsibilities = dependencyMapping[level]; if (!levelResponsibilities.ContainsKey(colInfo)) { continue; } // Include any numeric column present in initial dataset. Does not need // any transforms applied to be present in final dataset. if (level == 0 && colInfo.ItemType.IsNumber && levelResponsibilities[colInfo].Count == 0) { includedColumnIndices.Add(columnIndex); break; } // If column could not have been produced by transforms at this level, move down to the next level. if (levelResponsibilities[colInfo].Count == 0) { continue; } // Check if could have been produced by any transform in this pipeline if (levelResponsibilities[colInfo].Any(t => includedTransforms.Contains(t))) { includedColumnIndices.Add(columnIndex); } } } // Exclude all columns not discovered by our inclusion process return(Enumerable.Range(0, dataSample.Schema.ColumnCount).Except(includedColumnIndices).ToArray()); }
/// <summary> /// Exposed version of the method. /// </summary> public static TransformInference.SuggestedTransform[] GetFinalFeatureConcat(IHostEnvironment env, IDataView data, AutoInference.DependencyMap dependencyMapping, TransformInference.SuggestedTransform[] selectedTransforms, TransformInference.SuggestedTransform[] allTransforms, RoleMappedData dataRoles) { int level = 1; int atomicGroupLimit = 0; if (allTransforms.Length != 0) { level = allTransforms.Max(t => t.RoutingStructure.Level) + 1; atomicGroupLimit = allTransforms.Max(t => t.AtomicGroupId) + 1; } var excludedColumnIndices = GetExcludedColumnIndices(selectedTransforms, data, dependencyMapping); return(GetFinalFeatureConcat(env, data, excludedColumnIndices, level, atomicGroupLimit, dataRoles)); }
private static bool HasInitialNumericFeatures(AutoInference.DependencyMap dependencyMapping) { if (dependencyMapping.Count == 0) { return(false); } foreach (var info in dependencyMapping[0]) { if (info.Key.Name == DefaultColumnNames.Features && !info.Key.IsHidden && info.Key.ItemType.IsNumber && info.Value.Count == 0) { return(true); } } return(false); }
public virtual void SetSpace(TransformInference.SuggestedTransform[] availableTransforms, RecipeInference.SuggestedRecipe.SuggestedLearner[] availableLearners, Func <PipelinePattern, long, bool> pipelineVerifier, IDataView originalData, IDataView fullyTransformedData, AutoInference.DependencyMap dependencyMapping, bool isMaximizingMetric) { AvailableLearners = availableLearners; AvailableTransforms = availableTransforms; PipelineVerifier = pipelineVerifier; OriginalData = originalData; FullyTransformedData = fullyTransformedData; DependencyMapping = dependencyMapping; IsMaximizingMetric = isMaximizingMetric; foreach (var learner in AvailableLearners) { AutoMlUtils.PopulateSweepableParams(learner); } }
public override void SetSpace(TransformInference.SuggestedTransform[] availableTransforms, RecipeInference.SuggestedRecipe.SuggestedLearner[] availableLearners, Func <PipelinePattern, long, bool> pipelineVerifier, IDataView originalData, IDataView fullyTransformedData, AutoInference.DependencyMap dependencyMapping, bool isMaximizingMetric) { foreach (var engine in _secondaryEngines.Values) { engine.SetSpace(availableTransforms, availableLearners, pipelineVerifier, originalData, fullyTransformedData, dependencyMapping, isMaximizingMetric); } base.SetSpace(availableTransforms, availableLearners, pipelineVerifier, originalData, fullyTransformedData, dependencyMapping, isMaximizingMetric); }
/// <summary> /// Simple wrapper which allows the call signature to match the signature needed for the PipelineOptimizerBase interface. /// </summary> public static Func <PipelinePattern, long, bool> ValidationWrapper(TransformInference.SuggestedTransform[] allTransforms, AutoInference.DependencyMap dependencyMapping) { return((p, b) => IsValidTransformsPipeline(b, p.Transforms, allTransforms.Union(p.Transforms).ToArray(), dependencyMapping)); }
private static bool HasFinalFeatures(TransformInference.SuggestedTransform[] transforms, AutoInference.DependencyMap dependencyMapping) => HasFinalFeaturesColumnTransform(transforms) || HasInitialNumericFeatures(dependencyMapping);
public static bool IsValidTransformsPipeline(long transformsBitMask, TransformInference.SuggestedTransform[] selectedAndFinalTransforms, TransformInference.SuggestedTransform[] allTransforms, AutoInference.DependencyMap dependencyMapping) { // If no transforms and none selected, valid. if (transformsBitMask == 0 && allTransforms.Length == 0) { return(true); } // If including transforms that aren't there, invalid pipeline if (transformsBitMask > 0 && allTransforms.Length == 0) { return(false); } var graph = BuildAtomicIdDependencyGraph(allTransforms); var selectedInitialTransforms = allTransforms.Where(t => AtomicGroupPresent(transformsBitMask, t.AtomicGroupId)).ToArray(); // Make sure all necessary atomic groups are present, beginning with last level for (int l = allTransforms.Select(t => t.RoutingStructure.Level).DefaultIfEmpty(0).Max(); l > 0; l--) { int level = l; // To avoid complaint about access to modified closure var subset = allTransforms.Where(t => t.RoutingStructure.Level == level); var atomicIdsForLevel = subset.Select(t => t.AtomicGroupId).Distinct().ToArray(); if (atomicIdsForLevel.Any(a => AtomicGroupPresent(transformsBitMask, a) && !graph[a].All(r => AtomicGroupPresent(transformsBitMask, r)))) { return(false); } } // Make sure each transform only consumes columns actually produced by // a lower-level transform, or existed in original dataset. if (!AreColumnsConsistent(selectedInitialTransforms, dependencyMapping)) { return(false); } // Make sure has numeric vector Features column if (!HasFinalFeatures(selectedAndFinalTransforms, dependencyMapping)) { return(false); } // Passed all tests return(true); }