/// <summary> /// Using the dependencyMapping and included transforms, computes which subset of columns in dataSample /// will be present in the final transformed dataset when only the transforms present are applied. /// </summary> private static int[] GetExcludedColumnIndices(TransformInference.SuggestedTransform[] includedTransforms, IDataView dataSample, AutoInference.DependencyMap dependencyMapping) { List <int> includedColumnIndices = new List <int>(); // For every column, see if either present in initial dataset, or // produced by a transform used in current pipeline. for (int columnIndex = 0; columnIndex < dataSample.Schema.ColumnCount; columnIndex++) { // Create ColumnInfo object for indexing dictionary var colInfo = new AutoInference.ColumnInfo { Name = dataSample.Schema.GetColumnName(columnIndex), ItemType = dataSample.Schema.GetColumnType(columnIndex).ItemType, IsHidden = dataSample.Schema.IsHidden(columnIndex) }; // Exclude all hidden and non-numeric columns if (colInfo.IsHidden || !colInfo.ItemType.IsNumber) { continue; } foreach (var level in dependencyMapping.Keys.Reverse()) { var levelResponsibilities = dependencyMapping[level]; if (!levelResponsibilities.ContainsKey(colInfo)) { continue; } // Include any numeric column present in initial dataset. Does not need // any transforms applied to be present in final dataset. if (level == 0 && colInfo.ItemType.IsNumber && levelResponsibilities[colInfo].Count == 0) { includedColumnIndices.Add(columnIndex); break; } // If column could not have been produced by transforms at this level, move down to the next level. if (levelResponsibilities[colInfo].Count == 0) { continue; } // Check if could have been produced by any transform in this pipeline if (levelResponsibilities[colInfo].Any(t => includedTransforms.Contains(t))) { includedColumnIndices.Add(columnIndex); } } } // Exclude all columns not discovered by our inclusion process return(Enumerable.Range(0, dataSample.Schema.ColumnCount).Except(includedColumnIndices).ToArray()); }
/// <summary> /// Creates a dictionary mapping column names to the transforms which could have produced them. /// </summary> public static AutoInference.LevelDependencyMap ComputeColumnResponsibilities(IDataView transformedData, TransformInference.SuggestedTransform[] appliedTransforms) { var mapping = new AutoInference.LevelDependencyMap(); for (int i = 0; i < transformedData.Schema.ColumnCount; i++) { if (transformedData.Schema.IsHidden(i)) { continue; } var colInfo = new AutoInference.ColumnInfo { IsHidden = false, ItemType = transformedData.Schema.GetColumnType(i).ItemType, Name = transformedData.Schema.GetColumnName(i) }; mapping.Add(colInfo, appliedTransforms.Where(t => t.RoutingStructure.ColumnsProduced.Any(o => o.Name == colInfo.Name && o.IsNumeric == transformedData.Schema.GetColumnType(i).ItemType.IsNumber)).ToList()); } return(mapping); }