/// <summary> /// Using the dependencyMapping and included transforms, determines whether every /// transform present only consumes columns produced by a lower- or same-level transform, /// or existed in the original dataset. Note, a column could be produced by a /// transform on the same level, such as in multipart (atomic group) transforms. /// </summary> public static bool AreColumnsConsistent(TransformInference.SuggestedTransform[] includedTransforms, AutoInference.DependencyMap dependencyMapping) { foreach (var transform in includedTransforms) { foreach (var colConsumed in transform.RoutingStructure.ColumnsConsumed) { AutoInference.LevelDependencyMap ldm = dependencyMapping[transform.RoutingStructure.Level]; var colInfo = ldm.Keys.FirstOrDefault(k => k.Name == colConsumed.Name); // Consumed column does not exist at this sublevel. Since we never drop columns // it will not exist at any lower levels, either. Thus, problem with column consumption. if (colInfo.Name == null) { return(false); } // If this column could have been produced by a transform, make sure at least one // of the possible producer transforms in in our included transforms list. if (ldm[colInfo].Count > 0 && !ldm[colInfo].Any(t => includedTransforms.Contains(t))) { return(false); } } } // Passed all tests return(true); }
/// <summary> /// Creates a dictionary mapping column names to the transforms which could have produced them. /// </summary> public static AutoInference.LevelDependencyMap ComputeColumnResponsibilities(IDataView transformedData, TransformInference.SuggestedTransform[] appliedTransforms) { var mapping = new AutoInference.LevelDependencyMap(); for (int i = 0; i < transformedData.Schema.ColumnCount; i++) { if (transformedData.Schema.IsHidden(i)) { continue; } var colInfo = new AutoInference.ColumnInfo { IsHidden = false, ItemType = transformedData.Schema.GetColumnType(i).ItemType, Name = transformedData.Schema.GetColumnName(i) }; mapping.Add(colInfo, appliedTransforms.Where(t => t.RoutingStructure.ColumnsProduced.Any(o => o.Name == colInfo.Name && o.IsNumeric == transformedData.Schema.GetColumnType(i).ItemType.IsNumber)).ToList()); } return(mapping); }