예제 #1
0
        /// <summary>
        /// Using the dependencyMapping and included transforms, computes which subset of columns in dataSample
        /// will be present in the final transformed dataset when only the transforms present are applied.
        /// </summary>
        private static int[] GetExcludedColumnIndices(TransformInference.SuggestedTransform[] includedTransforms, IDataView dataSample,
                                                      AutoInference.DependencyMap dependencyMapping)
        {
            List <int> includedColumnIndices = new List <int>();

            // For every column, see if either present in initial dataset, or
            // produced by a transform used in current pipeline.
            for (int columnIndex = 0; columnIndex < dataSample.Schema.ColumnCount; columnIndex++)
            {
                // Create ColumnInfo object for indexing dictionary
                var colInfo = new AutoInference.ColumnInfo
                {
                    Name     = dataSample.Schema.GetColumnName(columnIndex),
                    ItemType = dataSample.Schema.GetColumnType(columnIndex).ItemType,
                    IsHidden = dataSample.Schema.IsHidden(columnIndex)
                };

                // Exclude all hidden and non-numeric columns
                if (colInfo.IsHidden || !colInfo.ItemType.IsNumber)
                {
                    continue;
                }

                foreach (var level in dependencyMapping.Keys.Reverse())
                {
                    var levelResponsibilities = dependencyMapping[level];

                    if (!levelResponsibilities.ContainsKey(colInfo))
                    {
                        continue;
                    }

                    // Include any numeric column present in initial dataset. Does not need
                    // any transforms applied to be present in final dataset.
                    if (level == 0 && colInfo.ItemType.IsNumber && levelResponsibilities[colInfo].Count == 0)
                    {
                        includedColumnIndices.Add(columnIndex);
                        break;
                    }

                    // If column could not have been produced by transforms at this level, move down to the next level.
                    if (levelResponsibilities[colInfo].Count == 0)
                    {
                        continue;
                    }

                    // Check if could have been produced by any transform in this pipeline
                    if (levelResponsibilities[colInfo].Any(t => includedTransforms.Contains(t)))
                    {
                        includedColumnIndices.Add(columnIndex);
                    }
                }
            }

            // Exclude all columns not discovered by our inclusion process
            return(Enumerable.Range(0, dataSample.Schema.ColumnCount).Except(includedColumnIndices).ToArray());
        }
예제 #2
0
        /// <summary>
        /// Creates a dictionary mapping column names to the transforms which could have produced them.
        /// </summary>
        public static AutoInference.LevelDependencyMap ComputeColumnResponsibilities(IDataView transformedData,
                                                                                     TransformInference.SuggestedTransform[] appliedTransforms)
        {
            var mapping = new AutoInference.LevelDependencyMap();

            for (int i = 0; i < transformedData.Schema.ColumnCount; i++)
            {
                if (transformedData.Schema.IsHidden(i))
                {
                    continue;
                }
                var colInfo = new AutoInference.ColumnInfo
                {
                    IsHidden = false,
                    ItemType = transformedData.Schema.GetColumnType(i).ItemType,
                    Name     = transformedData.Schema.GetColumnName(i)
                };
                mapping.Add(colInfo, appliedTransforms.Where(t =>
                                                             t.RoutingStructure.ColumnsProduced.Any(o => o.Name == colInfo.Name &&
                                                                                                    o.IsNumeric == transformedData.Schema.GetColumnType(i).ItemType.IsNumber)).ToList());
            }
            return(mapping);
        }