Ejemplo n.º 1
0
        /// <summary>
        /// Using the dependencyMapping and included transforms, determines whether every
        /// transform present only consumes columns produced by a lower- or same-level transform,
        /// or existed in the original dataset. Note, a column could be produced by a
        /// transform on the same level, such as in multipart (atomic group) transforms.
        /// </summary>
        public static bool AreColumnsConsistent(TransformInference.SuggestedTransform[] includedTransforms,
                                                AutoInference.DependencyMap dependencyMapping)
        {
            foreach (var transform in includedTransforms)
            {
                foreach (var colConsumed in transform.RoutingStructure.ColumnsConsumed)
                {
                    AutoInference.LevelDependencyMap ldm = dependencyMapping[transform.RoutingStructure.Level];
                    var colInfo = ldm.Keys.FirstOrDefault(k => k.Name == colConsumed.Name);

                    // Consumed column does not exist at this sublevel. Since we never drop columns
                    // it will not exist at any lower levels, either. Thus, problem with column consumption.
                    if (colInfo.Name == null)
                    {
                        return(false);
                    }

                    // If this column could have been produced by a transform, make sure at least one
                    // of the possible producer transforms in in our included transforms list.
                    if (ldm[colInfo].Count > 0 && !ldm[colInfo].Any(t => includedTransforms.Contains(t)))
                    {
                        return(false);
                    }
                }
            }

            // Passed all tests
            return(true);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Using the dependencyMapping and included transforms, computes which subset of columns in dataSample
        /// will be present in the final transformed dataset when only the transforms present are applied.
        /// </summary>
        private static int[] GetExcludedColumnIndices(TransformInference.SuggestedTransform[] includedTransforms, IDataView dataSample,
                                                      AutoInference.DependencyMap dependencyMapping)
        {
            List <int> includedColumnIndices = new List <int>();

            // For every column, see if either present in initial dataset, or
            // produced by a transform used in current pipeline.
            for (int columnIndex = 0; columnIndex < dataSample.Schema.ColumnCount; columnIndex++)
            {
                // Create ColumnInfo object for indexing dictionary
                var colInfo = new AutoInference.ColumnInfo
                {
                    Name     = dataSample.Schema.GetColumnName(columnIndex),
                    ItemType = dataSample.Schema.GetColumnType(columnIndex).ItemType,
                    IsHidden = dataSample.Schema.IsHidden(columnIndex)
                };

                // Exclude all hidden and non-numeric columns
                if (colInfo.IsHidden || !colInfo.ItemType.IsNumber)
                {
                    continue;
                }

                foreach (var level in dependencyMapping.Keys.Reverse())
                {
                    var levelResponsibilities = dependencyMapping[level];

                    if (!levelResponsibilities.ContainsKey(colInfo))
                    {
                        continue;
                    }

                    // Include any numeric column present in initial dataset. Does not need
                    // any transforms applied to be present in final dataset.
                    if (level == 0 && colInfo.ItemType.IsNumber && levelResponsibilities[colInfo].Count == 0)
                    {
                        includedColumnIndices.Add(columnIndex);
                        break;
                    }

                    // If column could not have been produced by transforms at this level, move down to the next level.
                    if (levelResponsibilities[colInfo].Count == 0)
                    {
                        continue;
                    }

                    // Check if could have been produced by any transform in this pipeline
                    if (levelResponsibilities[colInfo].Any(t => includedTransforms.Contains(t)))
                    {
                        includedColumnIndices.Add(columnIndex);
                    }
                }
            }

            // Exclude all columns not discovered by our inclusion process
            return(Enumerable.Range(0, dataSample.Schema.ColumnCount).Except(includedColumnIndices).ToArray());
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Exposed version of the method.
        /// </summary>
        public static TransformInference.SuggestedTransform[] GetFinalFeatureConcat(IHostEnvironment env, IDataView data,
                                                                                    AutoInference.DependencyMap dependencyMapping, TransformInference.SuggestedTransform[] selectedTransforms,
                                                                                    TransformInference.SuggestedTransform[] allTransforms, RoleMappedData dataRoles)
        {
            int level            = 1;
            int atomicGroupLimit = 0;

            if (allTransforms.Length != 0)
            {
                level            = allTransforms.Max(t => t.RoutingStructure.Level) + 1;
                atomicGroupLimit = allTransforms.Max(t => t.AtomicGroupId) + 1;
            }
            var excludedColumnIndices = GetExcludedColumnIndices(selectedTransforms, data, dependencyMapping);

            return(GetFinalFeatureConcat(env, data, excludedColumnIndices, level, atomicGroupLimit, dataRoles));
        }
Ejemplo n.º 4
0
 private static bool HasInitialNumericFeatures(AutoInference.DependencyMap dependencyMapping)
 {
     if (dependencyMapping.Count == 0)
     {
         return(false);
     }
     foreach (var info in dependencyMapping[0])
     {
         if (info.Key.Name == DefaultColumnNames.Features &&
             !info.Key.IsHidden &&
             info.Key.ItemType.IsNumber &&
             info.Value.Count == 0)
         {
             return(true);
         }
     }
     return(false);
 }
        public virtual void SetSpace(TransformInference.SuggestedTransform[] availableTransforms,
                                     RecipeInference.SuggestedRecipe.SuggestedLearner[] availableLearners,
                                     Func <PipelinePattern, long, bool> pipelineVerifier,
                                     IDataView originalData, IDataView fullyTransformedData, AutoInference.DependencyMap dependencyMapping,
                                     bool isMaximizingMetric)
        {
            AvailableLearners    = availableLearners;
            AvailableTransforms  = availableTransforms;
            PipelineVerifier     = pipelineVerifier;
            OriginalData         = originalData;
            FullyTransformedData = fullyTransformedData;
            DependencyMapping    = dependencyMapping;
            IsMaximizingMetric   = isMaximizingMetric;

            foreach (var learner in AvailableLearners)
            {
                AutoMlUtils.PopulateSweepableParams(learner);
            }
        }
Ejemplo n.º 6
0
        public override void SetSpace(TransformInference.SuggestedTransform[] availableTransforms,
                                      RecipeInference.SuggestedRecipe.SuggestedLearner[] availableLearners,
                                      Func <PipelinePattern, long, bool> pipelineVerifier,
                                      IDataView originalData, IDataView fullyTransformedData, AutoInference.DependencyMap dependencyMapping,
                                      bool isMaximizingMetric)
        {
            foreach (var engine in _secondaryEngines.Values)
            {
                engine.SetSpace(availableTransforms, availableLearners,
                                pipelineVerifier, originalData, fullyTransformedData, dependencyMapping,
                                isMaximizingMetric);
            }

            base.SetSpace(availableTransforms, availableLearners, pipelineVerifier, originalData, fullyTransformedData,
                          dependencyMapping, isMaximizingMetric);
        }
Ejemplo n.º 7
0
 /// <summary>
 /// Simple wrapper which allows the call signature to match the signature needed for the PipelineOptimizerBase interface.
 /// </summary>
 public static Func <PipelinePattern, long, bool> ValidationWrapper(TransformInference.SuggestedTransform[] allTransforms, AutoInference.DependencyMap dependencyMapping)
 {
     return((p, b) => IsValidTransformsPipeline(b, p.Transforms, allTransforms.Union(p.Transforms).ToArray(), dependencyMapping));
 }
Ejemplo n.º 8
0
 private static bool HasFinalFeatures(TransformInference.SuggestedTransform[] transforms,
                                      AutoInference.DependencyMap dependencyMapping) => HasFinalFeaturesColumnTransform(transforms) || HasInitialNumericFeatures(dependencyMapping);
Ejemplo n.º 9
0
        public static bool IsValidTransformsPipeline(long transformsBitMask, TransformInference.SuggestedTransform[] selectedAndFinalTransforms,
                                                     TransformInference.SuggestedTransform[] allTransforms, AutoInference.DependencyMap dependencyMapping)
        {
            // If no transforms and none selected, valid.
            if (transformsBitMask == 0 && allTransforms.Length == 0)
            {
                return(true);
            }

            // If including transforms that aren't there, invalid pipeline
            if (transformsBitMask > 0 && allTransforms.Length == 0)
            {
                return(false);
            }

            var graph = BuildAtomicIdDependencyGraph(allTransforms);
            var selectedInitialTransforms =
                allTransforms.Where(t => AtomicGroupPresent(transformsBitMask, t.AtomicGroupId)).ToArray();

            // Make sure all necessary atomic groups are present, beginning with last level
            for (int l = allTransforms.Select(t => t.RoutingStructure.Level).DefaultIfEmpty(0).Max(); l > 0; l--)
            {
                int level             = l; // To avoid complaint about access to modified closure
                var subset            = allTransforms.Where(t => t.RoutingStructure.Level == level);
                var atomicIdsForLevel = subset.Select(t => t.AtomicGroupId).Distinct().ToArray();
                if (atomicIdsForLevel.Any(a =>
                                          AtomicGroupPresent(transformsBitMask, a) &&
                                          !graph[a].All(r => AtomicGroupPresent(transformsBitMask, r))))
                {
                    return(false);
                }
            }

            // Make sure each transform only consumes columns actually produced by
            // a lower-level transform, or existed in original dataset.
            if (!AreColumnsConsistent(selectedInitialTransforms, dependencyMapping))
            {
                return(false);
            }

            // Make sure has numeric vector Features column
            if (!HasFinalFeatures(selectedAndFinalTransforms, dependencyMapping))
            {
                return(false);
            }

            // Passed all tests
            return(true);
        }