public virtual IEnumerable <SuggestedRecipe> Apply(
                TransformInference.InferenceResult transformInferenceResult, Type predictorType, IChannel ch)
            {
                TransformInference.SuggestedTransform[] transforms = GetSuggestedTransforms(
                    transformInferenceResult, predictorType);

                if (transforms?.Length > 0)
                {
                    foreach (var recipe in ApplyCore(predictorType, transforms))
                    {
                        yield return(recipe);
                    }
                }
            }
            protected virtual TransformInference.SuggestedTransform[] GetSuggestedTransforms(
                TransformInference.InferenceResult transformInferenceResult, Type predictorType)
            {
                List <Type> allowedTransforms   = AllowedTransforms();
                List <Type> qualifierTransforms = QualifierTransforms();

                if (AllowedPredictorTypes().Any(type => type == predictorType) &&
                    transformInferenceResult.SuggestedTransforms.Any(transform => qualifierTransforms.Contains(transform.ExpertType)))
                {
                    return(transformInferenceResult.SuggestedTransforms
                           .Where(transform => allowedTransforms.Contains(transform.ExpertType) || qualifierTransforms.Contains(transform.ExpertType))
                           .ToArray());
                }

                return(null);
            }
        public static InferenceResult InferRecipes(IHostEnvironment env, TransformInference.InferenceResult transformInferenceResult,
                                                   Type predictorType)
        {
            Contracts.CheckValue(env, nameof(env));
            var h = env.Register("InferRecipes");

            using (var ch = h.Start("InferRecipes"))
            {
                var list = new List <SuggestedRecipe>();
                foreach (var recipe in GetRecipes(h))
                {
                    list.AddRange(recipe.Apply(transformInferenceResult, predictorType, ch));
                }

                if (list.Count == 0)
                {
                    ch.Info("No recipes are needed for the data.");
                }

                return(new InferenceResult(list.ToArray()));
            }
        }
        public static SuggestedRecipe[] InferRecipesFromData(IHostEnvironment env, string dataFile, string schemaDefinitionFile,
                                                             out Type predictorType, out string settingsString, out TransformInference.InferenceResult inferenceResult,
                                                             bool excludeFeaturesConcatTransforms = false)
        {
            Contracts.CheckValue(env, nameof(env));
            var h = env.Register("InferRecipesFromData", seed: 0, verbose: false);

            using (var ch = h.Start("InferRecipesFromData"))
            {
                // Validate the schema file has content if provided.
                // Warn the user early if that is provided but beign skipped.
                string schemaJson = null;
                if (!string.IsNullOrEmpty(schemaDefinitionFile))
                {
                    try
                    {
                        schemaJson = File.ReadAllText(schemaDefinitionFile);
                    }
                    catch (Exception ex)
                    {
                        ch.Warning($"Unable to read the schema file. Proceeding to infer the schema :{ex.Message}");
                    }
                }

                ch.Info("Loading file sample into memory.");
                var sample = TextFileSample.CreateFromFullFile(h, dataFile);

                ch.Info("Detecting separator and columns");
                var splitResult = TextFileContents.TrySplitColumns(h, sample, TextFileContents.DefaultSeparators);

                // initialize to clustering if we're not successful?
                predictorType  = typeof(SignatureClusteringTrainer);
                settingsString = "";
                if (!splitResult.IsSuccess)
                {
                    throw ch.ExceptDecode("Couldn't detect separator.");
                }

                ch.Info($"Separator detected as '{splitResult.Separator}', there's {splitResult.ColumnCount} columns.");

                ColumnGroupingInference.GroupingColumn[] columns;
                bool hasHeader = false;
                if (string.IsNullOrEmpty(schemaJson))
                {
                    ch.Warning("Empty schema file. Proceeding to infer the schema.");
                    columns = InferenceUtils.InferColumnPurposes(ch, h, sample, splitResult, out hasHeader);
                }
                else
                {
                    try
                    {
                        columns = JsonConvert.DeserializeObject <ColumnGroupingInference.GroupingColumn[]>(schemaJson);
                        ch.Info("Using the provided schema file.");
                    }
                    catch
                    {
                        ch.Warning("Invalid json in the schema file. Proceeding to infer the schema.");
                        columns = InferenceUtils.InferColumnPurposes(ch, h, sample, splitResult, out hasHeader);
                    }
                }

                var finalLoaderArgs = new TextLoader.Arguments
                {
                    Column       = ColumnGroupingInference.GenerateLoaderColumns(columns),
                    HasHeader    = hasHeader,
                    Separator    = splitResult.Separator,
                    AllowSparse  = splitResult.AllowSparse,
                    AllowQuoting = splitResult.AllowQuote
                };

                settingsString = CommandLine.CmdParser.GetSettings(ch, finalLoaderArgs, new TextLoader.Arguments());
                ch.Info($"Loader options: {settingsString}");

                ch.Info("Inferring recipes");
                var finalData = TextLoader.ReadFile(h, finalLoaderArgs, sample);
                var cached    = new CacheDataView(h, finalData,
                                                  Enumerable.Range(0, finalLoaderArgs.Column.Length).ToArray());

                var purposeColumns = columns.Select((x, i) => new PurposeInference.Column(i, x.Purpose, x.ItemKind)).ToArray();

                var fraction = sample.FullFileSize == null ? 1.0 : (double)sample.SampleSize / sample.FullFileSize.Value;
                var transformInferenceResult = TransformInference.InferTransforms(h, cached, purposeColumns,
                                                                                  new TransformInference.Arguments
                {
                    EstimatedSampleFraction         = fraction,
                    ExcludeFeaturesConcatTransforms = excludeFeaturesConcatTransforms
                }
                                                                                  );
                predictorType = InferenceUtils.InferPredictorCategoryType(cached, purposeColumns);
                var recipeInferenceResult = InferRecipes(h, transformInferenceResult, predictorType);

                ch.Done();

                inferenceResult = transformInferenceResult;
                return(recipeInferenceResult.SuggestedRecipes);
            }
        }