public virtual IEnumerable <SuggestedRecipe> Apply( TransformInference.InferenceResult transformInferenceResult, Type predictorType, IChannel ch) { TransformInference.SuggestedTransform[] transforms = GetSuggestedTransforms( transformInferenceResult, predictorType); if (transforms?.Length > 0) { foreach (var recipe in ApplyCore(predictorType, transforms)) { yield return(recipe); } } }
protected virtual TransformInference.SuggestedTransform[] GetSuggestedTransforms( TransformInference.InferenceResult transformInferenceResult, Type predictorType) { List <Type> allowedTransforms = AllowedTransforms(); List <Type> qualifierTransforms = QualifierTransforms(); if (AllowedPredictorTypes().Any(type => type == predictorType) && transformInferenceResult.SuggestedTransforms.Any(transform => qualifierTransforms.Contains(transform.ExpertType))) { return(transformInferenceResult.SuggestedTransforms .Where(transform => allowedTransforms.Contains(transform.ExpertType) || qualifierTransforms.Contains(transform.ExpertType)) .ToArray()); } return(null); }
public static InferenceResult InferRecipes(IHostEnvironment env, TransformInference.InferenceResult transformInferenceResult, Type predictorType) { Contracts.CheckValue(env, nameof(env)); var h = env.Register("InferRecipes"); using (var ch = h.Start("InferRecipes")) { var list = new List <SuggestedRecipe>(); foreach (var recipe in GetRecipes(h)) { list.AddRange(recipe.Apply(transformInferenceResult, predictorType, ch)); } if (list.Count == 0) { ch.Info("No recipes are needed for the data."); } return(new InferenceResult(list.ToArray())); } }
public static SuggestedRecipe[] InferRecipesFromData(IHostEnvironment env, string dataFile, string schemaDefinitionFile, out Type predictorType, out string settingsString, out TransformInference.InferenceResult inferenceResult, bool excludeFeaturesConcatTransforms = false) { Contracts.CheckValue(env, nameof(env)); var h = env.Register("InferRecipesFromData", seed: 0, verbose: false); using (var ch = h.Start("InferRecipesFromData")) { // Validate the schema file has content if provided. // Warn the user early if that is provided but beign skipped. string schemaJson = null; if (!string.IsNullOrEmpty(schemaDefinitionFile)) { try { schemaJson = File.ReadAllText(schemaDefinitionFile); } catch (Exception ex) { ch.Warning($"Unable to read the schema file. Proceeding to infer the schema :{ex.Message}"); } } ch.Info("Loading file sample into memory."); var sample = TextFileSample.CreateFromFullFile(h, dataFile); ch.Info("Detecting separator and columns"); var splitResult = TextFileContents.TrySplitColumns(h, sample, TextFileContents.DefaultSeparators); // initialize to clustering if we're not successful? predictorType = typeof(SignatureClusteringTrainer); settingsString = ""; if (!splitResult.IsSuccess) { throw ch.ExceptDecode("Couldn't detect separator."); } ch.Info($"Separator detected as '{splitResult.Separator}', there's {splitResult.ColumnCount} columns."); ColumnGroupingInference.GroupingColumn[] columns; bool hasHeader = false; if (string.IsNullOrEmpty(schemaJson)) { ch.Warning("Empty schema file. Proceeding to infer the schema."); columns = InferenceUtils.InferColumnPurposes(ch, h, sample, splitResult, out hasHeader); } else { try { columns = JsonConvert.DeserializeObject <ColumnGroupingInference.GroupingColumn[]>(schemaJson); ch.Info("Using the provided schema file."); } catch { ch.Warning("Invalid json in the schema file. Proceeding to infer the schema."); columns = InferenceUtils.InferColumnPurposes(ch, h, sample, splitResult, out hasHeader); } } var finalLoaderArgs = new TextLoader.Arguments { Column = ColumnGroupingInference.GenerateLoaderColumns(columns), HasHeader = hasHeader, Separator = splitResult.Separator, AllowSparse = splitResult.AllowSparse, AllowQuoting = splitResult.AllowQuote }; settingsString = CommandLine.CmdParser.GetSettings(ch, finalLoaderArgs, new TextLoader.Arguments()); ch.Info($"Loader options: {settingsString}"); ch.Info("Inferring recipes"); var finalData = TextLoader.ReadFile(h, finalLoaderArgs, sample); var cached = new CacheDataView(h, finalData, Enumerable.Range(0, finalLoaderArgs.Column.Length).ToArray()); var purposeColumns = columns.Select((x, i) => new PurposeInference.Column(i, x.Purpose, x.ItemKind)).ToArray(); var fraction = sample.FullFileSize == null ? 1.0 : (double)sample.SampleSize / sample.FullFileSize.Value; var transformInferenceResult = TransformInference.InferTransforms(h, cached, purposeColumns, new TransformInference.Arguments { EstimatedSampleFraction = fraction, ExcludeFeaturesConcatTransforms = excludeFeaturesConcatTransforms } ); predictorType = InferenceUtils.InferPredictorCategoryType(cached, purposeColumns); var recipeInferenceResult = InferRecipes(h, transformInferenceResult, predictorType); ch.Done(); inferenceResult = transformInferenceResult; return(recipeInferenceResult.SuggestedRecipes); } }