private static ColumnInferenceResult InferColumns(MLContext context, TextFileSample sample, Func <TextLoader, IDataView> createDataView, string label, bool hasHeader, string separator, bool?isQuoted, bool?isSparse) { var splitInference = InferSplit(sample, separator, isQuoted, isSparse); var typeInference = InferColumnTypes(context, sample, splitInference); var typedLoaderArgs = new TextLoader.Arguments { Column = ColumnTypeInference.GenerateLoaderColumns(typeInference.Columns), Separator = splitInference.Separator, AllowSparse = splitInference.AllowSparse, AllowQuoting = splitInference.AllowQuote, HasHeader = hasHeader }; var textLoader = context.Data.CreateTextReader(typedLoaderArgs); var dataView = createDataView(textLoader); var purposeInferenceResult = PurposeInference.InferPurposes(context, dataView, label); // infer column grouping and generate column names var groupingResult = ColumnGroupingInference.InferGroupingAndNames(context, hasHeader, typeInference.Columns, purposeInferenceResult); // build result objects & return var inferredColumns = groupingResult.Select(c => (c.GenerateTextLoaderColumn(), c.Purpose)).ToArray(); return(new ColumnInferenceResult(inferredColumns, splitInference.AllowQuote, splitInference.AllowSparse, splitInference.Separator, hasHeader)); }
private static ColumnTypeInference.InferenceResult InferColumnTypes(MLContext context, TextFileSample sample, TextFileContents.ColumnSplitResult splitInference) { // infer column types var typeInferenceResult = ColumnTypeInference.InferTextFileColumnTypes(context, sample, new ColumnTypeInference.Arguments { ColumnCount = splitInference.ColumnCount, Separator = splitInference.Separator, AllowSparse = splitInference.AllowSparse, AllowQuote = splitInference.AllowQuote, }); if (!typeInferenceResult.IsSuccess) { throw new InferenceException(InferenceType.ColumnDataKind, "Unable to infer column types of the file provided."); } return(typeInferenceResult); }