public static ColumnInferenceResults InferColumns(MLContext context, string path, ColumnInformation columnInfo, bool hasHeader, TextFileContents.ColumnSplitResult splitInference, ColumnTypeInference.InferenceResult typeInference, bool trimWhitespace, bool groupColumns) { var loaderColumns = ColumnTypeInference.GenerateLoaderColumns(typeInference.Columns); var typedLoaderOptions = new TextLoader.Options { Columns = loaderColumns, Separators = new[] { splitInference.Separator.Value }, AllowSparse = splitInference.AllowSparse, AllowQuoting = splitInference.AllowQuote, ReadMultilines = splitInference.ReadMultilines, HasHeader = hasHeader, TrimWhitespace = trimWhitespace }; var textLoader = context.Data.CreateTextLoader(typedLoaderOptions); var dataView = textLoader.Load(path); // Validate all columns specified in column info exist in inferred data view ColumnInferenceValidationUtil.ValidateSpecifiedColumnsExist(columnInfo, dataView); var purposeInferenceResult = PurposeInference.InferPurposes(context, dataView, columnInfo); // start building result objects IEnumerable <TextLoader.Column> columnResults = null; IEnumerable <(string, ColumnPurpose)> purposeResults = null; // infer column grouping and generate column names if (groupColumns) { var groupingResult = ColumnGroupingInference.InferGroupingAndNames(context, hasHeader, typeInference.Columns, purposeInferenceResult); columnResults = groupingResult.Select(c => c.GenerateTextLoaderColumn()); purposeResults = groupingResult.Select(c => (c.SuggestedName, c.Purpose)); } else { columnResults = loaderColumns; purposeResults = purposeInferenceResult.Select(p => (dataView.Schema[p.ColumnIndex].Name, p.Purpose)); } var textLoaderOptions = new TextLoader.Options() { Columns = columnResults.ToArray(), AllowQuoting = splitInference.AllowQuote, AllowSparse = splitInference.AllowSparse, Separators = new char[] { splitInference.Separator.Value }, ReadMultilines = splitInference.ReadMultilines, HasHeader = hasHeader, TrimWhitespace = trimWhitespace }; return(new ColumnInferenceResults() { TextLoaderOptions = textLoaderOptions, ColumnInformation = ColumnInformationUtil.BuildColumnInfo(purposeResults) }); }
private static ColumnTypeInference.InferenceResult InferColumnTypes(MLContext context, TextFileSample sample, TextFileContents.ColumnSplitResult splitInference, bool hasHeader, uint?labelColumnIndex, string label) { // infer column types var typeInferenceResult = ColumnTypeInference.InferTextFileColumnTypes(context, sample, new ColumnTypeInference.Arguments { ColumnCount = splitInference.ColumnCount, Separator = splitInference.Separator.Value, AllowSparse = splitInference.AllowSparse, AllowQuote = splitInference.AllowQuote, HasHeader = hasHeader, LabelColumnIndex = labelColumnIndex, Label = label }); if (!typeInferenceResult.IsSuccess) { throw new InferenceException(InferenceExceptionType.ColumnDataType, "Unable to infer column types of the file provided."); } return(typeInferenceResult); }