public static ColumnInferenceResults InferColumns(MLContext context, string path, ColumnInformation columnInfo, char?separatorChar, bool?allowQuotedStrings, bool?supportSparse, bool trimWhitespace, bool groupColumns) { var sample = TextFileSample.CreateFromFullFile(path); var splitInference = InferSplit(context, sample, separatorChar, allowQuotedStrings, supportSparse); var typeInference = InferColumnTypes(context, sample, splitInference, true, null, columnInfo.LabelColumnName); return(InferColumns(context, path, columnInfo, true, splitInference, typeInference, trimWhitespace, groupColumns)); }
public static ColumnInferenceResults InferColumns(MLContext context, string path, uint labelColumnIndex, bool hasHeader, char?separatorChar, bool?allowQuotedStrings, bool?supportSparse, bool trimWhitespace, bool groupColumns) { var sample = TextFileSample.CreateFromFullFile(path); var splitInference = InferSplit(context, sample, separatorChar, allowQuotedStrings, supportSparse); var typeInference = InferColumnTypes(context, sample, splitInference, hasHeader, labelColumnIndex, null); // If no headers, suggest label column name as 'Label' if (!hasHeader) { typeInference.Columns[labelColumnIndex].SuggestedName = DefaultColumnNames.Label; } var columnInfo = new ColumnInformation() { LabelColumnName = typeInference.Columns[labelColumnIndex].SuggestedName }; return(InferColumns(context, path, columnInfo, hasHeader, splitInference, typeInference, trimWhitespace, groupColumns)); }
private static ColumnTypeInference.InferenceResult InferColumnTypes(MLContext context, TextFileSample sample, TextFileContents.ColumnSplitResult splitInference, bool hasHeader, uint?labelColumnIndex, string label) { // infer column types var typeInferenceResult = ColumnTypeInference.InferTextFileColumnTypes(context, sample, new ColumnTypeInference.Arguments { ColumnCount = splitInference.ColumnCount, Separator = splitInference.Separator.Value, AllowSparse = splitInference.AllowSparse, AllowQuote = splitInference.AllowQuote, HasHeader = hasHeader, LabelColumnIndex = labelColumnIndex, Label = label }); if (!typeInferenceResult.IsSuccess) { throw new InferenceException(InferenceExceptionType.ColumnDataType, "Unable to infer column types of the file provided."); } return(typeInferenceResult); }
private static TextFileContents.ColumnSplitResult InferSplit(MLContext context, TextFileSample sample, char?separatorChar, bool?allowQuotedStrings, bool?supportSparse) { var separatorCandidates = separatorChar == null ? TextFileContents.DefaultSeparators : new char[] { separatorChar.Value }; var splitInference = TextFileContents.TrySplitColumns(context, sample, separatorCandidates); // respect passed-in overrides if (allowQuotedStrings != null) { splitInference.AllowQuote = allowQuotedStrings.Value; } if (supportSparse != null) { splitInference.AllowSparse = supportSparse.Value; } if (!splitInference.IsSuccess) { throw new InferenceException(InferenceExceptionType.ColumnSplit, "Unable to split the file provided into multiple, consistent columns."); } return(splitInference); }