public static ColumnInferenceResults InferColumns(MLContext context, string path, ColumnInformation columnInfo,
                                                          char?separatorChar, bool?allowQuotedStrings, bool?supportSparse, bool trimWhitespace, bool groupColumns)
        {
            var sample         = TextFileSample.CreateFromFullFile(path);
            var splitInference = InferSplit(context, sample, separatorChar, allowQuotedStrings, supportSparse);
            var typeInference  = InferColumnTypes(context, sample, splitInference, true, null, columnInfo.LabelColumnName);

            return(InferColumns(context, path, columnInfo, true, splitInference, typeInference, trimWhitespace, groupColumns));
        }
        public static ColumnInferenceResults InferColumns(MLContext context, string path, uint labelColumnIndex,
                                                          bool hasHeader, char?separatorChar, bool?allowQuotedStrings, bool?supportSparse, bool trimWhitespace, bool groupColumns)
        {
            var sample         = TextFileSample.CreateFromFullFile(path);
            var splitInference = InferSplit(context, sample, separatorChar, allowQuotedStrings, supportSparse);
            var typeInference  = InferColumnTypes(context, sample, splitInference, hasHeader, labelColumnIndex, null);

            // If no headers, suggest label column name as 'Label'
            if (!hasHeader)
            {
                typeInference.Columns[labelColumnIndex].SuggestedName = DefaultColumnNames.Label;
            }

            var columnInfo = new ColumnInformation()
            {
                LabelColumnName = typeInference.Columns[labelColumnIndex].SuggestedName
            };

            return(InferColumns(context, path, columnInfo, hasHeader, splitInference, typeInference, trimWhitespace, groupColumns));
        }
        private static ColumnTypeInference.InferenceResult InferColumnTypes(MLContext context, TextFileSample sample,
                                                                            TextFileContents.ColumnSplitResult splitInference, bool hasHeader, uint?labelColumnIndex, string label)
        {
            // infer column types
            var typeInferenceResult = ColumnTypeInference.InferTextFileColumnTypes(context, sample,
                                                                                   new ColumnTypeInference.Arguments
            {
                ColumnCount      = splitInference.ColumnCount,
                Separator        = splitInference.Separator.Value,
                AllowSparse      = splitInference.AllowSparse,
                AllowQuote       = splitInference.AllowQuote,
                HasHeader        = hasHeader,
                LabelColumnIndex = labelColumnIndex,
                Label            = label
            });

            if (!typeInferenceResult.IsSuccess)
            {
                throw new InferenceException(InferenceExceptionType.ColumnDataType, "Unable to infer column types of the file provided.");
            }

            return(typeInferenceResult);
        }
        private static TextFileContents.ColumnSplitResult InferSplit(MLContext context, TextFileSample sample, char?separatorChar, bool?allowQuotedStrings, bool?supportSparse)
        {
            var separatorCandidates = separatorChar == null ? TextFileContents.DefaultSeparators : new char[] { separatorChar.Value };
            var splitInference      = TextFileContents.TrySplitColumns(context, sample, separatorCandidates);

            // respect passed-in overrides
            if (allowQuotedStrings != null)
            {
                splitInference.AllowQuote = allowQuotedStrings.Value;
            }
            if (supportSparse != null)
            {
                splitInference.AllowSparse = supportSparse.Value;
            }

            if (!splitInference.IsSuccess)
            {
                throw new InferenceException(InferenceExceptionType.ColumnSplit, "Unable to split the file provided into multiple, consistent columns.");
            }

            return(splitInference);
        }