Esempio n. 1
0
        /// <summary>
        /// Construct a TextLoader object by inferencing the dataset schema from a type.
        /// </summary>
        /// <param name="useHeader">Does the file contains header?</param>
        /// <param name="separator">Column separator character. Default is '\t'</param>
        /// <param name="allowQuotedStrings">Whether the input may include quoted values,
        /// which can contain separator characters, colons,
        /// and distinguish empty values from missing values. When true, consecutive separators
        /// denote a missing value and an empty value is denoted by \"\".
        /// When false, consecutive separators denote an empty value.</param>
        /// <param name="supportSparse">Whether the input may include sparse representations e.g.
        /// if one of the row contains "5 2:6 4:3" that's mean there are 5 columns all zero
        /// except for 3rd and 5th columns which have values 6 and 3</param>
        /// <param name="trimWhitespace">Remove trailing whitespace from lines</param>
        public TextLoader CreateFrom <TInput>(bool useHeader     = false,
                                              char separator     = '\t', bool allowQuotedStrings = true,
                                              bool supportSparse = true, bool trimWhitespace     = false)
        {
            var fields = typeof(TInput).GetFields();

            Arguments.Column = new TextLoaderColumn[fields.Length];
            for (int index = 0; index < fields.Length; index++)
            {
                var field       = fields[index];
                var mappingAttr = field.GetCustomAttribute <ColumnAttribute>();
                if (mappingAttr == null)
                {
                    throw Contracts.Except($"{field.Name} is missing ColumnAttribute");
                }

                if (Regex.Match(mappingAttr.Ordinal, @"[^(0-9,\*\-~)]+").Success)
                {
                    throw Contracts.Except($"{mappingAttr.Ordinal} contains invalid characters. " +
                                           $"Valid characters are 0-9, *, - and ~");
                }

                var name = mappingAttr.Name ?? field.Name;

                Runtime.Data.TextLoader.Range[] sources;
                if (!Runtime.Data.TextLoader.Column.TryParseSourceEx(mappingAttr.Ordinal, out sources))
                {
                    throw Contracts.Except($"{mappingAttr.Ordinal} could not be parsed.");
                }

                Contracts.Assert(sources != null);

                TextLoaderColumn tlc = new TextLoaderColumn();
                tlc.Name   = name;
                tlc.Source = new TextLoaderRange[sources.Length];
                DataKind dk;
                if (!TryGetDataKind(field.FieldType.IsArray ? field.FieldType.GetElementType() : field.FieldType, out dk))
                {
                    throw Contracts.Except($"{name} is of unsupported type.");
                }

                tlc.Type = dk;

                for (int indexLocal = 0; indexLocal < tlc.Source.Length; indexLocal++)
                {
                    tlc.Source[indexLocal] = new TextLoaderRange
                    {
                        AllOther    = sources[indexLocal].AllOther,
                        AutoEnd     = sources[indexLocal].AutoEnd,
                        ForceVector = sources[indexLocal].ForceVector,
                        VariableEnd = sources[indexLocal].VariableEnd,
                        Max         = sources[indexLocal].Max,
                        Min         = sources[indexLocal].Min
                    };
                }

                Arguments.Column[index] = tlc;
            }

            Arguments.HasHeader      = useHeader;
            Arguments.Separator      = new[] { separator };
            Arguments.AllowQuoting   = allowQuotedStrings;
            Arguments.AllowSparse    = supportSparse;
            Arguments.TrimWhitespace = trimWhitespace;

            return(this);
        }
Esempio n. 2
0
        /// <summary>
        /// Construct a TextLoader object by inferencing the dataset schema from a type.
        /// </summary>
        /// <param name="useHeader">Does the file contains header?</param>
        /// <param name="separator">Column separator character. Default is '\t'</param>
        /// <param name="allowQuotedStrings">Whether the input may include quoted values,
        /// which can contain separator characters, colons,
        /// and distinguish empty values from missing values. When true, consecutive separators
        /// denote a missing value and an empty value is denoted by \"\".
        /// When false, consecutive separators denote an empty value.</param>
        /// <param name="supportSparse">Whether the input may include sparse representations for example,
        /// if one of the row contains "5 2:6 4:3" that's mean there are 5 columns all zero
        /// except for 3rd and 5th columns which have values 6 and 3</param>
        /// <param name="trimWhitespace">Remove trailing whitespace from lines</param>
        public TextLoader CreateFrom <TInput>(bool useHeader     = false,
                                              char separator     = '\t', bool allowQuotedStrings = true,
                                              bool supportSparse = true, bool trimWhitespace     = false)
        {
            var userType = typeof(TInput);

            var fieldInfos = userType.GetFields(BindingFlags.Public | BindingFlags.Instance);

            var propertyInfos =
                userType
                .GetProperties(BindingFlags.Public | BindingFlags.Instance)
                .Where(x => x.CanRead && x.CanWrite && x.GetGetMethod() != null && x.GetSetMethod() != null && x.GetIndexParameters().Length == 0);

            var memberInfos = (fieldInfos as IEnumerable <MemberInfo>).Concat(propertyInfos).ToArray();

            Arguments.Column = new TextLoaderColumn[memberInfos.Length];
            for (int index = 0; index < memberInfos.Length; index++)
            {
                var memberInfo  = memberInfos[index];
                var mappingAttr = memberInfo.GetCustomAttribute <ColumnAttribute>();
                if (mappingAttr == null)
                {
                    throw Contracts.Except($"Field or property {memberInfo.Name} is missing ColumnAttribute");
                }

                if (Regex.Match(mappingAttr.Ordinal, @"[^(0-9,\*\-~)]+").Success)
                {
                    throw Contracts.Except($"{mappingAttr.Ordinal} contains invalid characters. " +
                                           $"Valid characters are 0-9, *, - and ~");
                }

                var mappingNameAttr = memberInfo.GetCustomAttribute <ColumnNameAttribute>();
                var name            = mappingAttr.Name ?? mappingNameAttr?.Name ?? memberInfo.Name;

                Runtime.Data.TextLoader.Range[] sources;
                if (!Runtime.Data.TextLoader.Column.TryParseSourceEx(mappingAttr.Ordinal, out sources))
                {
                    throw Contracts.Except($"{mappingAttr.Ordinal} could not be parsed.");
                }

                Contracts.Assert(sources != null);

                TextLoaderColumn tlc = new TextLoaderColumn();
                tlc.Name   = name;
                tlc.Source = new TextLoaderRange[sources.Length];
                DataKind dk;
                switch (memberInfo)
                {
                case FieldInfo field:
                    if (!TryGetDataKind(field.FieldType.IsArray ? field.FieldType.GetElementType() : field.FieldType, out dk))
                    {
                        throw Contracts.Except($"Field {name} is of unsupported type.");
                    }

                    break;

                case PropertyInfo property:
                    if (!TryGetDataKind(property.PropertyType.IsArray ? property.PropertyType.GetElementType() : property.PropertyType, out dk))
                    {
                        throw Contracts.Except($"Property {name} is of unsupported type.");
                    }
                    break;

                default:
                    Contracts.Assert(false);
                    throw Contracts.ExceptNotSupp("Expected a FieldInfo or a PropertyInfo");
                }

                tlc.Type = dk;

                for (int indexLocal = 0; indexLocal < tlc.Source.Length; indexLocal++)
                {
                    tlc.Source[indexLocal] = new TextLoaderRange
                    {
                        AllOther    = sources[indexLocal].AllOther,
                        AutoEnd     = sources[indexLocal].AutoEnd,
                        ForceVector = sources[indexLocal].ForceVector,
                        VariableEnd = sources[indexLocal].VariableEnd,
                        Max         = sources[indexLocal].Max,
                        Min         = sources[indexLocal].Min
                    };
                }

                Arguments.Column[index] = tlc;
            }

            Arguments.HasHeader      = useHeader;
            Arguments.Separator      = new[] { separator };
            Arguments.AllowQuoting   = allowQuotedStrings;
            Arguments.AllowSparse    = supportSparse;
            Arguments.TrimWhitespace = trimWhitespace;

            return(this);
        }