/// <summary> /// Construct a TextLoader object by inferencing the dataset schema from a type. /// </summary> /// <param name="useHeader">Does the file contains header?</param> /// <param name="separator">Column separator character. Default is '\t'</param> /// <param name="allowQuotedStrings">Whether the input may include quoted values, /// which can contain separator characters, colons, /// and distinguish empty values from missing values. When true, consecutive separators /// denote a missing value and an empty value is denoted by \"\". /// When false, consecutive separators denote an empty value.</param> /// <param name="supportSparse">Whether the input may include sparse representations e.g. /// if one of the row contains "5 2:6 4:3" that's mean there are 5 columns all zero /// except for 3rd and 5th columns which have values 6 and 3</param> /// <param name="trimWhitespace">Remove trailing whitespace from lines</param> public TextLoader CreateFrom <TInput>(bool useHeader = false, char separator = '\t', bool allowQuotedStrings = true, bool supportSparse = true, bool trimWhitespace = false) { var fields = typeof(TInput).GetFields(); Arguments.Column = new TextLoaderColumn[fields.Length]; for (int index = 0; index < fields.Length; index++) { var field = fields[index]; var mappingAttr = field.GetCustomAttribute <ColumnAttribute>(); if (mappingAttr == null) { throw Contracts.Except($"{field.Name} is missing ColumnAttribute"); } if (Regex.Match(mappingAttr.Ordinal, @"[^(0-9,\*\-~)]+").Success) { throw Contracts.Except($"{mappingAttr.Ordinal} contains invalid characters. " + $"Valid characters are 0-9, *, - and ~"); } var name = mappingAttr.Name ?? field.Name; Runtime.Data.TextLoader.Range[] sources; if (!Runtime.Data.TextLoader.Column.TryParseSourceEx(mappingAttr.Ordinal, out sources)) { throw Contracts.Except($"{mappingAttr.Ordinal} could not be parsed."); } Contracts.Assert(sources != null); TextLoaderColumn tlc = new TextLoaderColumn(); tlc.Name = name; tlc.Source = new TextLoaderRange[sources.Length]; DataKind dk; if (!TryGetDataKind(field.FieldType.IsArray ? field.FieldType.GetElementType() : field.FieldType, out dk)) { throw Contracts.Except($"{name} is of unsupported type."); } tlc.Type = dk; for (int indexLocal = 0; indexLocal < tlc.Source.Length; indexLocal++) { tlc.Source[indexLocal] = new TextLoaderRange { AllOther = sources[indexLocal].AllOther, AutoEnd = sources[indexLocal].AutoEnd, ForceVector = sources[indexLocal].ForceVector, VariableEnd = sources[indexLocal].VariableEnd, Max = sources[indexLocal].Max, Min = sources[indexLocal].Min }; } Arguments.Column[index] = tlc; } Arguments.HasHeader = useHeader; Arguments.Separator = new[] { separator }; Arguments.AllowQuoting = allowQuotedStrings; Arguments.AllowSparse = supportSparse; Arguments.TrimWhitespace = trimWhitespace; return(this); }
/// <summary> /// Construct a TextLoader object by inferencing the dataset schema from a type. /// </summary> /// <param name="useHeader">Does the file contains header?</param> /// <param name="separator">Column separator character. Default is '\t'</param> /// <param name="allowQuotedStrings">Whether the input may include quoted values, /// which can contain separator characters, colons, /// and distinguish empty values from missing values. When true, consecutive separators /// denote a missing value and an empty value is denoted by \"\". /// When false, consecutive separators denote an empty value.</param> /// <param name="supportSparse">Whether the input may include sparse representations for example, /// if one of the row contains "5 2:6 4:3" that's mean there are 5 columns all zero /// except for 3rd and 5th columns which have values 6 and 3</param> /// <param name="trimWhitespace">Remove trailing whitespace from lines</param> public TextLoader CreateFrom <TInput>(bool useHeader = false, char separator = '\t', bool allowQuotedStrings = true, bool supportSparse = true, bool trimWhitespace = false) { var userType = typeof(TInput); var fieldInfos = userType.GetFields(BindingFlags.Public | BindingFlags.Instance); var propertyInfos = userType .GetProperties(BindingFlags.Public | BindingFlags.Instance) .Where(x => x.CanRead && x.CanWrite && x.GetGetMethod() != null && x.GetSetMethod() != null && x.GetIndexParameters().Length == 0); var memberInfos = (fieldInfos as IEnumerable <MemberInfo>).Concat(propertyInfos).ToArray(); Arguments.Column = new TextLoaderColumn[memberInfos.Length]; for (int index = 0; index < memberInfos.Length; index++) { var memberInfo = memberInfos[index]; var mappingAttr = memberInfo.GetCustomAttribute <ColumnAttribute>(); if (mappingAttr == null) { throw Contracts.Except($"Field or property {memberInfo.Name} is missing ColumnAttribute"); } if (Regex.Match(mappingAttr.Ordinal, @"[^(0-9,\*\-~)]+").Success) { throw Contracts.Except($"{mappingAttr.Ordinal} contains invalid characters. " + $"Valid characters are 0-9, *, - and ~"); } var mappingNameAttr = memberInfo.GetCustomAttribute <ColumnNameAttribute>(); var name = mappingAttr.Name ?? mappingNameAttr?.Name ?? memberInfo.Name; Runtime.Data.TextLoader.Range[] sources; if (!Runtime.Data.TextLoader.Column.TryParseSourceEx(mappingAttr.Ordinal, out sources)) { throw Contracts.Except($"{mappingAttr.Ordinal} could not be parsed."); } Contracts.Assert(sources != null); TextLoaderColumn tlc = new TextLoaderColumn(); tlc.Name = name; tlc.Source = new TextLoaderRange[sources.Length]; DataKind dk; switch (memberInfo) { case FieldInfo field: if (!TryGetDataKind(field.FieldType.IsArray ? field.FieldType.GetElementType() : field.FieldType, out dk)) { throw Contracts.Except($"Field {name} is of unsupported type."); } break; case PropertyInfo property: if (!TryGetDataKind(property.PropertyType.IsArray ? property.PropertyType.GetElementType() : property.PropertyType, out dk)) { throw Contracts.Except($"Property {name} is of unsupported type."); } break; default: Contracts.Assert(false); throw Contracts.ExceptNotSupp("Expected a FieldInfo or a PropertyInfo"); } tlc.Type = dk; for (int indexLocal = 0; indexLocal < tlc.Source.Length; indexLocal++) { tlc.Source[indexLocal] = new TextLoaderRange { AllOther = sources[indexLocal].AllOther, AutoEnd = sources[indexLocal].AutoEnd, ForceVector = sources[indexLocal].ForceVector, VariableEnd = sources[indexLocal].VariableEnd, Max = sources[indexLocal].Max, Min = sources[indexLocal].Min }; } Arguments.Column[index] = tlc; } Arguments.HasHeader = useHeader; Arguments.Separator = new[] { separator }; Arguments.AllowQuoting = allowQuotedStrings; Arguments.AllowSparse = supportSparse; Arguments.TrimWhitespace = trimWhitespace; return(this); }