/// <summary> /// Configures a reader for text files. /// </summary> /// <typeparam name="TShape">The type shape parameter, which must be a valid-schema shape. As a practical /// matter this is generally not explicitly defined from the user, but is instead inferred from the return /// type of the <paramref name="func"/> where one takes an input <see cref="Context"/> and uses it to compose /// a shape-type instance describing what the columns are and how to load them from the file.</typeparam> /// <param name="env">The environment.</param> /// <param name="func">The delegate that describes what fields to read from the text file, as well as /// describing their input type. The way in which it works is that the delegate is fed a <see cref="Context"/>, /// and the user composes a shape type with <see cref="PipelineColumn"/> instances out of that <see cref="Context"/>. /// The resulting data will have columns with the names corresponding to their names in the shape type.</param> /// <param name="files">Input files. If <c>null</c> then no files are read, but this means that options or /// configurations that require input data for initialization (for example, <paramref name="hasHeader"/> or /// <see cref="Context.LoadFloat(int, int?)"/>) with a <c>null</c> second argument.</param> /// <param name="hasHeader">Data file has header with feature names.</param> /// <param name="separator">Text field separator.</param> /// <param name="allowQuoting">Whether the input -may include quoted values, which can contain separator /// characters, colons, and distinguish empty values from missing values. When true, consecutive separators /// denote a missing value and an empty value is denoted by <c>""</c>. When false, consecutive separators /// denote an empty value.</param> /// <param name="allowSparse">Whether the input may include sparse representations.</param> /// <param name="trimWhitspace">Remove trailing whitespace from lines.</param> /// <returns>A configured statically-typed reader for text files.</returns> public static DataReader <IMultiStreamSource, TShape> CreateReader <[IsShape] TShape>( IHostEnvironment env, Func <Context, TShape> func, IMultiStreamSource files = null, bool hasHeader = false, char separator = '\t', bool allowQuoting = true, bool allowSparse = true, bool trimWhitspace = false) { Contracts.CheckValue(env, nameof(env)); env.CheckValue(func, nameof(func)); env.CheckValueOrNull(files); // Populate all args except the columns. var args = new TextLoader.Arguments(); args.AllowQuoting = allowQuoting; args.AllowSparse = allowSparse; args.HasHeader = hasHeader; args.Separators = new[] { separator }; args.TrimWhitespace = trimWhitspace; var rec = new TextReconciler(args, files); var ctx = new Context(rec); using (var ch = env.Start("Initializing " + nameof(TextLoader))) { var readerEst = StaticPipeUtils.ReaderEstimatorAnalyzerHelper(env, ch, ctx, rec, func); Contracts.AssertValue(readerEst); return(readerEst.Fit(files)); } }
public Estimator <TInShape, TNewOutShape, ITransformer> Append <[IsShape] TNewOutShape>(Func <TOutShape, TNewOutShape> mapper) { Contracts.CheckValue(mapper, nameof(mapper)); using (var ch = Env.Start(nameof(Append))) { var method = mapper.Method; // Construct the dummy column structure, then apply the mapping. var input = StaticPipeInternalUtils.MakeAnalysisInstance <TOutShape>(out var fakeReconciler); KeyValuePair <string, PipelineColumn>[] inPairs = StaticPipeInternalUtils.GetNamesValues(input, method.GetParameters()[0]); // Initially we suppose we've only assigned names to the inputs. var inputColToName = new Dictionary <PipelineColumn, string>(); foreach (var p in inPairs) { inputColToName[p.Value] = p.Key; } string NameMap(PipelineColumn col) { inputColToName.TryGetValue(col, out var val); return(val); } var readerEst = StaticPipeUtils.GeneralFunctionAnalyzer(Env, ch, input, fakeReconciler, mapper, out var estTail, NameMap); ch.Assert(readerEst == null); ch.AssertValue(estTail); var est = AsDynamic.Append(estTail); var newOut = StaticSchemaShape.Make <TNewOutShape>(method.ReturnParameter); return(new Estimator <TInShape, TNewOutShape, ITransformer>(Env, est, _inShape, newOut)); } }
private static IEnumerable <TOut> GetColumnCore <TOut, TShape>(DataView <TShape> data, Func <TShape, PipelineColumn> column) { Contracts.CheckValue(data, nameof(data)); var env = StaticPipeUtils.GetEnvironment(data); Contracts.AssertValue(env); env.CheckValue(column, nameof(column)); var indexer = StaticPipeUtils.GetIndexer(data); string columnName = indexer.Get(column(indexer.Indices)); return(data.AsDynamic.GetColumn <TOut>(env, columnName)); }