示例#1
0
        public Estimator <TTupleInShape, TTupleNewOutShape, ITransformer> Append <[IsShape] TTupleNewOutShape>(Func <TTupleOutShape, TTupleNewOutShape> mapper)
        {
            Contracts.CheckValue(mapper, nameof(mapper));

            using (var ch = Env.Start(nameof(Append)))
            {
                var method = mapper.Method;

                // Construct the dummy column structure, then apply the mapping.
                var input = StaticPipeInternalUtils.MakeAnalysisInstance <TTupleOutShape>(out var fakeReconciler);
                KeyValuePair <string, PipelineColumn>[] inPairs = StaticPipeInternalUtils.GetNamesValues(input, method.GetParameters()[0]);

                // Initially we suppose we've only assigned names to the inputs.
                var inputColToName = new Dictionary <PipelineColumn, string>();
                foreach (var p in inPairs)
                {
                    inputColToName[p.Value] = p.Key;
                }
                string NameMap(PipelineColumn col)
                {
                    inputColToName.TryGetValue(col, out var val);
                    return(val);
                }

                var readerEst = StaticPipeUtils.GeneralFunctionAnalyzer(Env, ch, input, fakeReconciler, mapper, out var estTail, NameMap);
                ch.Assert(readerEst == null);
                ch.AssertValue(estTail);

                var est      = AsDynamic.Append(estTail);
                var newOut   = StaticSchemaShape.Make <TTupleNewOutShape>(method.ReturnParameter);
                var toReturn = new Estimator <TTupleInShape, TTupleNewOutShape, ITransformer>(Env, est, _inShape, newOut);
                ch.Done();
                return(toReturn);
            }
        }
        /// <summary>
        /// Configures a reader for text files.
        /// </summary>
        /// <typeparam name="TShape">The type shape parameter, which must be a valid-schema shape. As a practical
        /// matter this is generally not explicitly defined from the user, but is instead inferred from the return
        /// type of the <paramref name="func"/> where one takes an input <see cref="Context"/> and uses it to compose
        /// a shape-type instance describing what the columns are and how to load them from the file.</typeparam>
        /// <param name="env">The environment.</param>
        /// <param name="func">The delegate that describes what fields to read from the text file, as well as
        /// describing their input type. The way in which it works is that the delegate is fed a <see cref="Context"/>,
        /// and the user composes a shape type with <see cref="PipelineColumn"/> instances out of that <see cref="Context"/>.
        /// The resulting data will have columns with the names corresponding to their names in the shape type.</param>
        /// <param name="files">Input files. If <c>null</c> then no files are read, but this means that options or
        /// configurations that require input data for initialization (e.g., <paramref name="hasHeader"/> or
        /// <see cref="Context.LoadFloat(int, int?)"/>) with a <c>null</c> second argument.</param>
        /// <param name="hasHeader">Data file has header with feature names.</param>
        /// <param name="separator">Text field separator.</param>
        /// <param name="allowQuoting">Whether the input -may include quoted values, which can contain separator
        /// characters, colons, and distinguish empty values from missing values. When true, consecutive separators
        /// denote a missing value and an empty value is denoted by <c>""</c>. When false, consecutive separators
        /// denote an empty value.</param>
        /// <param name="allowSparse">Whether the input may include sparse representations.</param>
        /// <param name="trimWhitspace">Remove trailing whitespace from lines.</param>
        /// <returns>A configured statically-typed reader for text files.</returns>
        public static DataReader <IMultiStreamSource, TShape> CreateReader <[IsShape] TShape>(
            IHostEnvironment env, Func <Context, TShape> func, IMultiStreamSource files = null,
            bool hasHeader     = false, char separator = '\t', bool allowQuoting = true, bool allowSparse = true,
            bool trimWhitspace = false)
        {
            Contracts.CheckValue(env, nameof(env));
            env.CheckValue(func, nameof(func));
            env.CheckValueOrNull(files);

            // Populate all args except the columns.
            var args = new Arguments();

            args.AllowQuoting   = allowQuoting;
            args.AllowSparse    = allowSparse;
            args.HasHeader      = hasHeader;
            args.SeparatorChars = new[] { separator };
            args.TrimWhitespace = trimWhitspace;

            var rec = new TextReconciler(args, files);
            var ctx = new Context(rec);

            using (var ch = env.Start("Initializing " + nameof(TextLoader)))
            {
                var readerEst = StaticPipeUtils.ReaderEstimatorAnalyzerHelper(env, ch, ctx, rec, func);
                Contracts.AssertValue(readerEst);
                var reader = readerEst.Fit(files);
                ch.Done();
                return(reader);
            }
        }
示例#3
0
        private static IEnumerable <TOut> GetColumnCore <TOut, TShape>(DataView <TShape> data, Func <TShape, PipelineColumn> column)
        {
            Contracts.CheckValue(data, nameof(data));
            var env = StaticPipeUtils.GetEnvironment(data);

            Contracts.AssertValue(env);
            env.CheckValue(column, nameof(column));

            var    indexer    = StaticPipeUtils.GetIndexer(data);
            string columnName = indexer.Get(column(indexer.Indices));

            return(data.AsDynamic.GetColumn <TOut>(env, columnName));
        }