public void Save(ModelSaveContext ctx)
        {
            Contracts.CheckValue(ctx, nameof(ctx));
            ctx.CheckAtModel();
            ctx.SetVersionInfo(GetVersionInfo());

            // ** Binary format **
            // int: tailing directory count
            // Schema of the loader
            // int[]: srcColumns
            // byte[]: subloader
            // model: file path spec

            ctx.Writer.Write(_tailingDirCount);

            // Save the schema
            var noRows    = new EmptyDataView(_host, Schema);
            var saverArgs = new BinarySaver.Arguments();

            saverArgs.Silent = true;
            var saver = new BinarySaver(_host, saverArgs);

            using (var strm = new MemoryStream())
            {
                var allColumns = Enumerable.Range(0, Schema.Count).ToArray();
                saver.SaveData(strm, noRows, allColumns);
                ctx.SaveBinaryStream(SchemaCtxName, w => w.WriteByteArray(strm.ToArray()));
            }
            ctx.Writer.WriteIntArray(_srcDirIndex);

            ctx.Writer.WriteByteArray(_subLoaderBytes);
            ctx.SaveModel(_pathParser, FilePathSpecCtxName);
        }
예제 #2
0
        public void Save(ModelSaveContext ctx)
        {
            Contracts.CheckValue(ctx, nameof(ctx));
            ctx.CheckAtModel();
            ctx.SetVersionInfo(GetVersionInfo());

            // *** Binary format ***
            // int: cached chunk size
            // bool: TreatBigIntegersAsDates flag
            // Schema of the loader

            ctx.Writer.Write(_columnChunkReadSize);
            ctx.Writer.Write(_parquetOptions.TreatBigIntegersAsDates);

            // Save the schema
            var noRows    = new EmptyDataView(_host, Schema);
            var saverArgs = new BinarySaver.Arguments();

            saverArgs.Silent = true;
            var saver = new BinarySaver(_host, saverArgs);

            using (var strm = new MemoryStream())
            {
                var allColumns = Enumerable.Range(0, Schema.ColumnCount).ToArray();
                saver.SaveData(strm, noRows, allColumns);
                ctx.SaveBinaryStream(SchemaCtxName, w => w.WriteByteArray(strm.ToArray()));
            }
        }
        public ISchema GetOutputSchema(ISchema inputSchema)
        {
            _host.CheckValue(inputSchema, nameof(inputSchema));

            var dv     = new EmptyDataView(_host, inputSchema);
            var output = ApplyTransformUtils.ApplyAllTransformsToData(_host, _xf, dv);

            return(output.Schema);
        }
예제 #4
0
        public ITransformer Fit(IDataView input)
        {
            _host.CheckValue(input, nameof(input));

            var xf    = new ConcatTransform(_host, input, _name, _source);
            var empty = new EmptyDataView(_host, input.Schema);
            var chunk = ApplyTransformUtils.ApplyAllTransformsToData(_host, xf, empty, input);

            return(new ConcatTransformer(_host, chunk));
        }
예제 #5
0
            public IRowToRowMapper GetRowToRowMapper(ISchema inputSchema)
            {
                _host.CheckValue(inputSchema, nameof(inputSchema));
                var       input   = new EmptyDataView(_host, inputSchema);
                var       revMaps = new List <IRowToRowMapper>();
                IDataView chain;

                for (chain = ApplyTransformUtils.ApplyAllTransformsToData(_host, _xf, input); chain is IDataTransform xf; chain = xf.Source)
                {
                    // Everything in the chain ought to be a row mapper.
                    _host.Assert(xf is IRowToRowMapper);
                    revMaps.Add((IRowToRowMapper)xf);
                }
                // The walkback should have ended at the input.
                Contracts.Assert(chain == input);
                revMaps.Reverse();
                return(new CompositeRowToRowMapper(inputSchema, revMaps.ToArray()));
            }