void ICanSaveModel.Save(ModelSaveContext ctx)
        {
            Contracts.CheckValue(ctx, nameof(ctx));
            ctx.CheckAtModel();
            ctx.SetVersionInfo(GetVersionInfo());

            // *** Binary format ***
            // int: cached chunk size
            // bool: TreatBigIntegersAsDates flag
            // Schema of the loader

            ctx.Writer.Write(_columnChunkReadSize);
            ctx.Writer.Write(_parquetOptions.TreatBigIntegersAsDates);

            // Save the schema
            var noRows    = new EmptyDataView(_host, Schema);
            var saverArgs = new BinarySaver.Arguments();

            saverArgs.Silent = true;
            var saver = new BinarySaver(_host, saverArgs);

            using (var strm = new MemoryStream())
            {
                var allColumns = Enumerable.Range(0, Schema.Count).ToArray();
                saver.SaveData(strm, noRows, allColumns);
                ctx.SaveBinaryStream(SchemaCtxName, w => w.WriteByteArray(strm.ToArray()));
            }
        }
Example #2
0
        public void Save(ModelSaveContext ctx)
        {
            Contracts.CheckValue(ctx, nameof(ctx));
            ctx.CheckAtModel();
            ctx.SetVersionInfo(GetVersionInfo());

            // ** Binary format **
            // int: tailing directory count
            // Schema of the loader
            // int[]: srcColumns
            // byte[]: subloader
            // model: file path spec

            ctx.Writer.Write(_tailingDirCount);

            // Save the schema
            var noRows    = new EmptyDataView(_host, Schema);
            var saverArgs = new BinarySaver.Arguments();

            saverArgs.Silent = true;
            var saver = new BinarySaver(_host, saverArgs);

            using (var strm = new MemoryStream())
            {
                var allColumns = Enumerable.Range(0, Schema.Count).ToArray();
                saver.SaveData(strm, noRows, allColumns);
                ctx.SaveBinaryStream(SchemaCtxName, w => w.WriteByteArray(strm.ToArray()));
            }
            ctx.Writer.WriteIntArray(_srcDirIndex);

            ctx.Writer.WriteByteArray(_subLoaderBytes);
            ctx.SaveModel(_pathParser, FilePathSpecCtxName);
        }
        public DataViewSchema GetOutputSchema(DataViewSchema inputSchema)
        {
            _host.CheckValue(inputSchema, nameof(inputSchema));

            var dv     = new EmptyDataView(_host, inputSchema);
            var output = ApplyTransformUtils.ApplyAllTransformsToData(_host, _xf, dv);

            return(output.Schema);
        }
        IRowToRowMapper ITransformer.GetRowToRowMapper(DataViewSchema inputSchema)
        {
            _host.CheckValue(inputSchema, nameof(inputSchema));
            var       input   = new EmptyDataView(_host, inputSchema);
            var       revMaps = new List <IRowToRowMapper>();
            IDataView chain;

            for (chain = ApplyTransformUtils.ApplyAllTransformsToData(_host, _xf, input); chain is IDataTransform xf; chain = xf.Source)
            {
                // Everything in the chain ought to be a row mapper.
                _host.Assert(xf is IRowToRowMapper);
                revMaps.Add((IRowToRowMapper)xf);
            }
            // The walkback should have ended at the input.
            Contracts.Assert(chain == input);
            revMaps.Reverse();
            return(new CompositeRowToRowMapper(inputSchema, revMaps.ToArray()));
        }