Exemplo n.º 1
0
        public void Save(ModelSaveContext ctx)
        {
            Contracts.CheckValue(ctx, nameof(ctx));
            ctx.CheckAtModel();
            ctx.SetVersionInfo(GetVersionInfo());

            // *** Binary format ***
            // int: cached chunk size
            // bool: TreatBigIntegersAsDates flag
            // Schema of the loader

            ctx.Writer.Write(_columnChunkReadSize);
            ctx.Writer.Write(_parquetOptions.TreatBigIntegersAsDates);

            // Save the schema
            var noRows    = new EmptyDataView(_host, Schema);
            var saverArgs = new BinarySaver.Arguments();

            saverArgs.Silent = true;
            var saver = new BinarySaver(_host, saverArgs);

            using (var strm = new MemoryStream())
            {
                var allColumns = Enumerable.Range(0, Schema.ColumnCount).ToArray();
                saver.SaveData(strm, noRows, allColumns);
                ctx.SaveBinaryStream(SchemaCtxName, w => w.WriteByteArray(strm.ToArray()));
            }
        }
Exemplo n.º 2
0
        void ICanSaveModel.Save(ModelSaveContext ctx)
        {
            Contracts.CheckValue(ctx, nameof(ctx));
            ctx.CheckAtModel();
            ctx.SetVersionInfo(GetVersionInfo());

            // ** Binary format **
            // int: tailing directory count
            // Schema of the loader
            // int[]: srcColumns
            // byte[]: subloader
            // model: file path spec

            ctx.Writer.Write(_tailingDirCount);

            // Save the schema
            var noRows    = new EmptyDataView(_host, Schema);
            var saverArgs = new BinarySaver.Arguments();

            saverArgs.Silent = true;
            var saver = new BinarySaver(_host, saverArgs);

            using (var strm = new MemoryStream())
            {
                var allColumns = Enumerable.Range(0, Schema.Count).ToArray();
                saver.SaveData(strm, noRows, allColumns);
                ctx.SaveBinaryStream(SchemaCtxName, w => w.WriteByteArray(strm.ToArray()));
            }
            ctx.Writer.WriteIntArray(_srcDirIndex);

            ctx.Writer.WriteByteArray(_subLoaderBytes);
            ctx.SaveModel(_pathParser, FilePathSpecCtxName);
        }
Exemplo n.º 3
0
        public static CacheOutput CacheData(IHostEnvironment env, CacheInput input)
        {
            const string registrationName = "CreateCache";

            Contracts.CheckValue(env, nameof(env));
            var host = env.Register(registrationName);

            host.CheckValue(input, nameof(input));
            host.CheckValue(input.Data, nameof(input.Data));

            IDataView data;

            switch (input.Caching)
            {
            case CachingType.Memory:
                data = new CacheDataView(env, input.Data, null);
                break;

            case CachingType.Disk:
                var args = new BinarySaver.Arguments();
                args.Compression = CompressionKind.Default;
                args.Silent      = true;

                var saver  = new BinarySaver(host, args);
                var schema = input.Data.Schema;

                var cols = new List <int>();
                for (int i = 0; i < schema.ColumnCount; i++)
                {
                    var type = schema.GetColumnType(i);
                    if (saver.IsColumnSavable(type))
                    {
                        cols.Add(i);
                    }
                }

#pragma warning disable CS0618 // This ought to be addressed. See #1287.
                // We are not disposing the fileHandle because we want it to stay around for the execution of the graph.
                // It will be disposed when the environment is disposed.
                var fileHandle = host.CreateTempFile();
#pragma warning restore CS0618

                using (var stream = fileHandle.CreateWriteStream())
                    saver.SaveData(stream, input.Data, cols.ToArray());
                data = new BinaryLoader(host, new BinaryLoader.Arguments(), fileHandle.OpenReadStream());
                break;

            default:
                throw host.ExceptValue(nameof(input.Caching), $"Unrecognized caching option '{input.Caching}'");
            }

            return(new CacheOutput()
            {
                OutputData = data
            });
        }
            public void Save(IHostEnvironment env, ModelSaveContext ctx)
            {
                Contracts.AssertValue(ctx);

                // *** Binary format ***
                // Schema of the data view containing the optional columns
                // int: number of added columns
                // for each added column
                //   int: id of output column name
                //   ColumnType: the type of the column

                var noRows    = new EmptyDataView(env, _inputWithOptionalColumn);
                var saverArgs = new BinarySaver.Arguments();

                saverArgs.Silent = true;
                var saver = new BinarySaver(env, saverArgs);

                using (var strm = new MemoryStream())
                {
                    saver.SaveData(strm, noRows, _srcColsWithOptionalColumn);
                    ctx.SaveBinaryStream("Schema.idv", w => w.WriteByteArray(strm.ToArray()));
                }

                int size = InfoCount;

                ctx.Writer.Write(size);

                saver = new BinarySaver(env, new BinarySaver.Arguments());
                for (int i = 0; i < size; i++)
                {
                    ctx.SaveNonEmptyString(GetColumnNameCore(i));
                    var columnType = ColumnTypes[i];
                    int written;
                    saver.TryWriteTypeDescription(ctx.Writer.BaseStream, columnType, out written);
                }
            }