public ExtendedCacheTransform(IHostEnvironment env, Arguments args, IDataView input)
            : base(env, RegistrationName, input)
        {
            Host.CheckValue(args, "args");
            Host.CheckUserArg(args.inDataFrame || !string.IsNullOrEmpty(args.cacheFile), "cacheFile cannot be empty if inDataFrame is false.");
            Host.CheckUserArg(!args.async || args.inDataFrame, "inDataFrame must be true if async is true.");
            Host.CheckUserArg(!args.numTheads.HasValue || args.numTheads > 0, "numThread must be > 0 if specified.");
            var saverSettings = args.saverSettings as ICommandLineComponentFactory;

            Host.CheckValue(saverSettings, nameof(saverSettings));
            _saverSettings = string.Format("{0}{{{1}}}", saverSettings.Name, saverSettings.GetSettingsString());
            _saverSettings = _saverSettings.Replace("{}", "");
            if (!_saverSettings.ToLower().StartsWith("binary"))
            {
                throw env.ExceptNotSupp("Only binary format is supported.");
            }
            _inDataFrame = args.inDataFrame;
            _cacheFile   = args.cacheFile;
            _reuse       = args.reuse;
            _async       = args.async;
            _numThreads  = args.numTheads;

            var saver = ComponentCreation.CreateSaver(Host, _saverSettings);

            if (saver == null)
            {
                throw Host.Except("Cannot parse '{0}'", _saverSettings);
            }

            _pipedTransform = CreatePipeline(env, input);
        }
Beispiel #2
0
 public SortInDataFrameState(IHostEnvironment host, IDataView input, int sortColumn, bool reverse, int?numThreads)
 {
     _host = host.Register("SortInDataFrameState");
     _host.CheckValue(input, "input");
     _input      = input;
     _reverse    = reverse;
     _lock       = new object();
     _autoView   = null;
     _canShuffle = sortColumn < 0;
     _numThreads = numThreads;
     _sortColumn = sortColumn;
 }
 /// <summary>
 /// Creation of the pipeline knowing parameters _inDataFrame, _cacheFile, _reuse.
 /// </summary>
 protected IDataTransform CreatePipeline(IHostEnvironment env, IDataView input)
 {
     if (_inDataFrame)
     {
         if (_async)
         {
             var view = new CacheDataView(env, input, null);
             var tr   = new PassThroughTransform(env, new PassThroughTransform.Arguments(), view);
             return(tr);
         }
         else
         {
             var args = new SortInDataFrameTransform.Arguments()
             {
                 numThreads = _numThreads, sortColumn = null
             };
             var tr = new SortInDataFrameTransform(env, args, input);
             return(tr);
         }
     }
     else
     {
         string nt = _numThreads > 0 ? string.Format("{{t={0}}}", _numThreads) : string.Empty;
         using (var ch = Host.Start("Caching data..."))
         {
             if (_reuse && File.Exists(_cacheFile))
             {
                 ch.Info(MessageSensitivity.UserData, "Reusing cache '{0}'", _cacheFile);
             }
             else
             {
                 ch.Info(MessageSensitivity.UserData, "Building cache '{0}'", _cacheFile);
                 var saver = ComponentCreation.CreateSaver(env, _saverSettings);
                 using (var fs0 = Host.CreateOutputFile(_cacheFile))
                     DataSaverUtils.SaveDataView(ch, saver, input, fs0, true);
             }
         }
         var loader = ComponentCreation.CreateLoader(env, string.Format("binary{{{0}}}", nt),
                                                     new MultiFileSource(_cacheFile));
         SchemaHelper.CheckSchema(Host, input.Schema, loader.Schema);
         var copy = ComponentCreation.CreateTransform(env, "skip{s=0}", loader);
         return(copy);
     }
 }
Beispiel #4
0
        public SortInDataFrameTransform(IHostEnvironment env, Arguments args, IDataView input)
            : base(env, RegistrationName, input)
        {
            Host.CheckValue(args, "args");
            Host.CheckUserArg(!args.numThreads.HasValue || args.numThreads.Value > 0, "numThreads cannot be negative.");

            if (!string.IsNullOrEmpty(args.sortColumn))
            {
                var schema = input.Schema;
                int index  = SchemaHelper.GetColumnIndex(schema, args.sortColumn);
                var type   = schema[index].Type;
                Host.Check(!type.IsVector(), "sortColumn cannot be a vector.");
            }

            _reverse    = args.reverse;
            _sortColumn = args.sortColumn;
            _numThreads = args.numThreads;
            _transform  = CreateTemplatedTransform();
        }
Beispiel #5
0
        private SortInDataFrameTransform(IHost host, ModelLoadContext ctx, IDataView input) : base(host, input)
        {
            Host.CheckValue(input, "input");
            Host.CheckValue(ctx, "ctx");

            _sortColumn = ctx.Reader.ReadString();
            Host.AssertValue(_sortColumn);
            var schema = input.Schema;
            int index  = SchemaHelper.GetColumnIndex(schema, _sortColumn);
            var type   = schema[index].Type;

            Host.Check(!type.IsVector(), "sortColumn cannot be a vector.");
            _reverse    = ctx.Reader.ReadBoolean();
            _numThreads = ctx.Reader.ReadInt32();
            if (_numThreads < 0)
            {
                _numThreads = null;
            }
            _transform = CreateTemplatedTransform();
        }
        private ExtendedCacheTransform(IHost host, ModelLoadContext ctx, IDataView input) :
            base(host, input)
        {
            Host.CheckValue(input, "input");
            Host.CheckValue(ctx, "ctx");

            _inDataFrame = ctx.Reader.ReadBoolean();
            _async       = ctx.Reader.ReadBoolean();
            _numThreads  = ctx.Reader.ReadInt32();
            host.Check(_numThreads > -2, "_numThreads");
            if (_numThreads < 0)
            {
                _numThreads = null;
            }
            _saverSettings = ctx.Reader.ReadString();
            if (_inDataFrame)
            {
                _cacheFile = null;
                _reuse     = false;
            }
            else
            {
                _cacheFile = ctx.Reader.ReadString();
                _reuse     = ctx.Reader.ReadBoolean();
                host.CheckValue(_cacheFile, "_cacheFile");
            }

            var saver = ComponentCreation.CreateSaver(Host, _saverSettings);

            if (saver == null)
            {
                throw Host.Except("Cannot parse '{0}'", _saverSettings);
            }

            _pipedTransform = CreatePipeline(host, input);
        }
Beispiel #7
0
        public static SortInDataFrameTransform Create(IHostEnvironment env, ModelLoadContext ctx, IDataView input)
        {
            Contracts.CheckValue(env, "env");
            var h = env.Register(RegistrationName);

            h.CheckValue(ctx, "ctx");
            h.CheckValue(input, "input");
            ctx.CheckAtModel(GetVersionInfo());
            return(h.Apply("Loading Model", ch => new SortInDataFrameTransform(h, ctx, input)));
        }