Ejemplo n.º 1
0
        public ExtendedCacheTransform(IHostEnvironment env, Arguments args, IDataView input)
            : base(env, RegistrationName, input)
        {
            Host.CheckValue(args, "args");
            Host.CheckUserArg(args.inDataFrame || !string.IsNullOrEmpty(args.cacheFile), "cacheFile cannot be empty if inDataFrame is false.");
            Host.CheckUserArg(!args.async || args.inDataFrame, "inDataFrame must be true if async is true.");
            Host.CheckUserArg(!args.numTheads.HasValue || args.numTheads > 0, "numThread must be > 0 if specified.");
            var saverSettings = args.saverSettings as ICommandLineComponentFactory;

            Host.CheckValue(saverSettings, nameof(saverSettings));
            _saverSettings = string.Format("{0}{{{1}}}", saverSettings.Name, saverSettings.GetSettingsString());
            _saverSettings = _saverSettings.Replace("{}", "");
            if (!_saverSettings.ToLower().StartsWith("binary"))
            {
                throw env.ExceptNotSupp("Only binary format is supported.");
            }
            _inDataFrame = args.inDataFrame;
            _cacheFile   = args.cacheFile;
            _reuse       = args.reuse;
            _async       = args.async;
            _numThreads  = args.numTheads;

            var saver = ComponentCreation.CreateSaver(Host, _saverSettings);

            if (saver == null)
            {
                throw Host.Except("Cannot parse '{0}'", _saverSettings);
            }

            _pipedTransform = CreatePipeline(env, input);
        }
Ejemplo n.º 2
0
 /// <summary>
 /// Creation of the pipeline knowing parameters _inDataFrame, _cacheFile, _reuse.
 /// </summary>
 protected IDataTransform CreatePipeline(IHostEnvironment env, IDataView input)
 {
     if (_inDataFrame)
     {
         if (_async)
         {
             var view = new CacheDataView(env, input, null);
             var tr   = new PassThroughTransform(env, new PassThroughTransform.Arguments(), view);
             return(tr);
         }
         else
         {
             var args = new SortInDataFrameTransform.Arguments()
             {
                 numThreads = _numThreads, sortColumn = null
             };
             var tr = new SortInDataFrameTransform(env, args, input);
             return(tr);
         }
     }
     else
     {
         string nt = _numThreads > 0 ? string.Format("{{t={0}}}", _numThreads) : string.Empty;
         using (var ch = Host.Start("Caching data..."))
         {
             if (_reuse && File.Exists(_cacheFile))
             {
                 ch.Info("Reusing cache '{0}'", _cacheFile);
             }
             else
             {
                 ch.Info("Building cache '{0}'", _cacheFile);
                 var saver = ComponentCreation.CreateSaver(env, _saverSettings);
                 using (var fs0 = Host.CreateOutputFile(_cacheFile))
                     DataSaverUtils.SaveDataView(ch, saver, input, fs0, true);
             }
         }
         var loader = ComponentCreation.CreateLoader(env, string.Format("binary{{{0}}}", nt),
                                                     new MultiFileSource(_cacheFile));
         SchemaHelper.CheckSchema(Host, input.Schema, loader.Schema);
         var copy = ComponentCreation.CreateTransform(env, "skip{s=0}", loader);
         return(copy);
     }
 }
Ejemplo n.º 3
0
        private ExtendedCacheTransform(IHost host, ModelLoadContext ctx, IDataView input) :
            base(host, input)
        {
            Host.CheckValue(input, "input");
            Host.CheckValue(ctx, "ctx");

            _inDataFrame = ctx.Reader.ReadBoolean();
            _async       = ctx.Reader.ReadBoolean();
            _numThreads  = ctx.Reader.ReadInt32();
            host.Check(_numThreads > -2, "_numThreads");
            if (_numThreads < 0)
            {
                _numThreads = null;
            }
            _saverSettings = ctx.Reader.ReadString();
            if (_inDataFrame)
            {
                _cacheFile = null;
                _reuse     = false;
            }
            else
            {
                _cacheFile = ctx.Reader.ReadString();
                _reuse     = ctx.Reader.ReadBoolean();
                host.CheckValue(_cacheFile, "_cacheFile");
            }

            var saver = ComponentCreation.CreateSaver(Host, _saverSettings);

            if (saver == null)
            {
                throw Host.Except("Cannot parse '{0}'", _saverSettings);
            }

            _pipedTransform = CreatePipeline(host, input);
        }