public ExtendedCacheTransform(IHostEnvironment env, Arguments args, IDataView input) : base(env, RegistrationName, input) { Host.CheckValue(args, "args"); Host.CheckUserArg(args.inDataFrame || !string.IsNullOrEmpty(args.cacheFile), "cacheFile cannot be empty if inDataFrame is false."); Host.CheckUserArg(!args.async || args.inDataFrame, "inDataFrame must be true if async is true."); Host.CheckUserArg(!args.numTheads.HasValue || args.numTheads > 0, "numThread must be > 0 if specified."); var saverSettings = args.saverSettings as ICommandLineComponentFactory; Host.CheckValue(saverSettings, nameof(saverSettings)); _saverSettings = string.Format("{0}{{{1}}}", saverSettings.Name, saverSettings.GetSettingsString()); _saverSettings = _saverSettings.Replace("{}", ""); if (!_saverSettings.ToLower().StartsWith("binary")) { throw env.ExceptNotSupp("Only binary format is supported."); } _inDataFrame = args.inDataFrame; _cacheFile = args.cacheFile; _reuse = args.reuse; _async = args.async; _numThreads = args.numTheads; var saver = ComponentCreation.CreateSaver(Host, _saverSettings); if (saver == null) { throw Host.Except("Cannot parse '{0}'", _saverSettings); } _pipedTransform = CreatePipeline(env, input); }
/// <summary> /// Creation of the pipeline knowing parameters _inDataFrame, _cacheFile, _reuse. /// </summary> protected IDataTransform CreatePipeline(IHostEnvironment env, IDataView input) { if (_inDataFrame) { if (_async) { var view = new CacheDataView(env, input, null); var tr = new PassThroughTransform(env, new PassThroughTransform.Arguments(), view); return(tr); } else { var args = new SortInDataFrameTransform.Arguments() { numThreads = _numThreads, sortColumn = null }; var tr = new SortInDataFrameTransform(env, args, input); return(tr); } } else { string nt = _numThreads > 0 ? string.Format("{{t={0}}}", _numThreads) : string.Empty; using (var ch = Host.Start("Caching data...")) { if (_reuse && File.Exists(_cacheFile)) { ch.Info("Reusing cache '{0}'", _cacheFile); } else { ch.Info("Building cache '{0}'", _cacheFile); var saver = ComponentCreation.CreateSaver(env, _saverSettings); using (var fs0 = Host.CreateOutputFile(_cacheFile)) DataSaverUtils.SaveDataView(ch, saver, input, fs0, true); } } var loader = ComponentCreation.CreateLoader(env, string.Format("binary{{{0}}}", nt), new MultiFileSource(_cacheFile)); SchemaHelper.CheckSchema(Host, input.Schema, loader.Schema); var copy = ComponentCreation.CreateTransform(env, "skip{s=0}", loader); return(copy); } }
private ExtendedCacheTransform(IHost host, ModelLoadContext ctx, IDataView input) : base(host, input) { Host.CheckValue(input, "input"); Host.CheckValue(ctx, "ctx"); _inDataFrame = ctx.Reader.ReadBoolean(); _async = ctx.Reader.ReadBoolean(); _numThreads = ctx.Reader.ReadInt32(); host.Check(_numThreads > -2, "_numThreads"); if (_numThreads < 0) { _numThreads = null; } _saverSettings = ctx.Reader.ReadString(); if (_inDataFrame) { _cacheFile = null; _reuse = false; } else { _cacheFile = ctx.Reader.ReadString(); _reuse = ctx.Reader.ReadBoolean(); host.CheckValue(_cacheFile, "_cacheFile"); } var saver = ComponentCreation.CreateSaver(Host, _saverSettings); if (saver == null) { throw Host.Except("Cannot parse '{0}'", _saverSettings); } _pipedTransform = CreatePipeline(host, input); }