/// <summary> /// Creation of the pipeline knowing parameters _inDataFrame, _cacheFile, _reuse. /// </summary> protected IDataTransform CreatePipeline(IHostEnvironment env, IDataView input) { if (_inDataFrame) { if (_async) { var view = new CacheDataView(env, input, null); var tr = new PassThroughTransform(env, new PassThroughTransform.Arguments(), view); return(tr); } else { var args = new SortInDataFrameTransform.Arguments() { numThreads = _numThreads, sortColumn = null }; var tr = new SortInDataFrameTransform(env, args, input); return(tr); } } else { string nt = _numThreads > 0 ? string.Format("{{t={0}}}", _numThreads) : string.Empty; using (var ch = Host.Start("Caching data...")) { if (_reuse && File.Exists(_cacheFile)) { ch.Info(MessageSensitivity.UserData, "Reusing cache '{0}'", _cacheFile); } else { ch.Info(MessageSensitivity.UserData, "Building cache '{0}'", _cacheFile); var saver = ComponentCreation.CreateSaver(env, _saverSettings); using (var fs0 = Host.CreateOutputFile(_cacheFile)) DataSaverUtils.SaveDataView(ch, saver, input, fs0, true); } } var loader = ComponentCreation.CreateLoader(env, string.Format("binary{{{0}}}", nt), new MultiFileSource(_cacheFile)); SchemaHelper.CheckSchema(Host, input.Schema, loader.Schema); var copy = ComponentCreation.CreateTransform(env, "skip{s=0}", loader); return(copy); } }