/// <summary> /// We will run the job submission. We assume that any checking has already gone one /// and we are just going to execute all the commands required of this job request. /// Assume setupATLAS and rucio and voms proxy init have all been done before /// this guy is called! /// </summary> /// <param name="connection"></param> /// <param name="job"></param> /// <param name="datasetToStartWith"></param> /// <param name="credSet">Set of credentials to load. Default to CERN</param> /// <returns></returns> public static async Task <ISSHConnection> SubmitJobAsync(this ISSHConnection connection, AtlasJob job, string inputDataSet, string resultingDataSet, Action <string> statusUpdate = null, Func <bool> failNow = null, bool sameJobAsLastTime = false, string credSet = "CERN", bool dumpOnly = false) { // Get the status update protected. Action <string> update = statusUpdate != null ? statusUpdate : s => { }; // Figure out the proper submit command. string submitCmd = (job.SubmitPatternCommands.Length > 0 ? MatchSubmitPattern(job.SubmitPatternCommands, inputDataSet) : job.SubmitCommand.SubmitCommand.CommandLine) .Replace("*INPUTDS*", "{0}") .Replace("*OUTPUTDS*", "{1}"); var cernCred = new CredentialSet(credSet) .Load() .FirstOrDefault() .ThrowIfNull(() => new GRIDSubmitException($"Please create a windows generic credential with a target of '{credSet}' to allow access to kinit")); // If this is the first time through with a single job, then setup a directory we can use. if (!sameJobAsLastTime) { var linuxLocation = string.Format("/tmp/{0}", resultingDataSet); await connection.Apply(() => update("Removing old build directory")) .ExecuteLinuxCommandAsync("rm -rf " + linuxLocation, dumpOnly: dumpOnly); await connection .Apply(() => update("Setting up panda")) .ExecuteLinuxCommandAsync("lsetup panda", dumpOnly: dumpOnly); await connection.Apply(() => update("Setting up release")) .SetupRcReleaseAsync(linuxLocation, job.Release.Name, dumpOnly: dumpOnly); await connection.Apply(() => update("Getting CERN credentials")) .KinitAsync(cernCred.Username, cernCred.Password, dumpOnly: dumpOnly); await connection .ApplyAsync(job.Packages, (c, j) => c.Apply(() => update("Checking out package " + j.Name)).CheckoutPackageAsync(j.Name, j.SCTag, failNow: failNow, dumpOnly: dumpOnly)); await connection .ApplyAsync(job.Commands, (co, cm) => co.Apply(() => update("Running command " + cm.CommandLine)).ExecuteLinuxCommandAsync(cm.CommandLine, failNow: failNow, dumpOnly: dumpOnly)); await connection .Apply(() => update("Compiling release")) .BuildWorkAreaAsync(failNow: failNow, dumpOnly: dumpOnly); } // We should now be in the directory where everything is - so submit! return(await connection .Apply(() => update($"Running submit command ({inputDataSet})")) .ExecuteLinuxCommandAsync(string.Format(submitCmd, inputDataSet, resultingDataSet), failNow: failNow, dumpOnly: dumpOnly)); }
/// <summary> /// Fetch a dataset from the grid using Rucio to a local directory. /// </summary> /// <param name="connection">A previously configured connection with everything ready to go for GRID access.</param> /// <param name="dataSetName">The rucio dataset name</param> /// <param name="localDirectory">The local directory (on Linux) where the file should be downloaded</param> /// <param name="fileStatus">Gets updates as new files are downloaded. This will contain just the filename.</param> /// <param name="fileNameFilter">Filter function to alter the files that are to be downloaded</param> /// <param name="failNow">Checked periodically, if ever returns true, then bail out</param> /// <param name="timeout">How long before we should timeout in seconds</param> /// <returns>The connections used so you can chain</returns> public static async Task <ISSHConnection> DownloadFromGRIDAsync(this ISSHConnection connection, string dataSetName, string localDirectory, Action <string> fileStatus = null, Func <string[], string[]> fileNameFilter = null, Func <bool> failNow = null, int timeout = 3600) { // Does the dataset exist? if (fileStatus != null) { fileStatus("Checking the dataset exists"); } var response = new List <string>(); await connection.ExecuteLinuxCommandAsync(string.Format("rucio ls {0}", dataSetName), l => response.Add(l), secondsTimeout : 60, failNow : failNow); var dsnames = response .Where(l => l.Contains("DATASET") | l.Contains("CONTAINER")) .Select(l => l.Split(' ')) .Where(sl => sl.Length > 1) .Select(sl => sl[1]) .ToArray(); if (!dsnames.Where(n => n.SantizeDSName() == dataSetName.SantizeDSName()).Any()) { throw new ArgumentException(string.Format("Unable to find any datasets on the GRID (in rucuio) with the name '{0}'.", dataSetName)); } // Get the complete list of files in the dataset. if (fileStatus != null) { fileStatus("Getting the complete list of files from the dataset"); } var fileNameList = await connection.FilelistFromGRIDAsync(dataSetName, failNow : failNow); // Filter them if need be. var goodFiles = fileNameFilter != null ? fileNameFilter(fileNameList) : fileNameList; // Create a file that contains all the files we want to download up on the host. var fileListName = string.Format("/tmp/{0}.filelist", dataSetName.SantizeDSName()); await connection.ExecuteLinuxCommandAsync("rm -rf " + fileListName); await connection.ApplyAsync(goodFiles, async (c, fname) => await c.ExecuteLinuxCommandAsync(string.Format("echo {0} >> {1}", fname, fileListName))); // We good on creating the directory? await connection.ExecuteLinuxCommandAsync(string.Format("mkdir -p {0}", localDirectory), l => { throw new ArgumentException($"Error trying to create directory {0} for dataset on remote machine ({connection.MachineName}).", localDirectory); }, secondsTimeout : 20); // If we have no files to download, then we are totally done! // We do this after the directory is created, so if there are no files, a check still // works. if (goodFiles.Length == 0) { return(connection); } // Next, do the download response.Clear(); fileStatus?.Invoke($"Starting GRID download of {dataSetName}..."); await Policy .Handle <ClockSkewException>() .WaitAndRetryAsync(new[] { TimeSpan.FromSeconds(10), TimeSpan.FromMinutes(1), TimeSpan.FromMinutes(1), TimeSpan.FromMinutes(1), TimeSpan.FromMinutes(1) }, (e, ts) => { fileStatus?.Invoke("Clock Skew error - wait and re-try"); }) .ExecuteAsync(() => DoRucioDownloadAsync(connection, localDirectory, fileStatus, failNow, timeout, fileListName)); return(connection); }