Exemplo n.º 1
0
        /// <summary>
        /// We will run the job submission. We assume that any checking has already gone one
        /// and we are just going to execute all the commands required of this job request.
        /// Assume setupATLAS and rucio and voms proxy init have all been done before
        /// this guy is called!
        /// </summary>
        /// <param name="connection"></param>
        /// <param name="job"></param>
        /// <param name="datasetToStartWith"></param>
        /// <param name="credSet">Set of credentials to load. Default to CERN</param>
        /// <returns></returns>
        public static async Task <ISSHConnection> SubmitJobAsync(this ISSHConnection connection, AtlasJob job, string inputDataSet, string resultingDataSet, Action <string> statusUpdate = null, Func <bool> failNow = null, bool sameJobAsLastTime = false, string credSet = "CERN", bool dumpOnly = false)
        {
            // Get the status update protected.
            Action <string> update = statusUpdate != null ?
                                     statusUpdate
                : s => { };

            // Figure out the proper submit command.
            string submitCmd = (job.SubmitPatternCommands.Length > 0
                ? MatchSubmitPattern(job.SubmitPatternCommands, inputDataSet)
                : job.SubmitCommand.SubmitCommand.CommandLine)
                               .Replace("*INPUTDS*", "{0}")
                               .Replace("*OUTPUTDS*", "{1}");

            var cernCred = new CredentialSet(credSet)
                           .Load()
                           .FirstOrDefault()
                           .ThrowIfNull(() => new GRIDSubmitException($"Please create a windows generic credential with a target of '{credSet}' to allow access to kinit"));

            // If this is the first time through with a single job, then setup a directory we can use.
            if (!sameJobAsLastTime)
            {
                var linuxLocation = string.Format("/tmp/{0}", resultingDataSet);
                await connection.Apply(() => update("Removing old build directory"))
                .ExecuteLinuxCommandAsync("rm -rf " + linuxLocation, dumpOnly: dumpOnly);

                await connection
                .Apply(() => update("Setting up panda"))
                .ExecuteLinuxCommandAsync("lsetup panda", dumpOnly: dumpOnly);

                await connection.Apply(() => update("Setting up release"))
                .SetupRcReleaseAsync(linuxLocation, job.Release.Name, dumpOnly: dumpOnly);

                await connection.Apply(() => update("Getting CERN credentials"))
                .KinitAsync(cernCred.Username, cernCred.Password, dumpOnly: dumpOnly);

                await connection
                .ApplyAsync(job.Packages, (c, j) => c.Apply(() => update("Checking out package " + j.Name)).CheckoutPackageAsync(j.Name, j.SCTag, failNow: failNow, dumpOnly: dumpOnly));

                await connection
                .ApplyAsync(job.Commands, (co, cm) => co.Apply(() => update("Running command " + cm.CommandLine)).ExecuteLinuxCommandAsync(cm.CommandLine, failNow: failNow, dumpOnly: dumpOnly));

                await connection
                .Apply(() => update("Compiling release"))
                .BuildWorkAreaAsync(failNow: failNow, dumpOnly: dumpOnly);
            }

            // We should now be in the directory where everything is - so submit!
            return(await connection
                   .Apply(() => update($"Running submit command ({inputDataSet})"))
                   .ExecuteLinuxCommandAsync(string.Format(submitCmd, inputDataSet, resultingDataSet), failNow: failNow, dumpOnly: dumpOnly));
        }
Exemplo n.º 2
0
        /// <summary>
        /// Fetch a dataset from the grid using Rucio to a local directory.
        /// </summary>
        /// <param name="connection">A previously configured connection with everything ready to go for GRID access.</param>
        /// <param name="dataSetName">The rucio dataset name</param>
        /// <param name="localDirectory">The local directory (on Linux) where the file should be downloaded</param>
        /// <param name="fileStatus">Gets updates as new files are downloaded. This will contain just the filename.</param>
        /// <param name="fileNameFilter">Filter function to alter the files that are to be downloaded</param>
        /// <param name="failNow">Checked periodically, if ever returns true, then bail out</param>
        /// <param name="timeout">How long before we should timeout in seconds</param>
        /// <returns>The connections used so you can chain</returns>
        public static async Task <ISSHConnection> DownloadFromGRIDAsync(this ISSHConnection connection, string dataSetName, string localDirectory,
                                                                        Action <string> fileStatus = null,
                                                                        Func <string[], string[]> fileNameFilter = null,
                                                                        Func <bool> failNow = null,
                                                                        int timeout         = 3600)
        {
            // Does the dataset exist?
            if (fileStatus != null)
            {
                fileStatus("Checking the dataset exists");
            }
            var response = new List <string>();
            await connection.ExecuteLinuxCommandAsync(string.Format("rucio ls {0}", dataSetName), l => response.Add(l), secondsTimeout : 60, failNow : failNow);

            var dsnames = response
                          .Where(l => l.Contains("DATASET") | l.Contains("CONTAINER"))
                          .Select(l => l.Split(' '))
                          .Where(sl => sl.Length > 1)
                          .Select(sl => sl[1])
                          .ToArray();

            if (!dsnames.Where(n => n.SantizeDSName() == dataSetName.SantizeDSName()).Any())
            {
                throw new ArgumentException(string.Format("Unable to find any datasets on the GRID (in rucuio) with the name '{0}'.", dataSetName));
            }

            // Get the complete list of files in the dataset.
            if (fileStatus != null)
            {
                fileStatus("Getting the complete list of files from the dataset");
            }
            var fileNameList = await connection.FilelistFromGRIDAsync(dataSetName, failNow : failNow);

            // Filter them if need be.
            var goodFiles = fileNameFilter != null
                ? fileNameFilter(fileNameList)
                : fileNameList;

            // Create a file that contains all the files we want to download up on the host.
            var fileListName = string.Format("/tmp/{0}.filelist", dataSetName.SantizeDSName());
            await connection.ExecuteLinuxCommandAsync("rm -rf " + fileListName);

            await connection.ApplyAsync(goodFiles, async (c, fname) => await c.ExecuteLinuxCommandAsync(string.Format("echo {0} >> {1}", fname, fileListName)));

            // We good on creating the directory?
            await connection.ExecuteLinuxCommandAsync(string.Format("mkdir -p {0}", localDirectory),
                                                      l => { throw new ArgumentException($"Error trying to create directory {0} for dataset on remote machine ({connection.MachineName}).", localDirectory); },
                                                      secondsTimeout : 20);

            // If we have no files to download, then we are totally done!
            // We do this after the directory is created, so if there are no files, a check still
            // works.
            if (goodFiles.Length == 0)
            {
                return(connection);
            }

            // Next, do the download
            response.Clear();
            fileStatus?.Invoke($"Starting GRID download of {dataSetName}...");

            await Policy
            .Handle <ClockSkewException>()
            .WaitAndRetryAsync(new[]
            {
                TimeSpan.FromSeconds(10),
                TimeSpan.FromMinutes(1),
                TimeSpan.FromMinutes(1),
                TimeSpan.FromMinutes(1),
                TimeSpan.FromMinutes(1)
            }, (e, ts) => { fileStatus?.Invoke("Clock Skew error - wait and re-try"); })
            .ExecuteAsync(() => DoRucioDownloadAsync(connection, localDirectory, fileStatus, failNow, timeout, fileListName));

            return(connection);
        }