Пример #1
0
 /// <summary>
 /// Returns the name of the files in a GRID dataset.
 /// </summary>
 /// <param name="connection">The already setup SSH connection</param>
 /// <param name="dataSetName">The dataset that we are to query</param>
 /// <param name="failNow">Return true if long-running commands should quit right away</param>
 /// <param name="dumpOnly">If we are to only test-run, but not actually run.</param>
 /// <returns>List of filenames for this GRID dataset</returns>
 public static string[] FilelistFromGRID(this ISSHConnection connection, string dataSetName, Func <bool> failNow = null, bool dumpOnly = false)
 {
     return(connection.FilelistFromGRIDAsync(dataSetName, failNow, dumpOnly)
            .WaitAndUnwrapException());
 }
Пример #2
0
        /// <summary>
        /// Fetch a dataset from the grid using Rucio to a local directory.
        /// </summary>
        /// <param name="connection">A previously configured connection with everything ready to go for GRID access.</param>
        /// <param name="dataSetName">The rucio dataset name</param>
        /// <param name="localDirectory">The local directory (on Linux) where the file should be downloaded</param>
        /// <param name="fileStatus">Gets updates as new files are downloaded. This will contain just the filename.</param>
        /// <param name="fileNameFilter">Filter function to alter the files that are to be downloaded</param>
        /// <param name="failNow">Checked periodically, if ever returns true, then bail out</param>
        /// <param name="timeout">How long before we should timeout in seconds</param>
        /// <returns>The connections used so you can chain</returns>
        public static async Task <ISSHConnection> DownloadFromGRIDAsync(this ISSHConnection connection, string dataSetName, string localDirectory,
                                                                        Action <string> fileStatus = null,
                                                                        Func <string[], string[]> fileNameFilter = null,
                                                                        Func <bool> failNow = null,
                                                                        int timeout         = 3600)
        {
            // Does the dataset exist?
            if (fileStatus != null)
            {
                fileStatus("Checking the dataset exists");
            }
            var response = new List <string>();
            await connection.ExecuteLinuxCommandAsync(string.Format("rucio ls {0}", dataSetName), l => response.Add(l), secondsTimeout : 60, failNow : failNow);

            var dsnames = response
                          .Where(l => l.Contains("DATASET") | l.Contains("CONTAINER"))
                          .Select(l => l.Split(' '))
                          .Where(sl => sl.Length > 1)
                          .Select(sl => sl[1])
                          .ToArray();

            if (!dsnames.Where(n => n.SantizeDSName() == dataSetName.SantizeDSName()).Any())
            {
                throw new ArgumentException(string.Format("Unable to find any datasets on the GRID (in rucuio) with the name '{0}'.", dataSetName));
            }

            // Get the complete list of files in the dataset.
            if (fileStatus != null)
            {
                fileStatus("Getting the complete list of files from the dataset");
            }
            var fileNameList = await connection.FilelistFromGRIDAsync(dataSetName, failNow : failNow);

            // Filter them if need be.
            var goodFiles = fileNameFilter != null
                ? fileNameFilter(fileNameList)
                : fileNameList;

            // Create a file that contains all the files we want to download up on the host.
            var fileListName = string.Format("/tmp/{0}.filelist", dataSetName.SantizeDSName());
            await connection.ExecuteLinuxCommandAsync("rm -rf " + fileListName);

            await connection.ApplyAsync(goodFiles, async (c, fname) => await c.ExecuteLinuxCommandAsync(string.Format("echo {0} >> {1}", fname, fileListName)));

            // We good on creating the directory?
            await connection.ExecuteLinuxCommandAsync(string.Format("mkdir -p {0}", localDirectory),
                                                      l => { throw new ArgumentException($"Error trying to create directory {0} for dataset on remote machine ({connection.MachineName}).", localDirectory); },
                                                      secondsTimeout : 20);

            // If we have no files to download, then we are totally done!
            // We do this after the directory is created, so if there are no files, a check still
            // works.
            if (goodFiles.Length == 0)
            {
                return(connection);
            }

            // Next, do the download
            response.Clear();
            fileStatus?.Invoke($"Starting GRID download of {dataSetName}...");

            await Policy
            .Handle <ClockSkewException>()
            .WaitAndRetryAsync(new[]
            {
                TimeSpan.FromSeconds(10),
                TimeSpan.FromMinutes(1),
                TimeSpan.FromMinutes(1),
                TimeSpan.FromMinutes(1),
                TimeSpan.FromMinutes(1)
            }, (e, ts) => { fileStatus?.Invoke("Clock Skew error - wait and re-try"); })
            .ExecuteAsync(() => DoRucioDownloadAsync(connection, localDirectory, fileStatus, failNow, timeout, fileListName));

            return(connection);
        }