Example #1
0
        /// <summary>
        /// Submits the jobs and waits for it to complete.
        /// When it submits, it create a log entry file in the cluster working directory, named according to the run name. This file is deleted
        /// when the job finishes successfully, so long as we're still waiting for it to finish. If SubmitAndWait is called and this file already
        /// exists, then it is assumed that the job we want to submit was already submitted, so we wait for it to finish rather than submit again.
        /// </summary>
        /// <param name="clusterArgs"></param>
        /// <param name="distributableObj"></param>
        /// <param name="maxSubmitAfterTasksFail"></param>
        /// <param name="OnSubmittedCallbackOrNull"></param>
        public static void SubmitAndWait(ClusterSubmitterArgs clusterArgs, IDistributable distributableObj, int maxSubmitAfterTasksFail = 0, Action OnSubmittedCallbackOrNull = null)
        {
            using (ParallelOptionsScope.Suspend())
            {
                FileInfo logEntryFile = HpcLibSettings.GetLogEntryFile(clusterArgs);
                if (logEntryFile.Exists)
                {
                    Console.WriteLine(Resource.Job_already_exists, logEntryFile.FullName);
                    clusterArgs = HpcLibSettings.LoadLogEntryFile(logEntryFile).ClusterArgs;
                }
                else
                {
                    Submit(clusterArgs, distributableObj);
                    Console.WriteLine(Resource.Wait_Writing_log);
                    HpcLibSettings.WriteLogEntryToClusterDirectory(clusterArgs);
                }

                if (OnSubmittedCallbackOrNull != null)
                {
                    OnSubmittedCallbackOrNull();
                }

                JobState jobState = WaitForJobInternal(clusterArgs, maxSubmitAfterTasksFail);

                logEntryFile.Delete();  // job finished successfully, so we can delete this. Even if failed or canceled, we assume that we'll want to overwrite in the future.

                if (jobState != JobState.Finished)
                {
                    throw new Exception("Job " + jobState);
                }
            }
        }
Example #2
0
        /// <summary>
        /// Refreshes the cluster or core
        /// </summary>
        /// <returns>refreshed or not</returns>
        public bool Refresh()
        {
            bool changed = false;
            ISchedulerCounters counters;

            using (ParallelOptionsScope.Suspend())
            {
                if (Connect() && null != (counters = GetCounters()))
                {
                    changed =
                        BusyCores != counters.BusyCores ||
                        IdleCores != counters.IdleCores ||
                        QueuedTasks != counters.QueuedTasks;


                    BusyCores   = counters.BusyCores;
                    IdleCores   = counters.IdleCores;
                    QueuedTasks = counters.QueuedTasks;
                }
                else
                {
                    changed = BusyCores != -1;

                    BusyCores   = -1;
                    IdleCores   = -1;
                    QueuedTasks = -1;
                }
            }
            return(changed);
        }
Example #3
0
        private static void SubmitInternal(ClusterSubmitterArgs clusterArgs, IDistributable distributableObj)
        {
            lock (_submitterLockObj)  // for now, just let one thread submit at a time.
            {
                if (string.IsNullOrEmpty(clusterArgs.Name))
                {
                    clusterArgs.Name = distributableObj.JobName;
                }

                CopyExes(clusterArgs);

                clusterArgs.StdErrDirName = CreateUniqueDirectory(clusterArgs.ExternalRemoteDirectoryName, "Stderr", distributableObj.JobName);
                clusterArgs.StdOutDirName = CreateUniqueDirectory(clusterArgs.ExternalRemoteDirectoryName, "Stdout", distributableObj.JobName);

                if (clusterArgs.CopyInputFiles != null)
                {
                    if (!(distributableObj is DistributableWrapper))
                    {
                        clusterArgs.CopyInputFiles.AddRange(ArgumentCollection.EnumerateValuesOfTypeFromParsable <InputFile>(distributableObj).Select(file => file.ToString()));
                    }

                    if (clusterArgs.CopyInputFiles.Count > 0)
                    {
                        CopyInputFiles(clusterArgs.CopyInputFiles, clusterArgs.ExternalRemoteDirectoryName);
                    }
                }

                using (ParallelOptionsScope.Suspend())
                {
                    switch (clusterArgs.Version)
                    {
                    case 3:
                        SubmitViaAPI3(clusterArgs, distributableObj);
                        break;

                    default:
                        throw new NotSupportedException(string.Format("Cluster version {0} is not supported.", clusterArgs.Version));
                    }
                }
                Console.WriteLine(Resource.Processed_job, clusterArgs.Cluster, clusterArgs.ExternalRemoteDirectoryName);


                Console.WriteLine(Resource.Writing_log_file);
                HpcLibSettings.TryWriteToLog(clusterArgs);

                Console.WriteLine(Resource.Done);
            }
            return;
        }
Example #4
0
        private bool Connect()
        {
            if (_scheduler != null)
            {
                return(true);
            }

            bool connected = false;

            using (ParallelOptionsScope.Suspend())
            {
                connected = HpcLib.TryConnect(Cluster, out _scheduler);
            }

            return(connected);
        }