/// <summary> /// Create a job preparation task for an azure job. The job preparation task /// will run on each compute node (VM) before any other tasks are run. /// </summary> /// <param name="job">Job parameters.</param> /// <param name="sas">Shared access signature for the model files.</param> private JobPreparationTask CreateJobPreparationTask(JobParameters job, string sas) { return(new JobPreparationTask { CommandLine = "cmd.exe /c jobprep.cmd", ResourceFiles = GetJobPrepResourceFiles(sas, job.ApsimXVersion).ToList(), WaitForSuccess = true }); }
/// <summary> /// Create a job release task for an azure job. The job release task will run /// on each compute node after the job has finished running. /// </summary> /// <param name="job">Job parameters.</param> /// <param name="sas">Shared access signature for the model files.</param> private JobReleaseTask CreateJobReleaseTask(JobParameters job, string sas) { Licence licence = new Licence(AzureSettings.Default.LicenceFilePath); return new JobReleaseTask { CommandLine = "cmd.exe /c jobrelease.cmd", ResourceFiles = GetJobPrepResourceFiles(sas, job.ApsimXVersion).ToList(), EnvironmentSettings = new[] { new EnvironmentSetting("APSIM_STORAGE_ACCOUNT", licence.StorageAccount), new EnvironmentSetting("APSIM_STORAGE_KEY", licence.StorageKey), new EnvironmentSetting("JOBNAME", job.DisplayName), new EnvironmentSetting("RECIPIENT", job.EmailRecipient) } }; }
/// <summary> /// This function controls how the Azure pools/VMs are setup. /// This is not really controllable by the user but probably should be. /// </summary> /// <param name="job">Job parameters.</param> private PoolInformation GetPoolInfo(JobParameters job) { var autoPoolSpecification = new AutoPoolSpecification { PoolLifetimeOption = PoolLifetimeOption.Job, PoolSpecification = new PoolSpecification { ResizeTimeout = TimeSpan.FromMinutes(15), // todo: look into using ComputeNodeFillType.Pack TaskSchedulingPolicy = new TaskSchedulingPolicy(ComputeNodeFillType.Spread), // This specifies the OS that our VM will be running. // OS Family 5 means .NET 4.6 will be installed. // For more info see: // https://docs.microsoft.com/en-us/azure/cloud-services/cloud-services-guestos-update-matrix#releases CloudServiceConfiguration = new CloudServiceConfiguration("5"), // For now, always use standard_d5_v2 VM type. // This VM has 16 vCPUs, 56 GiB of memory and 800 GiB temp (SSD) storage. // todo: should make this user-controllable // For other VM sizes, see: // https://docs.microsoft.com/azure/batch/batch-pool-vm-sizes // https://docs.microsoft.com/azure/virtual-machines/windows/sizes-general VirtualMachineSize = "standard_d5_v2", // Each task needs only one vCPU. Therefore number of tasks per VM will be number of vCPUs per VM. MaxTasksPerComputeNode = 16 } }; // We only use one pool, so number of nodes per pool will be total number of vCPUs (as specified by the user) // divided by number of vCPUs per VM. We've hardcoded VM size to standard_d5_v2, which has 16 vCPUs. if (job.LowPriority) { autoPoolSpecification.PoolSpecification.TargetLowPriorityComputeNodes = job.CpuCount / 16; } else { autoPoolSpecification.PoolSpecification.TargetDedicatedComputeNodes = job.CpuCount / 16; } return(new PoolInformation { AutoPoolSpecification = autoPoolSpecification }); }
/// <summary> /// Submit a job to be run on Azure. /// </summary> /// <param name="job">Job parameters.</param> /// <param name="ct">Cancellation token.</param> /// <param name="UpdateStatus">Action which will display job submission status to the user.</param> public async Task SubmitJobAsync(JobParameters job, CancellationToken ct, Action <string> UpdateStatus) { if (batchClient == null || storageClient == null) { throw new Exception("Unable to submit job to Azure: no credentials provided"); } // Initialise a working directory. UpdateStatus("Initialising job environment..."); string workingDirectory = Path.Combine(Path.GetTempPath(), job.ID.ToString()); Directory.CreateDirectory(workingDirectory); // Set job owner. string owner = Environment.UserName.ToLower(); await SetAzureMetaDataAsync("job-" + job.ID, "Owner", owner, ct); if (ct.IsCancellationRequested) { UpdateStatus("Cancelled"); return; } // If the ApsimX path is a directory it will need to be compressed. if (Directory.Exists(job.ApsimXPath)) { UpdateStatus("Compressing APSIM Next Generation..."); string zipFile = Path.Combine(workingDirectory, $"Apsim-tmp-X-{owner}.zip"); if (File.Exists(zipFile)) { File.Delete(zipFile); } CreateApsimXZip(job.ApsimXPath, zipFile, ct); job.ApsimXPath = zipFile; job.ApsimXVersion = Path.GetFileName(zipFile).Substring(Path.GetFileName(zipFile).IndexOf('-') + 1); } if (ct.IsCancellationRequested) { UpdateStatus("Cancelled"); return; } // Upload tools such as 7zip, AzCopy, CMail, etc. UpdateStatus("Uploading tools..."); string executableDirectory = Path.GetDirectoryName(GetType().Assembly.Location); string toolsDir = Path.Combine(executableDirectory, "tools"); if (!Directory.Exists(toolsDir)) { throw new Exception("Tools Directory not found: " + toolsDir); } foreach (string filePath in Directory.EnumerateFiles(toolsDir)) { await UploadFileIfNeededAsync("tools", filePath, ct); if (ct.IsCancellationRequested) { UpdateStatus("Cancelled"); return; } } // Upload email config file. if (job.SendEmail) { Licence licence = new Licence(AzureSettings.Default.LicenceFilePath); StringBuilder config = new StringBuilder(); config.AppendLine($"EmailRecipient={job.EmailRecipient}"); config.AppendLine($"EmailSender={licence.EmailSender}"); config.AppendLine($"EmailPW={licence.EmailPW}"); // Write these settings to a temporary config file. string configFile = Path.Combine(workingDirectory, "settings.txt"); File.WriteAllText(configFile, config.ToString()); await UploadFileIfNeededAsync("job-" + job.ID, configFile, ct); File.Delete(configFile); } if (ct.IsCancellationRequested) { UpdateStatus("Cancelled"); return; } // Upload job manager. UpdateStatus("Uploading job manager..."); await UploadFileIfNeededAsync("jobmanager", Path.Combine(executableDirectory, "azure-apsim.exe"), ct); if (ct.IsCancellationRequested) { UpdateStatus("Cancelled"); return; } // Upload apsim. UpdateStatus("Uploading APSIM Next Generation..."); await UploadFileIfNeededAsync("apsim", job.ApsimXPath, ct); if (ct.IsCancellationRequested) { UpdateStatus("Cancelled"); return; } // Generate model files. UpdateStatus("Generating model files..."); if (!Directory.Exists(job.ModelPath)) { Directory.CreateDirectory(job.ModelPath); } if (ct.IsCancellationRequested) { UpdateStatus("Cancelled"); return; } // Generate .apsimx file for each simulation to be run. Runner run = new Runner(job.Model); GenerateApsimXFiles.Generate(run, job.ModelPath, p => { /* Don't bother with progress reporting */ }, collectExternalFiles: true); if (ct.IsCancellationRequested) { UpdateStatus("Cancelled"); return; } // Compress model (.apsimx file) directory. UpdateStatus("Compressing model files..."); string tmpZip = Path.Combine(workingDirectory, $"Model-{Guid.NewGuid()}.zip"); ZipFile.CreateFromDirectory(job.ModelPath, tmpZip, CompressionLevel.Fastest, false); job.ModelPath = tmpZip; if (ct.IsCancellationRequested) { UpdateStatus("Cancelled"); return; } // Upload models. UpdateStatus("Uploading model files..."); string modelZipFileSas = await UploadFileIfNeededAsync(job.ID.ToString(), job.ModelPath, ct); if (ct.IsCancellationRequested) { UpdateStatus("Cancelled"); return; } // Clean up temp files. UpdateStatus("Deleting temp files..."); Directory.Delete(workingDirectory, true); // Submit job. UpdateStatus("Submitting Job..."); CloudJob cloudJob = batchClient.JobOperations.CreateJob(job.ID.ToString(), GetPoolInfo(job)); cloudJob.DisplayName = job.DisplayName; cloudJob.JobPreparationTask = CreateJobPreparationTask(job, modelZipFileSas); cloudJob.JobReleaseTask = CreateJobReleaseTask(job, modelZipFileSas); cloudJob.JobManagerTask = CreateJobManagerTask(job); await cloudJob.CommitAsync(cancellationToken : ct); UpdateStatus("Job Successfully submitted"); }