示例#1
0
        protected override IBaseSSHAccess ConnectWithSSH(BaseServer server)
        {
            if (server.PublicAddress == null)
            {
                throw new InvalidOperationException("Can't connect to a server with no public address");
            }

            sshAccess.ConnectTo(server.PublicAddress.ToString(), ((ExternalServer)server).SSHKeyFileName);
            return(sshAccess);
        }
示例#2
0
        public async Task <IActionResult> Create([Required][FromBody] ExternalServerDTO request)
        {
            FailIfNotConfigured();

            if (request.PublicAddress == null)
            {
                return(BadRequest("Missing address"));
            }

            if (string.IsNullOrEmpty(request.SSHKeyFileName) || request.SSHKeyFileName.Contains("..") ||
                request.SSHKeyFileName.Contains("/"))
            {
                return(BadRequest("Invalid SSH key name format"));
            }

            if (!serverSSHAccess.IsValidKey(request.SSHKeyFileName))
            {
                return(BadRequest("Invalid SSH key specified"));
            }

            // Test connection
            try
            {
                serverSSHAccess.ConnectTo(request.PublicAddress.ToString(), request.SSHKeyFileName);
            }
            catch (Exception e)
            {
                logger.LogWarning("Failing to add a new external server due to connect failure: {@E}", e);
                return(BadRequest("Can't access the specified IP address with the specified key"));
            }

            // Don't allow duplicate IPs
            if (await database.ExternalServers.FirstOrDefaultAsync(s =>
                                                                   s.PublicAddress != null && s.PublicAddress.Equals(request.PublicAddress)) != null)
            {
                return(BadRequest("There is already a server configured with that IP address"));
            }

            var server = new ExternalServer()
            {
                PublicAddress  = request.PublicAddress,
                SSHKeyFileName = request.SSHKeyFileName,
            };
            await database.ExternalServers.AddAsync(server);

            await database.AdminActions.AddAsync(new AdminAction()
            {
                Message       = $"New external server with IP {request.PublicAddress} added",
                PerformedById = HttpContext.AuthenticatedUser() !.Id,
            });
示例#3
0
        public async Task Execute(long id, CancellationToken cancellationToken)
        {
            var server = await database.ExternalServers.FindAsync(id);

            if (server == null)
            {
                logger.LogWarning("Server {Id} (external) not found for startup check", id);
                return;
            }

            if (server.Status == ServerStatus.Running || server.Status == ServerStatus.Provisioning)
            {
                logger.LogInformation("External server {Id} is already up, skipping check job", id);
                return;
            }

            if (server.Status == ServerStatus.Stopping &&
                DateTime.UtcNow - server.StatusLastChecked < TimeSpan.FromSeconds(15))
            {
                throw new Exception($"External server {id} has been in stopping status too short time");
            }

            if (server.PublicAddress == null || server.PublicAddress.Equals(IPAddress.None))
            {
                throw new Exception($"External server {id} doesn't have public a address set");
            }

            bool up = false;

            try
            {
                sshAccess.ConnectTo(server.PublicAddress.ToString(), server.SSHKeyFileName);
                up = true;
            }
            catch (SocketException)
            {
                logger.LogInformation("Connection failed (socket exception), server is probably not up yet");
            }
            catch (SshOperationTimeoutException)
            {
                logger.LogInformation("Connection failed (ssh timed out), server is probably not up yet");
            }

            if (up)
            {
                server.Status = ServerStatus.Running;
            }

            server.StatusLastChecked = DateTime.UtcNow;
            server.BumpUpdatedAt();
            await database.SaveChangesAsync(cancellationToken);

            if (!up)
            {
                logger.LogTrace("External server {Id} is not up currently", id);
                jobClient.Schedule <WaitForExternalServerStartUpJob>(x => Execute(id, CancellationToken.None),
                                                                     TimeSpan.FromSeconds(30));
            }
            else
            {
                logger.LogInformation("External server {Id} is now up", id);
            }
        }
示例#4
0
        public async Task Execute(long ciProjectId, long ciBuildId, long ciJobId, long serverId, bool serverIsExternal,
                                  int retries, CancellationToken cancellationToken)
        {
            // Includes are needed here to provide fully populated data for update notifications
            var job = await Database.CiJobs.Include(j => j.Build !).ThenInclude(b => b.CiProject)
                      .FirstOrDefaultAsync(
                j => j.CiProjectId == ciProjectId && j.CiBuildId == ciBuildId && j.CiJobId == ciJobId,
                cancellationToken);

            BaseServer?server;

            if (serverIsExternal)
            {
                server = await Database.ExternalServers.FindAsync(new object[] { serverId }, cancellationToken);
            }
            else
            {
                server = await Database.ControlledServers.FindAsync(new object[] { serverId }, cancellationToken);
            }

            if (server == null)
            {
                throw new ArgumentException($"Could not find server ({serverId}, external: {serverIsExternal}) " +
                                            "to run build on");
            }

            if (job == null)
            {
                Logger.LogWarning("Skipping CI job as it doesn't exist");
                ReleaseServerReservation(server);
                return;
            }

            if (job.State != CIJobState.WaitingForServer)
            {
                Logger.LogWarning(
                    "CI job is not in waiting for server status, refusing to start running it on server: {ServerId}",
                    serverId);
                ReleaseServerReservation(server);
                return;
            }

            if (server.ReservedFor != job.CiJobId)
            {
                Logger.LogWarning(
                    "CI job id doesn't match reservation on server, refusing to start it on server: {ServerId}",
                    serverId);
                ReleaseServerReservation(server);
                return;
            }

            // Get the CI image for the job
            var imageFileName       = job.GetImageFileName();
            var serverSideImagePath = Path.Join("CI/Images", imageFileName);

            StorageItem?imageItem;

            try
            {
                imageItem = await StorageItem.FindByPath(Database, serverSideImagePath);
            }
            catch (Exception e)
            {
                // ReSharper disable once ExceptionPassedAsTemplateArgumentProblem
                Logger.LogError("Invalid image specified for CI job: {Image}, path parse exception: {@E}", job.Image,
                                e);
                job.SetFinishSuccess(false);
                await job.CreateFailureSection(Database, "Invalid image specified for job (invalid path)");
                await OnJobEnded(server, job);

                return;
            }

            if (string.IsNullOrEmpty(job.Image) || imageItem == null)
            {
                Logger.LogError("Invalid image specified for CI job: {Image}", job.Image);
                job.SetFinishSuccess(false);
                await job.CreateFailureSection(Database, "Invalid image specified for job (not found)");
                await OnJobEnded(server, job);

                return;
            }

            // The CI system uses the first valid image version. For future updates a different file name is needed
            // For example bumping the ":v1" to a ":v2" suffix
            var version = await imageItem.GetLowestUploadedVersion(Database);

            if (version == null || version.StorageFile == null)
            {
                Logger.LogError("Image with no uploaded version specified for CI job: {Image}", job.Image);
                job.SetFinishSuccess(false);
                await job.CreateFailureSection(Database, "Invalid image specified for job (not uploaded version)");
                await OnJobEnded(server, job);

                return;
            }

            // Queue a job to lock writing to the CI image if it isn't write protected yet
            if (imageItem.WriteAccess != FileAccess.Nobody)
            {
                Logger.LogInformation(
                    "Storage item {Id} used as CI image is not write locked, queuing a job to lock it", imageItem.Id);

                // To ensure the upload time is expired, this is upload time + 5 minutes
                JobClient.Schedule <LockCIImageItemJob>(x => x.Execute(imageItem.Id, CancellationToken.None),
                                                        AppInfo.RemoteStorageUploadExpireTime + TimeSpan.FromMinutes(5));
            }

            Logger.LogInformation("Trying to start job {CIProjectId}-{CIBuildId}-{CIJobId} on reserved " +
                                  "server ({Id}, {ServerIsExternal})", ciProjectId, ciBuildId, ciJobId, server.Id, serverIsExternal);

            if (server.PublicAddress == null || server.PublicAddress.Equals(IPAddress.None))
            {
                throw new Exception($"Server ({server.Id}, {serverIsExternal}) doesn't have a public address set");
            }

            // Try to start running the job, this can fail if the server is not actually really up yet
            IBaseSSHAccess sshAccess;

            try
            {
                if (serverIsExternal)
                {
                    externalSSHAccess.ConnectTo(server.PublicAddress.ToString(),
                                                ((ExternalServer)server).SSHKeyFileName);
                    sshAccess = externalSSHAccess;
                }
                else
                {
                    controlledSSHAccess.ConnectTo(server.PublicAddress.ToString());
                    sshAccess = controlledSSHAccess;
                }
            }
            catch (SocketException)
            {
                Logger.LogInformation("Connection failed (socket exception), server is probably not up (yet)");
                await Requeue(job, retries - 1, server, serverIsExternal);

                return;
            }
            catch (SshOperationTimeoutException)
            {
                Logger.LogInformation("Connection failed (ssh timed out), server is probably not up (yet)");
                await Requeue(job, retries - 1, server, serverIsExternal);

                return;
            }

            var imageDownloadUrl =
                remoteDownloadUrls.CreateDownloadFor(version.StorageFile, AppInfo.RemoteStorageDownloadExpireTime);

            // Connection success, so now we can run the job starting on the server
            job.RunningOnServerId         = serverId;
            job.RunningOnServerIsExternal = server.IsExternal;

            // TODO: permanently store on which server this job was ran on and how long since creation it took to get
            // here

            if (job.Build == null)
            {
                throw new NotLoadedModelNavigationException();
            }

            CISecretType jobSpecificSecretType = job.Build.IsSafe ? CISecretType.SafeOnly : CISecretType.UnsafeOnly;

            var secrets = await Database.CiSecrets.Where(s => s.CiProjectId == job.CiProjectId &&
                                                         (s.UsedForBuildTypes == jobSpecificSecretType || s.UsedForBuildTypes == CISecretType.All))
                          .ToListAsync(cancellationToken);

            await PerformServerCleanUpIfNeeded(server, sshAccess);

            // Then move on to the build starting, first thing is to download the CI executor script
            // TODO: is there a possibility that this is not secure? Someone would need to do HTTPS MItM attack...

            var executorDownload         = GetUrlToDownloadCIExecutor();
            var executorResourceDownload = GetUrlToDownloadCIExecutorResource();
            var executorHash             = await hashCalculator.Sha256(executorDownload, cancellationToken);

            var posixHelperHash = await hashCalculator.Sha256(executorResourceDownload, cancellationToken);

            // TODO: using async would be nice for the run commands when supported
            var result1 = sshAccess
                          .RunCommand("set -e\n" +
                                      CreateDownloadCommand("~/CIExecutor", executorHash, executorDownload) +
                                      CreateDownloadCommand("~/libMonoPosixHelper.so", posixHelperHash,
                                                            executorResourceDownload) + "chmod +x ~/CIExecutor");

            if (!result1.Success)
            {
                throw new Exception($"Failed to run executor download step: {result1.Result}, error: {result1.Error}");
            }

            // This save is done here as the build status might get reported back to us before we finish with the ssh
            // commands
            job.State                = CIJobState.Running;
            job.RanOnServer          = serverIsExternal ? $"External server {serverId}" : $"Controlled server {serverId}";
            job.TimeWaitingForServer = DateTime.UtcNow - job.CreatedAt;
            await Database.SaveChangesAsync(cancellationToken);

            // and then run it with environment variables for this build

            // Remove all type secrets if there is one with the same name that is build specific
            var cleanedSecrets = secrets
                                 .Where(s => s.UsedForBuildTypes != CISecretType.All || !secrets.Any(s2 =>
                                                                                                     s2.SecretName == s.SecretName && s2.UsedForBuildTypes != s.UsedForBuildTypes))
                                 .Select(s => s.ToExecutorData());

            if (job.Build.CiProject == null)
            {
                throw new NotLoadedModelNavigationException();
            }

            var env = new StringBuilder(250);

            env.Append("export CI_REF=\"");
            env.Append(BashEscape.EscapeForBash(job.Build.RemoteRef));
            env.Append("\"; export CI_COMMIT_HASH=\"");
            env.Append(BashEscape.EscapeForBash(job.Build.CommitHash));
            env.Append("\"; export CI_EARLIER_COMMIT=\"");
            env.Append(BashEscape.EscapeForBash(job.Build.PreviousCommit ?? AppInfo.NoCommitHash));
            env.Append("\"; export CI_BRANCH=\"");
            env.Append(BashEscape.EscapeForBash(job.Build.Branch ?? "unknown_branch"));
            env.Append("\"; export CI_DEFAULT_BRANCH=\"");
            env.Append(BashEscape.EscapeForBash(job.Build.CiProject.DefaultBranch));
            env.Append("\"; export CI_TRUSTED=\"");
            env.Append(job.Build.IsSafe);
            env.Append("\"; export CI_ORIGIN=\"");
            env.Append(BashEscape.EscapeForBash(job.Build.CiProject.RepositoryCloneUrl));
            env.Append("\"; export CI_IMAGE_DL_URL=\"");
            env.Append(BashEscape.EscapeForBash(imageDownloadUrl));
            env.Append("\"; export CI_IMAGE_NAME=\"");
            env.Append(BashEscape.EscapeForBash(job.Image));
            env.Append("\"; export CI_IMAGE_FILENAME=\"");
            env.Append(BashEscape.EscapeForBash(imageFileName));
            env.Append("\"; export CI_CACHE_OPTIONS=\"");
            env.Append(BashEscape.EscapeForBash(job.CacheSettingsJson ?? "{}"));
            env.Append("\"; export CI_SECRETS=\"");
            env.Append(BashEscape.EscapeForBash(JsonSerializer.Serialize(cleanedSecrets)));
            env.Append("\"; export CI_JOB_NAME=\"");
            env.Append(BashEscape.EscapeForBash(job.JobName));
            env.Append("\";");

            var result2 =
                sshAccess.RunCommand($"{env} nohup ~/CIExecutor {GetConnectToUrl(job)} > " +
                                     "build_script_output.txt 2>&1 &");

            if (!result2.Success)
            {
                throw new Exception($"Failed to start running CI executor: {result2.Result}, error: {result2.Error}");
            }

            JobClient.Schedule <CheckCIJobOutputHasConnectedJob>(
                x => x.Execute(ciProjectId, ciBuildId, ciJobId, serverId, CancellationToken.None),
                TimeSpan.FromMinutes(5));

            JobClient.Schedule <CancelCIBuildIfStuckJob>(
                x => x.Execute(ciProjectId, ciBuildId, ciJobId, serverId, server.IsExternal, CancellationToken.None),
                TimeSpan.FromMinutes(61));

            Logger.LogInformation(
                "CI job startup succeeded, now it's up to the executor to contact us with updates");
        }
        public async Task Execute(long ciProjectId, long ciBuildId, long ciJobId, long serverId, bool externalServer,
                                  CancellationToken cancellationToken)
        {
            var job = await database.CiJobs.FirstOrDefaultAsync(
                j => j.CiProjectId == ciProjectId && j.CiBuildId == ciBuildId && j.CiJobId == ciJobId,
                cancellationToken);

            if (job == null)
            {
                logger.LogWarning("Failed to check if a CI job is stuck, can't find the job");
                return;
            }

            if (job.State == CIJobState.Finished)
            {
                return;
            }

            logger.LogError(
                "Detected CI job {CIProjectId}-{CIBuildId}-{CIJobId} as stuck running (total build time limit reached)",
                ciProjectId, ciBuildId, ciJobId);

            if (externalServer)
            {
                var server =
                    await database.ExternalServers.FindAsync(new object[] { serverId }, cancellationToken);

                if (server == null)
                {
                    throw new ArgumentException("Could not find server to release for a stuck build");
                }

                cancellationToken.ThrowIfCancellationRequested();

                if (server.PublicAddress == null)
                {
                    throw new InvalidOperationException("Can't connect to a server with no public address");
                }

                externalServerSSHAccess.ConnectTo(server.PublicAddress.ToString(), server.SSHKeyFileName);
                externalServerSSHAccess.Reboot();

                await database.LogEntries.AddAsync(new LogEntry()
                {
                    Message = $"External server {server.Id} timed out running CI job, force rebooting it",
                }, cancellationToken);

                server.StatusLastChecked = DateTime.UtcNow;
                server.ReservationType   = ServerReservationType.None;
                server.Status            = ServerStatus.Stopping;
                server.BumpUpdatedAt();

                jobClient.Schedule <WaitForExternalServerStartUpJob>(x => x.Execute(server.Id, CancellationToken.None),
                                                                     TimeSpan.FromSeconds(20));
                logger.LogInformation("Successfully commanded reboot on: {ServerId}", server.Id);
            }
            else
            {
                var server =
                    await database.ControlledServers.FindAsync(new object[] { serverId }, cancellationToken);

                if (server == null)
                {
                    throw new ArgumentException("Could not find server to release for a stuck build");
                }

                cancellationToken.ThrowIfCancellationRequested();

                await database.LogEntries.AddAsync(new LogEntry()
                {
                    Message =
                        $"Server {server.Id} ({server.InstanceId}) timed out running CI job, stopping it, running " +
                        $"since {server.UpdatedAt}"
                }, cancellationToken);

                if (string.IsNullOrEmpty(server.InstanceId))
                {
                    throw new ArgumentException("Stuck server has no InstanceId, can't stop it");
                }

                await ec2Controller.StopInstance(server.InstanceId, false);

                server.Status = ServerStatus.Stopping;

                if (server.RunningSince != null)
                {
                    server.TotalRuntime += (DateTime.UtcNow - server.RunningSince.Value).TotalSeconds;
                }
                server.RunningSince = null;

                logger.LogInformation("Successfully signaled stop on: {InstanceId}", server.InstanceId);
            }

            if (job.RunningOnServerId != serverId)
            {
                logger.LogError("Wrong RunningOnServerId in job (total runtime limit exceeded)");
                job.RunningOnServerId = serverId;
            }

            // Not cancellable done as the state to terminated is very important to save
            // ReSharper disable once MethodSupportsCancellation
            await database.SaveChangesAsync();

            jobClient.Enqueue <SetFinishedCIJobStatusJob>(x =>
                                                          x.Execute(ciProjectId, ciBuildId, ciJobId, false, CancellationToken.None));
        }