Example #1
0
        public async Task RenewJobRequestAsync(int poolId, long requestId, Guid lockToken, TaskCompletionSource <int> firstJobRequestRenewed, CancellationToken token)
        {
            var agentServer             = HostContext.GetService <IAgentServer>();
            TaskAgentJobRequest request = null;
            int firstRenewRetryLimit    = 5;
            int encounteringError       = 0;

            // renew lock during job running.
            // stop renew only if cancellation token for lock renew task been signal or exception still happen after retry.
            while (!token.IsCancellationRequested)
            {
                try
                {
                    request = await agentServer.RenewAgentRequestAsync(poolId, requestId, lockToken, token);

                    Trace.Info($"Successfully renew job request {requestId}, job is valid till {request.LockedUntil.Value}");

                    if (!firstJobRequestRenewed.Task.IsCompleted)
                    {
                        // fire first renew succeed event.
                        firstJobRequestRenewed.TrySetResult(0);
                    }

                    if (encounteringError > 0)
                    {
                        encounteringError = 0;
                        agentServer.SetConnectionTimeout(AgentConnectionType.JobRequest, TimeSpan.FromSeconds(60));
                        HostContext.WritePerfCounter("JobRenewRecovered");
                    }

                    // renew again after 60 sec delay
                    await HostContext.Delay(TimeSpan.FromSeconds(60), token);
                }
                catch (TaskAgentJobNotFoundException)
                {
                    // no need for retry. the job is not valid anymore.
                    Trace.Info($"TaskAgentJobNotFoundException received when renew job request {requestId}, job is no longer valid, stop renew job request.");
                    return;
                }
                catch (TaskAgentJobTokenExpiredException)
                {
                    // no need for retry. the job is not valid anymore.
                    Trace.Info($"TaskAgentJobTokenExpiredException received renew job request {requestId}, job is no longer valid, stop renew job request.");
                    return;
                }
                catch (OperationCanceledException) when(token.IsCancellationRequested)
                {
                    // OperationCanceledException may caused by http timeout or _lockRenewalTokenSource.Cance();
                    // Stop renew only on cancellation token fired.
                    Trace.Info($"job renew has been canceled, stop renew job request {requestId}.");
                    return;
                }
                catch (Exception ex)
                {
                    Trace.Error($"Catch exception during renew agent jobrequest {requestId}.");
                    Trace.Error(ex);
                    encounteringError++;

                    // retry
                    TimeSpan remainingTime = TimeSpan.Zero;
                    if (!firstJobRequestRenewed.Task.IsCompleted)
                    {
                        // retry 5 times every 10 sec for the first renew
                        if (firstRenewRetryLimit-- > 0)
                        {
                            remainingTime = TimeSpan.FromSeconds(10);
                        }
                    }
                    else
                    {
                        // retry till reach lockeduntil + 5 mins extra buffer.
                        remainingTime = request.LockedUntil.Value + TimeSpan.FromMinutes(5) - DateTime.UtcNow;
                    }

                    if (remainingTime > TimeSpan.Zero)
                    {
                        TimeSpan delayTime;
                        if (!firstJobRequestRenewed.Task.IsCompleted)
                        {
                            Trace.Info($"Retrying lock renewal for jobrequest {requestId}. The first job renew request has failed.");
                            delayTime = BackoffTimerHelper.GetRandomBackoff(TimeSpan.FromSeconds(1), TimeSpan.FromSeconds(10));
                        }
                        else
                        {
                            Trace.Info($"Retrying lock renewal for jobrequest {requestId}. Job is valid until {request.LockedUntil.Value}.");
                            if (encounteringError > 5)
                            {
                                delayTime = BackoffTimerHelper.GetRandomBackoff(TimeSpan.FromSeconds(15), TimeSpan.FromSeconds(30));
                            }
                            else
                            {
                                delayTime = BackoffTimerHelper.GetRandomBackoff(TimeSpan.FromSeconds(5), TimeSpan.FromSeconds(15));
                            }
                        }

                        // Re-establish connection to server in order to avoid affinity with server.
                        // Reduce connection timeout to 30 seconds (from 60s)
                        HostContext.WritePerfCounter("ResetJobRenewConnection");
                        await agentServer.RefreshConnectionAsync(AgentConnectionType.JobRequest, TimeSpan.FromSeconds(30));

                        try
                        {
                            // back-off before next retry.
                            await HostContext.Delay(delayTime, token);
                        }
                        catch (OperationCanceledException) when(token.IsCancellationRequested)
                        {
                            Trace.Info($"job renew has been canceled, stop renew job request {requestId}.");
                        }
                    }
                    else
                    {
                        Trace.Info($"Lock renewal has run out of retry, stop renew lock for jobrequest {requestId}.");
                        HostContext.WritePerfCounter("JobRenewReachLimit");
                        return;
                    }
                }
            }
        }
Example #2
0
        public async Task <TaskAgentMessage> GetNextMessageAsync(CancellationToken token)
        {
            Trace.Entering();
            ArgUtil.NotNull(_session, nameof(_session));
            ArgUtil.NotNull(_settings, nameof(_settings));
            bool      encounteringError = false;
            int       continuousError   = 0;
            string    errorMessage      = string.Empty;
            Stopwatch heartbeat         = new Stopwatch();

            heartbeat.Restart();
            while (true)
            {
                token.ThrowIfCancellationRequested();
                TaskAgentMessage message = null;
                try
                {
                    message = await _runnerServer.GetAgentMessageAsync(_settings.PoolId,
                                                                       _session.SessionId,
                                                                       _lastMessageId,
                                                                       token);

                    // Decrypt the message body if the session is using encryption
                    message = DecryptMessage(message);

                    if (message != null)
                    {
                        _lastMessageId = message.MessageId;
                    }

                    if (encounteringError) //print the message once only if there was an error
                    {
                        _term.WriteLine($"{DateTime.UtcNow:u}: Runner reconnected.");
                        encounteringError = false;
                        continuousError   = 0;
                    }
                }
                catch (OperationCanceledException) when(token.IsCancellationRequested)
                {
                    Trace.Info("Get next message has been cancelled.");
                    throw;
                }
                catch (TaskAgentAccessTokenExpiredException)
                {
                    Trace.Info("Runner OAuth token has been revoked. Unable to pull message.");
                    throw;
                }
                catch (Exception ex)
                {
                    Trace.Error("Catch exception during get next message.");
                    Trace.Error(ex);

                    // don't retry if SkipSessionRecover = true, DT service will delete agent session to stop agent from taking more jobs.
                    if (ex is TaskAgentSessionExpiredException && !_settings.SkipSessionRecover && await CreateSessionAsync(token))
                    {
                        Trace.Info($"{nameof(TaskAgentSessionExpiredException)} received, recovered by recreate session.");
                    }
                    else if (!IsGetNextMessageExceptionRetriable(ex))
                    {
                        throw;
                    }
                    else
                    {
                        continuousError++;
                        //retry after a random backoff to avoid service throttling
                        //in case of there is a service error happened and all agents get kicked off of the long poll and all agent try to reconnect back at the same time.
                        if (continuousError <= 5)
                        {
                            // random backoff [15, 30]
                            _getNextMessageRetryInterval = BackoffTimerHelper.GetRandomBackoff(TimeSpan.FromSeconds(15), TimeSpan.FromSeconds(30), _getNextMessageRetryInterval);
                        }
                        else
                        {
                            // more aggressive backoff [30, 60]
                            _getNextMessageRetryInterval = BackoffTimerHelper.GetRandomBackoff(TimeSpan.FromSeconds(30), TimeSpan.FromSeconds(60), _getNextMessageRetryInterval);
                        }

                        if (!encounteringError)
                        {
                            //print error only on the first consecutive error
                            _term.WriteError($"{DateTime.UtcNow:u}: Runner connect error: {ex.Message}. Retrying until reconnected.");
                            encounteringError = true;
                        }

                        // re-create VssConnection before next retry
                        await _runnerServer.RefreshConnectionAsync(RunnerConnectionType.MessageQueue, TimeSpan.FromSeconds(60));

                        Trace.Info("Sleeping for {0} seconds before retrying.", _getNextMessageRetryInterval.TotalSeconds);
                        await HostContext.Delay(_getNextMessageRetryInterval, token);
                    }
                }

                if (message == null)
                {
                    if (heartbeat.Elapsed > TimeSpan.FromMinutes(30))
                    {
                        Trace.Info($"No message retrieved from session '{_session.SessionId}' within last 30 minutes.");
                        heartbeat.Restart();
                    }
                    else
                    {
                        Trace.Verbose($"No message retrieved from session '{_session.SessionId}'.");
                    }

                    continue;
                }

                Trace.Info($"Message '{message.MessageId}' received from session '{_session.SessionId}'.");
                return(message);
            }
        }
Example #3
0
        public async Task <Boolean> CreateSessionAsync(CancellationToken token)
        {
            Trace.Entering();

            // Settings
            var configManager = HostContext.GetService <IConfigurationManager>();

            _settings = configManager.LoadSettings();
            var serverUrl = _settings.ServerUrl;

            Trace.Info(_settings);

            // Create connection.
            Trace.Info("Loading Credentials");
            var            credMgr = HostContext.GetService <ICredentialManager>();
            VssCredentials creds   = credMgr.LoadCredentials();

            var agent = new TaskAgentReference
            {
                Id            = _settings.AgentId,
                Name          = _settings.AgentName,
                Version       = BuildConstants.RunnerPackage.Version,
                OSDescription = RuntimeInformation.OSDescription,
            };
            string sessionName      = $"{Environment.MachineName ?? "RUNNER"}";
            var    taskAgentSession = new TaskAgentSession(sessionName, agent);

            string errorMessage      = string.Empty;
            bool   encounteringError = false;

            while (true)
            {
                token.ThrowIfCancellationRequested();
                Trace.Info($"Attempt to create session.");
                try
                {
                    Trace.Info("Connecting to the Runner Server...");
                    await _runnerServer.ConnectAsync(new Uri(serverUrl), creds);

                    Trace.Info("VssConnection created");

                    _term.WriteLine();
                    _term.WriteSuccessMessage("Connected to GitHub");
                    _term.WriteLine();

                    _session = await _runnerServer.CreateAgentSessionAsync(
                        _settings.PoolId,
                        taskAgentSession,
                        token);

                    Trace.Info($"Session created.");
                    if (encounteringError)
                    {
                        _term.WriteLine($"{DateTime.UtcNow:u}: Runner reconnected.");
                        _sessionCreationExceptionTracker.Clear();
                        encounteringError = false;
                    }

                    return(true);
                }
                catch (OperationCanceledException) when(token.IsCancellationRequested)
                {
                    Trace.Info("Session creation has been cancelled.");
                    throw;
                }
                catch (TaskAgentAccessTokenExpiredException)
                {
                    Trace.Info("Runner OAuth token has been revoked. Session creation failed.");
                    throw;
                }
                catch (Exception ex)
                {
                    Trace.Error("Catch exception during create session.");
                    Trace.Error(ex);

                    if (ex is VssOAuthTokenRequestException && creds.Federated is VssOAuthCredential vssOAuthCred)
                    {
                        // Check whether we get 401 because the runner registration already removed by the service.
                        // If the runner registration get deleted, we can't exchange oauth token.
                        Trace.Error("Test oauth app registration.");
                        var oauthTokenProvider = new VssOAuthTokenProvider(vssOAuthCred, new Uri(serverUrl));
                        var authError          = await oauthTokenProvider.ValidateCredentialAsync(token);

                        if (string.Equals(authError, "invalid_client", StringComparison.OrdinalIgnoreCase))
                        {
                            _term.WriteError("Failed to create a session. The runner registration has been deleted from the server, please re-configure.");
                            return(false);
                        }
                    }

                    if (!IsSessionCreationExceptionRetriable(ex))
                    {
                        _term.WriteError($"Failed to create session. {ex.Message}");
                        return(false);
                    }

                    if (!encounteringError) //print the message only on the first error
                    {
                        _term.WriteError($"{DateTime.UtcNow:u}: Runner connect error: {ex.Message}. Retrying until reconnected.");
                        encounteringError = true;
                    }

                    Trace.Info("Sleeping for {0} seconds before retrying.", _sessionCreationRetryInterval.TotalSeconds);
                    await HostContext.Delay(_sessionCreationRetryInterval, token);
                }
            }
        }
Example #4
0
        public async Task <TaskAgentMessage> GetNextMessageAsync(CancellationToken token)
        {
            Trace.Entering();
            ArgUtil.NotNull(_session, nameof(_session));
            ArgUtil.NotNull(_settings, nameof(_settings));
            bool      encounteringError = false;
            string    errorMessage      = string.Empty;
            Stopwatch heartbeat         = new Stopwatch();

            heartbeat.Restart();
            while (true)
            {
                token.ThrowIfCancellationRequested();
                TaskAgentMessage message = null;
                try
                {
                    message = await _agentServer.GetAgentMessageAsync(_settings.PoolId,
                                                                      _session.SessionId,
                                                                      _lastMessageId,
                                                                      token);

                    // Decrypt the message body if the session is using encryption
                    message = DecryptMessage(message);

                    if (message != null)
                    {
                        _lastMessageId = message.MessageId;
                    }

                    if (encounteringError) //print the message once only if there was an error
                    {
                        _term.WriteLine(StringUtil.Loc("QueueConnected", DateTime.UtcNow));
                        encounteringError = false;
                    }
                }
                catch (OperationCanceledException) when(token.IsCancellationRequested)
                {
                    Trace.Info("Get next message has been cancelled.");
                    throw;
                }
                catch (Exception ex)
                {
                    Trace.Error("Catch exception during get next message.");
                    Trace.Error(ex);

                    if (ex is TaskAgentSessionExpiredException && await CreateSessionAsync(token))
                    {
                        Trace.Info($"{nameof(TaskAgentSessionExpiredException)} received, recoverd by recreate session.");
                    }
                    else if (!IsGetNextMessageExceptionRetriable(ex))
                    {
                        throw;
                    }
                    else
                    {
                        //retry after a delay
                        if (!encounteringError)
                        {
                            //print error only on the first consecutive error
                            _term.WriteError(StringUtil.Loc("QueueConError", DateTime.UtcNow, ex.Message, _getNextMessageRetryInterval.TotalSeconds));
                            encounteringError = true;
                        }

                        Trace.Info("Sleeping for {0} seconds before retrying.", _getNextMessageRetryInterval.TotalSeconds);
                        await HostContext.Delay(_getNextMessageRetryInterval, token);
                    }
                }

                if (message == null)
                {
                    if (heartbeat.Elapsed > TimeSpan.FromMinutes(30))
                    {
                        Trace.Info($"No message retrieved from session '{_session.SessionId}' within last 30 minutes.");
                        heartbeat.Restart();
                    }
                    else
                    {
                        Trace.Verbose($"No message retrieved from session '{_session.SessionId}'.");
                    }

                    continue;
                }

                Trace.Info($"Message '{message.MessageId}' received from session '{_session.SessionId}'.");
                return(message);
            }
        }
Example #5
0
        public async Task <Boolean> CreateSessionAsync(CancellationToken token)
        {
            Trace.Entering();

            // Settings
            var configManager = HostContext.GetService <IConfigurationManager>();

            _settings = configManager.LoadSettings();
            var serverUrl = _settings.ServerUrl;

            Trace.Info(_settings);

            // Capabilities.
            _term.WriteLine(StringUtil.Loc("ScanToolCapabilities"));
            Dictionary <string, string> systemCapabilities = await HostContext.GetService <ICapabilitiesManager>().GetCapabilitiesAsync(_settings, token);

            // Create connection.
            Trace.Verbose("Loading Credentials");
            var            credMgr = HostContext.GetService <ICredentialManager>();
            VssCredentials creds   = credMgr.LoadCredentials();
            Uri            uri     = new Uri(serverUrl);
            VssConnection  conn    = ApiUtil.CreateConnection(uri, creds);

            var agent = new TaskAgentReference
            {
                Id      = _settings.AgentId,
                Name    = _settings.AgentName,
                Version = Constants.Agent.Version,
            };
            string sessionName      = $"{Environment.MachineName ?? "AGENT"}";
            var    taskAgentSession = new TaskAgentSession(sessionName, agent, systemCapabilities);

            string errorMessage      = string.Empty;
            bool   encounteringError = false;

            _term.WriteLine(StringUtil.Loc("ConnectToServer"));
            while (true)
            {
                token.ThrowIfCancellationRequested();
                Trace.Info($"Attempt to create session.");
                try
                {
                    Trace.Info("Connecting to the Agent Server...");
                    await _agentServer.ConnectAsync(conn);

                    _session = await _agentServer.CreateAgentSessionAsync(
                        _settings.PoolId,
                        taskAgentSession,
                        token);

                    Trace.Info($"Session created.");
                    if (encounteringError)
                    {
                        _term.WriteLine(StringUtil.Loc("QueueConnected", DateTime.UtcNow));
                        _sessionCreationExceptionTracker.Clear();
                        encounteringError = false;
                    }

                    return(true);
                }
                catch (OperationCanceledException) when(token.IsCancellationRequested)
                {
                    Trace.Info("Session creation has been cancelled.");
                    throw;
                }
                catch (Exception ex)
                {
                    Trace.Error("Catch exception during create session.");
                    Trace.Error(ex);

                    if (!IsSessionCreationExceptionRetriable(ex))
                    {
                        _term.WriteError(StringUtil.Loc("SessionCreateFailed", ex.Message));
                        return(false);
                    }

                    if (!encounteringError) //print the message only on the first error
                    {
                        _term.WriteError(StringUtil.Loc("QueueConError", DateTime.UtcNow, ex.Message, _sessionCreationRetryInterval.TotalSeconds));
                        encounteringError = true;
                    }

                    Trace.Info("Sleeping for {0} seconds before retrying.", _sessionCreationRetryInterval.TotalSeconds);
                    await HostContext.Delay(_sessionCreationRetryInterval, token);
                }
            }
        }
Example #6
0
        private async Task <VssCredentials> GetNewOAuthAuthorizationSetting(CancellationToken token)
        {
            Trace.Info("Start checking oauth authorization url update.");
            while (true)
            {
                var backoff = BackoffTimerHelper.GetRandomBackoff(TimeSpan.FromMinutes(30), TimeSpan.FromMinutes(45));
                await HostContext.Delay(backoff, token);

                try
                {
                    var migratedAuthorizationUrl = await _runnerServer.GetRunnerAuthUrlAsync(_settings.PoolId, _settings.AgentId);

                    if (!string.IsNullOrEmpty(migratedAuthorizationUrl))
                    {
                        var credData = _configStore.GetCredentials();
                        var clientId = credData.Data.GetValueOrDefault("clientId", null);
                        var currentAuthorizationUrl = credData.Data.GetValueOrDefault("authorizationUrl", null);
                        Trace.Info($"Current authorization url: {currentAuthorizationUrl}, new authorization url: {migratedAuthorizationUrl}");

                        if (string.Equals(currentAuthorizationUrl, migratedAuthorizationUrl, StringComparison.OrdinalIgnoreCase))
                        {
                            // We don't need to update credentials.
                            Trace.Info("No needs to update authorization url");
                            await Task.Delay(TimeSpan.FromMilliseconds(-1), token);
                        }

                        var keyManager         = HostContext.GetService <IRSAKeyManager>();
                        var signingCredentials = VssSigningCredentials.Create(() => keyManager.GetKey());

                        var migratedClientCredential = new VssOAuthJwtBearerClientCredential(clientId, migratedAuthorizationUrl, signingCredentials);
                        var migratedRunnerCredential = new VssOAuthCredential(new Uri(migratedAuthorizationUrl, UriKind.Absolute), VssOAuthGrant.ClientCredentials, migratedClientCredential);

                        Trace.Info("Try connect service with Token Service OAuth endpoint.");
                        var runnerServer = HostContext.CreateService <IRunnerServer>();
                        await runnerServer.ConnectAsync(new Uri(_settings.ServerUrl), migratedRunnerCredential);

                        await runnerServer.GetAgentPoolsAsync();

                        Trace.Info($"Successfully connected service with new authorization url.");

                        var migratedCredData = new CredentialData
                        {
                            Scheme = Constants.Configuration.OAuth,
                            Data   =
                            {
                                { "clientId",         clientId                 },
                                { "authorizationUrl", migratedAuthorizationUrl },
                                { "oauthEndpointUrl", migratedAuthorizationUrl },
                            },
                        };

                        _configStore.SaveMigratedCredential(migratedCredData);
                        return(migratedRunnerCredential);
                    }
                    else
                    {
                        Trace.Verbose("No authorization url updates");
                    }
                }
                catch (Exception ex)
                {
                    Trace.Error("Fail to get/test new authorization url.");
                    Trace.Error(ex);

                    try
                    {
                        await _runnerServer.ReportRunnerAuthUrlErrorAsync(_settings.PoolId, _settings.AgentId, ex.ToString());
                    }
                    catch (Exception e)
                    {
                        // best effort
                        Trace.Error("Fail to report the migration error");
                        Trace.Error(e);
                    }
                }
            }
        }
Example #7
0
        public async Task <Boolean> CreateSessionAsync(CancellationToken token)
        {
            Trace.Entering();

            // Settings
            var configManager = HostContext.GetService <IConfigurationManager>();

            _settings = configManager.LoadSettings();
            var serverUrl = _settings.ServerUrl;

            Trace.Info(_settings);

            // Create connection.
            Trace.Info("Loading Credentials");
            _useMigratedCredentials = !StringUtil.ConvertToBoolean(Environment.GetEnvironmentVariable("GITHUB_ACTIONS_RUNNER_SPSAUTHURL"));
            VssCredentials creds = _credMgr.LoadCredentials(_useMigratedCredentials);

            var agent = new TaskAgentReference
            {
                Id            = _settings.AgentId,
                Name          = _settings.AgentName,
                Version       = BuildConstants.RunnerPackage.Version,
                OSDescription = RuntimeInformation.OSDescription,
            };
            string sessionName      = $"{Environment.MachineName ?? "RUNNER"}";
            var    taskAgentSession = new TaskAgentSession(sessionName, agent);

            string errorMessage      = string.Empty;
            bool   encounteringError = false;

            var originalCreds = _configStore.GetCredentials();
            var migratedCreds = _configStore.GetMigratedCredentials();

            if (migratedCreds == null)
            {
                _useMigratedCredentials = false;
                if (originalCreds.Scheme == Constants.Configuration.OAuth)
                {
                    _needToCheckAuthorizationUrlUpdate = true;
                }
            }

            while (true)
            {
                token.ThrowIfCancellationRequested();
                Trace.Info($"Attempt to create session.");
                try
                {
                    Trace.Info("Connecting to the Runner Server...");
                    await _runnerServer.ConnectAsync(new Uri(serverUrl), creds);

                    Trace.Info("VssConnection created");

                    _term.WriteLine();
                    _term.WriteSuccessMessage("Connected to GitHub");
                    _term.WriteLine();

                    _session = await _runnerServer.CreateAgentSessionAsync(
                        _settings.PoolId,
                        taskAgentSession,
                        token);

                    Trace.Info($"Session created.");
                    if (encounteringError)
                    {
                        _term.WriteLine($"{DateTime.UtcNow:u}: Runner reconnected.");
                        _sessionCreationExceptionTracker.Clear();
                        encounteringError = false;
                    }

                    if (_needToCheckAuthorizationUrlUpdate)
                    {
                        // start background task try to get new authorization url
                        _authorizationUrlMigrationBackgroundTask = GetNewOAuthAuthorizationSetting(token);
                    }

                    return(true);
                }
                catch (OperationCanceledException) when(token.IsCancellationRequested)
                {
                    Trace.Info("Session creation has been cancelled.");
                    throw;
                }
                catch (TaskAgentAccessTokenExpiredException)
                {
                    Trace.Info("Runner OAuth token has been revoked. Session creation failed.");
                    throw;
                }
                catch (Exception ex)
                {
                    Trace.Error("Catch exception during create session.");
                    Trace.Error(ex);

                    if (!IsSessionCreationExceptionRetriable(ex))
                    {
                        if (_useMigratedCredentials)
                        {
                            // migrated credentials might cause lose permission during permission check,
                            // we will force to use original credential and try again
                            _useMigratedCredentials = false;
                            var reattemptBackoff = BackoffTimerHelper.GetRandomBackoff(TimeSpan.FromHours(24), TimeSpan.FromHours(36));
                            _authorizationUrlRollbackReattemptDelayBackgroundTask = HostContext.Delay(reattemptBackoff, token); // retry migrated creds in 24-36 hours.
                            creds = _credMgr.LoadCredentials(false);
                            Trace.Error("Fallback to original credentials and try again.");
                        }
                        else
                        {
                            _term.WriteError($"Failed to create session. {ex.Message}");
                            return(false);
                        }
                    }

                    if (!encounteringError) //print the message only on the first error
                    {
                        _term.WriteError($"{DateTime.UtcNow:u}: Runner connect error: {ex.Message}. Retrying until reconnected.");
                        encounteringError = true;
                    }

                    Trace.Info("Sleeping for {0} seconds before retrying.", _sessionCreationRetryInterval.TotalSeconds);
                    await HostContext.Delay(_sessionCreationRetryInterval, token);
                }
            }
        }
Example #8
0
        public async Task <TaskAgentMessage> GetNextMessageAsync(CancellationToken token)
        {
            Trace.Entering();
            ArgUtil.NotNull(_session, nameof(_session));
            ArgUtil.NotNull(_settings, nameof(_settings));
            bool      encounteringError = false;
            int       continuousError   = 0;
            string    errorMessage      = string.Empty;
            Stopwatch heartbeat         = new Stopwatch();

            heartbeat.Restart();
            while (true)
            {
                token.ThrowIfCancellationRequested();
                TaskAgentMessage message = null;
                try
                {
                    message = await _runnerServer.GetAgentMessageAsync(_settings.PoolId,
                                                                       _session.SessionId,
                                                                       _lastMessageId,
                                                                       token);

                    // Decrypt the message body if the session is using encryption
                    message = DecryptMessage(message);

                    if (message != null)
                    {
                        _lastMessageId = message.MessageId;
                    }

                    if (encounteringError) //print the message once only if there was an error
                    {
                        _term.WriteLine($"{DateTime.UtcNow:u}: Runner reconnected.");
                        encounteringError = false;
                        continuousError   = 0;
                    }

                    if (_needToCheckAuthorizationUrlUpdate &&
                        _authorizationUrlMigrationBackgroundTask?.IsCompleted == true)
                    {
                        if (HostContext.GetService <IJobDispatcher>().Busy ||
                            HostContext.GetService <ISelfUpdater>().Busy)
                        {
                            Trace.Info("Job or runner updates in progress, update credentials next time.");
                        }
                        else
                        {
                            try
                            {
                                var newCred = await _authorizationUrlMigrationBackgroundTask;
                                await _runnerServer.ConnectAsync(new Uri(_settings.ServerUrl), newCred);

                                Trace.Info("Updated connection to use migrated credential for next GetMessage call.");
                                _useMigratedCredentials = true;
                                _authorizationUrlMigrationBackgroundTask = null;
                                _needToCheckAuthorizationUrlUpdate       = false;
                            }
                            catch (Exception ex)
                            {
                                Trace.Error("Fail to refresh connection with new authorization url.");
                                Trace.Error(ex);
                            }
                        }
                    }

                    if (_authorizationUrlRollbackReattemptDelayBackgroundTask?.IsCompleted == true)
                    {
                        try
                        {
                            // we rolled back to use original creds about 2 days before, now it's a good time to try migrated creds again.
                            Trace.Info("Re-attempt to use migrated credential");
                            var migratedCreds = _credMgr.LoadCredentials();
                            await _runnerServer.ConnectAsync(new Uri(_settings.ServerUrl), migratedCreds);

                            _useMigratedCredentials = true;
                            _authorizationUrlRollbackReattemptDelayBackgroundTask = null;
                        }
                        catch (Exception ex)
                        {
                            Trace.Error("Fail to refresh connection with new authorization url on rollback reattempt.");
                            Trace.Error(ex);
                        }
                    }
                }
                catch (OperationCanceledException) when(token.IsCancellationRequested)
                {
                    Trace.Info("Get next message has been cancelled.");
                    throw;
                }
                catch (TaskAgentAccessTokenExpiredException)
                {
                    Trace.Info("Runner OAuth token has been revoked. Unable to pull message.");
                    throw;
                }
                catch (Exception ex)
                {
                    Trace.Error("Catch exception during get next message.");
                    Trace.Error(ex);

                    // don't retry if SkipSessionRecover = true, DT service will delete agent session to stop agent from taking more jobs.
                    if (ex is TaskAgentSessionExpiredException && !_settings.SkipSessionRecover && await CreateSessionAsync(token))
                    {
                        Trace.Info($"{nameof(TaskAgentSessionExpiredException)} received, recovered by recreate session.");
                    }
                    else if (!IsGetNextMessageExceptionRetriable(ex))
                    {
                        if (_useMigratedCredentials)
                        {
                            // migrated credentials might cause lose permission during permission check,
                            // we will force to use original credential and try again
                            _useMigratedCredentials = false;
                            var reattemptBackoff = BackoffTimerHelper.GetRandomBackoff(TimeSpan.FromHours(24), TimeSpan.FromHours(36));
                            _authorizationUrlRollbackReattemptDelayBackgroundTask = HostContext.Delay(reattemptBackoff, token); // retry migrated creds in 24-36 hours.
                            var originalCreds = _credMgr.LoadCredentials(false);
                            await _runnerServer.ConnectAsync(new Uri(_settings.ServerUrl), originalCreds);

                            Trace.Error("Fallback to original credentials and try again.");
                        }
                        else
                        {
                            throw;
                        }
                    }
                    else
                    {
                        continuousError++;
                        //retry after a random backoff to avoid service throttling
                        //in case of there is a service error happened and all agents get kicked off of the long poll and all agent try to reconnect back at the same time.
                        if (continuousError <= 5)
                        {
                            // random backoff [15, 30]
                            _getNextMessageRetryInterval = BackoffTimerHelper.GetRandomBackoff(TimeSpan.FromSeconds(15), TimeSpan.FromSeconds(30), _getNextMessageRetryInterval);
                        }
                        else
                        {
                            // more aggressive backoff [30, 60]
                            _getNextMessageRetryInterval = BackoffTimerHelper.GetRandomBackoff(TimeSpan.FromSeconds(30), TimeSpan.FromSeconds(60), _getNextMessageRetryInterval);
                        }

                        if (!encounteringError)
                        {
                            //print error only on the first consecutive error
                            _term.WriteError($"{DateTime.UtcNow:u}: Runner connect error: {ex.Message}. Retrying until reconnected.");
                            encounteringError = true;
                        }

                        // re-create VssConnection before next retry
                        await _runnerServer.RefreshConnectionAsync(RunnerConnectionType.MessageQueue, TimeSpan.FromSeconds(60));

                        Trace.Info("Sleeping for {0} seconds before retrying.", _getNextMessageRetryInterval.TotalSeconds);
                        await HostContext.Delay(_getNextMessageRetryInterval, token);
                    }
                }

                if (message == null)
                {
                    if (heartbeat.Elapsed > TimeSpan.FromMinutes(30))
                    {
                        Trace.Info($"No message retrieved from session '{_session.SessionId}' within last 30 minutes.");
                        heartbeat.Restart();
                    }
                    else
                    {
                        Trace.Verbose($"No message retrieved from session '{_session.SessionId}'.");
                    }

                    continue;
                }

                Trace.Info($"Message '{message.MessageId}' received from session '{_session.SessionId}'.");
                return(message);
            }
        }
Example #9
0
        public async Task <Boolean> CreateSessionAsync(CancellationToken token)
        {
            Trace.Entering();

            // Settings
            var configManager = HostContext.GetService <IConfigurationManager>();

            _settings = configManager.LoadSettings();
            var serverUrl = _settings.ServerUrl;

            Trace.Info(_settings);

            // Create connection.
            Trace.Info("Loading Credentials");
            var            credMgr = HostContext.GetService <ICredentialManager>();
            VssCredentials creds   = credMgr.LoadCredentials();

            var agent = new TaskAgentReference
            {
                Id            = _settings.AgentId,
                Name          = _settings.AgentName,
                Version       = BuildConstants.RunnerPackage.Version,
                OSDescription = RuntimeInformation.OSDescription,
            };
            string sessionName      = $"{Environment.MachineName ?? "RUNNER"}";
            var    taskAgentSession = new TaskAgentSession(sessionName, agent);

            string errorMessage      = string.Empty;
            bool   encounteringError = false;

            while (true)
            {
                token.ThrowIfCancellationRequested();
                Trace.Info($"Attempt to create session.");
                try
                {
                    Trace.Info("Connecting to the Runner Server...");
                    await _runnerServer.ConnectAsync(new Uri(serverUrl), creds);

                    Trace.Info("VssConnection created");

                    _term.WriteLine();
                    _term.WriteSuccessMessage("Connected to GitHub");
                    _term.WriteLine();

                    _session = await _runnerServer.CreateAgentSessionAsync(
                        _settings.PoolId,
                        taskAgentSession,
                        token);

                    Trace.Info($"Session created.");
                    if (encounteringError)
                    {
                        _term.WriteLine($"{DateTime.UtcNow:u}: Runner reconnected.");
                        _sessionCreationExceptionTracker.Clear();
                        encounteringError = false;
                    }

                    return(true);
                }
                catch (OperationCanceledException) when(token.IsCancellationRequested)
                {
                    Trace.Info("Session creation has been cancelled.");
                    throw;
                }
                catch (TaskAgentAccessTokenExpiredException)
                {
                    Trace.Info("Runner OAuth token has been revoked. Session creation failed.");
                    throw;
                }
                catch (Exception ex)
                {
                    Trace.Error("Catch exception during create session.");
                    Trace.Error(ex);

                    if (!IsSessionCreationExceptionRetriable(ex))
                    {
                        _term.WriteError($"Failed to create session. {ex.Message}");
                        return(false);
                    }

                    if (!encounteringError) //print the message only on the first error
                    {
                        _term.WriteError($"{DateTime.UtcNow:u}: Runner connect error: {ex.Message}. Retrying until reconnected.");
                        encounteringError = true;
                    }

                    Trace.Info("Sleeping for {0} seconds before retrying.", _sessionCreationRetryInterval.TotalSeconds);
                    await HostContext.Delay(_sessionCreationRetryInterval, token);
                }
            }
        }
Example #10
0
        public async Task <Boolean> CreateSessionAsync(CancellationToken token)
        {
            Trace.Entering();
            const int MaxAttempts = 10;
            int       attempt     = 0;

            // Settings
            var configManager = HostContext.GetService <IConfigurationManager>();

            _settings = configManager.LoadSettings();
            int agentPoolId = _settings.PoolId;
            var serverUrl   = _settings.ServerUrl;

            Trace.Info(_settings);

            // Load Credentials
            Trace.Verbose("Loading Credentials");
            var            credMgr     = HostContext.GetService <ICredentialManager>();
            VssCredentials creds       = credMgr.LoadCredentials();
            Uri            uri         = new Uri(serverUrl);
            VssConnection  conn        = ApiUtil.CreateConnection(uri, creds);
            string         sessionName = $"{Environment.MachineName ?? string.Empty}_{Guid.NewGuid().ToString()}";
            var            capProvider = HostContext.GetService <ICapabilitiesProvider>();
            Dictionary <string, string> agentSystemCapabilities = await capProvider.GetCapabilitiesAsync(_settings.AgentName, token);

            var agent = new TaskAgentReference
            {
                Id      = _settings.AgentId,
                Name    = _settings.AgentName,
                Version = Constants.Agent.Version,
                Enabled = true
            };
            var taskAgentSession = new TaskAgentSession(sessionName, agent, agentSystemCapabilities);

            var agentSvr = HostContext.GetService <IAgentServer>();

            while (++attempt <= MaxAttempts)
            {
                Trace.Info("Create session attempt {0} of {1}.", attempt, MaxAttempts);
                try
                {
                    Trace.Info("Connecting to the Agent Server...");
                    await agentSvr.ConnectAsync(conn);

                    Session = await agentSvr.CreateAgentSessionAsync(
                        _settings.PoolId,
                        taskAgentSession,
                        token);

                    return(true);
                }
                catch (OperationCanceledException)
                {
                    Trace.Info("Cancelled");
                    throw;
                }
                catch (Exception ex)
                {
                    Trace.Error("Failed to create session.");
                    if (ex is TaskAgentNotFoundException)
                    {
                        Trace.Error("The agent no longer exists on the server. Stopping the agent.");
                        Trace.Error(ex);
                        return(false);
                    }
                    else if (ex is TaskAgentSessionConflictException)
                    {
                        Trace.Error("The session for this agent already exists.");
                    }
                    else
                    {
                        Trace.Error(ex);
                    }

                    if (attempt >= MaxAttempts)
                    {
                        Trace.Error("Retries exhausted. Terminating the agent.");
                        return(false);
                    }

                    TimeSpan interval = TimeSpan.FromSeconds(30);
                    Trace.Info("Sleeping for {0} seconds before retrying.", interval.TotalSeconds);
                    await HostContext.Delay(interval, token);
                }
            }

            return(false);
        }
Example #11
0
        public async Task <Boolean> CreateSessionAsync(CancellationToken token)
        {
            Trace.Entering();
            int attempt = 0;

            // Settings
            var configManager = HostContext.GetService <IConfigurationManager>();

            _settings = configManager.LoadSettings();
            int agentPoolId = _settings.PoolId;
            var serverUrl   = _settings.ServerUrl;

            Trace.Info(_settings);

            // Capabilities.
            // TODO: LOC
            _term.WriteLine("Scanning for tool capabilities.");
            Dictionary <string, string> systemCapabilities = await HostContext.GetService <ICapabilitiesManager>().GetCapabilitiesAsync(_settings, token);

            // Create connection.
            Trace.Verbose("Loading Credentials");
            var            credMgr = HostContext.GetService <ICredentialManager>();
            VssCredentials creds   = credMgr.LoadCredentials();
            Uri            uri     = new Uri(serverUrl);
            VssConnection  conn    = ApiUtil.CreateConnection(uri, creds);

            var agent = new TaskAgentReference
            {
                Id      = _settings.AgentId,
                Name    = _settings.AgentName,
                Version = Constants.Agent.Version,
                Enabled = true
            };
            string sessionName      = $"{Environment.MachineName ?? "AGENT"}";
            var    taskAgentSession = new TaskAgentSession(sessionName, agent, systemCapabilities);

            var    agentSvr     = HostContext.GetService <IAgentServer>();
            string errorMessage = string.Empty;
            bool   firstAttempt = true; //tells us if this is the first time we try to connect

            // TODO: LOC
            _term.WriteLine("Connecting to the server.");
            while (true)
            {
                attempt++;
                Trace.Info($"Create session attempt {attempt}.");
                try
                {
                    Trace.Info("Connecting to the Agent Server...");
                    await agentSvr.ConnectAsync(conn);

                    Session = await agentSvr.CreateAgentSessionAsync(
                        _settings.PoolId,
                        taskAgentSession,
                        token);

                    if (!firstAttempt)
                    {
                        _term.WriteLine(StringUtil.Loc("QueueConnected", DateTime.UtcNow));
                    }

                    return(true);
                }
                catch (OperationCanceledException ex)
                {
                    if (token.IsCancellationRequested) //Distinguish timeout from user cancellation
                    {
                        Trace.Info("Cancelled");
                        throw;
                    }
                    errorMessage = ex.Message;
                }
                catch (Exception ex)
                {
                    Trace.Error("Failed to create session.");
                    if (ex is TaskAgentNotFoundException)
                    {
                        Trace.Error("The agent no longer exists on the server. Stopping the agent.");
                        _term.WriteError(StringUtil.Loc("MissingAgent"));
                    }

                    if (ex is TaskAgentSessionConflictException)
                    {
                        Trace.Error("The session for this agent already exists.");
                        _term.WriteError(StringUtil.Loc("SessionExist"));
                    }

                    Trace.Error(ex);
                    if (IsFatalException(ex))
                    {
                        _term.WriteError(StringUtil.Loc("SessionCreateFailed"));
                        return(false);
                    }

                    errorMessage = ex.Message;
                }

                TimeSpan interval = TimeSpan.FromSeconds(30);
                if (firstAttempt) //print the message only on the first error
                {
                    _term.WriteError(StringUtil.Loc("QueueConError", DateTime.UtcNow, errorMessage, interval.TotalSeconds));
                    firstAttempt = false;
                }
                Trace.Info("Sleeping for {0} seconds before retrying.", interval.TotalSeconds);
                await HostContext.Delay(interval, token);
            }
        }
Example #12
0
        public async Task <TaskAgentMessage> GetNextMessageAsync(CancellationToken token)
        {
            Trace.Entering();
            ArgUtil.NotNull(Session, nameof(Session));
            ArgUtil.NotNull(_settings, nameof(_settings));
            var    agentServer       = HostContext.GetService <IAgentServer>();
            int    consecutiveErrors = 0; //number of consecutive exceptions thrown by GetAgentMessageAsync
            string errorMessage      = string.Empty;

            while (true)
            {
                token.ThrowIfCancellationRequested();
                TaskAgentMessage message = null;
                try
                {
                    consecutiveErrors++;
                    message = await agentServer.GetAgentMessageAsync(_settings.PoolId,
                                                                     Session.SessionId,
                                                                     _lastMessageId,
                                                                     token);

                    if (message != null)
                    {
                        _lastMessageId = message.MessageId;
                    }

                    if (consecutiveErrors > 1) //print the message once only if there was an error
                    {
                        _term.WriteLine(StringUtil.Loc("QueueConnected", DateTime.UtcNow));
                    }

                    consecutiveErrors = 0;
                }
                catch (TimeoutException ex)
                {
                    Trace.Verbose($"{nameof(TimeoutException)} received.");
                    //retry after a delay
                    errorMessage = ex.Message;
                }
                catch (TaskAgentSessionExpiredException)
                {
                    Trace.Verbose($"{nameof(TaskAgentSessionExpiredException)} received.");
                    if (!await CreateSessionAsync(token))
                    {
                        throw;
                    }

                    consecutiveErrors = 0;
                }
                catch (OperationCanceledException ex)
                {
                    Trace.Verbose($"{nameof(OperationCanceledException)} received.");
                    //we get here when the agent is stopped with CTRL-C or service is stopped or HttpClient has timed out
                    if (token.IsCancellationRequested) //Distinguish timeout from user cancellation
                    {
                        throw;
                    }

                    //retry after a delay
                    errorMessage = ex.Message;
                }
                catch (Exception ex)
                {
                    Trace.Error(ex);
                    if (IsFatalException(ex))
                    {
                        throw;
                    }

                    //retry after a delay
                    errorMessage = ex.Message;
                }

                //print an error and add a delay
                if (consecutiveErrors > 0)
                {
                    TimeSpan interval = TimeSpan.FromSeconds(15);
                    if (consecutiveErrors == 1)
                    {
                        //print error only on the first consecutive error
                        _term.WriteError(StringUtil.Loc("QueueConError", DateTime.UtcNow, errorMessage, interval.TotalSeconds));
                    }

                    Trace.Info("Sleeping for {0} seconds before retrying.", interval.TotalSeconds);
                    await HostContext.Delay(interval, token);
                }

                if (message == null)
                {
                    Trace.Verbose($"No message retrieved from session '{Session.SessionId}'.");
                    continue;
                }

                Trace.Verbose($"Message '{message.MessageId}' received from session '{Session.SessionId}'.");
                return(message);
            }
        }