private async Task <RpcCallResult <T> > CallCore <T>( Func <TrackedConnection, BondCallTracker, Task <T> > callAsync, CancellationToken cancellationToken, string functionName, BondCallTracker callTracker, bool allowInactive = false, Func <T, bool> shouldRetry = null, uint maxTryCount = 0) { Contract.Requires(functionName != null); callTracker = callTracker ?? CreateLoggingCallTracker(functionName); TimeSpan waitForConnectionDuration = default(TimeSpan); Failure lastFailure = null; m_outstandingCalls.AddOrUpdate(functionName, 1, (k, i) => i + 1); // For heartbeat only try once if (maxTryCount == 0) { maxTryCount = DefaultMaxRetryCount; } for (uint retryCount = 0; retryCount < maxTryCount; retryCount++) { callTracker.TryCount = retryCount; if (retryCount != 0) { // For retries, log a call start with the call tracker's updated retry count callTracker.OnStateChanged(BondCallState.Started); // Yield after first iteration to ensure // we don't overflow the stack with async continuations await Task.Yield(); } TrackedConnection connection = null; try { var startWaitForConnection = m_stopwatch.Elapsed; callTracker.OnStateChanged(BondCallState.WaitingForConnection); // Wait for a connection to become active via the a successful heartbeat using (var connectionScope = await WaitForConnectionAsync(callTracker, allowInactive, cancellationToken)) { // Log wait for connection success var iterationWaitForConnectionDuration = GetElapsed(startWaitForConnection); waitForConnectionDuration += iterationWaitForConnectionDuration; callTracker.OnStateChanged(BondCallState.CompletedWaitForConnection); // connection is not returned in the case that the proxy is shutting down or timed out // other case is that this is a failed heartbeat call. In which case, just continue. if (connectionScope.Connection == null) { if (m_isShuttingDown || m_exceededInactivityTimeout) { // Log the failure lastFailure = new RecoverableExceptionFailure(new BuildXLException(m_isShuttingDown ? "Bond RPC Call failure: Proxy is shutting down" : "Bond RPC Call failure: Proxy timed out")); callTracker.LogMessage("Could not retrieve connection. Failure={0}", lastFailure.DescribeIncludingInnerFailures()); callTracker.OnStateChanged(BondCallState.Failed); return(new RpcCallResult <T>(RpcCallResultState.Failed, retryCount + 1, callTracker.TotalDuration, waitForConnectionDuration, lastFailure)); } continue; } connection = connectionScope.Connection; // Make the actual call var result = await callAsync(connection, callTracker); // Check if call should be retried if (shouldRetry != null && shouldRetry(result)) { continue; } // Log the call completion callTracker.OnStateChanged(BondCallState.Succeeded); m_proxyLogger.LogSuccessfulCall(m_loggingContext, functionName, retryCount); connectionScope.MarkSucceeded(); m_services.Counters.AddToCounter(DistributionCounter.SendPipBuildRequestCallDurationMs, (long)callTracker.TotalDuration.TotalMilliseconds); return(new RpcCallResult <T>(result, retryCount + 1, callTracker.TotalDuration, waitForConnectionDuration)); } } catch (OperationCanceledException) { callTracker.OnStateChanged(BondCallState.Canceled); return(new RpcCallResult <T>(RpcCallResultState.Cancelled, retryCount + 1, callTracker.TotalDuration, waitForConnectionDuration)); } catch (Exception ex) { // If shutting down just return the failed result if (ex is ObjectDisposedException && m_isShuttingDown) { lastFailure = new RecoverableExceptionFailure(new BuildXLException("Bond RPC Call failure: Proxy is shutting down", ex)); callTracker.LogMessage("{0}", lastFailure.DescribeIncludingInnerFailures()); callTracker.OnStateChanged(BondCallState.Failed); return(new RpcCallResult <T>(RpcCallResultState.Failed, retryCount + 1, callTracker.TotalDuration, waitForConnectionDuration)); } if (DistributionServices.IsBuildIdMismatchException(ex)) { m_proxyLogger.LogCallException(m_loggingContext, functionName, retryCount, ex); // If a message with different build is received, it means that the sender has participated in a different distributed build. // Then, we need to lose the connection with the sender. OnConnectionTimeOut?.Invoke(this, EventArgs.Empty); return(new RpcCallResult <T>(RpcCallResultState.Failed, retryCount + 1, callTracker.TotalDuration, waitForConnectionDuration)); } // If not a transient exception, log and throw if (!DistributionHelpers.IsTransientBondException(ex, m_services.Counters) && !m_services.IsChecksumMismatchException(ex)) { m_proxyLogger.LogCallException(m_loggingContext, functionName, retryCount, ex); throw; } // Otherwise, the exception is transient, so log exception and try again lastFailure = new RecoverableExceptionFailure(new BuildXLException("Failed Bond RPC call", ex)); callTracker.LogMessage("{0}", lastFailure.DescribeIncludingInnerFailures()); // Deactivate connection so subsequent calls on the proxy will wait for heartbeat before trying to make call. DeactivateConnection(connection); m_services.Counters.AddToCounter(DistributionCounter.FailedSendPipBuildRequestCallDurationMs, (long)callTracker.TotalDuration.TotalMilliseconds); m_services.Counters.IncrementCounter(DistributionCounter.FailedSendPipBuildRequestCount); m_proxyLogger.LogFailedCall(m_loggingContext, functionName, retryCount, lastFailure); } finally { m_outstandingCalls.AddOrUpdate(functionName, 0, (k, i) => i - 1); } } // Exceeded retry count. callTracker.LogMessage("Call failed and exhausted allowed retries. LastFailure={0}", lastFailure?.DescribeIncludingInnerFailures() ?? string.Empty); callTracker.OnStateChanged(BondCallState.Failed); return(new RpcCallResult <T>(RpcCallResultState.Failed, DefaultMaxRetryCount, callTracker.TotalDuration, waitForConnectionDuration, lastFailure)); }
public async Task <RpcCallResult <Unit> > CallAsync( Func <CallOptions, AsyncUnaryCall <RpcResponse> > func, string operation, CancellationToken cancellationToken = default(CancellationToken), bool waitForConnection = false) { var watch = Stopwatch.StartNew(); TimeSpan waitForConnectionDuration = TimeSpan.Zero; TimeSpan totalCallDuration = TimeSpan.Zero; if (waitForConnection) { bool connectionSucceeded = await TryConnectChannelAsync(GrpcSettings.InactiveTimeout, operation, watch); waitForConnectionDuration = watch.Elapsed; if (!connectionSucceeded) { return(new RpcCallResult <Unit>(RpcCallResultState.Cancelled, attempts: 1, duration: TimeSpan.Zero, waitForConnectionDuration)); } } Guid traceId = Guid.NewGuid(); var headers = new Metadata(); headers.Add(GrpcSettings.TraceIdKey, traceId.ToByteArray()); headers.Add(GrpcSettings.BuildIdKey, m_buildId); headers.Add(GrpcSettings.SenderKey, DistributionHelpers.MachineName); RpcCallResultState state = RpcCallResultState.Succeeded; Failure failure = null; uint numTry = 0; while (numTry < GrpcSettings.MaxRetry) { numTry++; watch.Restart(); try { var callOptions = new CallOptions( deadline: DateTime.UtcNow.Add(GrpcSettings.CallTimeout), cancellationToken: cancellationToken, headers: headers).WithWaitForReady(); Logger.Log.GrpcTrace(m_loggingContext, GenerateLog(traceId.ToString(), "Call", numTry, operation)); await func(callOptions); Logger.Log.GrpcTrace(m_loggingContext, GenerateLog(traceId.ToString(), "Sent", numTry, $"Duration: {watch.ElapsedMilliseconds}ms")); state = RpcCallResultState.Succeeded; break; } catch (RpcException e) { state = e.Status.StatusCode == StatusCode.Cancelled ? RpcCallResultState.Cancelled : RpcCallResultState.Failed; failure = state == RpcCallResultState.Failed ? new RecoverableExceptionFailure(new BuildXLException(e.Message)) : null; Logger.Log.GrpcTrace(m_loggingContext, GenerateFailLog(traceId.ToString(), numTry, watch.ElapsedMilliseconds, e.Message)); // If the call is cancelled or channel is shutdown, then do not retry the call. if (state == RpcCallResultState.Cancelled || m_isShutdownInitiated) { break; } if (numTry == GrpcSettings.MaxRetry - 1) { // If this is the last retry, try to attempt reconnecting. If the connection fails, do not attempt to retry the call. bool connectionSucceeded = await TryConnectChannelAsync(GrpcSettings.CallTimeout, operation); if (!connectionSucceeded) { break; } } } catch (ObjectDisposedException e) { state = RpcCallResultState.Failed; failure = new RecoverableExceptionFailure(new BuildXLException(e.Message)); Logger.Log.GrpcTrace(m_loggingContext, GenerateFailLog(traceId.ToString(), numTry, watch.ElapsedMilliseconds, e.Message)); // If stream is already disposed, we cannot retry call. break; } finally { totalCallDuration += watch.Elapsed; } } if (state == RpcCallResultState.Succeeded) { return(new RpcCallResult <Unit>(Unit.Void, attempts: numTry, duration: totalCallDuration, waitForConnectionDuration: waitForConnectionDuration)); } return(new RpcCallResult <Unit>( state, attempts: numTry, duration: totalCallDuration, waitForConnectionDuration: waitForConnectionDuration, lastFailure: failure)); }