/// <summary> /// Create storage blob container. /// </summary> /// <param name="container">blob container</param> public static void CreateContainerWithRetry(CloudBlobContainer container) { RetryHelper <object> .InvokeOperation( () => { if (container.CreateIfNotExists()) { BrokerTracing.TraceInfo("[AzureQueueManager].CreateContainerWithRetry: Create the container {0}", container.Name); } return(null); }, (e, count) => { BrokerTracing.TraceError("Failed to create the container {0}: {1}. Retry Count = {2}", container.Name, e, count); StorageException se = e as StorageException; if (se != null) { string errorCode = BurstUtility.GetStorageErrorCode(se); // According to test, the error code is ResourceAlreadyExists. // There is no doc about this, add ContainerAlreadyExists here. // TODO: Azure storage SDK 2.0 if ( // errorCode == StorageErrorCodeStrings.ResourceAlreadyExists || errorCode == StorageErrorCodeStrings.ContainerAlreadyExists) { Thread.Sleep(TimeSpan.FromMinutes(1)); } } }); }
/// <summary> /// Create storage queue /// </summary> /// <param name="queue">storage queue</param> /// <remarks> /// CreateIfNotExist method throws StorageClientException when queue is /// being deleted, so sleep for a while before retry. /// </remarks> public static void CreateQueueWithRetry(CloudQueue queue) { RetryHelper <object> .InvokeOperation( () => { if (queue.CreateIfNotExists()) { BrokerTracing.TraceInfo("[AzureQueueManager].CreateQueueWithRetry: Create the queue {0}", queue.Name); } return(null); }, (e, count) => { BrokerTracing.TraceError("Failed to create the queue {0}: {1}. Retry Count = {2}", queue.Name, e, count); StorageException se = e as StorageException; if (se != null) { if (BurstUtility.GetStorageErrorCode(se) == QueueErrorCodeStrings.QueueAlreadyExists) { Thread.Sleep(TimeSpan.FromMinutes(1)); } } }); }
/// <summary> /// Callback of the CloudQueue.BeginAddMessage method. /// </summary> /// <param name="ar">async result</param> /// <remarks> /// Notice: This method doesn't throw exception. It invokes callback /// and pass exception to it in case exception occurs. /// </remarks> private void BeginAddMessageCallback(IAsyncResult ar) { BrokerTracing.TraceVerbose("[AzureServiceClient].BeginAddMessageCallback: Enter callback method of BeginAddMessage."); var reliableState = ar.AsyncState as ReliableQueueClient.ReliableState; QueueAsyncResult asyncResult = reliableState.State as QueueAsyncResult; Debug.Assert(asyncResult != null, "reliableState.State must be a QueueAsyncResult."); try { BrokerTracing.TraceVerbose( "[AzureServiceClient].BeginAddMessageCallback: Try to complete adding message {0}", asyncResult.MessageId); asyncResult.StorageClient.EndAddMessage(ar); } catch (StorageException e) { BrokerTracing.TraceError( "[AzureServiceClient].BeginAddMessageCallback: Failed to complete adding message {0}, {1}", asyncResult.MessageId, e.ToString()); if (BurstUtility.IsQueueNotFound(e)) { // StorageException happens here when want to add request // messages, so it must be request queue not found. Handle // the outstanding messages, which are already sent to // request queue, but maybe not got by proxy. And should // consider the multi request queue case when there are // multi azure deployments. this.manager.HandleInvalidRequestQueue(new RequestStorageException(e), this.requestStorageClient.QueueName); } this.manager.CompleteCallback(asyncResult, null, new RequestStorageException(e)); } catch (Exception e) { BrokerTracing.TraceError( "[AzureServiceClient].BeginAddMessageCallback: Failed to complete adding message {0}, {1}", asyncResult.MessageId, e.ToString()); this.manager.CompleteCallback(asyncResult, null, e); } }
/// <summary> /// Handle the invalid response queue. /// </summary> /// <param name="e"> /// exception occurred when access the response queue /// </param> private void HandleInvalidResponseQueue(StorageException e) { BrokerTracing.TraceWarning( "[ResponseQueueManager].HandleInvalidResponseQueue: Exception occurs when access response queue, {0}, {1}, {2}", BurstUtility.GetStorageErrorCode(e), e, this.responseStorageName); if (BurstUtility.IsQueueNotFound(e)) { // Current method is called once, so only have following trace once. BrokerTracing.EtwTrace.LogQueueNotExist(this.sessionId, this.responseStorageName); this.azureQueueManager.TriggerCallbackForInvalidResponseQueue( new ResponseStorageException(e, this.responseStorageName)); } }
/// <summary> /// Handle the invalid request queue. /// </summary> /// <param name="e"> /// exception occurred when access the response queue /// </param> /// <param name="requestQueueName"> /// request queue name /// </param> public void HandleInvalidRequestQueue(StorageException e, string requestQueueName) { BrokerTracing.TraceWarning( "[AzureQueueManager].HandleInvalidRequestQueue: Exception occurs when access request queue, {0}, {1}", BurstUtility.GetStorageErrorCode(e), e); if (BurstUtility.IsQueueNotFound(e)) { if (Interlocked.CompareExchange(ref this.requestQueueExist, 0, 1) == 1) { BrokerTracing.EtwTrace.LogQueueNotExist(this.sessionId, requestQueueName); } this.TriggerCallbackForInvalidRequestQueue(requestQueueName, e); } }
/// <summary> /// Convert StorageException to DataErrorCode /// </summary> /// <param name="e">Storage exception</param> /// <returns>Data error code</returns> public static int ConvertToDataServiceErrorCode(StorageException e) { if (e.RequestInformation == null) { return(DataErrorCode.Unknown); } else if (e.RequestInformation.HttpStatusCode == (int)HttpStatusCode.Forbidden) { return(DataErrorCode.DataNoPermission); } else if (e.RequestInformation.HttpStatusCode == (int)HttpStatusCode.BadGateway || e.RequestInformation.HttpStatusCode == (int)HttpStatusCode.BadRequest || e.RequestInformation.HttpStatusCode == (int)HttpStatusCode.RequestedRangeNotSatisfiable) { return(DataErrorCode.DataServerUnreachable); } else if (e.RequestInformation.HttpStatusCode == (int)HttpStatusCode.Conflict) { return(DataErrorCode.DataClientAlreadyExists); } else if (e.RequestInformation.HttpStatusCode == (int)HttpStatusCode.NotFound) { return(DataErrorCode.DataClientNotFound); } else { string errorCode = BurstUtility.GetStorageErrorCode(e); if (errorCode.Equals(StorageErrorCodeStrings.AuthenticationFailed, StringComparison.OrdinalIgnoreCase)) { return(DataErrorCode.DataNoPermission); } else if (errorCode.Equals(BlobErrorCodeStrings.BlobAlreadyExists, StringComparison.OrdinalIgnoreCase)) { return(DataErrorCode.DataClientAlreadyExists); } else if (errorCode.Equals(BlobErrorCodeStrings.BlobNotFound, StringComparison.OrdinalIgnoreCase) || errorCode.Equals(StorageErrorCodeStrings.ResourceNotFound, StringComparison.OrdinalIgnoreCase)) { return(DataErrorCode.DataClientNotFound); } return(DataErrorCode.Unknown); } }
/// <summary> /// Refresh blob's attributes /// </summary> /// <param name="blob">target data blob</param> /// <returns>true if update blob's attributes successfully, false otherwise</returns> private static bool RefreshBlobAttributes(CloudBlockBlob blob) { try { blob.FetchAttributes(); return(true); } catch (StorageException ex) { // Notice: Azure storage SDK 2.0 removes StorageServerException // and StorageClientException string errorCode = BurstUtility.GetStorageErrorCode(ex); TraceHelper.TraceSource.TraceEvent( TraceEventType.Error, 0, "[BlobDataContainer] .WaitUntilTransferComplete: failed to fetch blob attributes. blob name={0}, error code={1}, exception={2}", blob.Name, errorCode, ex); if (errorCode.Equals(StorageErrorCodeStrings.ResourceNotFound, StringComparison.OrdinalIgnoreCase) || errorCode.Equals(BlobErrorCodeStrings.BlobNotFound, StringComparison.OrdinalIgnoreCase) || errorCode.Equals(BlobErrorCodeStrings.ContainerNotFound, StringComparison.OrdinalIgnoreCase)) { throw new DataException(DataErrorCode.DataClientDeleted, ex); } throw DataUtility.ConvertToDataException(ex); } catch (Exception ex) { TraceHelper.TraceSource.TraceEvent( TraceEventType.Error, 0, "[BlobDataContainer] .WaitUntilTransferComplete: failed to fetch blob attributes. blob name={0}, exception={1}", blob.Name, ex); throw new DataException(DataErrorCode.Unknown, ex); } }
/// <summary> /// Invoke invalidQueueHandler if failed to access the queue. /// </summary> /// <remarks> /// Notice: Only handle the case that queue is not found now. May /// add more error handlings for specific queue issues if necessary. /// </remarks> /// <param name="e">exception happens when access the queue</param> private void HandleInvalidQueue(StorageException e) { TraceUtils.TraceError( "MessageRetriever.Worker", "HandleInvalidQueue", "StorageException, worker {0}, queue {1}, error code {2}, {3}", this.workerId, this.queue.Name, BurstUtility.GetStorageErrorCode(e), e); if (BurstUtility.IsQueueNotFound(e)) { // Invoke invalidQueueHandler if the exception indicates // that the queue is not found. if (this.invalidQueueHandler != null) { this.invalidQueueHandler(e); } } }
/// <summary> /// Invoke invalidQueueHandler if failed to access the queue. /// </summary> /// <remarks> /// Notice: Only handle the case that queue is not found now. May /// add more error handlings for specific queue issues if necessary. /// </remarks> /// <param name="e">exception happens when access the queue</param> private void HandleInvalidQueue(StorageException e) { SessionBase.TraceSource.TraceEvent( TraceEventType.Error, 0, "StorageException, worker {0}, queue {1}, error code {2}, {3}", this.workerId, this.queue.Name, BurstUtility.GetStorageErrorCode(e), e); if (BurstUtility.IsQueueNotFound(e)) { // Invoke invalidQueueHandler if the exception indicates // that the queue is not found. if (this.invalidQueueHandler != null) { this.invalidQueueHandler(e); } } }
/// <summary> /// Callback of the BeginGetMessages method. /// </summary> /// <param name="ar">async result</param> private void GetMessagesCallback(IAsyncResult ar) { Guid callId = (Guid)ar.AsyncState; try { IEnumerable <CloudQueueMessage> messages = null; try { TraceUtils.TraceVerbose( "MessageRetriever.Worker", "GetMessagesCallback", "Call EndGetMessages, worker {0}, call Id {1}, queue {2}", this.workerId, callId, this.queue.Name); messages = this.queue.EndGetMessages(ar); TraceUtils.TraceVerbose( "MessageRetriever.Worker", "GetMessagesCallback", "EndGetMessages returns, worker {0}, call Id {1}, queue {2}", this.workerId, callId, this.queue.Name); } catch (StorageException e) { TraceUtils.TraceError( "MessageRetriever.Worker", "GetMessagesCallback", "EndGetMessages failed, worker {0}, call Id {1}, queue {2}, error code {3}, {4}", this.workerId, callId, this.queue.Name, BurstUtility.GetStorageErrorCode(e), e); this.HandleInvalidQueue(e); } catch (Exception e) { TraceUtils.TraceError( "MessageRetriever.Worker", "GetMessagesCallback", "EndGetMessages failed, worker {0}, call Id {1}, queue {2}, {3}", this.workerId, callId, this.queue.Name, e); } finally { this.waitHandler.Set(); } int count = 0; if (messages != null) { count = messages.Count <CloudQueueMessage>(); } if (count > 0) { TraceUtils.TraceVerbose( "MessageRetriever.Worker", "GetMessagesCallback", "Get {0} messages from the queue, worker {1}, call Id {2}, queue {3}", count, this.workerId, callId, this.queue.Name); this.sleepPeriod = 0; // Make sure messageHandler is a fast operation, call // it before getting messages next time, in case // current thread doesn't get chance on time to call // messageHandler if BeginGetMessages's callback is // invoked on current thread again. if (this.messageHandler != null) { try { this.messageHandler(messages); } catch (Exception e) { TraceUtils.TraceError( "MessageRetriever.Worker", "GetMessagesCallback", "Message handler throws exception, worker {0}, call Id {1}, queue {2}, {3}", this.workerId, callId, this.queue.Name, e); } } this.InternalBeginGetMessages(null); } else { TraceUtils.TraceVerbose( "MessageRetriever.Worker", "GetMessagesCallback", "Get 0 message from the queue, worker {0}, call Id {1}, queue {2}", this.workerId, callId, this.queue.Name); this.TriggerTimer(); } } catch (Exception e) { TraceUtils.TraceError( "MessageRetriever.Worker", "GetMessagesCallback", "Error occurs, worker {0}, call Id {1}, queue {2}, {3}", this.workerId, callId, this.queue.Name, e); this.TriggerTimer(); } }
/// <summary> /// Call cloud queue's async method to get messages. /// </summary> /// <param name="state">state object</param> private void InternalBeginGetMessages(object state) { if (this.stop) { // if current worker already stops, just return return; } // this Id is only used in trace log to track latency for each // BeginGetMessages method call Guid callId = Guid.NewGuid(); try { TraceUtils.TraceVerbose( "MessageRetriever.Worker", "InternalGetMessages", "Call BeginGetMessages, worker {0}, call Id {1}, queue {2}", this.workerId, callId, this.queue.Name); this.waitHandler.Reset(); this.queue.BeginGetMessages(Constant.GetQueueMessageBatchSize, this.visibleTimeout, null, null, this.GetMessagesCallback, callId); TraceUtils.TraceVerbose( "MessageRetriever.Worker", "InternalGetMessages", "BeginGetMessages returns, worker {0}, call Id {1}, queue {2}", this.workerId, callId, this.queue.Name); } catch (StorageException e) { this.waitHandler.Set(); TraceUtils.TraceError( "MessageRetriever.Worker", "InternalGetMessages", "BeginGetMessages failed, worker {0}, call Id {1}, queue {2}, error code {3}, {4}", this.workerId, callId, this.queue.Name, BurstUtility.GetStorageErrorCode(e), e); this.HandleInvalidQueue(e); this.TriggerTimer(); } catch (Exception e) { this.waitHandler.Set(); TraceUtils.TraceError( "MessageRetriever.Worker", "InternalGetMessages", "Error occurs, worker {0}, call Id {1}, queue {2}, {3}", this.workerId, callId, this.queue.Name, e); this.TriggerTimer(); } }
/// <summary> /// Check if the Azure storage connection string is valid. /// </summary> private async Task ValidateConnectionString() { int sessionId = this.sharedData.BrokerInfo.SessionId; string sessionNode = await this.context.ResolveSessionLauncherNodeAsync(); string certThrumbprint = await this.context.GetSSLThumbprint(); string storageConnectionString = this.sharedData.BrokerInfo.AzureStorageConnectionString; // CommonClusterManagerHelper.GetAzureStorageConnectionString(scheduler); if (string.IsNullOrEmpty(storageConnectionString)) { this.connectionStringValid = false; BrokerTracing.TraceError( "[DispatcherManager].ValidateConnectionString: Azure storage connection string is missed."); // set job's progress message if Azure connection string is missed using (HpcSchedulerAdapterInternalClient client = new HpcSchedulerAdapterInternalClient(sessionNode, certThrumbprint)) { await client.SetJobProgressMessage(sessionId, SR.MissAzureStorageConnectionString); } } else { try { this.azureQueueManager.StorageConnectionString = storageConnectionString; CloudStorageAccount storageAccount = CloudStorageAccount.Parse(storageConnectionString); CloudQueueClient client = storageAccount.CreateCloudQueueClient(); client.ListQueues().Any <CloudQueue>(); this.connectionStringValid = true; } catch (StorageException e) { string errorCode = BurstUtility.GetStorageErrorCode(e); if (errorCode == StorageErrorCodeStrings.AuthenticationFailed) { this.connectionStringValid = false; BrokerTracing.TraceError( "[DispatcherManager].ValidateConnectionString: Access key in Azure storage connection string is invalid."); // set job's progress message if Azure connection string is invalid using (HpcSchedulerAdapterInternalClient client = new HpcSchedulerAdapterInternalClient(sessionNode, certThrumbprint)) { await client.SetJobProgressMessage(sessionId, SR.InvalidAzureStorageConnectionString); } } else if (e.RequestInformation.HttpStatusCode == (int)HttpStatusCode.BadGateway) { this.connectionStringValid = false; BrokerTracing.TraceError( "[DispatcherManager].ValidateConnectionString: Account name in Azure storage connection string is invalid."); // set job's progress message if Azure connection string is invalid using (HpcSchedulerAdapterInternalClient client = new HpcSchedulerAdapterInternalClient(sessionNode, certThrumbprint)) { await client.SetJobProgressMessage(sessionId, SR.InvalidAzureStorageConnectionString); } } else { this.connectionStringValid = true; BrokerTracing.TraceWarning( "[DispatcherManager].ValidateConnectionString: Error occurs when check storage connection string, {0}", e); } } } }
/// <summary> /// Write a data item into data container and flush /// </summary> /// <param name="data">data content to be written</param> public void AddDataAndFlush(DataContent data) { TraceHelper.TraceSource.TraceEvent(TraceEventType.Verbose, 0, "[BlobDataContainer] .AddDataAndFlush"); using (MemoryStream ms = new MemoryStream()) { // dump all data into a memory stream data.Dump(ms); // create timer that updates "CommonDataLastUpdateTime" metadata peoriodically Timer updateMetadataTimer = new Timer( this.MarkBlobAsBeingUploaded, null, TimeSpan.FromMilliseconds(Constant.LastUpdateTimeUpdateIntervalInMilliseconds), TimeSpan.FromMilliseconds(Constant.LastUpdateTimeUpdateIntervalInMilliseconds)); // write data Exception transferException = null; try { BlobTransferOptions transferOptions = new BlobTransferOptions { Concurrency = Environment.ProcessorCount * 8, }; using (BlobTransferManager transferManager = new BlobTransferManager(transferOptions)) { transferManager.QueueUpload( this.dataBlob, ms, null, delegate(object userData, double speed, double progress) { TraceHelper.TraceSource.TraceEvent(TraceEventType.Verbose, 0, "[BlobDataContainer] .AddDataAndFlush: progress={0}%", progress); }, delegate(object userData, Exception ex) { if (ex != null) { transferException = ex; } }, null); transferManager.WaitForCompletion(); } } finally { updateMetadataTimer.Dispose(); } TraceHelper.TraceSource.TraceEvent(TraceEventType.Verbose, 0, "[BlobDataContainer] .AddDataAndFlush: data transfer done"); DataException dataException = null; if (transferException != null) { dataException = TranslateTransferExceptionToDataException(transferException); } try { int errorCode = DataErrorCode.Success; string errorMessage = string.Empty; if (dataException != null) { errorCode = dataException.ErrorCode; errorMessage = dataException.Message; } AzureBlobHelper.MarkBlobAsCompleted(this.dataBlob, errorCode.ToString(), errorMessage); } catch (StorageException ex) { TraceHelper.TraceSource.TraceEvent( TraceEventType.Error, 0, "[BlobDataContainer] .AddDataAndFlush: failed to mark blob as completed. blob url={0}. error code={1}, exception={2}", this.dataBlob.Uri.AbsoluteUri, BurstUtility.GetStorageErrorCode(ex), ex); } catch (Exception ex) { TraceHelper.TraceSource.TraceEvent( TraceEventType.Error, 0, "[BlobDataContainer] .AddDataAndFlush: failed to mark blob as completed. blob url={0}. Exception={1}", this.dataBlob.Uri.AbsoluteUri, ex); } if (dataException != null) { throw dataException; } } }