public IEnumerable<JobChunk> SplitJob(JobInfo info) { Logger.Log.Instance.Info(string.Format("DefaultJobChunksProvider. Split job. JobId '{0}'", info.JobId)); var blobDirectory = AzureClient.Instance.BlobClient.GetBlobDirectoryReference(info.DataSource.ToString()); if (blobDirectory == null) throw new JobProcessorException("Data source is not available"); var blobs = blobDirectory.ListBlobs(); foreach (var blob in blobs) { var chunk = new JobChunk() { Data = blob.Uri.ToString(), IsBlob = true, BlobContainer = blob.Container.Name, BlobName = (blob as CloudBlockBlob).Name, Handler = info.Handler, Mode = ProcessingMode.Map, ResponseQueueName = JobProcessor.RoleSettings.ChunkResponseQueue }; chunk.ChunkUid.ChunkId = Guid.NewGuid().ToString(); chunk.ChunkUid.JobId = info.JobId; chunk.ChunkUid.JobName = info.JobName; Logger.Log.Instance.Info(string.Format("DefaultJobChunksProvider. Create new map chunk for JobId '{0}'. ChunkId: '{1}'", chunk.ChunkUid.JobId, chunk.ChunkUid.ChunkId)); yield return chunk; } }
public IEnumerable<JobChunk> SplittedMappedData() { Logger.Log.Instance.Info(string.Format("MapResultsCollector. SplittedMappedData called for job {0}", JobId)); var messages = new List<KeyValueMessage>(); for (var i = 0; i < _results.Keys.Count; i++) { var message = new KeyValueMessage() { key = _results.Keys[i], value = _results[_results.Keys[i]].ToArray() }; messages.Add(message); if (i % 500 == 0 || i == _results.Keys.Count - 1) { var jsonMessage = Newtonsoft.Json.JsonConvert.SerializeObject(messages); if (jsonMessage.Length > MaxReduceChunkLength || i == _results.Keys.Count - 1) { var blob = UploadToBlob(string.Format("rchunk{0}.json", i), jsonMessage); var chunk = new JobChunk() { Mode = ProcessingMode.Reduce, Handler = _jobInfo.Handler, Data = blob.Uri.ToString(), IsBlob = true, BlobContainer = blob.Container.Name, BlobName = blob.Name, ResponseQueueName = JobProcessor.RoleSettings.ChunkResponseQueue }; chunk.ChunkUid.JobId = JobId; chunk.ChunkUid.JobName = _jobInfo.JobName; chunk.ChunkUid.ChunkId = Guid.NewGuid().ToString(); yield return chunk; messages.Clear(); } } } _results.Clear(); }
public void Dispatch(JobChunk chunk) { var jsonChunk = chunk.ToJson(); Logger.Log.Instance.Info(string.Format("QueueJobChunkDispatcher. Dispatch chunk. JobId '{0}', ChunkId '{1}', JSON:\n{2}", chunk.ChunkUid.JobId, chunk.ChunkUid.ChunkId, jsonChunk)); _queue.AddMessage(new CloudQueueMessage(jsonChunk)); if (chunk.Mode == ProcessingMode.Map) { _jobChunkRegistrator.UpdateChunkMapSent(chunk.ChunkUid); } else if (chunk.Mode == ProcessingMode.Reduce) { _jobChunkRegistrator.UpdateChunkReduceSent(chunk.ChunkUid); } }