/// <summary>Deletes all the data related to a job, regardless of the job status.</summary> /// <param name="jobName">The name of the job.</param> /// <remarks>Messages enqueued cannot be deleted but they cause no harm.</remarks> public void DeleteJobData(string jobName) { _blobStorage.DeleteBlobIfExist(MapReduceConfigurationName.Create(jobName)); _blobStorage.DeleteAllBlobs(InputBlobName.GetPrefix(jobName)); _blobStorage.DeleteAllBlobs(ReducedBlobName.GetPrefix(jobName)); _blobStorage.DeleteBlobIfExist(AggregatedBlobName.Create(jobName)); _blobStorage.DeleteBlobIfExist(BlobCounterName.Create(jobName)); }
/// <summary>Performs map/reduce operations on a blobset.</summary> /// <param name="jobName">The name of the job.</param> /// <param name="blobSetId">The blobset ID.</param> /// <remarks>This method should be called from <see cref="T:MapReduceService"/>.</remarks> public void PerformMapReduce(string jobName, int blobSetId) { // 1. Load config // 2. For all blobs in blobset, do map (output N) // 3. For all mapped items, do reduce (output 1) // 4. Store reduce result // 5. Update counter // 6. If aggregator != null && blobsets are all processed --> enqueue aggregation message // 7. Delete blobset // 1. Load config var config = GetJobConfig(jobName).Value; var blobsetPrefix = InputBlobName.GetPrefix(jobName, blobSetId); var mapResults = new List <object>(); var mapReduceFunctions = GetMapReduceFunctions(config.MapReduceFunctionsImplementor); var mapIn = Type.GetType(config.TMapInType); var mapOut = Type.GetType(config.TMapOutType); // 2. Do map for all blobs in the blobset string ignored; foreach (var blobName in _blobStorage.ListBlobNames(blobsetPrefix)) { var inputBlob = _blobStorage.GetBlob(blobName.ContainerName, blobName.ToString(), mapIn, out ignored); if (!inputBlob.HasValue) { continue; } object mapResult = InvokeAsDelegate(mapReduceFunctions.GetMapper(), inputBlob.Value); mapResults.Add(mapResult); } // 3. Do reduce for all mapped results while (mapResults.Count > 1) { object item1 = mapResults[0]; object item2 = mapResults[1]; mapResults.RemoveAt(0); mapResults.RemoveAt(0); object reduceResult = InvokeAsDelegate(mapReduceFunctions.GetReducer(), item1, item2); mapResults.Add(reduceResult); } // 4. Store reduced result var reducedBlobName = ReducedBlobName.Create(jobName, blobSetId); _blobStorage.PutBlob(reducedBlobName.ContainerName, reducedBlobName.ToString(), mapResults[0], mapOut, false, out ignored); // 5. Update counter var counterName = BlobCounterName.Create(jobName); var counter = new BlobCounter(_blobStorage, counterName); var totalCompletedBlobSets = (int)counter.Increment(1); // 6. Queue aggregation if appropriate if (totalCompletedBlobSets == config.BlobSetCount) { _queueStorage.Put(JobsQueueName, new JobMessage { JobName = jobName, BlobSetId = null, Type = MessageType.ReducedDataToAggregate }); } // 7. Delete blobset's blobs _blobStorage.DeleteAllBlobs(blobsetPrefix); }