/// <summary>Deletes all the data related to a job, regardless of the job status.</summary> /// <param name="jobName">The name of the job.</param> /// <remarks>Messages enqueued cannot be deleted but they cause no harm.</remarks> public void DeleteJobData(string jobName) { _blobStorage.DeleteBlobIfExist(MapReduceConfigurationName.Create(jobName)); _blobStorage.DeleteAllBlobs(InputBlobName.GetPrefix(jobName)); _blobStorage.DeleteAllBlobs(ReducedBlobName.GetPrefix(jobName)); _blobStorage.DeleteBlobIfExist(AggregatedBlobName.Create(jobName)); _blobStorage.DeleteBlobIfExist(BlobCounterName.Create(jobName)); }
/// <summary>Performs the aggregate operation on a blobset.</summary> /// <param name="jobName">The name of the job.</param> public void PerformAggregate(string jobName) { // 1. Load config // 2. Do aggregation // 3. Store result // 4. Delete reduced data // 1. Load config var config = GetJobConfig(jobName).Value; var reducedBlobPrefix = ReducedBlobName.GetPrefix(jobName); var aggregateResults = new List <object>(); Type mapOut = Type.GetType(config.TMapOutType); // 2. Load reduced items and do aggregation string ignored; foreach (var blobName in _blobStorage.ListBlobNames(reducedBlobPrefix)) { var blob = _blobStorage.GetBlob(blobName.ContainerName, blobName.ToString(), mapOut, out ignored); if (!blob.HasValue) { continue; } aggregateResults.Add(blob.Value); } IMapReduceFunctions mapReduceFunctions = GetMapReduceFunctions(config.MapReduceFunctionsImplementor); while (aggregateResults.Count > 1) { object item1 = aggregateResults[0]; object item2 = aggregateResults[1]; aggregateResults.RemoveAt(0); aggregateResults.RemoveAt(0); object aggregResult = InvokeAsDelegate(mapReduceFunctions.GetReducer(), item1, item2); aggregateResults.Add(aggregResult); } // 3. Store aggregated result var aggregatedBlobName = AggregatedBlobName.Create(jobName); _blobStorage.PutBlob(aggregatedBlobName.ContainerName, aggregatedBlobName.ToString(), aggregateResults[0], mapOut, false, out ignored); // 4. Delete reduced data _blobStorage.DeleteAllBlobs(reducedBlobPrefix); }
/// <summary>Performs map/reduce operations on a blobset.</summary> /// <param name="jobName">The name of the job.</param> /// <param name="blobSetId">The blobset ID.</param> /// <remarks>This method should be called from <see cref="T:MapReduceService"/>.</remarks> public void PerformMapReduce(string jobName, int blobSetId) { // 1. Load config // 2. For all blobs in blobset, do map (output N) // 3. For all mapped items, do reduce (output 1) // 4. Store reduce result // 5. Update counter // 6. If aggregator != null && blobsets are all processed --> enqueue aggregation message // 7. Delete blobset // 1. Load config var config = GetJobConfig(jobName).Value; var blobsetPrefix = InputBlobName.GetPrefix(jobName, blobSetId); var mapResults = new List <object>(); var mapReduceFunctions = GetMapReduceFunctions(config.MapReduceFunctionsImplementor); var mapIn = Type.GetType(config.TMapInType); var mapOut = Type.GetType(config.TMapOutType); // 2. Do map for all blobs in the blobset string ignored; foreach (var blobName in _blobStorage.ListBlobNames(blobsetPrefix)) { var inputBlob = _blobStorage.GetBlob(blobName.ContainerName, blobName.ToString(), mapIn, out ignored); if (!inputBlob.HasValue) { continue; } object mapResult = InvokeAsDelegate(mapReduceFunctions.GetMapper(), inputBlob.Value); mapResults.Add(mapResult); } // 3. Do reduce for all mapped results while (mapResults.Count > 1) { object item1 = mapResults[0]; object item2 = mapResults[1]; mapResults.RemoveAt(0); mapResults.RemoveAt(0); object reduceResult = InvokeAsDelegate(mapReduceFunctions.GetReducer(), item1, item2); mapResults.Add(reduceResult); } // 4. Store reduced result var reducedBlobName = ReducedBlobName.Create(jobName, blobSetId); _blobStorage.PutBlob(reducedBlobName.ContainerName, reducedBlobName.ToString(), mapResults[0], mapOut, false, out ignored); // 5. Update counter var counterName = BlobCounterName.Create(jobName); var counter = new BlobCounter(_blobStorage, counterName); var totalCompletedBlobSets = (int)counter.Increment(1); // 6. Queue aggregation if appropriate if (totalCompletedBlobSets == config.BlobSetCount) { _queueStorage.Put(JobsQueueName, new JobMessage { JobName = jobName, BlobSetId = null, Type = MessageType.ReducedDataToAggregate }); } // 7. Delete blobset's blobs _blobStorage.DeleteAllBlobs(blobsetPrefix); }