Exemple #1
0
        /// <summary>Retrieves the aggregated result of a map/reduce job.</summary>
        /// <typeparam name="T">The type of the result.</typeparam>
        /// <param name="jobName">The name of the job.</param>
        /// <returns>The aggregated result.</returns>
        /// <exception cref="InvalidOperationException">If the is not yet complete.</exception>
        /// <exception cref="ArgumentException">If <paramref name="jobName"/> refers to an inexistent job.</exception>
        public T GetAggregatedResult <T>(string jobName)
        {
            var config = GetJobConfig(jobName);

            if (!config.HasValue)
            {
                throw new ArgumentException("Unknown job", "jobName");
            }

            var counter           = new BlobCounter(_blobStorage, BlobCounterName.Create(jobName));
            int completedBlobsets = (int)counter.GetValue();

            if (completedBlobsets < config.Value.BlobSetCount)
            {
                throw new InvalidOperationException("Job is not complete (there still are blobsets to process)");
            }

            Type mapOut = Type.GetType(config.Value.TMapOutType);

            var    blobName = AggregatedBlobName.Create(jobName);
            string ignored;
            var    aggregatedResult = _blobStorage.GetBlob(blobName.ContainerName, blobName.ToString(), mapOut, out ignored);

            if (!aggregatedResult.HasValue)
            {
                throw new InvalidOperationException("Job is not complete (reduced items must still be aggregated)");
            }

            return((T)aggregatedResult.Value);
        }
Exemple #2
0
        /// <summary>Deletes all the data related to a job, regardless of the job status.</summary>
        /// <param name="jobName">The name of the job.</param>
        /// <remarks>Messages enqueued cannot be deleted but they cause no harm.</remarks>
        public void DeleteJobData(string jobName)
        {
            _blobStorage.DeleteBlobIfExist(MapReduceConfigurationName.Create(jobName));

            _blobStorage.DeleteAllBlobs(InputBlobName.GetPrefix(jobName));
            _blobStorage.DeleteAllBlobs(ReducedBlobName.GetPrefix(jobName));

            _blobStorage.DeleteBlobIfExist(AggregatedBlobName.Create(jobName));
            _blobStorage.DeleteBlobIfExist(BlobCounterName.Create(jobName));
        }
Exemple #3
0
        /// <summary>Gets the number of completed blobsets of a job.</summary>
        /// <param name="jobName">The name of the job.</param>
        /// <returns>The number of completed blobsets (<c>Tuple.Item1</c>) and the total number of blobsets (<c>Tuple.Item2</c>).</returns>
        /// <exception cref="ArgumentException">If <paramref name="jobName"/> refers to an inexistent job.</exception>
        public System.Tuple <int, int> GetCompletedBlobSets(string jobName)
        {
            var config = GetJobConfig(jobName);

            if (!config.HasValue)
            {
                throw new ArgumentException("Unknown job", "jobName");
            }

            var counter           = new BlobCounter(_blobStorage, BlobCounterName.Create(jobName));
            int completedBlobsets = (int)counter.GetValue();

            return(new System.Tuple <int, int>(completedBlobsets, config.Value.BlobSetCount));
        }
Exemple #4
0
        /// <summary>Performs map/reduce operations on a blobset.</summary>
        /// <param name="jobName">The name of the job.</param>
        /// <param name="blobSetId">The blobset ID.</param>
        /// <remarks>This method should be called from <see cref="T:MapReduceService"/>.</remarks>
        public void PerformMapReduce(string jobName, int blobSetId)
        {
            // 1. Load config
            // 2. For all blobs in blobset, do map (output N)
            // 3. For all mapped items, do reduce (output 1)
            // 4. Store reduce result
            // 5. Update counter
            // 6. If aggregator != null && blobsets are all processed --> enqueue aggregation message
            // 7. Delete blobset

            // 1. Load config
            var config = GetJobConfig(jobName).Value;

            var blobsetPrefix = InputBlobName.GetPrefix(jobName, blobSetId);
            var mapResults    = new List <object>();

            var mapReduceFunctions = GetMapReduceFunctions(config.MapReduceFunctionsImplementor);
            var mapIn  = Type.GetType(config.TMapInType);
            var mapOut = Type.GetType(config.TMapOutType);

            // 2. Do map for all blobs in the blobset
            string ignored;

            foreach (var blobName in _blobStorage.ListBlobNames(blobsetPrefix))
            {
                var inputBlob = _blobStorage.GetBlob(blobName.ContainerName, blobName.ToString(), mapIn, out ignored);
                if (!inputBlob.HasValue)
                {
                    continue;
                }

                object mapResult = InvokeAsDelegate(mapReduceFunctions.GetMapper(), inputBlob.Value);

                mapResults.Add(mapResult);
            }

            // 3. Do reduce for all mapped results
            while (mapResults.Count > 1)
            {
                object item1 = mapResults[0];
                object item2 = mapResults[1];
                mapResults.RemoveAt(0);
                mapResults.RemoveAt(0);

                object reduceResult = InvokeAsDelegate(mapReduceFunctions.GetReducer(), item1, item2);
                mapResults.Add(reduceResult);
            }

            // 4. Store reduced result
            var reducedBlobName = ReducedBlobName.Create(jobName, blobSetId);

            _blobStorage.PutBlob(reducedBlobName.ContainerName, reducedBlobName.ToString(), mapResults[0], mapOut, false, out ignored);

            // 5. Update counter
            var counterName            = BlobCounterName.Create(jobName);
            var counter                = new BlobCounter(_blobStorage, counterName);
            var totalCompletedBlobSets = (int)counter.Increment(1);

            // 6. Queue aggregation if appropriate
            if (totalCompletedBlobSets == config.BlobSetCount)
            {
                _queueStorage.Put(JobsQueueName,
                                  new JobMessage {
                    JobName = jobName, BlobSetId = null, Type = MessageType.ReducedDataToAggregate
                });
            }

            // 7. Delete blobset's blobs
            _blobStorage.DeleteAllBlobs(blobsetPrefix);
        }