Пример #1
0
        /// <summary>Deletes all the data related to a job, regardless of the job status.</summary>
        /// <param name="jobName">The name of the job.</param>
        /// <remarks>Messages enqueued cannot be deleted but they cause no harm.</remarks>
        public void DeleteJobData(string jobName)
        {
            _blobStorage.DeleteBlobIfExist(MapReduceConfigurationName.Create(jobName));

            _blobStorage.DeleteAllBlobs(InputBlobName.GetPrefix(jobName));
            _blobStorage.DeleteAllBlobs(ReducedBlobName.GetPrefix(jobName));

            _blobStorage.DeleteBlobIfExist(AggregatedBlobName.Create(jobName));
            _blobStorage.DeleteBlobIfExist(BlobCounterName.Create(jobName));
        }
Пример #2
0
        /// <summary>Performs the aggregate operation on a blobset.</summary>
        /// <param name="jobName">The name of the job.</param>
        public void PerformAggregate(string jobName)
        {
            // 1. Load config
            // 2. Do aggregation
            // 3. Store result
            // 4. Delete reduced data

            // 1. Load config
            var config = GetJobConfig(jobName).Value;

            var reducedBlobPrefix = ReducedBlobName.GetPrefix(jobName);
            var aggregateResults  = new List <object>();

            Type mapOut = Type.GetType(config.TMapOutType);

            // 2. Load reduced items and do aggregation
            string ignored;

            foreach (var blobName in _blobStorage.ListBlobNames(reducedBlobPrefix))
            {
                var blob = _blobStorage.GetBlob(blobName.ContainerName, blobName.ToString(), mapOut, out ignored);
                if (!blob.HasValue)
                {
                    continue;
                }

                aggregateResults.Add(blob.Value);
            }

            IMapReduceFunctions mapReduceFunctions = GetMapReduceFunctions(config.MapReduceFunctionsImplementor);

            while (aggregateResults.Count > 1)
            {
                object item1 = aggregateResults[0];
                object item2 = aggregateResults[1];
                aggregateResults.RemoveAt(0);
                aggregateResults.RemoveAt(0);

                object aggregResult = InvokeAsDelegate(mapReduceFunctions.GetReducer(), item1, item2);
                aggregateResults.Add(aggregResult);
            }

            // 3. Store aggregated result
            var aggregatedBlobName = AggregatedBlobName.Create(jobName);

            _blobStorage.PutBlob(aggregatedBlobName.ContainerName, aggregatedBlobName.ToString(), aggregateResults[0], mapOut, false, out ignored);

            // 4. Delete reduced data
            _blobStorage.DeleteAllBlobs(reducedBlobPrefix);
        }
Пример #3
0
        /// <summary>Performs map/reduce operations on a blobset.</summary>
        /// <param name="jobName">The name of the job.</param>
        /// <param name="blobSetId">The blobset ID.</param>
        /// <remarks>This method should be called from <see cref="T:MapReduceService"/>.</remarks>
        public void PerformMapReduce(string jobName, int blobSetId)
        {
            // 1. Load config
            // 2. For all blobs in blobset, do map (output N)
            // 3. For all mapped items, do reduce (output 1)
            // 4. Store reduce result
            // 5. Update counter
            // 6. If aggregator != null && blobsets are all processed --> enqueue aggregation message
            // 7. Delete blobset

            // 1. Load config
            var config = GetJobConfig(jobName).Value;

            var blobsetPrefix = InputBlobName.GetPrefix(jobName, blobSetId);
            var mapResults    = new List <object>();

            var mapReduceFunctions = GetMapReduceFunctions(config.MapReduceFunctionsImplementor);
            var mapIn  = Type.GetType(config.TMapInType);
            var mapOut = Type.GetType(config.TMapOutType);

            // 2. Do map for all blobs in the blobset
            string ignored;

            foreach (var blobName in _blobStorage.ListBlobNames(blobsetPrefix))
            {
                var inputBlob = _blobStorage.GetBlob(blobName.ContainerName, blobName.ToString(), mapIn, out ignored);
                if (!inputBlob.HasValue)
                {
                    continue;
                }

                object mapResult = InvokeAsDelegate(mapReduceFunctions.GetMapper(), inputBlob.Value);

                mapResults.Add(mapResult);
            }

            // 3. Do reduce for all mapped results
            while (mapResults.Count > 1)
            {
                object item1 = mapResults[0];
                object item2 = mapResults[1];
                mapResults.RemoveAt(0);
                mapResults.RemoveAt(0);

                object reduceResult = InvokeAsDelegate(mapReduceFunctions.GetReducer(), item1, item2);
                mapResults.Add(reduceResult);
            }

            // 4. Store reduced result
            var reducedBlobName = ReducedBlobName.Create(jobName, blobSetId);

            _blobStorage.PutBlob(reducedBlobName.ContainerName, reducedBlobName.ToString(), mapResults[0], mapOut, false, out ignored);

            // 5. Update counter
            var counterName            = BlobCounterName.Create(jobName);
            var counter                = new BlobCounter(_blobStorage, counterName);
            var totalCompletedBlobSets = (int)counter.Increment(1);

            // 6. Queue aggregation if appropriate
            if (totalCompletedBlobSets == config.BlobSetCount)
            {
                _queueStorage.Put(JobsQueueName,
                                  new JobMessage {
                    JobName = jobName, BlobSetId = null, Type = MessageType.ReducedDataToAggregate
                });
            }

            // 7. Delete blobset's blobs
            _blobStorage.DeleteAllBlobs(blobsetPrefix);
        }