Beispiel #1
0
        /// <summary>Deletes all the data related to a job, regardless of the job status.</summary>
        /// <param name="jobName">The name of the job.</param>
        /// <remarks>Messages enqueued cannot be deleted but they cause no harm.</remarks>
        public void DeleteJobData(string jobName)
        {
            _blobStorage.DeleteBlobIfExist(MapReduceConfigurationName.Create(jobName));

            _blobStorage.DeleteAllBlobs(InputBlobName.GetPrefix(jobName));
            _blobStorage.DeleteAllBlobs(ReducedBlobName.GetPrefix(jobName));

            _blobStorage.DeleteBlobIfExist(AggregatedBlobName.Create(jobName));
            _blobStorage.DeleteBlobIfExist(BlobCounterName.Create(jobName));
        }
Beispiel #2
0
        public override int GetHashCode()
        {
            int hash = 1;

            if (MinCropSize != 0)
            {
                hash ^= MinCropSize.GetHashCode();
            }
            if (CropMethod != 0)
            {
                hash ^= CropMethod.GetHashCode();
            }
            if (MeanB != 0F)
            {
                hash ^= MeanB.GetHashCode();
            }
            if (MeanG != 0F)
            {
                hash ^= MeanG.GetHashCode();
            }
            if (MeanR != 0F)
            {
                hash ^= MeanR.GetHashCode();
            }
            if (IsBgr != false)
            {
                hash ^= IsBgr.GetHashCode();
            }
            if (CropScale != 0F)
            {
                hash ^= CropScale.GetHashCode();
            }
            if (InputBlobName.Length != 0)
            {
                hash ^= InputBlobName.GetHashCode();
            }
            if (ImParamBlobName.Length != 0)
            {
                hash ^= ImParamBlobName.GetHashCode();
            }
            if (OutputBlobName.Length != 0)
            {
                hash ^= OutputBlobName.GetHashCode();
            }
            if (ModelName.Length != 0)
            {
                hash ^= ModelName.GetHashCode();
            }
            if (ModelType.Length != 0)
            {
                hash ^= ModelType.GetHashCode();
            }
            if (ProtoFile.Length != 0)
            {
                hash ^= ProtoFile.GetHashCode();
            }
            if (WeightFile.Length != 0)
            {
                hash ^= WeightFile.GetHashCode();
            }
            if (MaxBatchSize != 0)
            {
                hash ^= MaxBatchSize.GetHashCode();
            }
            return(hash);
        }
Beispiel #3
0
        /// <summary>Generates the blob sets that are required to run cloud-based map/reduce operations.</summary>
        /// <param name="jobName">The name of the job (should be unique).</param>
        /// <param name="items">The items that must be processed (at least two).</param>
        /// <param name="functions">The map/reduce/aggregate functions (aggregate is optional).</param>
        /// <param name="workerCount">The number of workers to use.</param>
        /// <param name="mapIn">The type of the map input.</param>
        /// <param name="mapOut">The type of the map output.</param>
        /// <remarks>This method should be called from <see cref="T:MapReduceJob"/>.</remarks>
        public void GenerateBlobSets(string jobName, IList <object> items, IMapReduceFunctions functions, int workerCount, Type mapIn, Type mapOut)
        {
            // Note: items is IList and not IEnumerable because the number of items must be known up-front

            // 1. Store config
            // 2. Put blobs and queue job messages
            // 3. Put messages in the work queue

            int itemCount = items.Count;

            // Note: each blobset should contain at least two elements

            int   blobSetCount = Math.Min(workerCount, itemCount);
            float blobsPerSet  = (float)itemCount / (float)blobSetCount;

            string ignored;

            // 1. Store configuration
            var configBlobName = MapReduceConfigurationName.Create(jobName);
            var config         = new MapReduceConfiguration()
            {
                TMapInType  = mapIn.AssemblyQualifiedName,
                TMapOutType = mapOut.AssemblyQualifiedName,
                MapReduceFunctionsImplementor = functions.GetType().AssemblyQualifiedName,
                BlobSetCount = blobSetCount
            };

            _blobStorage.PutBlob(configBlobName.ContainerName, configBlobName.ToString(),
                                 config, typeof(MapReduceConfiguration), false, out ignored);

            // 2.1. Allocate blobsets
            var allNames       = new InputBlobName[blobSetCount][];
            int processedBlobs = 0;

            for (int currSet = 0; currSet < blobSetCount; currSet++)
            {
                // Last blobset might be smaller
                int thisSetSize = currSet != blobSetCount - 1 ? (int)Math.Ceiling(blobsPerSet) : itemCount - processedBlobs;
                allNames[currSet] = new InputBlobName[thisSetSize];

                processedBlobs += thisSetSize;
            }

            if (processedBlobs != itemCount)
            {
                throw new InvalidOperationException("Processed Blobs are less than the number of items");
            }

            // 2.2. Store input data (separate cycle for clarity)
            processedBlobs = 0;
            for (int currSet = 0; currSet < blobSetCount; currSet++)
            {
                for (int i = 0; i < allNames[currSet].Length; i++)
                {
                    // BlobSet and Blob IDs start from zero
                    allNames[currSet][i] = InputBlobName.Create(jobName, currSet, i);

                    var item = items[processedBlobs];
                    _blobStorage.PutBlob(allNames[currSet][i].ContainerName, allNames[currSet][i].ToString(), item, mapIn, false, out ignored);
                    processedBlobs++;
                }

                _queueStorage.Put(JobsQueueName, new JobMessage()
                {
                    Type = MessageType.BlobSetToProcess, JobName = jobName, BlobSetId = currSet
                });
            }
        }
Beispiel #4
0
        /// <summary>Performs map/reduce operations on a blobset.</summary>
        /// <param name="jobName">The name of the job.</param>
        /// <param name="blobSetId">The blobset ID.</param>
        /// <remarks>This method should be called from <see cref="T:MapReduceService"/>.</remarks>
        public void PerformMapReduce(string jobName, int blobSetId)
        {
            // 1. Load config
            // 2. For all blobs in blobset, do map (output N)
            // 3. For all mapped items, do reduce (output 1)
            // 4. Store reduce result
            // 5. Update counter
            // 6. If aggregator != null && blobsets are all processed --> enqueue aggregation message
            // 7. Delete blobset

            // 1. Load config
            var config = GetJobConfig(jobName).Value;

            var blobsetPrefix = InputBlobName.GetPrefix(jobName, blobSetId);
            var mapResults    = new List <object>();

            var mapReduceFunctions = GetMapReduceFunctions(config.MapReduceFunctionsImplementor);
            var mapIn  = Type.GetType(config.TMapInType);
            var mapOut = Type.GetType(config.TMapOutType);

            // 2. Do map for all blobs in the blobset
            string ignored;

            foreach (var blobName in _blobStorage.ListBlobNames(blobsetPrefix))
            {
                var inputBlob = _blobStorage.GetBlob(blobName.ContainerName, blobName.ToString(), mapIn, out ignored);
                if (!inputBlob.HasValue)
                {
                    continue;
                }

                object mapResult = InvokeAsDelegate(mapReduceFunctions.GetMapper(), inputBlob.Value);

                mapResults.Add(mapResult);
            }

            // 3. Do reduce for all mapped results
            while (mapResults.Count > 1)
            {
                object item1 = mapResults[0];
                object item2 = mapResults[1];
                mapResults.RemoveAt(0);
                mapResults.RemoveAt(0);

                object reduceResult = InvokeAsDelegate(mapReduceFunctions.GetReducer(), item1, item2);
                mapResults.Add(reduceResult);
            }

            // 4. Store reduced result
            var reducedBlobName = ReducedBlobName.Create(jobName, blobSetId);

            _blobStorage.PutBlob(reducedBlobName.ContainerName, reducedBlobName.ToString(), mapResults[0], mapOut, false, out ignored);

            // 5. Update counter
            var counterName            = BlobCounterName.Create(jobName);
            var counter                = new BlobCounter(_blobStorage, counterName);
            var totalCompletedBlobSets = (int)counter.Increment(1);

            // 6. Queue aggregation if appropriate
            if (totalCompletedBlobSets == config.BlobSetCount)
            {
                _queueStorage.Put(JobsQueueName,
                                  new JobMessage {
                    JobName = jobName, BlobSetId = null, Type = MessageType.ReducedDataToAggregate
                });
            }

            // 7. Delete blobset's blobs
            _blobStorage.DeleteAllBlobs(blobsetPrefix);
        }