Example #1
0
 internal ServiceAndContextConfigurationProvider(IPartitionedInputDataSet dataset)
 {
     _dataset = dataset;
     foreach (var descriptor in _dataset)
     {
         _partitionDescriptorIds.Push(descriptor.Id);
     }
 }
        /// <summary>
        /// Constructs the object which maintains partitionDescriptor Ids so that to provide proper data load configuration
        /// It also maintains the partitionDescriptor id and context id mapping to ensure same context id alway assign the same data partition
        /// This is to ensure if the tasks are added to the typology based on the sequence of context id, the result is deterministic.
        /// </summary>
        /// <param name="dataset">partition input dataset</param>
        /// <param name="configurationManager">Configuration manager that holds configurations for context and tasks</param>
        internal ServiceAndContextConfigurationProvider(IPartitionedInputDataSet dataset, ConfigurationManager configurationManager)
        {
            _dataset = dataset;
            int contextSequenceNumber = 0;

            foreach (var descriptor in _dataset)
            {
                var contextId = string.Format("DataLoadingContext-{0}", contextSequenceNumber++);
                _availablePartitionDescriptorContextIds.Push(new PartitionDescriptorContextIdBundle(descriptor.Id, contextId));
            }
            _configurationManager = configurationManager;
        }
Example #3
0
        private IMRUDriver(IPartitionedInputDataSet dataSet,
                           [Parameter(typeof(PerMapConfigGeneratorSet))] ISet <IPerMapperConfigGenerator> perMapperConfigs,
                           ConfigurationManager configurationManager,
                           IEvaluatorRequestor evaluatorRequestor,
                           [Parameter(typeof(CoresPerMapper))] int coresPerMapper,
                           [Parameter(typeof(CoresForUpdateTask))] int coresForUpdateTask,
                           [Parameter(typeof(MemoryPerMapper))] int memoryPerMapper,
                           [Parameter(typeof(MemoryForUpdateTask))] int memoryForUpdateTask,
                           [Parameter(typeof(AllowedFailedEvaluatorsFraction))] double failedEvaluatorsFraction,
                           [Parameter(typeof(MaxRetryNumberInRecovery))] int maxRetryNumberInRecovery,
                           [Parameter(typeof(InvokeGC))] bool invokeGC,
                           IGroupCommDriver groupCommDriver,
                           INameServer nameServer)
        {
            _configurationManager           = configurationManager;
            _groupCommDriver                = groupCommDriver;
            _nameServer                     = nameServer;
            _perMapperConfigs               = perMapperConfigs;
            _totalMappers                   = dataSet.Count;
            _invokeGC                       = invokeGC;
            _maxRetryNumberForFaultTolerant = maxRetryNumberInRecovery > 0 ? maxRetryNumberInRecovery : DefaultMaxNumberOfRetryInRecovery;

            _contextManager = new ActiveContextManager(_totalMappers + 1);
            _contextManager.Subscribe(this);

            var updateSpec = new EvaluatorSpecification(memoryForUpdateTask, coresForUpdateTask);
            var mapperSpec = new EvaluatorSpecification(memoryPerMapper, coresPerMapper);
            var allowedFailedEvaluators = (int)(failedEvaluatorsFraction * _totalMappers);

            _evaluatorManager = new EvaluatorManager(_totalMappers + 1, allowedFailedEvaluators, evaluatorRequestor, updateSpec, mapperSpec);

            _systemState = new SystemStateMachine();
            _serviceAndContextConfigurationProvider =
                new ServiceAndContextConfigurationProvider <TMapInput, TMapOutput, TPartitionType>(dataSet, configurationManager);

            var msg =
                string.Format(CultureInfo.InvariantCulture, "map task memory:{0}, update task memory:{1}, map task cores:{2}, update task cores:{3}, maxRetry {4}, allowedFailedEvaluators {5}.",
                              memoryPerMapper,
                              memoryForUpdateTask,
                              coresPerMapper,
                              coresForUpdateTask,
                              _maxRetryNumberForFaultTolerant,
                              allowedFailedEvaluators);

            Logger.Log(Level.Info, msg);
        }
Example #4
0
        private IMRUDriver(IPartitionedInputDataSet dataSet,
                           [Parameter(typeof(PerMapConfigGeneratorSet))] ISet <IPerMapperConfigGenerator> perMapperConfigs,
                           ConfigurationManager configurationManager,
                           IEvaluatorRequestor evaluatorRequestor,
                           [Parameter(typeof(CoresPerMapper))] int coresPerMapper,
                           [Parameter(typeof(CoresForUpdateTask))] int coresForUpdateTask,
                           [Parameter(typeof(MemoryPerMapper))] int memoryPerMapper,
                           [Parameter(typeof(MemoryForUpdateTask))] int memoryForUpdateTask,
                           [Parameter(typeof(AllowedFailedEvaluatorsFraction))] double failedEvaluatorsFraction,
                           [Parameter(typeof(InvokeGC))] bool invokeGC,
                           IGroupCommDriver groupCommDriver)
        {
            _dataSet = dataSet;
            _configurationManager    = configurationManager;
            _evaluatorRequestor      = evaluatorRequestor;
            _groupCommDriver         = groupCommDriver;
            _coresPerMapper          = coresPerMapper;
            _coresForUpdateTask      = coresForUpdateTask;
            _memoryPerMapper         = memoryPerMapper;
            _memoryForUpdateTask     = memoryForUpdateTask;
            _perMapperConfigs        = perMapperConfigs;
            _completedTasks          = new ConcurrentBag <ICompletedTask>();
            _allowedFailedEvaluators = (int)(failedEvaluatorsFraction * dataSet.Count);
            _invokeGC = invokeGC;

            AddGroupCommunicationOperators();
            _groupCommTaskStarter = new TaskStarter(_groupCommDriver, _dataSet.Count + 1);

            _taskIdStack              = new ConcurrentStack <string>();
            _perMapperConfiguration   = new ConcurrentStack <IConfiguration>();
            _partitionDescriptorStack = new Stack <IPartitionDescriptor>();
            ConstructTaskIdAndPartitionDescriptorStack();
            _serviceAndContextConfigurationProvider =
                new ServiceAndContextConfigurationProvider <TMapInput, TMapOutput>(dataSet.Count + 1, groupCommDriver,
                                                                                   _configurationManager, _partitionDescriptorStack);

            var msg = string.Format("map task memory:{0}, update task memory:{1}, map task cores:{2}, update task cores:{3}",
                                    _memoryPerMapper, _memoryForUpdateTask, _coresPerMapper, _coresForUpdateTask);

            Logger.Log(Level.Info, msg);
        }
Example #5
0
        /// <summary>
        /// We also need IInputPartition at each map function
        /// </summary>
        /// <param name="mapConfiguration">Map configuration given by user</param>
        /// <param name="partitionedDataSetConfig">Partitioned dataset configuration</param>
        /// <param name="perMapConfigGenerators">Per map configuration generators</param>
        /// <returns></returns>
        private MapFunctions <TMapInput, TMapOutput> MakeMapFunctions <TMapInput, TMapOutput>(IConfiguration mapConfiguration, IConfiguration partitionedDataSetConfig, ISet <IPerMapperConfigGenerator> perMapConfigGenerators)
        {
            IPartitionedInputDataSet dataset =
                TangFactory.GetTang().NewInjector(partitionedDataSetConfig).GetInstance <IPartitionedInputDataSet>();

            ISet <IMapFunction <TMapInput, TMapOutput> > mappers = new HashSet <IMapFunction <TMapInput, TMapOutput> >();

            int counter = 0;

            foreach (var descriptor in dataset)
            {
                var            emptyConfig  = TangFactory.GetTang().NewConfigurationBuilder().Build();
                IConfiguration perMapConfig = perMapConfigGenerators.Aggregate(emptyConfig,
                                                                               (current, configGenerator) =>
                                                                               Configurations.Merge(current, configGenerator.GetMapperConfiguration(counter, dataset.Count)));

                var injector = TangFactory.GetTang()
                               .NewInjector(mapConfiguration, descriptor.GetPartitionConfiguration(), perMapConfig);
                mappers.Add(injector.GetInstance <IMapFunction <TMapInput, TMapOutput> >());
                counter++;
            }
            return(new MapFunctions <TMapInput, TMapOutput>(mappers));
        }