internal ServiceAndContextConfigurationProvider(IPartitionedInputDataSet dataset) { _dataset = dataset; foreach (var descriptor in _dataset) { _partitionDescriptorIds.Push(descriptor.Id); } }
/// <summary> /// Constructs the object which maintains partitionDescriptor Ids so that to provide proper data load configuration /// It also maintains the partitionDescriptor id and context id mapping to ensure same context id alway assign the same data partition /// This is to ensure if the tasks are added to the typology based on the sequence of context id, the result is deterministic. /// </summary> /// <param name="dataset">partition input dataset</param> /// <param name="configurationManager">Configuration manager that holds configurations for context and tasks</param> internal ServiceAndContextConfigurationProvider(IPartitionedInputDataSet dataset, ConfigurationManager configurationManager) { _dataset = dataset; int contextSequenceNumber = 0; foreach (var descriptor in _dataset) { var contextId = string.Format("DataLoadingContext-{0}", contextSequenceNumber++); _availablePartitionDescriptorContextIds.Push(new PartitionDescriptorContextIdBundle(descriptor.Id, contextId)); } _configurationManager = configurationManager; }
private IMRUDriver(IPartitionedInputDataSet dataSet, [Parameter(typeof(PerMapConfigGeneratorSet))] ISet <IPerMapperConfigGenerator> perMapperConfigs, ConfigurationManager configurationManager, IEvaluatorRequestor evaluatorRequestor, [Parameter(typeof(CoresPerMapper))] int coresPerMapper, [Parameter(typeof(CoresForUpdateTask))] int coresForUpdateTask, [Parameter(typeof(MemoryPerMapper))] int memoryPerMapper, [Parameter(typeof(MemoryForUpdateTask))] int memoryForUpdateTask, [Parameter(typeof(AllowedFailedEvaluatorsFraction))] double failedEvaluatorsFraction, [Parameter(typeof(MaxRetryNumberInRecovery))] int maxRetryNumberInRecovery, [Parameter(typeof(InvokeGC))] bool invokeGC, IGroupCommDriver groupCommDriver, INameServer nameServer) { _configurationManager = configurationManager; _groupCommDriver = groupCommDriver; _nameServer = nameServer; _perMapperConfigs = perMapperConfigs; _totalMappers = dataSet.Count; _invokeGC = invokeGC; _maxRetryNumberForFaultTolerant = maxRetryNumberInRecovery > 0 ? maxRetryNumberInRecovery : DefaultMaxNumberOfRetryInRecovery; _contextManager = new ActiveContextManager(_totalMappers + 1); _contextManager.Subscribe(this); var updateSpec = new EvaluatorSpecification(memoryForUpdateTask, coresForUpdateTask); var mapperSpec = new EvaluatorSpecification(memoryPerMapper, coresPerMapper); var allowedFailedEvaluators = (int)(failedEvaluatorsFraction * _totalMappers); _evaluatorManager = new EvaluatorManager(_totalMappers + 1, allowedFailedEvaluators, evaluatorRequestor, updateSpec, mapperSpec); _systemState = new SystemStateMachine(); _serviceAndContextConfigurationProvider = new ServiceAndContextConfigurationProvider <TMapInput, TMapOutput, TPartitionType>(dataSet, configurationManager); var msg = string.Format(CultureInfo.InvariantCulture, "map task memory:{0}, update task memory:{1}, map task cores:{2}, update task cores:{3}, maxRetry {4}, allowedFailedEvaluators {5}.", memoryPerMapper, memoryForUpdateTask, coresPerMapper, coresForUpdateTask, _maxRetryNumberForFaultTolerant, allowedFailedEvaluators); Logger.Log(Level.Info, msg); }
private IMRUDriver(IPartitionedInputDataSet dataSet, [Parameter(typeof(PerMapConfigGeneratorSet))] ISet <IPerMapperConfigGenerator> perMapperConfigs, ConfigurationManager configurationManager, IEvaluatorRequestor evaluatorRequestor, [Parameter(typeof(CoresPerMapper))] int coresPerMapper, [Parameter(typeof(CoresForUpdateTask))] int coresForUpdateTask, [Parameter(typeof(MemoryPerMapper))] int memoryPerMapper, [Parameter(typeof(MemoryForUpdateTask))] int memoryForUpdateTask, [Parameter(typeof(AllowedFailedEvaluatorsFraction))] double failedEvaluatorsFraction, [Parameter(typeof(InvokeGC))] bool invokeGC, IGroupCommDriver groupCommDriver) { _dataSet = dataSet; _configurationManager = configurationManager; _evaluatorRequestor = evaluatorRequestor; _groupCommDriver = groupCommDriver; _coresPerMapper = coresPerMapper; _coresForUpdateTask = coresForUpdateTask; _memoryPerMapper = memoryPerMapper; _memoryForUpdateTask = memoryForUpdateTask; _perMapperConfigs = perMapperConfigs; _completedTasks = new ConcurrentBag <ICompletedTask>(); _allowedFailedEvaluators = (int)(failedEvaluatorsFraction * dataSet.Count); _invokeGC = invokeGC; AddGroupCommunicationOperators(); _groupCommTaskStarter = new TaskStarter(_groupCommDriver, _dataSet.Count + 1); _taskIdStack = new ConcurrentStack <string>(); _perMapperConfiguration = new ConcurrentStack <IConfiguration>(); _partitionDescriptorStack = new Stack <IPartitionDescriptor>(); ConstructTaskIdAndPartitionDescriptorStack(); _serviceAndContextConfigurationProvider = new ServiceAndContextConfigurationProvider <TMapInput, TMapOutput>(dataSet.Count + 1, groupCommDriver, _configurationManager, _partitionDescriptorStack); var msg = string.Format("map task memory:{0}, update task memory:{1}, map task cores:{2}, update task cores:{3}", _memoryPerMapper, _memoryForUpdateTask, _coresPerMapper, _coresForUpdateTask); Logger.Log(Level.Info, msg); }
/// <summary> /// We also need IInputPartition at each map function /// </summary> /// <param name="mapConfiguration">Map configuration given by user</param> /// <param name="partitionedDataSetConfig">Partitioned dataset configuration</param> /// <param name="perMapConfigGenerators">Per map configuration generators</param> /// <returns></returns> private MapFunctions <TMapInput, TMapOutput> MakeMapFunctions <TMapInput, TMapOutput>(IConfiguration mapConfiguration, IConfiguration partitionedDataSetConfig, ISet <IPerMapperConfigGenerator> perMapConfigGenerators) { IPartitionedInputDataSet dataset = TangFactory.GetTang().NewInjector(partitionedDataSetConfig).GetInstance <IPartitionedInputDataSet>(); ISet <IMapFunction <TMapInput, TMapOutput> > mappers = new HashSet <IMapFunction <TMapInput, TMapOutput> >(); int counter = 0; foreach (var descriptor in dataset) { var emptyConfig = TangFactory.GetTang().NewConfigurationBuilder().Build(); IConfiguration perMapConfig = perMapConfigGenerators.Aggregate(emptyConfig, (current, configGenerator) => Configurations.Merge(current, configGenerator.GetMapperConfiguration(counter, dataset.Count))); var injector = TangFactory.GetTang() .NewInjector(mapConfiguration, descriptor.GetPartitionConfiguration(), perMapConfig); mappers.Add(injector.GetInstance <IMapFunction <TMapInput, TMapOutput> >()); counter++; } return(new MapFunctions <TMapInput, TMapOutput>(mappers)); }