Ejemplo n.º 1
0
        private async Task OnRuntimeServicesStart(CancellationToken ct)
        {
            //TODO: Setup all (or as many as possible) of the class started in this call to work directly with lifecyce
            var stopWatch = Stopwatch.StartNew();

            // The order of these 4 is pretty much arbitrary.
            StartTaskWithPerfAnalysis("Start Scheduler", scheduler.Start, stopWatch);
            StartTaskWithPerfAnalysis("Start Message center", messageCenter.Start, stopWatch);
            StartTaskWithPerfAnalysis("Start Incoming message agents", IncomingMessageAgentsStart, stopWatch);
            void IncomingMessageAgentsStart()
            {
                incomingPingAgent.Start();
                incomingSystemAgent.Start();
                incomingAgent.Start();
            }

            StartTaskWithPerfAnalysis("Start local grain directory", LocalGrainDirectory.Start, stopWatch);

            // Set up an execution context for this thread so that the target creation steps can use asynch values.
            RuntimeContext.InitializeMainThread();

            StartTaskWithPerfAnalysis("Init implicit stream subscribe table", InitImplicitStreamSubscribeTable, stopWatch);
            void InitImplicitStreamSubscribeTable()
            {
                // Initialize the implicit stream subscribers table.
                var implicitStreamSubscriberTable = Services.GetRequiredService <ImplicitStreamSubscriberTable>();
                var grainTypeManager = Services.GetRequiredService <GrainTypeManager>();

                implicitStreamSubscriberTable.InitImplicitStreamSubscribers(grainTypeManager.GrainClassTypeData.Select(t => t.Value.Type).ToArray());
            }

            var siloProviderRuntime = Services.GetRequiredService <SiloProviderRuntime>();
            SiloStatisticsOptions statisticsOptions = Services.GetRequiredService <IOptions <SiloStatisticsOptions> >().Value;

            runtimeClient.CurrentStreamProviderRuntime = siloProviderRuntime;
            await StartAsyncTaskWithPerfAnalysis("Load StatisticProviders", LoadStatsProvider, stopWatch);

            async Task LoadStatsProvider()
            {
                // can call SetSiloMetricsTableDataManager only after MessageCenter is created (dependency on this.SiloAddress).
                await siloStatistics.SetSiloStatsTableDataManager(this, statisticsOptions).WithTimeout(initTimeout, $"SiloStatistics Setting SiloStatsTableDataManager failed due to timeout {initTimeout}");

                await siloStatistics.SetSiloMetricsTableDataManager(this, statisticsOptions).WithTimeout(initTimeout,
                                                                                                         $"SiloStatistics Setting SiloMetricsTableDataManager failed due to timeout {initTimeout}");
            }

            // This has to follow the above steps that start the runtime components
            await StartAsyncTaskWithPerfAnalysis("Create system targets and inject dependencies", () =>
            {
                CreateSystemTargets();
                return(InjectDependencies());
            }, stopWatch);

            // Validate the configuration.
            // TODO - refactor validation - jbragg
            //GlobalConfig.Application.ValidateConfiguration(logger);
        }
Ejemplo n.º 2
0
 /// <summary>
 /// Constructor
 /// </summary>
 /// <param name="loadSheddingLimit"></param>
 /// <param name="getNodeConfig">The method used to get the current node configuration.</param>
 private LoadShedQueueFlowController(int loadSheddingLimit, SiloStatisticsOptions options)
 {
     this.options = options;
     if (loadSheddingLimit < 0 || loadSheddingLimit > 100)
     {
         throw new ArgumentOutOfRangeException(nameof(loadSheddingLimit), "Value must be between 0-100");
     }
     this.loadSheddingLimit = loadSheddingLimit != 0 ? loadSheddingLimit : int.MaxValue;
 }
Ejemplo n.º 3
0
 /// <summary>
 /// Creates a flow controller triggered when the CPU reaches a percentage of the cluster load shedding limit.
 /// This is intended to reduce queue read rate prior to causing the silo to shed load.
 /// Note:  Triggered only when load shedding is enabled.
 /// </summary>
 /// <param name="options">The silo satistics options.</param>
 /// <param name="percentOfSiloSheddingLimit">Percentage of load shed limit which triggers a reduction of queue read rate.</param>
 /// <returns></returns>
 public static IQueueFlowController CreateAsPercentOfLoadSheddingLimit(SiloStatisticsOptions options, int percentOfSiloSheddingLimit = SiloStatisticsOptions.DEFAULT_LOAD_SHEDDING_LIMIT)
 {
     if (percentOfSiloSheddingLimit < 0.0 || percentOfSiloSheddingLimit > 100.0)
     {
         throw new ArgumentOutOfRangeException(nameof(percentOfSiloSheddingLimit), "Percent value must be between 0-100");
     }
     // Start shedding before silo reaches shedding limit.
     return(new LoadShedQueueFlowController((int)(options.LoadSheddingLimit * (percentOfSiloSheddingLimit / 100.0)), options));
 }
Ejemplo n.º 4
0
        public EventHubAdapterReceiver(EventHubPartitionSettings settings,
                                       Func <string, IStreamQueueCheckpointer <string>, ILoggerFactory, ITelemetryProducer, IEventHubQueueCache> cacheFactory,
                                       Func <string, Task <IStreamQueueCheckpointer <string> > > checkpointerFactory,
                                       ILoggerFactory loggerFactory,
                                       IQueueAdapterReceiverMonitor monitor,
                                       SiloStatisticsOptions statisticsOptions,
                                       ITelemetryProducer telemetryProducer,
                                       Func <EventHubPartitionSettings, string, ILogger, ITelemetryProducer, Task <IEventHubReceiver> > eventHubReceiverFactory = null)
        {
            if (settings == null)
            {
                throw new ArgumentNullException(nameof(settings));
            }
            if (cacheFactory == null)
            {
                throw new ArgumentNullException(nameof(cacheFactory));
            }
            if (checkpointerFactory == null)
            {
                throw new ArgumentNullException(nameof(checkpointerFactory));
            }
            if (loggerFactory == null)
            {
                throw new ArgumentNullException(nameof(loggerFactory));
            }
            if (monitor == null)
            {
                throw new ArgumentNullException(nameof(monitor));
            }
            if (statisticsOptions == null)
            {
                throw new ArgumentNullException(nameof(statisticsOptions));
            }
            if (telemetryProducer == null)
            {
                throw new ArgumentNullException(nameof(telemetryProducer));
            }
            this.settings            = settings;
            this.cacheFactory        = cacheFactory;
            this.checkpointerFactory = checkpointerFactory;
            this.loggerFactory       = loggerFactory;
            this.logger            = this.loggerFactory.CreateLogger($"{this.GetType().FullName}.{settings.Hub.Path}.{settings.Partition}");
            this.monitor           = monitor;
            this.telemetryProducer = telemetryProducer;
            this.statisticsOptions = statisticsOptions;

            this.eventHubReceiverFactory = eventHubReceiverFactory == null ? EventHubAdapterReceiver.CreateReceiver : eventHubReceiverFactory;
        }
Ejemplo n.º 5
0
 internal SiloPerformanceMetrics(
     IHostEnvironmentStatistics hostEnvironmentStatistics,
     IAppEnvironmentStatistics appEnvironmentStatistics,
     ILoggerFactory loggerFactory,
     IOptions <SiloStatisticsOptions> statisticsOptions)
 {
     this.loggerFactory             = loggerFactory;
     this.hostEnvironmentStatistics = hostEnvironmentStatistics;
     this.appEnvironmentStatistics  = appEnvironmentStatistics;
     reportFrequency        = TimeSpan.Zero;
     overloadLatched        = false;
     overloadValue          = false;
     this.logger            = loggerFactory.CreateLogger <SiloPerformanceMetrics>();
     this.statisticsOptions = statisticsOptions.Value;
     StringValueStatistic.FindOrCreate(StatisticNames.RUNTIME_IS_OVERLOADED, () => IsOverloaded.ToString());
 }
Ejemplo n.º 6
0
        private async Task OnRuntimeGrainServicesStart(CancellationToken ct)
        {
            var stopWatch = Stopwatch.StartNew();

            await StartAsyncTaskWithPerfAnalysis("Init transaction agent", InitTransactionAgent, stopWatch);

            async Task InitTransactionAgent()
            {
                ITransactionAgent  transactionAgent        = this.Services.GetRequiredService <ITransactionAgent>();
                ISchedulingContext transactionAgentContext = (transactionAgent as SystemTarget)?.SchedulingContext;
                await scheduler.QueueTask(transactionAgent.Start, transactionAgentContext)
                .WithTimeout(initTimeout, $"Starting TransactionAgent failed due to timeout {initTimeout}");
            }

            // Load and init grain services before silo becomes active.
            await StartAsyncTaskWithPerfAnalysis("Init grain services",
                                                 () => CreateGrainServices(), stopWatch);

            this.membershipOracleContext = (this.membershipOracle as SystemTarget)?.SchedulingContext ??
                                           this.fallbackScheduler.SchedulingContext;

            await StartAsyncTaskWithPerfAnalysis("Starting local silo status oracle", StartMembershipOracle, stopWatch);

            async Task StartMembershipOracle()
            {
                await scheduler.QueueTask(() => this.membershipOracle.Start(), this.membershipOracleContext)
                .WithTimeout(initTimeout, $"Starting MembershipOracle failed due to timeout {initTimeout}");

                logger.Debug("Local silo status oracle created successfully.");
            }

            var versionStore = Services.GetService <IVersionStore>();

            await StartAsyncTaskWithPerfAnalysis("Init type manager", () => scheduler
                                                 .QueueTask(() => this.typeManager.Initialize(versionStore), this.typeManager.SchedulingContext)
                                                 .WithTimeout(this.initTimeout, $"TypeManager Initializing failed due to timeout {initTimeout}"), stopWatch);

            //if running in multi cluster scenario, start the MultiClusterNetwork Oracle
            if (this.multiClusterOracle != null)
            {
                await StartAsyncTaskWithPerfAnalysis("Start multicluster oracle", StartMultiClusterOracle, stopWatch);

                async Task StartMultiClusterOracle()
                {
                    logger.Info("Starting multicluster oracle with my ServiceId={0} and ClusterId={1}.",
                                this.clusterOptions.ServiceId, this.clusterOptions.ClusterId);

                    this.multiClusterOracleContext = (multiClusterOracle as SystemTarget)?.SchedulingContext ??
                                                     this.fallbackScheduler.SchedulingContext;
                    await scheduler.QueueTask(() => multiClusterOracle.Start(), multiClusterOracleContext)
                    .WithTimeout(initTimeout, $"Starting MultiClusterOracle failed due to timeout {initTimeout}");

                    logger.Debug("multicluster oracle created successfully.");
                }
            }

            try
            {
                SiloStatisticsOptions statisticsOptions = Services.GetRequiredService <IOptions <SiloStatisticsOptions> >().Value;
                StartTaskWithPerfAnalysis("Start silo statistics", () => this.siloStatistics.Start(statisticsOptions), stopWatch);
                logger.Debug("Silo statistics manager started successfully.");

                // Finally, initialize the deployment load collector, for grains with load-based placement
                await StartAsyncTaskWithPerfAnalysis("Start deployment load collector", StartDeploymentLoadCollector, stopWatch);

                async Task StartDeploymentLoadCollector()
                {
                    var deploymentLoadPublisher = Services.GetRequiredService <DeploymentLoadPublisher>();

                    await this.scheduler.QueueTask(deploymentLoadPublisher.Start, deploymentLoadPublisher.SchedulingContext)
                    .WithTimeout(this.initTimeout, $"Starting DeploymentLoadPublisher failed due to timeout {initTimeout}");

                    logger.Debug("Silo deployment load publisher started successfully.");
                }


                // Start background timer tick to watch for platform execution stalls, such as when GC kicks in
                this.platformWatchdog = new Watchdog(statisticsOptions.LogWriteInterval, this.healthCheckParticipants, this.executorService, this.loggerFactory);
                this.platformWatchdog.Start();
                if (this.logger.IsEnabled(LogLevel.Debug))
                {
                    logger.Debug("Silo platform watchdog started successfully.");
                }
            }
            catch (Exception exc)
            {
                this.SafeExecute(() => this.logger.Error(ErrorCode.Runtime_Error_100330, String.Format("Error starting silo {0}. Going to FastKill().", this.SiloAddress), exc));
                throw;
            }
            if (logger.IsEnabled(LogLevel.Debug))
            {
                logger.Debug("Silo.Start complete: System status = {0}", this.SystemStatus);
            }
        }
Ejemplo n.º 7
0
 /// <summary>
 /// Creates a flow controller triggered when the CPU reaches the specified limit.
 /// Note:  Triggered only when load shedding is enabled.
 /// </summary>
 /// <param name="loadSheddingLimit">Percentage of CPU which triggers queue read rate reduction</param>
 /// <param name="options">The silo satistics options.</param>
 /// <returns></returns>
 public static IQueueFlowController CreateAsPercentageOfCPU(int loadSheddingLimit, SiloStatisticsOptions options)
 {
     if (loadSheddingLimit < 0 || loadSheddingLimit > 100)
     {
         throw new ArgumentOutOfRangeException(nameof(loadSheddingLimit), "Value must be between 0-100");
     }
     return(new LoadShedQueueFlowController(loadSheddingLimit, options));
 }