/// <summary>
        /// Method removes DPC from powered on servers
        /// </summary>
        private static void RemoveAllBladeDpc()
        {
            for (int bladeIndex = 0; bladeIndex < ConfigLoaded.Population; bladeIndex++)
            {
                byte bladeId = (byte)(bladeIndex + 1);

                if (ChassisState.BladePower[bladeIndex].GetCachedBladePowerState().BladePowerState == 0x01) // PowerState.On
                {
                    // Spec 1.86 adds optimization there by current state of PsuAlert
                    // does not need to be queried to disable
                    PsuAlert psuAlert = WcsBladeFacade.GetPsuAlert(bladeId);

                    if (psuAlert.CompletionCode == 0x00)
                    {
                        BmcPsuAlertAction bmcAction = BmcPsuAlertAction.DpcOnly;

                        if (psuAlert.BmcProchotEnabled)
                        {
                            bmcAction = BmcPsuAlertAction.ProcHotAndDpc;
                        }

                        // disable DPC.
                        WcsBladeFacade.ActivatePsuAlert(bladeId, psuAlert.AutoProchotEnabled, bmcAction, true);
                    }
                }
            }
        }
Exemplo n.º 2
0
        /// <summary>
        /// Send Smbus payload to Fpga Mezz via MasterWriteRead
        /// </summary>
        /// <param name="bladeId"></param>
        /// <param name="channel"></param>
        /// <param name="slaveId"></param>
        /// <param name="count"></param>
        /// <param name="writeData"></param>
        /// <param name="cmd"></param>
        /// <returns>SmbusWriteRead</returns>
        private static Ipmi.SmbusWriteRead SendSmbusPayload(int bladeId, byte channel, byte slaveId, byte count, byte[] writeData, string cmd)
        {
            Ipmi.SmbusWriteRead sendSmbusPayloadResponse = WcsBladeFacade.MasterWriteRead((byte)bladeId, channel, slaveId, count, writeData);
            if (sendSmbusPayloadResponse.CompletionCode != (byte)CompletionCode.Success)
            {
                Tracer.WriteError(string.Format("{0}(): MasterWriteRead failed for bladeId {1} with completion code {2}",
                                                cmd, bladeId, sendSmbusPayloadResponse.CompletionCode));
            }

            return(sendSmbusPayloadResponse);
        }
Exemplo n.º 3
0
        internal static Contracts.StartSerialResponse StartBladeSerialSession(int bladeId, int timeoutInSecs)
        {
            Contracts.StartSerialResponse response = new Contracts.StartSerialResponse();
            response.completionCode     = Contracts.CompletionCode.Failure;
            response.serialSessionToken = null;
            Tracer.WriteInfo("BladeSerialSessionMetadata StartBladeSerialSession({0})", bladeId);

            // If there is an already existing Blade serial session (indicated by a valid bladeId and a valid sessionToken), return failure with appropriate completion code
            if (CompareAndSwapMetadata(ConfigLoaded.InactiveBladePortId, ConfigLoaded.InactiveBladeSerialSessionToken, ConfigLoaded.InactiveBladePortId, ConfigLoaded.InactiveBladeSerialSessionToken) != CompletionCode.Success)
            {
                Tracer.WriteError("StartBladeSerialSession({0}): Start failed because of already active session.", bladeId);
                response.completionCode = Contracts.CompletionCode.SerialSessionActive;
                return(response);
            }

            // Ipmi command to indicate start of serial session
            // This has to be executed before Enabling comm. dev. safe mode otherwise this will fail
            Ipmi.SerialMuxSwitch sms = WcsBladeFacade.SetSerialMuxSwitch((byte)bladeId, Ipmi.MuxSwtich.SwitchSystem);

            // If set serial mux fails - reverse all operations
            if (sms.CompletionCode != (byte)CompletionCode.Success)
            {
                Tracer.WriteError("BladeSerialSessionMetadata.StartBladeSerialSession({0}): Ipmi SetSerialMuxSwitch Failed", bladeId);
                if (!CommunicationDevice.DisableSafeMode())
                {
                    Tracer.WriteError("BladeSerialSessionMetadata.StartBladeSerialSession({0}): Unable to disable comm.dev. safe mode", bladeId);
                }
                // Whenever we disable safe mode, make sure that no more serial session activity may be performed - by reseting metadata
                if (!BladeSerialSessionMetadata.ResetMetadata())
                {
                    Tracer.WriteError("BladeSerialSessionMetadata.StopBladeSerialSession({0}): Unable to reset metadata", bladeId);
                }
                return(response);
            }

            byte[] randomNumber = new byte[8];
            new System.Security.Cryptography.RNGCryptoServiceProvider().GetNonZeroBytes(randomNumber);

            // Initialize Blade Serial Session MetaData - Init function does this automically
            // This function acts as a serialization point - only one active thread can proceed beyond this
            if (CompareAndSwapMetadata(ConfigLoaded.InactiveBladePortId, ConfigLoaded.InactiveBladeSerialSessionToken, bladeId, BitConverter.ToString(randomNumber), DateTime.Now) != CompletionCode.Success)
            {
                response.completionCode = Contracts.CompletionCode.SerialSessionActive;
                return(response);
            }

            response.completionCode     = Contracts.CompletionCode.Success;
            response.serialSessionToken = BitConverter.ToString(randomNumber);
            // Initializing TimeoutBladeSerialSessionInSecs with user defined session timeout
            ConfigLoaded.TimeoutBladeSerialSessionInSecs = timeoutInSecs;
            return(response);
        }
Exemplo n.º 4
0
        /// <summary>
        /// Reinitialize the sled and set chassis state
        /// </summary>
        private void ReInitialize(byte sledId)
        {
            // Serialize initialize and power behavior per sled
            lock (ChassisState._lock[sledId - 1])
            {
                ChassisState.FailCount[sledId - 1] = 0;                // reset fail count since we are going to reinitialize the blade

                bool status = WcsBladeFacade.InitializeClient(sledId); // TODO: no completion code, only byte status returned

                if (status != true)
                {
                    // Initialization failed - move to fail state before retrying again
                    Tracer.WriteInfo("Reinitialization failed with code: {0} for Sled: {1}", status, sledId);
                    Tracer.WriteInfo("State Transition for Sled {0}: {1} -> Fail", sledId,
                                     ChassisState.GetStateName(sledId));

                    ChassisState.SetBladeState((byte)sledId, (byte)BladeState.Fail);

                    // check power status to see if the blade was manually switched off or removed
                    BladePowerStatePacket response = ChassisState.BladePower[sledId - 1].GetBladePowerState();

                    // If the blade was turned off, set correct status / TODO: do we need this here?
                    if (response.BladePowerState == (byte)Contracts.PowerState.OFF)
                    {
                        Tracer.WriteInfo("SledId {0} is in hard power off state", sledId);
                        Tracer.WriteInfo("State Transition for Sled {0}: {1} -> HardPowerOff", sledId,
                                         ChassisState.GetStateName(sledId));

                        ChassisState.SetBladeState(sledId, (byte)BladeState.HardPowerOff);
                    }
                }
                else
                {
                    // State change: I -> P
                    Tracer.WriteInfo("Reinitialization of Sled: {0} succeeded with status {1}", sledId, status);

                    Tracer.WriteInfo("State Transition for Sled {0}: {1} -> Probation", sledId,
                                     ChassisState.GetStateName(sledId));
                    ChassisState.SetBladeState(sledId, (byte)BladeState.Probation);

                    // Initialize Blade Type (Type might have changed when Blades were reinserted)
                    if (WcsBladeFacade.clients.ContainsKey(sledId))
                    {
                        ChassisState.BladeTypeCache[sledId - 1] = (byte)WcsBladeFacade.clients[sledId].BladeClassification;
                    }
                    else
                    {
                        ChassisState.BladeTypeCache[sledId - 1] = (byte)BladeType.Unknown;
                    }
                }
            }
        }
Exemplo n.º 5
0
        /// <summary>
        /// BladeOff commands switches off blade through IPMI (soft blade off)
        /// </summary>
        /// <param name="bladeId"></param>
        /// <returns></returns>
        internal static bool BladeOff(int bladeId)
        {
            bool powerOffStatus = false;

            // Soft power enable
            byte softStatus = WcsBladeFacade.SetPowerState((byte)bladeId, Ipmi.IpmiPowerState.Off);

            Tracer.WriteInfo("Soft poweroff status " + softStatus);

            if (softStatus != (byte)CompletionCode.Success)
            {
                Tracer.WriteWarning("Blade Soft Power Off Failed with Completion Code {0:X}", softStatus);
            }
            else
            {
                powerOffStatus = true;
            }
            return(powerOffStatus);
        }
        /// <summary>
        /// Initialize blades state (powered?) and type (compute/JBOD)
        /// </summary>
        private static void BladeInitialize()
        {
            // Get power status of enable pin for each blade and update blade state
            for (byte deviceId = 1; deviceId <= MaxSledCount; deviceId++)
            {
                CheckPowerEnableState(deviceId);
            }

            // Initialize Wcs Blade - TODO: This initialize should return some status
            WcsBladeFacade.Initialize();  // This method just creates IPMI Client Class for each blade.

            Tracer.WriteInfo("BladeInitialize: IPMI Facade Initialized, Number of blades initialized: {0}", WcsBladeFacade.Initialized);

            // check all client initialization status and update state
            Tracer.WriteInfo("BladeInitialize: Checking client status for {0} blades", MaxSledCount);
            for (byte deviceId = 1; deviceId <= MaxSledCount; deviceId++)
            {
                // TODO: How to check initialized status, now that this has become a function
                if (WcsBladeFacade.clients[deviceId].Initialize()) // This method logs on to an IPMI session.
                {
                    // If initialized is true, change state to probation
                    Tracer.WriteInfo("BladeInitialize: State Transition for blade {0}: {1} -> Probation",
                                     deviceId, ChassisState.GetStateName(deviceId));

                    ChassisState.SetBladeState(deviceId, (byte)BladeState.Probation);
                }
                else
                {
                    Tracer.WriteInfo("BladeInitialize: Blade not initialized: Blade {0}", deviceId);
                }
            }

            if (WcsBladeFacade.Initialized > 0)
            {
                // Identify what kind of sleds these are
                for (byte loop = 1; loop <= MaxSledCount; loop++)
                {
                    byte deviceId = WcsBladeFacade.clients[loop].DeviceId;
                    ChassisState.BladeTypeCache[deviceId - 1] = (byte)WcsBladeFacade.clients[loop].BladeClassification;
                }
            }
        }
        private static void EnableDisableDefaultBladeOperations(int bladeId)
        {
            // TODO: Check blade type etc and Kill any serial session
            // TODO: Add trace log messages

            // Check to see if the blade is hard powered off
            BladePowerStatePacket response = ChassisState.BladePower[bladeId - 1].GetBladePowerState();

            if (response.CompletionCode != CompletionCode.Success)
            {
                // Log error here, and proceed to check blade state since we still want to check BMC soft power status
                // even if blade enable read failed for whatever reason
                Tracer.WriteError("EnableDisableDefaultBladeOperations: Blade {0} Power Enable state read failed (Completion Code: {1:X})",
                                  bladeId, response.CompletionCode);
            }
            else if (response.BladePowerState == (byte)Contracts.PowerState.OFF)
            {
                // If blade is hard powered off, no further processing is necessary
                return;
            }

            // If the blade is a Jbod, return since the operations done in this method do not apply for Jbods
            if (ChassisState.GetBladeType((byte)bladeId) == (byte)BladeType.Jbod)
            {
                Tracer.WriteInfo("EnableDisableDefaultBladeOperations (Blade#{0}): Ignoring since it is a Jbod", bladeId);
                return;
            }

            DatasafeOperationSupport.ProcessDatasafeAction(bladeId, ConfigLoaded.DatasafeOperationsEnabled ? DatasafeActions.EnableDatasafe :
                                                           DatasafeActions.DisableDatasafe);

            if (ConfigLoaded.PsuAlertMonitorEnabled)
            {
                WcsBladeFacade.ActivatePsuAlert((byte)bladeId, true, BmcPsuAlertAction.ProcHotAndDpc, true);
            }
            else
            {
                WcsBladeFacade.ActivatePsuAlert((byte)bladeId, false, BmcPsuAlertAction.NoAction, true);
            }
        }
Exemplo n.º 8
0
        /// <summary>
        /// Internal method to Power off blade
        /// </summary>
        /// <param name="bladeId">Blade ID(1-48)</param>
        /// <returns>true/false if operation was success/failure</returns>
        internal static bool PowerOff(int bladeId)
        {
            Tracer.WriteInfo("Received poweroff({0})", bladeId);
            bool powerOffStatus = false;

            BladePowerStatePacket bladePowerSwitchStatePacket = new BladePowerStatePacket();

            // Serialize power off and power on, on the same lock variable per blade, so we prevent inconsistent power state behavior
            lock (ChassisState.locker[bladeId - 1])
            {
                bladePowerSwitchStatePacket = ChassisState.BladePower[bladeId - 1].SetBladePowerState((byte)PowerState.OFF);
                CompletionCode status = bladePowerSwitchStatePacket.CompletionCode;

                // Sleep for specified amount of time after blade hard power off to prevent hardware inconsistent state
                // - hot-swap controller not completely draining its capacitance leading to inconsistent power state issues
                Thread.Sleep(ConfigLoaded.WaitTimeAfterBladeHardPowerOffInMsecs);

                Tracer.WriteInfo("PowerOff: Return: {0}", status);

                if (status != CompletionCode.Success)
                {
                    Tracer.WriteError("PowerOff: Blade Hard Power Off Failed with Completion code {0:X}", status);
                    powerOffStatus = false;
                }
                else
                {
                    powerOffStatus = true;
                    // set state to Hard Power Off
                    Tracer.WriteInfo("PowerOff: State Transition for blade {0}: {1} -> HardPowerOff", bladeId,
                                     ChassisState.GetStateName((byte)bladeId));

                    ChassisState.SetBladeState((byte)bladeId, (byte)BladeState.HardPowerOff);
                    ChassisState.PowerFailCount[bladeId - 1] = 0;
                    // Clear blade type and cache
                    ChassisState.BladeTypeCache[bladeId - 1] = (byte)BladeType.Unknown;
                    WcsBladeFacade.ClearBladeClassification((byte)bladeId);
                }
            }
            return(powerOffStatus);
        }
        protected override void OnStop()
        {
            if (serviceHost != null)
            {
                serviceHost.Close();
                serviceHost = null;
            }

            Tracer.WriteInfo("OnStop: Service closed");

            RequestAdditionalTime(60 * 1000); // This is to prevent Windows service from timeouts

            // Release Chassis Manager threads
            this.Release();
            Tracer.WriteInfo("OnStop: Chassis Manager threads stopped");

            // Try to gracefully Close Open Ipmi sessions
            WcsBladeFacade.Release();
            Tracer.WriteInfo("OnStop: WcsBladeFacade released");

            // Release the communication device layer holds
            CommunicationDevice.Release();
            Tracer.WriteInfo("OnStop: Communication Device released");
        }
Exemplo n.º 10
0
        internal static Contracts.ChassisResponse StopBladeSerialSession(int bladeId, string sessionToken, bool forceKill = false)
        {
            Contracts.ChassisResponse response = new Contracts.ChassisResponse();
            response.completionCode = Contracts.CompletionCode.Failure;
            Tracer.WriteInfo("BladeSerialSessionMetadata.Received Stopbladeserialsession({0})", bladeId);

            // If there is NOT an already existing Blade serial session (indicated by a invalid bladeId and a invalid sessionToken), return failure with appropriate completion code
            if (CompareAndSwapMetadata(ConfigLoaded.InactiveBladePortId, ConfigLoaded.InactiveBladeSerialSessionToken,
                                       ConfigLoaded.InactiveBladePortId, ConfigLoaded.InactiveBladeSerialSessionToken) == CompletionCode.Success)
            {
                Tracer.WriteError("StopBladeSerialSession({0}): Stop failed because of no active session.", bladeId);
                response.completionCode = Contracts.CompletionCode.NoActiveSerialSession;
                return(response);
            }

            // Normal scenario when forcekill option is not true.. check for bladeid correctness and if it currently holds the serial session
            if (!forceKill)
            {
                if (ChassisManagerUtil.CheckBladeId((byte)bladeId) != (byte)CompletionCode.Success)
                {
                    response.completionCode = Contracts.CompletionCode.ParameterOutOfRange;
                    return(response);
                }

                // If this bladeid do not currently hold the serial session, return failure
                if (CompareAndSwapMetadata(bladeId, sessionToken, bladeId, sessionToken) != CompletionCode.Success)
                {
                    response.completionCode = Contracts.CompletionCode.SerialSessionActive;
                    return(response);
                }
            }

            // Communication device has to come out of safe mode - should allow IPMI commands to go to the BMC
            if (!CommunicationDevice.DisableSafeMode())
            {
                Tracer.WriteError(
                    "BladeSerialSessionMetadata.StopBladeSerialSession({0}): CommunicationDevice.DisableSafeMode Failed",
                    bladeId);
            }

            Ipmi.SerialMuxSwitch rms;
            // If forcekill parameter is false, then use the bladeid that is passed by the user
            if (!forceKill)
            {
                rms = WcsBladeFacade.ResetSerialMux((byte)bladeId);
            }
            // If forcekill parameter is true, then use the bladeid that currently holds the serial session
            else
            {
                rms = WcsBladeFacade.ResetSerialMux((byte)BladeSerialSessionMetadata.bladeId);
            }

            if (rms.CompletionCode != (byte)CompletionCode.Success)
            {
                Tracer.WriteError("BladeSerialSessionMetadata.StopBladeSerialSession({0}): Ipmi ReSetSerialMuxSwitch Failed", bladeId);
            }

            if (!BladeSerialSessionMetadata.ResetMetadata())
            {
                Tracer.WriteError("BladeSerialSessionMetadata.StopBladeSerialSession: Unable to reset metadata");
            }
            response.completionCode = Contracts.CompletionCode.Success;
            // Resetting TimeoutBladeSerialSessionInSecs to 0 to account for default or user provided session timeout value
            ConfigLoaded.TimeoutBladeSerialSessionInSecs = 0;
            return(response);
        }
        /// <summary>
        /// Attempt to resolve Psu Faults
        /// </summary>
        private static Dictionary <byte, PsuAlertFaultStatus> PsuAlertRemediate(Dictionary <byte, PsuAlertFaultType> psuFailures)
        {
            Dictionary <byte, PsuAlertFaultStatus> failedPsu = new Dictionary <byte, PsuAlertFaultStatus>();

            foreach (KeyValuePair <byte, PsuAlertFaultType> psu in psuFailures)
            {
                // If firmware update is in progress, skip this PSU
                if (ChassisState.PsuFwUpdateInProgress[psu.Key - 1])
                {
                    continue;
                }

                lock (ChassisState.psuLock[psu.Key - 1])
                {
                    // Log PSU faults
                    ChassisState.Psu[psu.Key - 1].LogPsuFaultStatus();

                    // Clear PSU faults, which will clear PSU_ALERT
                    CompletionCode clearAlert = ClearPsuFault(psu.Key);
                    if (clearAlert != CompletionCode.Success)
                    {
                        // PSU clear faults failed. Log failure and continue to next PSU.
                        failedPsu.Add(psu.Key, PsuAlertFaultStatus.PsuClearFaultFailed);
                        Tracer.WriteError("PsuAlertRemediate: ClearPsuFault failed on PsuId: {0}", psu.Key);
                        continue;
                    }

                    if (psu.Value == PsuAlertFaultType.PsuFailure)
                    {
                        // Check that the PSU is on
                        PsuStatusPacket psuStatus = ChassisState.Psu[psu.Key - 1].GetPsuStatus();
                        if (psuStatus.CompletionCode != CompletionCode.Success)
                        {
                            failedPsu.Add(psu.Key, PsuAlertFaultStatus.PsuFault);
                            Tracer.WriteError("PsuAlertRemediate: GetPsuStatus on PSU ({0}) failed with return code {1}",
                                              psu.Key, psuStatus.CompletionCode);
                        }
                        else
                        {
                            if (psuStatus.PsuStatus == (byte)Contracts.PowerState.ON)
                            {
                                // Check PSU power output
                                PsuPowerPacket power = ChassisState.Psu[psu.Key - 1].GetPsuPower();
                                if ((power.CompletionCode == CompletionCode.Success) && (power.PsuPower != 0))
                                {
                                    Tracer.WriteInfo("PsuStatus clear faults succeeded.  Psu: {0} drawing power: {1} Watts",
                                                     psu.Key, power.PsuPower);
                                }
                                else
                                {
                                    // PSU is not outputting power.
                                    failedPsu.Add(psu.Key, PsuAlertFaultStatus.PsuNoOutputPower);
                                    Tracer.WriteError("PsuAlertRemediate failed Psu.  PsuId: {0} Psu Error State: {1}",
                                                      psu.Key, PsuAlertFaultStatus.PsuNoOutputPower.ToString());
                                }
                            }
                            else
                            {
                                // PSU is turned off.
                                failedPsu.Add(psu.Key, PsuAlertFaultStatus.PsuPowerOff);
                                Tracer.WriteError("PsuAlertRemediate failed Psu.  PsuId: {0} Psu Error State: {1}",
                                                  psu.Key, PsuAlertFaultStatus.PsuPowerOff.ToString());
                            }
                        }
                    }
                    else if ((ConfigLoaded.BatteryMonitoringEnabled) && (ChassisState.Psu[(psu.Key - 1)] is EmersonPsu))
                    {
                        // convert psu from base class object
                        EmersonPsu emersonPsu = (EmersonPsu)ChassisState.Psu[(psu.Key - 1)];

                        if (psu.Value == PsuAlertFaultType.BatteryFault)
                        {
                            // clear battery fault status
                            CompletionCode clearFault = emersonPsu.ClearBatteryFaultIndicator();

                            if (clearFault == CompletionCode.Success)
                            {
                                EmersonPsu.BatteryFaultIndicatorPacket faultIndicator = emersonPsu.GetBatteryFaultIndicator();

                                if (faultIndicator.BatteryFault == 1)
                                {
                                    if (!failedPsu.ContainsKey(emersonPsu.PsuId))
                                    {
                                        // Psu Clear faults did not succeed.
                                        failedPsu.Add(psu.Key, PsuAlertFaultStatus.BatteryFault);
                                    }
                                    Tracer.WriteError("PsuAlertRemediate failed to clear battery fault. PsuId: {0} Battery Error State: {1}",
                                                      psu.Key, PsuAlertFaultStatus.BatteryFault.ToString());
                                }
                            }
                        }
                        else if (psu.Value == PsuAlertFaultType.OnBattery && ConfigLoaded.NumBatteries > 0)
                        {
                            // Check if we need to trigger delegate to process battery status
                            if (ConfigLoaded.ProcessBatteryStatus)
                            {
                                double sumBatteryChargeLevel      = 0;
                                ChassisEnergyStorageStatus status = null;

                                // list to store battery charge levels
                                List <string> batteryStates = new List <string>();

                                // battery present or not, set to true if even one battery is present.
                                // default to false
                                bool isBatteryPresent = false;

                                // Calculate average battery charge level
                                for (int index = 1; index <= ConfigLoaded.NumBatteries; index++)
                                {
                                    status = ChassisState.GetEnergyStorageStatus((byte)index);

                                    // Add to the list battery charge levels
                                    batteryStates.Add(status.State.ToString());

                                    // If even one battery is present, set flag to true
                                    if (status.Present)
                                    {
                                        isBatteryPresent = true;
                                    }

                                    // If battery state is not unknown, add up the charge level.
                                    if (status.State != EnergyStorageState.Unknown)
                                    {
                                        sumBatteryChargeLevel += status.PercentCharge;
                                    }
                                }

                                double avgChargeLevel = (sumBatteryChargeLevel / ConfigLoaded.NumBatteries);

                                // Process battery status if battery discharge time is greater than the allowed discharge time
                                // from app.config( default 35 seconds) or Average battery charge level is below a given threshold value.
                                if (BatteryDischargeTimer.Elapsed > new System.TimeSpan(0, 0, ConfigLoaded.BatteryDischargeTimeInSecs) ||
                                    avgChargeLevel < ConfigLoaded.BatteryChargeLevelThreshold)
                                {
                                    // Invoke method to trigger NVDIMM backup for critical battery status
                                    ThreadPool.QueueUserWorkItem(new WaitCallback(ChassisManagerInternal.ProcessCriticalBatteryStatus));
                                }

                                // Calculate backup energy available per blade and per NVDIMM
                                double bladeEnergy = (ConfigLoaded.NumPsus * BATT_POUT_MAX * BATT_OP_TIME_100_LOAD * avgChargeLevel) /
                                                     ConfigLoaded.Population;
                                double nvdimmEnergy = (ConfigLoaded.NumPsus * BATT_POUT_EXTENDED * BATT_OP_TIME_75W_LOAD) /
                                                      (ConfigLoaded.Population * ConfigLoaded.NvDimmPerBlade);
                                // Scale the values
                                bladeEnergy  = bladeEnergy / ENERGY_STORAGE_SCALING_JOULES;
                                nvdimmEnergy = nvdimmEnergy / ENERGY_STORAGE_SCALING_JOULES;

                                // Send battery status to BMC, check returned completion code for success
                                Dictionary <byte, CompletionCode> results = WcsBladeFacade.BroadcastSetEnergyStorage
                                                                                (isBatteryPresent, GetBatteryStateToBroadcast(batteryStates), ENERGY_STORAGE_SCALING_JOULES, (ushort)bladeEnergy, (byte)nvdimmEnergy);

                                // Check if broadcast failed for any blade, if yes log error.
                                for (int index = 1; index <= ConfigLoaded.Population; index++)
                                {
                                    CompletionCode code;

                                    if (results.TryGetValue((byte)index, out code))
                                    {
                                        // If completion code returned is not success
                                        if (code != CompletionCode.Success)
                                        {
                                            Tracer.WriteError("PsuMonitor: ProcessBatteryStatus: " +
                                                              "Failed to update battery status to BMC for blade: " + index +
                                                              ", completion code returned: " + code);
                                        }
                                    }
                                    else
                                    {
                                        // If blade entry does not exist.
                                        Tracer.WriteError("PsuMonitor: ProcessBatteryStatus : " +
                                                          "Failed to update battery status to BMC for blade: " + index);
                                    }
                                }
                            }
                        }
                    }
                } // lock...
            }     // foreach...

            return(failedPsu);
        }
        /// <summary>
        /// Function that gets fan speed requirements
        /// from all blades. It also updates the blade states.
        /// </summary>
        private static void GetAllBladePwmRequirements()
        {
            // Rate is required to timestep over each individual Blade call
            double rate     = (double)getBladePwmReqtTimePeriodInMilliseconds / (double)MaxSledCount;
            double timeDiff = 0;

            for (byte blade = 1; blade <= MaxSledCount; blade++)
            {
                // Handle shutdown state
                if (ChassisState.ShutDown)
                {
                    return;
                }

                // default PWM setting
                byte PWM = (byte)ConfigLoaded.MinPWM;

                // Query blade type from IPMI layer
                ChassisState.BladeTypeCache[blade - 1] = (byte)WcsBladeFacade.clients[blade].BladeClassification;

                // wait for rate limiter which includes the previous time difference for sensor get, and then issue get fan requirement

                double sleepTime = rate - timeDiff;

                if (sleepTime > rate)
                {
                    sleepTime = rate;
                }
                if (sleepTime > 0)
                {
                    Thread.Sleep(TimeSpan.FromMilliseconds(sleepTime));
                }

                Tracer.WriteInfo("GetBladeRequirement called at {0} for BladeId {1} (state: {2})", DateTime.Now, blade,
                                 ChassisState.GetStateName(blade));

                // Check for the condition where known state is hardpoweroff, but someone plugged a new blade in
                if (ChassisState.GetBladeState(blade) == (byte)BladeState.HardPowerOff)
                {
                    CheckPowerEnableState(blade);
                }

                // Log Start time
                DateTime startTime = DateTime.Now;

                #region Check fail State -> Initialize

                // If blade was in Fail state
                if (ChassisState.GetBladeState(blade) == (byte)BladeState.Fail)
                {
                    // If failed count is greater than a maximum value, we move it to Initialization state
                    if (ChassisState.FailCount[blade - 1] > ConfigLoaded.MaxFailCount)
                    {
                        // Move to Initialization state so that this blade could be reinitialized
                        Tracer.WriteInfo("GetAllBladePwmRequirements: State Transition for blade {0}: {1} -> Initialization", blade,
                                         ChassisState.GetStateName(blade));
                        ChassisState.SetBladeState(blade, (byte)BladeState.Initialization);
                    }
                    else
                    {
                        // Moving out of Fail state - First we use a light-weight get GUID to check whether the blade is there.
                        // do not allow retries on Get System Guid
                        DeviceGuid guid = WcsBladeFacade.GetSystemGuid(blade, false);
                        if (guid.CompletionCode == (byte)CompletionCode.Success)
                        {
                            Tracer.WriteInfo("GetAllBladePwmRequirements: GUID present for blade {0}, GUID: {1}", blade, guid.Guid.ToString());

                            DeviceGuid cachedGuid = WcsBladeFacade.GetCachedGuid(blade);

                            if (guid.Guid == cachedGuid.Guid)
                            {
                                // Change state to Probation and assume the system was in fail due to timeout.
                                Tracer.WriteInfo("GetAllBladePwmRequirements: State Transition for blade {0}: {1} -> Probation", blade,
                                                 ChassisState.GetStateName(blade));
                                ChassisState.SetBladeState(blade, (byte)BladeState.Probation);
                            }
                            else
                            {
                                // Change state to Initialization as the device has changed.
                                Tracer.WriteInfo("GetAllBladePwmRequirements: State Transition for blade {0}: {1} -> Probation", blade,
                                                 ChassisState.GetStateName(blade));
                                ChassisState.SetBladeState(blade, (byte)BladeState.Initialization);
                            }
                        }
                        else
                        {
                            Tracer.WriteInfo("GetAllBladePwmRequirements: Get System GUID returns a bad completion status: {0}", guid.CompletionCode);
                        }
                    }

                    // Increase time spent in Fail state everytime we are in this state
                    ChassisState.FailCount[blade - 1]++;
                }

                #endregion

                #region Move Initialize -> Probation

                // Handles Initialization
                if (ChassisState.GetBladeState(blade) == (byte)BladeState.Initialization)
                {
                    BladePowerStatePacket powerstate = ChassisState.BladePower[blade - 1].GetCachedBladePowerState();

                    if (powerstate.CompletionCode == 0)
                    {
                        if (powerstate.DecompressionTime == 0)
                        {
                            // Will result in Hard Power off or Probation
                            ReInitialize(blade);
                        }
                    }
                }

                #endregion


                // Normal operation - possible states are probation or healthy
                if (ChassisState.GetBladeState(blade) == (byte)BladeState.Probation ||
                    ChassisState.GetBladeState(blade) == (byte)BladeState.Healthy)
                {
                    #region Jbod (no sensor reading)

                    if (ChassisState.GetBladeType(blade) == (byte)BladeType.Jbod)
                    {
                        // Do not allow retries on system guid.
                        DeviceGuid guid = WcsBladeFacade.GetSystemGuid(blade, false);
                        if (guid.CompletionCode == (byte)CompletionCode.Success)
                        {
                            Tracer.WriteInfo("GetAllBladePwmRequirements: GUID present for JBOD {0}, GUID: {1}",
                                             blade, guid.Guid.ToString());

                            // Change state to Healthy
                            if (ChassisState.GetBladeState(blade) == (byte)BladeState.Probation)
                            {
                                Tracer.WriteInfo("GetAllBladePwmRequirements: State Transition for JBOD {0}: {1} -> Healthy",
                                                 blade, ChassisState.GetStateName(blade));
                                ChassisState.SetBladeState(blade, (byte)BladeState.Healthy);
                            }
                        }
                        else
                        {
                            Tracer.WriteInfo("GetAllBladePwmRequirements: Get System GUID for JBOD {0} failed with status {1}",
                                             blade, guid.CompletionCode);
                            // Set it to failed state, where we will retry guids and reinitialize if needed
                            Tracer.WriteInfo("GetAllBladePwmRequirements: State Transition for JBOD {0}: {1} -> Fail",
                                             blade, ChassisState.GetStateName(blade));
                            ChassisState.SetBladeState(blade, (byte)BladeState.Fail);
                        }

                        // No need to check for sensor reading, just continue
                        continue;
                    }

                    #endregion

                    #region Server -> Get PWM move to Healthy or move to Fail

                    // Call temperature reading list command
                    SensorReading Temps = WcsBladeFacade.GetSensorReading((byte)blade, (byte)ConfigLoaded.InputSensor, PriorityLevel.System);

                    if (Temps.CompletionCode != (byte)CompletionCode.Success)
                    {
                        Tracer.WriteWarning("GetAllBladePwmRequirements: BladeId: {0} - GetSensorReading for temperature failed with code {1:X}",
                                            blade, Temps.CompletionCode);

                        // Move to Fail state if no readings were obtained
                        Tracer.WriteInfo("GetAllBladePwmRequirements: State Transition for blade {0}: {1} -> Fail", blade,
                                         ChassisState.GetStateName(blade));

                        ChassisState.SetBladeState(blade, (byte)BladeState.Fail);
                    }
                    else
                    {
                        Tracer.WriteInfo("GetAllBladePwmRequirements: #### BladeId = " + blade + " Sensor id= " + ConfigLoaded.InputSensor +
                                         " Sensor reading = " + Temps.Reading + " Raw = " + Temps.RawReading +
                                         ", LowerNonCritical= " + ConfigLoaded.SensorLowThreshold + ", UpperNonCritical= " + ConfigLoaded.SensorHighThreshold);

                        // Handle state logic if needed
                        // Probation state should be shifted to Healthy since there was no timeout, & sensorread succeeded
                        if (ChassisState.GetBladeState(blade) == (byte)BladeState.Probation)
                        {
                            // Change state to healthy
                            Tracer.WriteInfo("GetAllBladePwmRequirements: State Transition for blade {0}: {1} -> Healthy",
                                             blade, ChassisState.GetStateName(blade));

                            ChassisState.SetBladeState(blade, (byte)BladeState.Healthy);
                            ChassisState.FailCount[blade - 1] = 0; // reset the fail count

                            // When a blade transitions to 'Healthy' state, enable/disable default blade operations
                            EnableDisableDefaultBladeOperations(blade);
                        }

                        if (ConfigLoaded.InputSensor != 1) // Non-PWM sensor.
                        {
                            PWM = GetPwmFromTemperature(Temps.Reading,
                                                        ConfigLoaded.SensorLowThreshold,
                                                        ConfigLoaded.SensorHighThreshold);
                        }
                        else
                        {
                            // PWM should never be higher or lower than the threshold.
                            if (Temps.Reading < ConfigLoaded.MinPWM || Temps.Reading > ConfigLoaded.MaxPWM)
                            {
                                Tracer.WriteWarning("PWM value " + Temps.Reading + " on blade " + blade +
                                                    " is out of range (lowThreshold: " + ConfigLoaded.MinPWM +
                                                    " - highThreshold: " + ConfigLoaded.MaxPWM);

                                PWM = (byte)ConfigLoaded.MinPWM;
                            }
                            else
                            {
                                PWM = (byte)Temps.Reading;
                            }
                        }

                        Tracer.WriteInfo("PWM value on blade {0} for Sensor {1} = {2}", blade, InputSensor, PWM);
                    }

                    #endregion
                }

                // write value into requirements table
                BladeRequirementTable[blade - 1] = PWM;

                // Log end time and capture time of execution for sensor get command
                DateTime endTime = DateTime.Now;
                timeDiff = endTime.Subtract(startTime).TotalMilliseconds; // convert time difference into milliseconds
            }
        }
Exemplo n.º 13
0
        /// <summary>
        /// Function that gets all the fan speed requirements
        /// from the Blade.  It also updates the balde state
        /// </summary>
        private void GetAllBladePwmRequirements()
        {
            // Rate is required to timestep over each individual Blade call
            double rate     = (double)GetTimePeriod / (double)MaxSledCount;
            double timeDiff = 0;

            for (byte blade = 1; blade <= MaxSledCount; blade++)
            {
                // Handle shutdown state
                if (ChassisState.ShutDown)
                {
                    return;
                }

                // default PWM setting
                byte PWM = (byte)ConfigLoaded.MinPWM;

                // Query blade type from IPMI layer
                ChassisState.BladeTypeCache[blade - 1] = (byte)WcsBladeFacade.clients[blade].BladeClassification;

                // wait for rate limiter which includes the previous time difference for sensor get, and then issue get fan requirement

                double sleepTime = rate - timeDiff;

                if (sleepTime > rate)
                {
                    sleepTime = rate;
                }
                if (sleepTime > 0)
                {
                    Thread.Sleep(TimeSpan.FromMilliseconds(sleepTime));
                }
                if (CommunicationDevice.IsSafeMode())
                {
                    // Do not perform any sensor reading - continue in the for loop
                    Tracer.WriteInfo("Monitoring thread: Safe Mode, Skipping sensor read");
                    continue;
                }
                Tracer.WriteInfo("GetBladeRequirement called at {0} for sledId {1} (state: {2})", DateTime.Now, blade,
                                 ChassisState.GetStateName(blade));

                // Check for the condition where known state is hardpoweroff, but someone plugged a new blade in
                if (ChassisState.GetBladeState(blade) == (byte)BladeState.HardPowerOff)
                {
                    ChassisState.PowerFailCount[blade - 1]++;
                    // TODO: identify if this period is sufficient to do this check
                    if (ChassisState.PowerFailCount[blade - 1] > (ConfigLoaded.MaxRetries * ConfigLoaded.Population))
                    {
                        CheckPowerEnableState(blade);
                        ChassisState.PowerFailCount[blade - 1] = 0;
                    }
                }
                // Log Start time
                DateTime startTime = DateTime.Now;

                // If blade was in Fail state
                if (ChassisState.GetBladeState(blade) == (byte)BladeState.Fail)
                {
                    // If failed count is greater than a maximum value, we move it to Initialization state

                    if (ChassisState.FailCount[blade - 1] > ConfigLoaded.MaxFailCount)
                    {
                        // Move to Initialization state so that this sled could be reinitialized
                        Tracer.WriteInfo("State Transition for Sled {0}: {1} -> Initialization", blade,
                                         ChassisState.GetStateName(blade));
                        ChassisState.SetBladeState(blade, (byte)BladeState.Initialization);
                    }
                    else
                    {
                        // Moving out of Fail state - First we use a light-weight get GUID to check whether the blade is there
                        DeviceGuid guid = WcsBladeFacade.GetSystemGuid(blade);
                        if (guid.CompletionCode == (byte)CompletionCode.Success)
                        {
                            Tracer.WriteInfo("GUID present for sled {0}, GUID: {1}", blade, guid.Guid.ToString());

                            // Change state to Probation
                            Tracer.WriteInfo("State Transition for Sled {0}: {1} -> Probation", blade,
                                             ChassisState.GetStateName(blade));
                            ChassisState.SetBladeState(blade, (byte)BladeState.Probation);
                        }
                        else
                        {
                            Tracer.WriteInfo("Get System GUID returns a bad completion status: {0}", guid.CompletionCode);
                        }
                    }
                    // Increase time spent in Fail state everytime we are in this state
                    ChassisState.FailCount[blade - 1]++;
                }

                // Handles Initialization
                if (ChassisState.GetBladeState(blade) == (byte)BladeState.Initialization)
                {
                    this.ReInitialize(blade);
                }

                // Normal operation - possible states are probation or healthy
                if (ChassisState.GetBladeState(blade) == (byte)BladeState.Probation ||
                    ChassisState.GetBladeState(blade) == (byte)BladeState.Healthy)
                {
                    if (ChassisState.GetBladeType(blade) == (byte)BladeType.Jbod)
                    {
                        DeviceGuid guid = WcsBladeFacade.GetSystemGuid(blade);
                        if (guid.CompletionCode == (byte)CompletionCode.Success)
                        {
                            Tracer.WriteInfo("GUID present for jbod {0}, GUID: {1}", blade, guid.Guid.ToString());

                            // Change state to Probation
                            Tracer.WriteInfo("State Transition for jbod {0}: {1} -> Healthy", blade, ChassisState.GetStateName(blade));
                            ChassisState.SetBladeState(blade, (byte)BladeState.Healthy);
                        }
                        else
                        {
                            Tracer.WriteInfo("Get System GUID for jbod {0} failed with status {1}", blade, guid.CompletionCode);
                            // Set it to failed state, where we will retry guids and reinitialize if needed
                            Tracer.WriteInfo("State Transition for jbod {0}: {1} -> Fail", blade, ChassisState.GetStateName(blade));
                            ChassisState.SetBladeState(blade, (byte)BladeState.Fail);
                        }

                        // No need to check for sensor reading, just continue
                        continue;
                    }
                    // Call temperature reading list command
                    SensorReading Temps = WcsBladeFacade.GetSensorReading((byte)blade, (byte)ConfigLoaded.InputSensor, PriorityLevel.System);

                    if (Temps.CompletionCode != (byte)CompletionCode.Success)
                    {
                        Tracer.WriteWarning("SledId: {0} - getTempSensorReading failed with code {1:X}", blade, Temps.CompletionCode);

                        // Move to Fail state if no readings were obtained
                        Tracer.WriteInfo("State Transition for Sled {0}: {1} -> Fail", blade,
                                         ChassisState.GetStateName(blade));

                        ChassisState.SetBladeState(blade, (byte)BladeState.Fail);
                    }
                    else
                    {
                        Tracer.WriteInfo("#### Sledid= " + blade + " Sensor id= " + ConfigLoaded.InputSensor + " Sensor reading= " +
                                         Temps.Reading + " Raw= " + Temps.RawReading + ", LowerNonCritical= " +
                                         ConfigLoaded.SensorLowThreshold + ", UpperNonCritical= " + ConfigLoaded.SensorHighThreshold);

                        // Handle state logic if needed
                        // Probation state should be shifted to Healthy since there was no timeout, & sensorread succeeded
                        if (ChassisState.GetBladeState(blade) == (byte)BladeState.Probation)
                        {
                            // Change state to healthy
                            Tracer.WriteInfo("State Transition for Sled {0}: {1} -> Healthy", blade,
                                             ChassisState.GetStateName(blade));

                            ChassisState.SetBladeState(blade, (byte)BladeState.Healthy);
                            ChassisState.FailCount[blade - 1] = 0; // reset the fail count
                        }

                        PWM = GetPwmFromTemperature(Temps.Reading,
                                                    ConfigLoaded.SensorLowThreshold,
                                                    ConfigLoaded.SensorHighThreshold);

                        Tracer.WriteInfo("PWM value for Sensor {0} = {1}", InputSensor, PWM);
                    }
                }

                // write value into requirements table
                BladeRequirementTable[blade - 1] = PWM;

                // Log end time and capture time of execution for sensor get command
                DateTime endTime = DateTime.Now;
                timeDiff = endTime.Subtract(startTime).TotalMilliseconds; // convert time difference into milliseconds
            }
        }
Exemplo n.º 14
0
        /// <summary>
        /// Initialize Chassis constants and configs
        /// </summary>
        internal byte Initialize()
        {
            Tracer.WriteInfo("Initializing state");

            byte status = (byte)CompletionCode.UnspecifiedError;

            ChassisState.Initialize();

            Tracer.WriteInfo("Initializing Communication Device");
            // Initializer lower layer communication device
            CompletionCode completionCode = CommunicationDevice.Init();

            if (CompletionCodeChecker.Failed(completionCode))
            {
                Tracer.WriteWarning("Initialization failed: {0}", completionCode);
                int loop = 0;

                // Retry 3 times before failing completely
                for (loop = 0; loop < ConfigLoaded.MaxRetries; loop++)
                {
                    Tracer.WriteInfo("Initialization Retry: {0}", loop);

                    completionCode = CommunicationDevice.Init();
                    if (CompletionCodeChecker.Succeeded(completionCode))
                    {
                        break;
                    }
                }

                if (loop == ConfigLoaded.MaxRetries)
                {
                    Tracer.WriteError("Re-attempt at Communication Device Initialization failed with code: {0}", completionCode);
                    return(status);
                }
            }
            if (CompletionCodeChecker.Succeeded(completionCode))
            {
                Tracer.WriteInfo("Communication Device Initialized");
                status = (byte)CompletionCode.Success;
            }

            // Get power status of enable pin for each blade and update blade state
            for (byte deviceId = 1; deviceId <= MaxSledCount; deviceId++)
            {
                CheckPowerEnableState(deviceId);
            }

            // Initialize Wcs Blade - TODO: This initialize should return some status
            WcsBladeFacade.Initialize();  // This method just creates IPMI Client Class for each blade.
            Tracer.WriteInfo("IPMI Facade Initialized, Number of blades initialized: {0}", WcsBladeFacade.Initialized);

            // check all client initialization status and update state
            Tracer.WriteInfo("Checking client status for {0} blades", MaxSledCount);
            for (byte deviceId = 1; deviceId <= MaxSledCount; deviceId++)
            {
                // TODO: How to check initialized status, now that this has become a function
                if (WcsBladeFacade.clients[deviceId].Initialize()) // This method logs on to an IPMI session.
                {
                    // If initialized is true, change state to probation
                    Tracer.WriteInfo("State Transition for Sled {0}: {1} -> Probation", deviceId,
                                     ChassisState.GetStateName(deviceId));

                    ChassisState.SetBladeState(deviceId, (byte)BladeState.Probation);
                }
                else
                {
                    Tracer.WriteInfo("Blade not initialized: Blade ", +deviceId);
                }
            }

            Tracer.WriteInfo("Initializing Watchdog Timer");

            // Initialize WatchDog Timer
            ChassisState.Wdt.EnableWatchDogTimer();

            Tracer.WriteInfo("Watchdog timer initialized");

            // Initialize internal chassis manager tables
            this.ChassisInternalInitialize();

            return(status);
        }
Exemplo n.º 15
0
        /// <summary>
        /// Internal method to power cycle specified blade
        /// </summary>
        /// <param name="bladeId">Blade ID(1-48)</param>
        /// <param name="offTime">time for which the blades will be powered off in seconds</param>
        /// <returns>true/false indicating if blade operation was success/failure</returns>
        internal static bool PowerCycle(int bladeId, uint offTime)
        {
            Tracer.WriteInfo("Received PowerCycle({0},{1})", bladeId, offTime);
            bool powerStatus = false;

            bool intervalStatus = WcsBladeFacade.SetPowerCycleInterval((byte)bladeId, (byte)offTime);

            if (intervalStatus != true)
            {
                Tracer.WriteWarning("Blade PowerCycle Interval setting failed with Completion code {0:X}", intervalStatus);
                return(powerStatus);
            }

            byte status = WcsBladeFacade.SetPowerState((byte)bladeId, Ipmi.IpmiPowerState.Cycle);

            Tracer.WriteInfo("PowerCycle: SetPowerState Return: {0}", status);

            // We want the blade to always power on when it receives a Power Cycle command.
            // Some BMC implementations may not turn on the blade for Power Cycle if the blade
            // is in the OFF state, and will return 0xD5 (Request parameter(s) not supported
            // in present state) as recommended in the IPMI standard.
            // Check for 0xD5 and manually turn on the blade.
            if (status == 0xD5)
            {
                // Check that the blade is actually off
                Ipmi.SystemStatus powerState = WcsBladeFacade.GetChassisState((byte)bladeId);

                Tracer.WriteInfo("PowerCycle: GetChassisState Return: {0}, Blade State: {1}", powerState.CompletionCode, powerState.PowerState.ToString());

                if (powerState.CompletionCode != 0)
                {
                    Tracer.WriteError("PowerCycle: GetChassisState Failed with Completion code {0:X}", powerState.CompletionCode);
                }
                else
                {
                    if (powerState.PowerState == Ipmi.IpmiPowerState.Off)
                    {
                        // Set blade on
                        if (BladeOn(bladeId))
                        {
                            Tracer.WriteInfo("PowerCycle: {0} SetBladeOn(): Blade soft power set to ON", bladeId);
                            powerStatus = true;
                        }
                        else
                        {
                            Tracer.WriteError("PowerCycle: {0} SetBladeOn(): Failed to set Blade soft power ON", bladeId);
                        }
                    }
                    else
                    {
                        // The blade is ON but the BMC returns 0xD5. Return error.
                        Tracer.WriteError("SetPowerState returned 0xD5 but blade is not Off. Blade state: {0}", powerState.PowerState);
                    }
                }
            }
            else if (status != 0)
            {
                Tracer.WriteWarning("Blade PowerCycle Failed with Completion code {0:X}", status);
            }
            else
            {
                powerStatus = true;
            }

            return(powerStatus);
        }