private void _SetBIOS(biosThreadState state) { // SCP some needed files to it. copyDeploymentFilesToBlade(state.blade, state.biosxml, state.connectDeadline); // And execute the command to deploy the BIOS via SSH. using (hypervisor hyp = _hostManager.makeHypervisorForBlade_LTSP(state.blade)) { executionResult res = hyp.startExecutable("bash", "~/applyBIOS.sh"); if (res.resultCode != 0) { string msg = string.Format("Executing applyBIOS.sh on {0} resulted in error code {1}", state.nodeIP, res.resultCode); msg += "stdout: " + res.stdout; msg += "stderr: " + res.stderr; _hostManager.addLogEvent(msg); state.result = new result(resultCode.genericFail, msg); } else { _hostManager.addLogEvent(string.Format("Deployed BIOS successfully to {0}", state.nodeIP)); using (var tmp = new tempLockElevation(state.blade, bladeLockType.lockNone, bladeLockType.lockBIOS)) { _hostManager.markLastKnownBIOS(state.blade, state.biosxml); } state.result = new result(resultCode.success); } // All done, now we can power off and return. hyp.powerOff(state.connectDeadline); } state.isFinished = true; }
private result rebootAndStartPerformingBIOSOperation(string bladeIP, string biosxml, Action <biosThreadState> onCompletion, ManualResetEvent signalOnStartComplete) { // We need to: // 1) set this blade to boot into LTSP // 2) start the blade // 3) wait for it to boot // 4) SSH into it, and run conrep to configure the BIOS. // Init this node to have a null threadState, ie, to be idle, but only if it isn't there already. _currentlyDeployingNodes.TryAdd(bladeIP, null); // Add a new bios thread state if the blade is currently idle. bool failedToAdd = false; // FIXME: does this cause the lambda to leak? biosThreadState newState = _currentlyDeployingNodes.AddOrUpdate(bladeIP, (biosThreadState)null, (key, oldVal) => { // If the previously-existing deploy status is finished, we can just add a new one. if (oldVal == null || oldVal.isFinished) { return(new biosThreadState(bladeIP, biosxml, bladeIP)); } // Otherwise, oh no, another BIOS operation it is still in progress! failedToAdd = true; return(null); }); // If we failed to add, then abort the request if (failedToAdd) { return(new result(resultCode.bladeInUse, "Adding to _currentlyDeployingNodes failed")); } // Now, go ahead and spin up a new thread to handle this update, and start it. newState.onBootFinish = onCompletion; newState.onBootFailure = handleReadOrWriteBIOSError; newState.isStarted = signalOnStartComplete; newState.rebootThread = new Thread(ltspBootThread) { Name = "Booting " + bladeIP + " to LTSP" }; newState.rebootThread.Start(newState); signalOnStartComplete.WaitOne(); // ok so this method is sync, whatever return(new result(resultCode.pending, "LTSP thread created")); }
private void ltspBootThread(Object o) { biosThreadState param = (biosThreadState)o; try { param.result = new result(resultCode.pending); _ltspBootThreadStart(param); } catch (Exception e) { param.result = new result(resultCode.genericFail, e.Message); } finally { param.isFinished = true; } }
public void cancelOperationsForBlade(string bladeIP) { result res = checkBIOSOperationProgress(bladeIP); if (res.code != resultCode.pending) { // None in progress. return; } // Okay, this blade has a pending BIOS read/write. We need to request that the relevant thread exists, and not // return until it has. biosThreadState toCancel = _currentlyDeployingNodes[bladeIP]; while (!toCancel.isFinished) { toCancel.connectDeadline.markCancelled(); // If we can't cancel within 30 seconds, we write a crashdump so that an operator can work out why. DateTime dumpTime = DateTime.Now + TimeSpan.FromSeconds(30); while (!toCancel.isFinished) { _hostManager.addLogEvent("Waiting for BIOS operation on " + bladeIP + " to cancel"); if (DateTime.Now > dumpTime) { string dumpPath = Path.Combine(Settings.Default.internalErrorDumpPath, "slow_bios_" + Guid.NewGuid().ToString() + ".dmp"); _hostManager.addLogEvent(string.Format("Cancel has taken more than 30 seconds; writing dump '{0}'", dumpPath)); miniDumpUtils.dumpSelf(dumpPath); dumpTime = DateTime.MaxValue; } Thread.Sleep(TimeSpan.FromSeconds(1)); } } biosThreadState foo; _currentlyDeployingNodes.TryRemove(bladeIP, out foo); }
private void setBIOS(biosThreadState state) { try { _SetBIOS(state); } catch (Exception e) { string msg = string.Format("Writing BIOS to {0} resulted in exception {1}", state.nodeIP, e); _hostManager.addLogEvent(msg); state.result = new result(resultCode.genericFail, msg); using (var tmp = new tempLockElevation(state.blade, bladeLockType.lockNone, bladeLockType.lockBIOS)) { _hostManager.markLastKnownBIOS(state.blade, "unknown"); } state.isFinished = true; } }
private void _ltspBootThreadStart(biosThreadState param) { using (lockableBladeSpec blade = _hostManager.db.getBladeByIP(param.nodeIP, bladeLockType.lockBIOS, bladeLockType.lockBIOS, permitAccessDuringBIOS: true, permitAccessDuringDeployment: true)) { blade.spec.currentlyHavingBIOSDeployed = true; } param.connectDeadline = new cancellableDateTime(TimeSpan.FromMinutes(5)); param.isStarted.Set(); using (lockableBladeSpec blade = _hostManager.db.getBladeByIP(param.nodeIP, bladeLockType.lockOwnership | bladeLockType.lockSnapshot, bladeLockType.lockNone, permitAccessDuringBIOS: true, permitAccessDuringDeployment: true)) { // Power cycle it _hostManager.startBladePowerOff(blade, param.connectDeadline); _hostManager.startBladePowerOn(blade, param.connectDeadline); param.blade = blade; // Wait for it to boot. Note that we don't ping the client repeatedly here - since the Ping class can cause // a BSoD.. ;_; Instead, we wait for port 22 (SSH) to be open. _hostManager.setCallbackOnTCPPortOpen(22, param.onBootFinishEvent, param.onBootFailureEvent, param.connectDeadline, param); // Wait for the boot to either complete or to fail. while (true) { if (!param.onBootFinishEvent.WaitOne(TimeSpan.FromMilliseconds(500))) { param.onBootFinish(param); break; } if (!param.onBootFailureEvent.WaitOne(TimeSpan.FromMilliseconds(500))) { param.onBootFailure(param); break; } } } }
private void _GetBIOS(biosThreadState state) { copyDeploymentFilesToBlade(state.blade, null, state.connectDeadline); using (hypervisor hyp = _hostManager.makeHypervisorForBlade_LTSP(state.blade)) { executionResult res = hyp.startExecutable("bash", "~/getBIOS.sh"); if (res.resultCode != 0) { string msg = string.Format("Executing getBIOS.sh on {0} resulted in error code {1}", state.nodeIP, res.resultCode); msg += "stdout: " + res.stdout; msg += "stderr: " + res.stderr; _hostManager.addLogEvent(msg); state.result = new result(resultCode.genericFail, msg); } else { string msg = string.Format("Deployed BIOS successfully to {0}", state.nodeIP); _hostManager.addLogEvent(msg); state.result = new result(resultCode.success, msg); } // Retrieve the output state.biosxml = hyp.getFileFromGuest("currentbios.xml", state.connectDeadline); // All done, now we can power off and return. hyp.powerOff(state.connectDeadline); } using (var tmp = new tempLockElevation(state.blade, bladeLockType.lockNone, bladeLockType.lockBIOS)) { _hostManager.markLastKnownBIOS(state.blade, state.biosxml); } state.isFinished = true; }
public override void setCallbackOnTCPPortOpen(int port, ManualResetEvent onCompletion, ManualResetEvent onError, cancellableDateTime deadline, biosThreadState state) { lock (inProgressTCPConnects) { if (inProgressTCPConnects.ContainsKey(state.nodeIP)) { throw new Exception("operation already in progress"); } hostStateDBInProgressTCPConnect newInProg = new hostStateDBInProgressTCPConnect { biosUpdateEndpoint = new IPEndPoint(IPAddress.Parse(state.nodeIP), port), biosUpdateConnectionEvent = onCompletion, biosUpdateDeadline = deadline, biosUpdateTimeoutEvent = onError, biosCurrentThreadState = state, biosUpdateSocket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp) }; inProgressTCPConnects.GetOrAdd(state.nodeIP, newInProg); newInProg.biosUpdateSocket.BeginConnect(newInProg.biosUpdateEndpoint, TCPCallback, newInProg); } }
public override void setCallbackOnTCPPortOpen(int nodePort, ManualResetEvent onCompletion, ManualResetEvent onError, cancellableDateTime deadline, biosThreadState biosThreadState) { //if (onTCPConnectionAttempt.Invoke(biosThreadState.nodeIP, nodePort, onBootFinish, onError, deadline, biosThreadState)) onCompletion.Set(); //else // onError.Set(); }
private static void handleReadOrWriteBIOSError(biosThreadState state) { state.result = new result(resultCode.genericFail, "handleReadOrWriteBIOSError called"); state.isFinished = true; state.blade.spec.currentlyHavingBIOSDeployed = false; }