예제 #1
0
        private void _SetBIOS(biosThreadState state)
        {
            // SCP some needed files to it.
            copyDeploymentFilesToBlade(state.blade, state.biosxml, state.connectDeadline);

            // And execute the command to deploy the BIOS via SSH.
            using (hypervisor hyp = _hostManager.makeHypervisorForBlade_LTSP(state.blade))
            {
                executionResult res = hyp.startExecutable("bash", "~/applyBIOS.sh");
                if (res.resultCode != 0)
                {
                    string msg = string.Format("Executing applyBIOS.sh on {0} resulted in error code {1}", state.nodeIP, res.resultCode);
                    msg += "stdout: " + res.stdout;
                    msg += "stderr: " + res.stderr;
                    _hostManager.addLogEvent(msg);
                    state.result = new result(resultCode.genericFail, msg);
                }
                else
                {
                    _hostManager.addLogEvent(string.Format("Deployed BIOS successfully to {0}", state.nodeIP));

                    using (var tmp = new tempLockElevation(state.blade, bladeLockType.lockNone, bladeLockType.lockBIOS))
                    {
                        _hostManager.markLastKnownBIOS(state.blade, state.biosxml);
                    }

                    state.result = new result(resultCode.success);
                }

                // All done, now we can power off and return.
                hyp.powerOff(state.connectDeadline);
            }

            state.isFinished = true;
        }
예제 #2
0
        private result rebootAndStartPerformingBIOSOperation(string bladeIP, string biosxml, Action <biosThreadState> onCompletion, ManualResetEvent signalOnStartComplete)
        {
            //  We need to:
            //  1) set this blade to boot into LTSP
            //  2) start the blade
            //  3) wait for it to boot
            //  4) SSH into it, and run conrep to configure the BIOS.

            // Init this node to have a null threadState, ie, to be idle, but only if it isn't there already.
            _currentlyDeployingNodes.TryAdd(bladeIP, null);

            // Add a new bios thread state if the blade is currently idle.
            bool            failedToAdd = false; // FIXME: does this cause the lambda to leak?
            biosThreadState newState    = _currentlyDeployingNodes.AddOrUpdate(bladeIP, (biosThreadState)null, (key, oldVal) =>
            {
                // If the previously-existing deploy status is finished, we can just add a new one.
                if (oldVal == null || oldVal.isFinished)
                {
                    return(new biosThreadState(bladeIP, biosxml, bladeIP));
                }
                // Otherwise, oh no, another BIOS operation it is still in progress!
                failedToAdd = true;
                return(null);
            });

            // If we failed to add, then abort the request
            if (failedToAdd)
            {
                return(new result(resultCode.bladeInUse, "Adding to _currentlyDeployingNodes failed"));
            }

            // Now, go ahead and spin up a new thread to handle this update, and start it.
            newState.onBootFinish  = onCompletion;
            newState.onBootFailure = handleReadOrWriteBIOSError;
            newState.isStarted     = signalOnStartComplete;
            newState.rebootThread  = new Thread(ltspBootThread)
            {
                Name = "Booting " + bladeIP + " to LTSP"
            };
            newState.rebootThread.Start(newState);

            signalOnStartComplete.WaitOne(); // ok so this method is sync, whatever

            return(new result(resultCode.pending, "LTSP thread created"));
        }
예제 #3
0
        private void ltspBootThread(Object o)
        {
            biosThreadState param = (biosThreadState)o;

            try
            {
                param.result = new result(resultCode.pending);
                _ltspBootThreadStart(param);
            }
            catch (Exception e)
            {
                param.result = new result(resultCode.genericFail, e.Message);
            }
            finally
            {
                param.isFinished = true;
            }
        }
예제 #4
0
        public void cancelOperationsForBlade(string bladeIP)
        {
            result res = checkBIOSOperationProgress(bladeIP);

            if (res.code != resultCode.pending)
            {
                // None in progress.
                return;
            }

            // Okay, this blade has a pending BIOS read/write. We need to request that the relevant thread exists, and not
            // return until it has.
            biosThreadState toCancel = _currentlyDeployingNodes[bladeIP];

            while (!toCancel.isFinished)
            {
                toCancel.connectDeadline.markCancelled();

                // If we can't cancel within 30 seconds, we write a crashdump so that an operator can work out why.
                DateTime dumpTime = DateTime.Now + TimeSpan.FromSeconds(30);

                while (!toCancel.isFinished)
                {
                    _hostManager.addLogEvent("Waiting for BIOS operation on " + bladeIP + " to cancel");

                    if (DateTime.Now > dumpTime)
                    {
                        string dumpPath = Path.Combine(Settings.Default.internalErrorDumpPath, "slow_bios_" + Guid.NewGuid().ToString() + ".dmp");
                        _hostManager.addLogEvent(string.Format("Cancel has taken more than 30 seconds; writing dump '{0}'", dumpPath));
                        miniDumpUtils.dumpSelf(dumpPath);

                        dumpTime = DateTime.MaxValue;
                    }

                    Thread.Sleep(TimeSpan.FromSeconds(1));
                }
            }

            biosThreadState foo;

            _currentlyDeployingNodes.TryRemove(bladeIP, out foo);
        }
예제 #5
0
        private void setBIOS(biosThreadState state)
        {
            try
            {
                _SetBIOS(state);
            }
            catch (Exception e)
            {
                string msg = string.Format("Writing BIOS to {0} resulted in exception {1}", state.nodeIP, e);
                _hostManager.addLogEvent(msg);
                state.result = new result(resultCode.genericFail, msg);

                using (var tmp = new tempLockElevation(state.blade, bladeLockType.lockNone, bladeLockType.lockBIOS))
                {
                    _hostManager.markLastKnownBIOS(state.blade, "unknown");
                }

                state.isFinished = true;
            }
        }
예제 #6
0
        private void _ltspBootThreadStart(biosThreadState param)
        {
            using (lockableBladeSpec blade = _hostManager.db.getBladeByIP(param.nodeIP, bladeLockType.lockBIOS, bladeLockType.lockBIOS,
                                                                          permitAccessDuringBIOS: true, permitAccessDuringDeployment: true))
            {
                blade.spec.currentlyHavingBIOSDeployed = true;
            }
            param.connectDeadline = new cancellableDateTime(TimeSpan.FromMinutes(5));
            param.isStarted.Set();

            using (lockableBladeSpec blade = _hostManager.db.getBladeByIP(param.nodeIP,
                                                                          bladeLockType.lockOwnership | bladeLockType.lockSnapshot,
                                                                          bladeLockType.lockNone, permitAccessDuringBIOS: true, permitAccessDuringDeployment: true))
            {
                // Power cycle it
                _hostManager.startBladePowerOff(blade, param.connectDeadline);
                _hostManager.startBladePowerOn(blade, param.connectDeadline);

                param.blade = blade;

                // Wait for it to boot.  Note that we don't ping the client repeatedly here - since the Ping class can cause
                // a BSoD.. ;_; Instead, we wait for port 22 (SSH) to be open.
                _hostManager.setCallbackOnTCPPortOpen(22, param.onBootFinishEvent, param.onBootFailureEvent, param.connectDeadline, param);

                // Wait for the boot to either complete or to fail.
                while (true)
                {
                    if (!param.onBootFinishEvent.WaitOne(TimeSpan.FromMilliseconds(500)))
                    {
                        param.onBootFinish(param);
                        break;
                    }
                    if (!param.onBootFailureEvent.WaitOne(TimeSpan.FromMilliseconds(500)))
                    {
                        param.onBootFailure(param);
                        break;
                    }
                }
            }
        }
예제 #7
0
        private void _GetBIOS(biosThreadState state)
        {
            copyDeploymentFilesToBlade(state.blade, null, state.connectDeadline);

            using (hypervisor hyp = _hostManager.makeHypervisorForBlade_LTSP(state.blade))
            {
                executionResult res = hyp.startExecutable("bash", "~/getBIOS.sh");
                if (res.resultCode != 0)
                {
                    string msg = string.Format("Executing getBIOS.sh on {0} resulted in error code {1}", state.nodeIP, res.resultCode);
                    msg += "stdout: " + res.stdout;
                    msg += "stderr: " + res.stderr;
                    _hostManager.addLogEvent(msg);
                    state.result = new result(resultCode.genericFail, msg);
                }
                else
                {
                    string msg = string.Format("Deployed BIOS successfully to {0}", state.nodeIP);
                    _hostManager.addLogEvent(msg);
                    state.result = new result(resultCode.success, msg);
                }

                // Retrieve the output
                state.biosxml = hyp.getFileFromGuest("currentbios.xml", state.connectDeadline);

                // All done, now we can power off and return.
                hyp.powerOff(state.connectDeadline);
            }

            using (var tmp = new tempLockElevation(state.blade, bladeLockType.lockNone, bladeLockType.lockBIOS))
            {
                _hostManager.markLastKnownBIOS(state.blade, state.biosxml);
            }

            state.isFinished = true;
        }
예제 #8
0
        public override void setCallbackOnTCPPortOpen(int port, ManualResetEvent onCompletion, ManualResetEvent onError, cancellableDateTime deadline, biosThreadState state)
        {
            lock (inProgressTCPConnects)
            {
                if (inProgressTCPConnects.ContainsKey(state.nodeIP))
                {
                    throw new Exception("operation already in progress");
                }

                hostStateDBInProgressTCPConnect newInProg = new hostStateDBInProgressTCPConnect
                {
                    biosUpdateEndpoint        = new IPEndPoint(IPAddress.Parse(state.nodeIP), port),
                    biosUpdateConnectionEvent = onCompletion,
                    biosUpdateDeadline        = deadline,
                    biosUpdateTimeoutEvent    = onError,
                    biosCurrentThreadState    = state,
                    biosUpdateSocket          = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp)
                };

                inProgressTCPConnects.GetOrAdd(state.nodeIP, newInProg);

                newInProg.biosUpdateSocket.BeginConnect(newInProg.biosUpdateEndpoint, TCPCallback, newInProg);
            }
        }
예제 #9
0
 public override void setCallbackOnTCPPortOpen(int nodePort, ManualResetEvent onCompletion, ManualResetEvent onError, cancellableDateTime deadline, biosThreadState biosThreadState)
 {
     //if (onTCPConnectionAttempt.Invoke(biosThreadState.nodeIP, nodePort, onBootFinish, onError, deadline, biosThreadState))
     onCompletion.Set();
     //else
     //    onError.Set();
 }
예제 #10
0
 private static void handleReadOrWriteBIOSError(biosThreadState state)
 {
     state.result     = new result(resultCode.genericFail, "handleReadOrWriteBIOSError called");
     state.isFinished = true;
     state.blade.spec.currentlyHavingBIOSDeployed = false;
 }