コード例 #1
0
        /// <summary>
        /// Node state event handler.
        /// </summary>
        private static void Node_OnStateChange(object sender, NodeStateEventArg e)
        {
            if (e.NewState == NodeState.Offline)
            {
                // If node is offline, remove it from NodeInfoCache as we held the reference to ISchedulerNode instances in that cache.
                // If the node is online again, it will be added back into the cache.
                LockNodeInfoCache.AcquireWriterLock(Timeout.Infinite);
                try
                {
                    ISchedulerNode nodeInfo = (ISchedulerNode)sender;
                    ISchedulerNode testNode;
                    if (!NodeInfoCache.TryGetValue(nodeInfo.Name, out testNode))
                    {
                        TraceHelper.TraceWarning(0, "[JobMonitorEntry] Node info cache inconsistent.");
                        return;
                    }

                    // remove node info. step 1. deregister node state event
                    nodeInfo.OnNodeState -= Node_OnStateChange;

                    // remove node info. step 2. remove node info from NodeInfoCache
                    NodeInfoCache.Remove(nodeInfo.Name);
                }
                finally
                {
                    LockNodeInfoCache.ReleaseLock();
                }
            }
        }
コード例 #2
0
ファイル: CPU.cs プロジェクト: sebastianzillessen/MISD-OWL
        private ISchedulerNode GetSchedulerNode(string monitoredSystemName)
        {
            string nodeName = monitoredSystemName.Split('.')[0];

            checkClusterConnection();
            IScheduler     scheduler     = (IScheduler)(((HpcClusterConnection)clusterConnection.CopyConnection()).GetConnection());
            ISchedulerNode requestedNode = null;

            foreach (ISchedulerNode node in scheduler.GetNodeList(null, null))
            {
                if (node.Name.Equals(nodeName))
                {
                    requestedNode = node;
                }
            }
            return(requestedNode);
        }
コード例 #3
0
        //for best results, run this sample code in queued scheduling mode
        static void Main(string[] args)
        {
            string clusterName = Environment.GetEnvironmentVariable("CCP_SCHEDULER");

            using (IScheduler scheduler = new Scheduler())
            {
                Console.WriteLine("Connecting to {0}", clusterName);
                scheduler.Connect(clusterName);

                //assume you have two nodegroups, NodeGroup1 and NodeGroup2
                IStringCollection nodeGroup1 = scheduler.GetNodesInNodeGroup("NodeGroup1");
                IStringCollection nodeGroup2 = scheduler.GetNodesInNodeGroup("NodeGroup2");
                if (nodeGroup1.Count == 0 || nodeGroup2.Count == 0)
                {
                    Console.WriteLine("Node groups are not set up correctly");
                    return;
                }

                //and nodes in NodeGroup2 are not in NodeGroup1, and vise versa.
                string nodeToMove = "";
                foreach (string node in nodeGroup2)
                {
                    if (!nodeGroup1.Contains(node))
                    {
                        nodeToMove = node;
                        break;
                    }
                }
                if (string.IsNullOrEmpty(nodeToMove))
                {
                    Console.WriteLine("No eligible nodes to move");
                    return;
                }

                //create a job to run on NodeGroup1
                ISchedulerJob job = scheduler.CreateJob();
                job.NodeGroups.Add("NodeGroup1");
                //Set unit type to node, but let it autocalculate resources
                job.UnitType = JobUnitType.Node;

                ISchedulerTask task = job.CreateTask();
                task.CommandLine = "ver";
                task.Type        = TaskType.Service;
                job.AddTask(task);

                job.OnTaskState += new EventHandler <TaskStateEventArg>(job_OnTaskState);
                Console.WriteLine("Submitting job on NodeGroup1");
                scheduler.SubmitJob(job, null, null);
                Console.WriteLine("Job {0} Submitted", job.Id);

                //wait for the job to start running
                running.WaitOne();

                job.Refresh();
                int allocationCount = job.AllocatedNodes.Count;
                Console.WriteLine("Number of allocated nodes: {0}", allocationCount);

                //Check the status of NodeGroup1 nodes
                int idleCores = 0;
                foreach (string nodename in nodeGroup1)
                {
                    ISchedulerNode node = scheduler.OpenNodeByName(nodename);
                    idleCores += node.GetCounters().IdleCoreCount;
                }

                //There are no more idle cores remaining in this node group
                //So we'll place one of the nodes from NodeGroup2 allow the job to grow
                if (idleCores == 0)
                {
                    running.Reset();

                    //Changing nodegroups is available through the UI or PowerShell
                    string powershellScript = String.Format("add-pssnapin microsoft.hpc; " +
                                                            "add-hpcgroup -scheduler {0} -name {1} -nodename {2}",
                                                            clusterName, "NodeGroup1", nodeToMove);
                    using (PowerShell ps = PowerShell.Create())
                    {
                        ps.AddScript(powershellScript, true);
                        ps.Invoke();
                    }

                    running.WaitOne();
                    Console.WriteLine("(Waiting 5 seconds for job to update the scheduler)");
                    Thread.Sleep(5 * 1000);
                    job.Refresh();
                    int newAllocationCount = job.AllocatedNodes.Count;

                    //verify that job has grown
                    if (newAllocationCount > allocationCount)
                    {
                        Console.WriteLine("Job has grown to {0} nodes", newAllocationCount);
                    }
                }
                else
                {
                    Console.WriteLine("There are still idle cores in the nodegroup");
                }
            }
        }
コード例 #4
0
        static void Main(string[] args)
        {
            Console.WriteLine("Utility to setup a sweep run\n");
            Console.WriteLine("-- sets up nodeDir (removes if existing)\n");
            Console.WriteLine("-- copies client utility to nodes\n");
            Console.WriteLine("-- makes a single (localMaster) copy of files on nodes\n");
            //required args
            //path to runfiles
            string filePath = null;
            //root dir on slaves
            string nodeDir = null;
            //username
            string userName = null;
            //password
            string password = null;

            //client
            string clientExe = "hpc_client_util.exe";

            //node file
            string nodeFile = null;
            //cluster UNC name
            string clusterName = null;

            bool updateOnly = false;


            if (parse_cmd_args(args, ref filePath, ref nodeFile, ref nodeDir,
                               ref clusterName, ref userName, ref password, ref updateOnly) == false)
            {
                Console.WriteLine("parse cmd args fail...");
                Console.WriteLine("required commandline args: -filePath:path to folder with complete set of files");
                Console.WriteLine("                           -nodeDir:working dir on the compute nodes");
                Console.WriteLine("                           -userName: domain user name");
                //Console.WriteLine("                           -password: domain password");
                Console.WriteLine("\noptional (default)       -nodeFile:file with node UNC name(s) to use(all)");
                Console.WriteLine("                              negative for numCores less than max ");
                Console.WriteLine("                           -clusterName:cluster UNC name (babeshn010)");
                Console.WriteLine("                           -updateOnly:updates existing dir structure with new files");
                return;
            }
            //set clusterName
            if (clusterName == null)
            {
                clusterName = "IGSBABESHN010";
            }

            //get files in filePath
            string[] dataFiles = null;
            try
            {
                dataFiles = Directory.GetFiles(filePath);
            }
            catch (Exception e)
            {
                Console.WriteLine("Unable to get file list for filePath:\n    " + filePath);
                Console.WriteLine(e);
                return;
            }

            //try to find a copy of the node-side client
            string clientPath = null;

            if (clientPath == null)
            {
                string[] localFiles = null;
                try
                {
                    localFiles = Directory.GetFiles(".\\");
                }
                catch (Exception e)
                {
                    Console.WriteLine("Unable to get local file list");
                    Console.WriteLine(e);
                    return;
                }
                foreach (string file in localFiles)
                {
                    if (Path.GetFileName(file) == clientExe)
                    {
                        clientPath = Path.GetFullPath(file);
                    }
                }
            }
            //if still not found, give up
            if (clientPath == null)
            {
                Console.WriteLine("could not find client in  local folder: " + clientExe);
                return;
            }


            //
            //
            //**********************HPC portion*************************
            //
            //

            IScheduler scheduler = null;

            try
            {
                // Make the scheduler and connect to the local host.
                scheduler = new Scheduler();
                scheduler.Connect(clusterName);
            }
            catch (Exception e)
            {
                Console.WriteLine("Unable to connect to cluster:\n   " + clusterName);
                Console.WriteLine(e);
                return;
            }


            List <string> clusterNodes = new List <string>();

            // Get all the nodes in the compute node group.

            try
            {
                clusterNodes = convert(scheduler.GetNodesInNodeGroup("ComputeNodes"));
            }
            catch (Exception e)
            {
                Console.WriteLine("Unable to get cluster node list:\n   " + clusterName);
                Console.WriteLine(e);
                return;
            }

            //get the nodes in the nodeFile
            List <string> fileNodes = new List <string>();

            if (nodeFile != null)
            {
                try
                {
                    fileNodes = get_node_list(nodeFile);
                }
                catch (Exception e)
                {
                    Console.WriteLine("Unable to get node list from nodeFile:\n   " + nodeFile);
                    Console.WriteLine(e);
                    return;
                }
            }
            else
            {
                fileNodes = clusterNodes;
            }

            //build requestedNodes
            List <string> requestedNodes = new List <string>();

            foreach (string fnode in fileNodes)
            {
                foreach (string cnode in clusterNodes)
                {
                    if (fnode.ToUpper() == cnode.ToUpper())
                    {
                        ISchedulerNode node = scheduler.OpenNodeByName(fnode);
                        if (node.Reachable == true)
                        {
                            requestedNodes.Add(fnode);
                            Console.WriteLine("compute node added: " + fnode);
                        }
                        else
                        {
                            Console.WriteLine("compute node not reachable: " + fnode);
                        }
                    }
                }
            }
            if (requestedNodes.Count == 0)
            {
                Console.WriteLine("no usable compute nodes found");
                return;
            }
            string task, localHost;
            bool   success;

            localHost = Environment.MachineName;
            string[] oneDirLevelUp = get_up_level_dir(nodeDir);
            if (!updateOnly)
            {
                //first remove existing node dir
                //
                success = true;

                task = @"rmdir " + oneDirLevelUp[1] + " /S /Q";
                Console.WriteLine("removing (possibly) existing nodeDir");
                success = submit_job(scheduler, task, oneDirLevelUp[0], requestedNodes, userName, password, true);
                if (success == false)
                {
                    return;
                }

                //now make the dir
                //
                task = @"mkdir " + oneDirLevelUp[1];
                Console.WriteLine("making new nodeDir");
                success = submit_job(scheduler, task, oneDirLevelUp[0], requestedNodes, userName, password, true);
                if (success == false)
                {
                    return;
                }

                //now copy client to slaves
                //
                string clientUnc = get_master_unc(localHost, clientPath);
                task = @"copy " + clientUnc;
                Console.WriteLine("Copying client to nodes");
                success = submit_job(scheduler, task, nodeDir, requestedNodes, userName, password, true);
                if (success == false)
                {
                    return;
                }
            }
            else
            {
                Console.WriteLine("using existing directory structure and hpc_client_util, updating files only...");
            }

            //now finally run client to make one localMaster copy
            //
            string masterUnc = get_master_unc(localHost, Path.GetFullPath(filePath));

            Console.WriteLine("starting client");

            task = clientExe + " -src:" + masterUnc + " ";
            task = task + " -n:0";
            if (updateOnly)
            {
                task = task + " -updateOnly";
            }
            success = submit_job(scheduler, task, nodeDir, requestedNodes, userName, password, false);
            if (success == false)
            {
                return;
            }
            return;
        }
コード例 #5
0
        static void Main(string[] args)
        {
            //required args
            //path to runfiles
            string filePath = null;
            //root dir on slaves
            string nodeDir = null;
            //pest case
            string pestCase = null;
            //username
            string userName = null;
            //password
            string password = null;

            //optional
            //master dir
            string masterDir = null;
            //node file
            string nodeFile = null;
            //exec name
            string execName = null;
            //execArgs
            string execArgs = null;
            //client command
            string clientExe = null;
            //client command line
            string clientArgs = null;
            //cluster UNC name
            string clusterName = null;
            //number of cores to use on each node
            int numCores = -999;
            //port number
            int portNum = -999;
            //master start delay
            int delay = 0;
            //flag to potentially not starting a master
            bool masterFlag = true;
            //flag to potentially stagger start nodes
            bool staggerFlag = false;

            if (parse_cmd_args(args, ref filePath, ref masterDir, ref nodeFile, ref nodeDir, ref execName,
                               ref numCores, ref pestCase, ref portNum, ref clientExe, ref clientArgs,
                               ref clusterName, ref userName, ref password, ref delay, ref masterFlag, ref staggerFlag) == false)
            {
                Console.WriteLine("parse cmd args fail...");
                Console.WriteLine("required commandline args: -filePath:path to folder with complete set of files");
                Console.WriteLine("                           -nodeDir:root dir on the compute nodes");
                Console.WriteLine("                           -pestCase: pest case name");
                Console.WriteLine("                           -userName: domain user name");
                Console.WriteLine("\noptional (default)       -nodeFile:file with node UNC name(s) to use(all)");
                Console.WriteLine("                           -masterDir:directory to run for master(.\\master)");
                Console.WriteLine("                             if not passed existing, \".\\master\" is removed!");
                Console.WriteLine("                           -execName:executable name (\"beopest64.exe\")");
                Console.WriteLine("                           -numCores:number of cores per node (processor count)");
                Console.WriteLine("                             negative for numCores less than max ");
                Console.WriteLine("                           -portNum:TCP/IP port number (4004)");
                Console.WriteLine("                           -clientExe:node side client (hpc_client_util.exe)");
                Console.WriteLine("                           -clientCmdLine:passed only if client passed");
                Console.WriteLine("                             use \" \" if clientCmdLine contains spaces");
                Console.WriteLine("                           -clusterName:cluster UNC name (babeshn010)");
                Console.WriteLine("                           -delay:time to wait after master start (0 seconds)");
                Console.WriteLine("                           -noMaster:if passed, no master started, only slaves");
                //Console.WriteLine("                           -stagger:if passed, each node will be start sequentially");
                return;
            }
            //set clusterName
            if (clusterName == null)
            {
                clusterName = "IGSBABESHN010";
            }

            //set execName
            if (execName == null)
            {
                execName = "beopest64.exe";
            }

            //set clientExe
            if (clientExe == null)
            {
                clientExe = "hpc_client_util.exe";
            }

            //set port number
            if (portNum == -999)
            {
                portNum = 4004;
            }

            if (password == null)
            {
                Console.WriteLine("Enter network password:"******"Unable to get file list for filePath:\n    " + filePath);
                Console.WriteLine(e);
                return;
            }

            // make sure pestCase.pst exists and execCmd exists
            bool execFlag = false, pstFlag = false;

            foreach (string file in dataFiles)
            {
                if (Path.GetFileName(file) == execName)
                {
                    execFlag = true;
                }
                else if (Path.GetFileNameWithoutExtension(file) == pestCase && Path.GetExtension(file) == ".pst")
                {
                    pstFlag = true;
                }
            }
            if (execFlag == false)
            {
                Console.WriteLine("executable not found in file path folder:\n    " + execName);
                return;
            }
            if (pstFlag == false)
            {
                Console.WriteLine("pestCase.pst not found in file path folder:\n    " + pestCase);
                return;
            }

            //set numCores
            if (numCores == -999)
            {
                numCores = Environment.ProcessorCount;
            }
            else if (numCores < 0)
            {
                numCores = Environment.ProcessorCount - numCores;
            }

            //setup master dir
            string currentDir = Directory.GetCurrentDirectory();
            bool   newMaster  = false;

            if ((masterDir == null) && (masterFlag == false))
            {
                masterDir = currentDir + @"\" + "master";
                newMaster = true;

                if (Directory.Exists(masterDir))
                {
                    Console.WriteLine("master dir already exists:\n    " + masterDir + "...removing...");
                    try
                    {
                        Directory.Delete(masterDir, true);
                    }
                    catch (Exception e)
                    {
                        Console.WriteLine("Unable to remove default master dir:\n    " + masterDir);
                        Console.WriteLine(e);
                        return;
                    }
                }
                try
                {
                    Directory.CreateDirectory(masterDir);
                }
                catch (Exception e)
                {
                    Console.WriteLine("Unable to create master dir:\n   " + masterDir);
                    Console.WriteLine(e);
                    return;
                }
            }
            else
            {
                if (Directory.Exists(masterDir) == false)
                {
                    Console.WriteLine("Unable to find existing master dir:\n   " + masterDir);
                    return;
                }
            }

            //try to find a copy of the node-side client
            string clientPath = null;

            //first filePath files
            foreach (string file in dataFiles)
            {
                if (Path.GetFileName(file) == clientExe)
                {
                    clientPath = Path.GetFullPath(file);
                }
            }
            //next look in the current dir
            if (clientPath == null)
            {
                string[] localFiles = null;
                try
                {
                    localFiles = Directory.GetFiles(".\\");
                }
                catch (Exception e)
                {
                    Console.WriteLine("Unable to get local file list");
                    Console.WriteLine(e);
                    return;
                }
                foreach (string file in localFiles)
                {
                    if (Path.GetFileName(file) == clientExe)
                    {
                        clientPath = Path.GetFullPath(file);
                    }
                }
            }
            //if still not found, give up
            if (clientPath == null)
            {
                Console.WriteLine("could not find client in filePath folder\n or local folder: " + clientExe);
                return;
            }

            //copy files to master
            if (newMaster == true)
            {
                try
                {
                    copy_folder(filePath, masterDir);
                }
                catch (Exception e)
                {
                    Console.WriteLine("Unable to copy files to master dir:\n   " + masterDir);
                    Console.WriteLine(e);
                    return;
                }
            }

            //start beopest master
            Process master = new Process();

            if (masterFlag)
            {
                string masterCmd = " " + pestCase + " /h  :" + portNum;
                try
                {
                    master = run_wait(masterDir, execName, masterCmd, delay);
                    Console.WriteLine(master.Id);
                }
                catch (Exception e)
                {
                    Console.WriteLine("Unable to start master successfully,\n  adding full path to masterDir and retrying");
                    Console.WriteLine(e);
                    masterDir = currentDir + @"\" + masterDir;
                    try
                    {
                        master = run_wait(masterDir, execName, masterCmd, delay);
                        Console.WriteLine(master.Id);
                    }
                    catch (Exception e2)
                    {
                        Console.WriteLine("Still unable to start master successfully");
                        Console.WriteLine(e2);

                        return;
                    }
                }
                Console.WriteLine("Master started successfully in " + masterDir);
            }
            else
            {
                Console.WriteLine("No master started.  Adding current path to masterDir");
                masterDir = currentDir + @"\" + masterDir;
            }

            //
            //
            //**********************HPC portion*************************
            //
            //

            IScheduler scheduler = null;

            try
            {
                // Make the scheduler and connect to the local host.
                scheduler = new Scheduler();
                scheduler.Connect(clusterName);
            }
            catch (Exception e)
            {
                Console.WriteLine("Unable to connect to cluster:\n   " + clusterName);
                Console.WriteLine(e);
                master.Kill();
                return;
            }


            List <string> clusterNodes = new List <string>();

            // Get all the nodes in the compute node group.

            try
            {
                clusterNodes = convert(scheduler.GetNodesInNodeGroup("ComputeNodes"));
            }
            catch (Exception e)
            {
                Console.WriteLine("Unable to get cluster node list:\n   " + clusterName);
                Console.WriteLine(e);
                master.Kill();
                return;
            }

            //get the nodes in the nodeFile
            List <string> fileNodes = new List <string>();

            if (nodeFile != null)
            {
                try
                {
                    fileNodes = get_node_list(nodeFile);
                }
                catch (Exception e)
                {
                    Console.WriteLine("Unable to get node list from nodeFile:\n   " + nodeFile);
                    Console.WriteLine(e);
                    master.Kill();
                    return;
                }
            }
            else
            {
                fileNodes = clusterNodes;
            }

            //build requestedNodes
            List <string> requestedNodes = new List <string>();

            foreach (string fnode in fileNodes)
            {
                foreach (string cnode in clusterNodes)
                {
                    if (fnode.ToUpper() == cnode.ToUpper())
                    {
                        ISchedulerNode node = scheduler.OpenNodeByName(fnode);
                        if (node.Reachable == true)
                        {
                            requestedNodes.Add(fnode);
                            Console.WriteLine("compute node added: " + fnode);
                        }
                        else
                        {
                            Console.WriteLine("compute node not reachable: " + fnode);
                        }
                    }
                }
            }
            if (requestedNodes.Count == 0)
            {
                Console.WriteLine("no usable compute nodes found");
                if (masterFlag)
                {
                    master.Kill();
                }
                return;
            }


            if (!staggerFlag)
            {
                //first remove existing node dir
                //
                bool     success       = false;
                string[] oneDirLevelUp = get_up_level_dir(nodeDir);
                string   task          = @"rmdir " + oneDirLevelUp[1] + " /S /Q";
                Console.WriteLine("removing (possibly) existing nodeDir");
                success = submit_job(scheduler, task, oneDirLevelUp[0], requestedNodes, userName, password, true);
                if (success == false)
                {
                    if (masterFlag)
                    {
                        master.Kill();
                    }
                    return;
                }

                //now make the dir
                //

                task = @"mkdir " + oneDirLevelUp[1];
                Console.WriteLine("making new nodeDir");
                success = submit_job(scheduler, task, oneDirLevelUp[0], requestedNodes, userName, password, true);
                if (success == false)
                {
                    if (masterFlag)
                    {
                        master.Kill();
                    }
                    return;
                }

                //now copy slaveCopyRun to slaves
                //
                string localHost = Environment.MachineName;
                string clientUnc = get_master_unc(localHost, clientPath);
                task = @"copy " + clientUnc;
                Console.WriteLine("Copying client to nodes");
                success = submit_job(scheduler, task, nodeDir, requestedNodes, userName, password, true);
                if (success == false)
                {
                    if (masterFlag)
                    {
                        master.Kill();
                    }
                    return;
                }

                //now finally run slaveCopyRun
                //
                string masterUnc = get_master_unc(localHost, masterDir);
                Console.WriteLine("starting client util");

                if (execArgs == null)
                {
                    execArgs = " " + pestCase + " /h " + localHost + ":" + portNum;
                }

                task    = clientExe + " -src:" + masterUnc + " ";
                task    = task + " -cmdExec:" + execName + " -cmdArgs:\"" + execArgs + "\"";
                success = submit_job(scheduler, task, nodeDir, requestedNodes, userName, password, false);
                if (success == false)
                {
                    master.Kill();
                    return;
                }
            }

            //if nodes are stagger started
            else
            {
                for (int i = 0; i < requestedNodes.Count; i++)
                {
                    List <string> rnode = new List <string> {
                        requestedNodes[i]
                    };

                    //first remove existing node dir
                    //
                    bool     success       = false;
                    string[] oneDirLevelUp = get_up_level_dir(nodeDir);
                    string   task          = @"rmdir " + oneDirLevelUp[1] + " /S /Q";
                    Console.WriteLine("removing (possibly) existing nodeDir");
                    success = submit_job(scheduler, task, oneDirLevelUp[0], rnode, userName, password, true);
                    if (success == false)
                    {
                        if (masterFlag)
                        {
                            master.Kill();
                        }
                        return;
                    }

                    //now make the dir
                    //

                    task = @"mkdir " + oneDirLevelUp[1];
                    Console.WriteLine("making new nodeDir");
                    success = submit_job(scheduler, task, oneDirLevelUp[0], rnode, userName, password, true);
                    if (success == false)
                    {
                        if (masterFlag)
                        {
                            master.Kill();
                        }
                        return;
                    }

                    //now copy slaveCopyRun to slaves
                    //
                    string localHost = Environment.MachineName;
                    string clientUnc = get_master_unc(localHost, clientPath);
                    task = @"copy " + clientUnc;
                    Console.WriteLine("Copying client to nodes");
                    success = submit_job(scheduler, task, nodeDir, rnode, userName, password, true);
                    if (success == false)
                    {
                        if (masterFlag)
                        {
                            master.Kill();
                        }
                        return;
                    }

                    //now finally run slaveCopyRun
                    //
                    string masterUnc = get_master_unc(localHost, masterDir);
                    Console.WriteLine("starting slaveCopyRun");

                    if (execArgs == null)
                    {
                        execArgs = " " + pestCase + " /h " + localHost + ":" + portNum;
                    }

                    task    = clientExe + " -src:" + masterUnc + " ";
                    task    = task + " -cmdExec:" + execName + " -cmdArgs:\"" + execArgs + "\"";
                    success = submit_job(scheduler, task, nodeDir, rnode, userName, password, false);
                    if (success == false)
                    {
                        master.Kill();
                        return;
                    }
                }
            }
            while (true)
            {
                try
                {
                    if (master.HasExited)
                    {
                        break;
                    }
                }
                catch (Exception e)
                {
                    break;
                }
            }



            return;
        }